On 3=BF=F919=C0=CF, =BF=C0=C0=FC4=BD=C356=BA=D0, mjc <mjco...@[EMAIL PROTECTED]
>
wrote=
:
> On Mar 17, 9:30 pm, iaminsik <iamin...@[EMAIL PROTECTED]
> wrote:
>
>
>
>
>
> > On 3=BF=F918=C0=CF, =BF=C0=C8=C412=BD=C341=BA=D0, Joel Reicher
<j...@[EMAIL PROTECTED]
> wrote:
>
> > > iaminsik <iamin...@[EMAIL PROTECTED]
> writes:
> > > > Hi, I compared one element with others in a Hash (size : about
7,000=
).
>
> > > >
--------------------------------------------------------------------=
----
> > > > for(word1 in arrword)
> > > > {
> > > > for(word2 in arrword)
> > > > {
> > > > # compare word1 with word2.
> > > > }
> > > > }
> > > >
--------------------------------------------------------------------=
----
>
> > > > But, in every iteration, the memory-usage increased.
>
> > > > Somebody help me?
>
> > > I would guess your comparison procedure is causing memory
allocation,
> > > but you haven't said what it's doing.
>
> > I experimented word-similarity with predicates.
> > My source code is very long.... In the sub-routines, I frequently
> > split a tring into an array, and delete the array.
>
> > If you have some times, let me know memory bugs...
>
> > print "calc similarity";
> > for(word1 in arrword)
> > {
> > I1 =3D I_of_word(arrword[word1], cnt_all_noun_type);
> > for(word2 in arrword)
> > {
> > if(word1 !=3D word2 && arrwordfreq[word1]>10 &&
> > arrwordfreq[word2]>10 )
> > #if(word1 !=3D word2 )
> > {
> > I2 =3D I_of_word(arrword[word2], cnt_all_noun_type);
> > ret =3D Co_I_of_words(arrword[word1], cnt_all_noun_type,
word2);=
> > ln=3Dsplit(ret, arrret, SUBSEP);
> > ISame =3D arrret[1];
> > ISame =3D ISame * 2;
> > I =3D ISame / ( I1 + I2);
> > if( I )
> > print word1 "(" arrwordfreq[word1] ")", I, word2 "("
> > arrwordfreq[word2] ")", arrret[2], arrret[3] > "Dekang_over10.txt";
> > delete arrret;
> > }
> > }
> > }
>
> > }
>
> > # Co-Information Sum between Two Words
> > function Co_I_of_words (features,cnt_all_type,word2) # divided by [|]
> > {
> > cofeatures1=3Dcofeatures2=3D"";
> > I=3D0.0;
> > word1num=3Dsplit(features,arrfeature2,"[|]");
> > for(i=3D1;i<=3Dword1num;i++)
> > {
> > # check whether word2 has the same feature.
> > if(arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]] !=3D 0)
> > {
> > I +=3D I_of_feature(arrfeature2[i], cnt_all_type);
> > arrfeaturestr=3Darrfeature2[i];
> > gsub(SUBSEP, "/", arrfeaturestr);
> > cofeatures2=3Dcofeatures2 (cofeatures2=3D=3D""?"":"|")
arrfeatures=
tr \
> > "(" arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]]
> > ")";
> > cofeatures1=3Dcofeatures1 (cofeatures1=3D=3D""?"":"|")
arrfeatures=
tr \
> > "(" arrwordfeaturerelation[word1 SUBSEP arrfeature2[i]]
> > ")";
> > }
> > else
> > delete arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]];
> > }
> > delete arrfeature2;
>
> > retval=3DI SUBSEP cofeatures1 SUBSEP cofeatures2;
> > return retval;
>
> > }
>
> > # Information Sum of Word
> > function I_of_word (features,cnt_all_type) # divided by [|]
> > {
> > I=3D0.0;
> > word1num=3Dsplit(features,arrfeature1,"[|]");
> > for(i=3D1;i<=3Dword1num;i++)
> > {
> > I +=3D I_of_feature(arrfeature1[i], cnt_all_type);
> > }
> > delete arrfeature1;
>
> > return I;
>
> > }
>
> > # Information A Feature
> > function I_of_feature (feature_relation,cnt_all_type) # divided by [|]
> > {
> > lchild=3Dsplit(arrfeature[feature_relation],div,"[|]");
> > delete div;
> > return -log(lchild / cnt_all_type);
>
> > }
>
> > # NonDuplicateString
> > function InsertArrayString (ArrString, newbie)
> > {
> > n=3Dsplit(ArrString, Container, "[|]");
> > bflag=3D0;
> > for(i=3D1;i<=3Dn;i++)
> > {
> > if(Container[i]=3D=3Dnewbie) bflag=3D1;
> > }
> > delete Container;
> > if(bflag=3D=3D0)
> > ArrString =3D ArrString (ArrString=3D=3D""?"":"|") newbie;
> > return ArrString;
>
> > }
>
> > # Get Function String
> > function CleanStringToFunc (String)
> > {
> > retstr=3D"";
> > n=3Dsplit(String, Arrstr, " [+] ");
> > for(i=3D1;i<=3Dn;i++)
> > {
> > if(Arrstr[i] ~ /\/fjc/)
> > {
> > tempstr=3DArrstr[i];
> > gsub(/^[^\/]*\//,"",tempstr);
> > delete Arrstr;
> > return tempstr;
> > }
> > }
> > delete Arrstr;
> > return "NULL";
>
> > }
>
> > # CleanString
> > function CleanStringToVerb (String)
> > {
> > retstr=3D"";
> > n=3Dsplit(String, Arrstr, " [+] ");
> > for(i=3D1;i<=3Dn;i++)
> > {
> > if(Arrstr[i] ~ /\/CMC/ || Arrstr[i] ~ /\/YBDO/ \
> > || Arrstr[i] ~ /\/YBHO/ \
> > || Arrstr[i] ~ /\/fpd/ || Arrstr[i] ~ /\/fph/)
> > {
> > if(retstr=3D=3D"") retstr =3D retstr Arrstr[i];
> > else retstr =3D retstr " + " Arrstr[i];
> > }
> > }
> > delete Arrstr;
> > return retstr;
>
> > }
>
> > Thanks!
> > Remi.
>
> > > Cheers,
>
> > > - Joel- =B5=FB=BF=C2 =C5=D8=BD=BA=C6=AE =BC=FB=B1=E2=B1=E2 -
>
> > > - =B5=FB=BF=C2 =C5=D8=BD=BA=C6=AE =BA=B8=B1=E2 -
>
> One thing I noticed is that there are no local variables in your
> subroutines.
I've never heard of declare variables as 'local' in awk.
>
> I would declare then (in the "function" statement) so there would be,
> for example, no chance of "i" in one routine conflicting with "i" in
> another.- =B5=FB=BF=C2 =C5=D8=BD=BA=C6=AE =BC=FB=B1=E2=B1=E2 -
Can you show an example?
>
> - =B5=FB=BF=C2 =C5=D8=BD=BA=C6=AE =BA=B8=B1=E2 -
Regards.
Remi.


|