On 3=BF=F918=C0=CF, =BF=C0=C8=C412=BD=C341=BA=D0, Joel Reicher
<j...@[EMAIL PROTECTED]
> wrote:
> iaminsik <iamin...@[EMAIL PROTECTED]
> writes:
> > Hi, I compared one element with others in a Hash (size : about 7,000).
>
> >
------------------------------------------------------------------------=
> > for(word1 in arrword)
> > {
> > for(word2 in arrword)
> > {
> > # compare word1 with word2.
> > }
> > }
> >
------------------------------------------------------------------------=
>
> > But, in every iteration, the memory-usage increased.
>
> > Somebody help me?
>
> I would guess your comparison procedure is causing memory allocation,
> but you haven't said what it's doing.
I experimented word-similarity with predicates.
My source code is very long.... In the sub-routines, I frequently
split a tring into an array, and delete the array.
If you have some times, let me know memory bugs...
print "calc similarity";
for(word1 in arrword)
{
I1 =3D I_of_word(arrword[word1], cnt_all_noun_type);
for(word2 in arrword)
{
if(word1 !=3D word2 && arrwordfreq[word1]>10 &&
arrwordfreq[word2]>10 )
#if(word1 !=3D word2 )
{
I2 =3D I_of_word(arrword[word2], cnt_all_noun_type);
ret =3D Co_I_of_words(arrword[word1], cnt_all_noun_type, word2);
ln=3Dsplit(ret, arrret, SUBSEP);
ISame =3D arrret[1];
ISame =3D ISame * 2;
I =3D ISame / ( I1 + I2);
if( I )
print word1 "(" arrwordfreq[word1] ")", I, word2 "("
arrwordfreq[word2] ")", arrret[2], arrret[3] > "Dekang_over10.txt";
delete arrret;
}
}
}
}
# Co-Information Sum between Two Words
function Co_I_of_words (features,cnt_all_type,word2) # divided by [|]
{
cofeatures1=3Dcofeatures2=3D"";
I=3D0.0;
word1num=3Dsplit(features,arrfeature2,"[|]");
for(i=3D1;i<=3Dword1num;i++)
{
# check whether word2 has the same feature.
if(arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]] !=3D 0)
{
I +=3D I_of_feature(arrfeature2[i], cnt_all_type);
arrfeaturestr=3Darrfeature2[i];
gsub(SUBSEP, "/", arrfeaturestr);
cofeatures2=3Dcofeatures2 (cofeatures2=3D=3D""?"":"|") arrfeaturestr
\=
"(" arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]]
")";
cofeatures1=3Dcofeatures1 (cofeatures1=3D=3D""?"":"|") arrfeaturestr
\=
"(" arrwordfeaturerelation[word1 SUBSEP arrfeature2[i]]
")";
}
else
delete arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]];
}
delete arrfeature2;
retval=3DI SUBSEP cofeatures1 SUBSEP cofeatures2;
return retval;
}
# Information Sum of Word
function I_of_word (features,cnt_all_type) # divided by [|]
{
I=3D0.0;
word1num=3Dsplit(features,arrfeature1,"[|]");
for(i=3D1;i<=3Dword1num;i++)
{
I +=3D I_of_feature(arrfeature1[i], cnt_all_type);
}
delete arrfeature1;
return I;
}
# Information A Feature
function I_of_feature (feature_relation,cnt_all_type) # divided by [|]
{
lchild=3Dsplit(arrfeature[feature_relation],div,"[|]");
delete div;
return -log(lchild / cnt_all_type);
}
# NonDuplicateString
function InsertArrayString (ArrString, newbie)
{
n=3Dsplit(ArrString, Container, "[|]");
bflag=3D0;
for(i=3D1;i<=3Dn;i++)
{
if(Container[i]=3D=3Dnewbie) bflag=3D1;
}
delete Container;
if(bflag=3D=3D0)
ArrString =3D ArrString (ArrString=3D=3D""?"":"|") newbie;
return ArrString;
}
# Get Function String
function CleanStringToFunc (String)
{
retstr=3D"";
n=3Dsplit(String, Arrstr, " [+] ");
for(i=3D1;i<=3Dn;i++)
{
if(Arrstr[i] ~ /\/fjc/)
{
tempstr=3DArrstr[i];
gsub(/^[^\/]*\//,"",tempstr);
delete Arrstr;
return tempstr;
}
}
delete Arrstr;
return "NULL";
}
# CleanString
function CleanStringToVerb (String)
{
retstr=3D"";
n=3Dsplit(String, Arrstr, " [+] ");
for(i=3D1;i<=3Dn;i++)
{
if(Arrstr[i] ~ /\/CMC/ || Arrstr[i] ~ /\/YBDO/ \
|| Arrstr[i] ~ /\/YBHO/ \
|| Arrstr[i] ~ /\/fpd/ || Arrstr[i] ~ /\/fph/)
{
if(retstr=3D=3D"") retstr =3D retstr Arrstr[i];
else retstr =3D retstr " + " Arrstr[i];
}
}
delete Arrstr;
return retstr;
}
Thanks!
Remi.
>
> Cheers,
>
> - Joel- =B5=FB=BF=C2 =C5=D8=BD=BA=C6=AE =BC=FB=B1=E2=B1=E2 -
>
> - =B5=FB=BF=C2 =C5=D8=BD=BA=C6=AE =BA=B8=B1=E2 -


|