import sys; sys.path.append(_dh[0].split("knowknow")[0])
from knowknow import *
database_name = "sociology-wos"
cnt = get_cnt('%s.doc'%database_name, ['fy','c.fj','fj'])
list(cnt['fj'])[:5]
jj_counter = defaultdict(int)
for (j1,) in cnt['fj']:
for (c2,j2),count2 in cnt['c.fj'].items():
# undirected edges
if j1 >= j2:
continue
if (c2,j1) not in cnt['c.fj']:
continue
count1 = cnt['c.fj'][(c2,j1)]
cc = count1 * count2
jj_counter[(j1,j2)] += cc
# cosine similarity
jmag = {j: np.sqrt(
np.sum([
count**2
for (_,fj),count in cnt['c.fj'].items()
if fj==j
])
) for (j,) in cnt['fj']}
from csv import writer
with open('jj_net.csv', 'w', encoding='utf8') as cf:
w = writer(cf)
w.writerow(['Source','Target','weight'])
for (j1,j2),c in jj_counter.items():
w.writerow([j1,j2,c / (jmag[j1]*jmag[j2])])