In [2]:
import sys; sys.path.append(_dh[0].split("knowknow")[0])
from knowknow import *
In [3]:
database_name = "sociology-wos"
In [6]:
cnt = get_cnt('%s.doc'%database_name, ['fy','c.fj','fj'])
Loaded keys: dict_keys(['fy', 'c.fj', 'fj'])
Available keys: ['c', 'c.fj', 'c.fy', 'c.fy.j', 'fa', 'fa.c', 'fa.fj', 'fa.fj.fy', 'fa.fy', 'fj', 'fj.fy', 'fj.ta', 'fj.ty', 'fy', 'fy.ta', 'fy.ty', 'ta', 'ty']
In [7]:
list(cnt['fj'])[:5]
Out[7]:
[fj(fj='SOCIAL INDICATORS RESEARCH'),
 fj(fj='SOCIAL NETWORKS'),
 fj(fj='SOCIAL SCIENCE RESEARCH'),
 fj(fj='SOCIETY & NATURAL RESOURCES'),
 fj(fj='SOCIOLOGIA RURALIS')]
In [9]:
jj_counter = defaultdict(int)

for (j1,) in cnt['fj']:
    
    for (c2,j2),count2 in cnt['c.fj'].items():
        
        # undirected edges
        if j1 >= j2:
            continue
        
        if (c2,j1) not in cnt['c.fj']:
            continue
            
        count1 = cnt['c.fj'][(c2,j1)]
        cc = count1 * count2
        
        jj_counter[(j1,j2)] += cc
In [15]:
# cosine similarity
jmag = {j: np.sqrt(
    np.sum([
        count**2
        for (_,fj),count in cnt['c.fj'].items() 
        if fj==j
    ])
) for (j,) in cnt['fj']}
In [19]:
from csv import writer
with open('jj_net.csv', 'w', encoding='utf8') as cf:
    w = writer(cf)
    w.writerow(['Source','Target','weight'])
    for (j1,j2),c in jj_counter.items():
        w.writerow([j1,j2,c / (jmag[j1]*jmag[j2])])
In [ ]: