import sys; sys.path.append(_dh[0].split("knowknow")[0])
from knowknow import *
docs = get_cnt("sociology-wos.ind", keys=['c.fa'])
len(docs['c.fa'])
key = 'c.fa'
keysp = key.split(".")
names = sorted(set(comb.c for comb in docs['c.fa']))
namesi = {
n:i
for i,n in enumerate(names)
}
years = sorted(set(comb.fa for comb in docs['c.fa']))
yearsi = {
y:i
for i,y in enumerate(years)
}
len(years)
from scipy.sparse import csr_matrix
flatten = list(docs['c.fa'].items())
data = [x[1] for x in flatten]
row_ind = [ namesi[x[0].c] for x in flatten ]
col_ind = [ yearsi[x[0].fa] for x in flatten ]
len(data)
spmat = csr_matrix((
data,
(row_ind, col_ind)
), shape = (len(names), len(years)))
save_variable("sociology-wos.doc - c.fa - sparse", ((names, years),spmat))