import sys; sys.path.append(_dh[0].split("knowknow")[0])
from knowknow import *
database_name = "sociology-wos"
dtype = 'c'
# Parameters
database_name = "sociology-jstor"
dtype = "t"
cits = get_cnt("%s.ind"%database_name, [comb(dtype,'fy'),'fy',dtype])
cysum = load_variable("%s.%s.ysum"%(database_name,dtype))
cc = Counter(int(x['total']) for _,x in cysum.items())
counts = []
totals = range(0,max(cc.keys())+1)
for i in totals:
counts.append(cc[i])
# more than 5 times
print("%0.2f%% were cited more than 5 times" % (100 * sum(counts[i] for i in totals if i>5)/np.sum(totals)))
counts = np.array(counts)
totals = np.array(totals)
plt.plot(np.log10(totals),np.log10(counts))
xlab = np.array([1,2,3])
plt.xticks(
xlab,
np.power(10, xlab)
)
ylab = np.array([1,2,3,4,5])
plt.yticks(
ylab,
np.power(10, ylab)
)
plt.ylabel("Number of citations")
plt.ylabel("Number of cited works")
title = "Received citations distribution (%s, %s)" % (database_name,dtype)
plt.title(title)
save_figure(title)
import statsmodels.api as sm
CUTOFF = 10
lt,lc = [x for i,x in enumerate(totals) if (totals[i]>CUTOFF and counts[i]>0)], [x for i,x in enumerate(counts) if (totals[i]>CUTOFF and counts[i]>0)]
lt,lc = np.log(lt),np.log(lc)
X = sm.add_constant(lt)
y = lc
mod = sm.OLS(y,X)
res = mod.fit()
res.summary()