import sys; sys.path.append(_dh[0].split("knowknow")[0])
from knowknow import *
to_remove = ['Canadian Journal of Sociology / Cahiers canadiens de sociologie',
'Contagion',
'Contagion: Journal of Violence, Mimesis, and Culture',
'Contexts',
'Journal of Applied Social Science',
'Max Weber Studies',
'Race, Poverty & the Environment',
'Social Thought & Research',
'The Canadian Journal of Sociology / Cahiers canadiens de\n sociologie']
remove_type = 'fj'
database_name = "sociology-jstor"
cnt_name = "%s.doc" % database_name
cnt_doc = get_cnt( cnt_name, keys=get_cnt_keys(cnt_name) )
cnt_name = "%s.ind" % database_name
cnt_ind = get_cnt( cnt_name, keys=get_cnt_keys(cnt_name) )
print("Consolidating %s of type `%s`."%(len(to_remove), remove_type))
print("Here are some examples: %s."%( list(to_remove)[:2] ))
to_prune = [x for x in get_cnt_keys(cnt_name) if remove_type in x.split(".")]
to_prune
for tp in to_prune:
whichT = tp.split(".").index( remove_type ) # this checks where 't' is in the name of the variable (first or second?)
print("pruning '%s'..." % tp)
if tp == remove_type:
tydels = to_remove
else:
tydels = [x for x in cnt_doc[tp] if x[ whichT ] in to_remove]
print("old size:", len(cnt_doc[tp]))
for tr in tydels:
del cnt_doc[tp][tr]
del cnt_ind[tp][tr]
print("new size:", len(cnt_doc[tp]))