imports

In [1]:
import sys; sys.path.append(_dh[0].split("knowknow")[0])
from knowknow import *

User settings

In [2]:
database_name = "sociology-wos"
In [3]:
pubyears = None
if 'wos' in database_name:
    pubyears = load_variable("%s.pubyears" % database_name)
    print("Pubyears loaded for %s entries" % len(pubyears.keys()))
    RELIABLE_DATA_ENDS_HERE = 2019
if 'jstor' in database_name:
    RELIABLE_DATA_ENDS_HERE = 2010
Pubyears loaded for 397702 entries
In [4]:
import re

def create_cysum(cits, typ):
    
    meta_counters = defaultdict(int)

    cy = defaultdict(lambda:defaultdict(int))

    for (c,y),count in cits['c.fy'].items():
        cy[c][y] = count
        
    if 'fy' in cits:
        fyc = cits['fy']
    else:
        fyc = cits['y']

    cysum = {}
    for ci,c in enumerate(cy):
        meta_counters['at least one citation'] += 1

        count = cy[c]
        prop =  {
            y: county / fyc[y]
            for y,county in count.items()
        }

        res = {
            'first': min(count),
            'last': max(count),
            'maxcounty': max(count, key=lambda y:(count[y],y)),
            'maxpropy': max(count, key=lambda y:(prop[y],y))
        }

        res['maxprop'] = prop[ res['maxpropy'] ]
        res['maxcount'] = count[ res['maxcounty'] ]
        res['total'] = sum(count.values())
        res['totalprop'] = sum(prop.values())
        res['name'] = c
        
        # gotta do something here...
        res['type'] = 'article'
        
        if typ == 'wos':
            sp = c.split("|")
            if len(sp) < 2:
                continue
            try:
                res['pub'] = int(sp[1])
                res['type'] = 'article'
            except ValueError:
                res['type'] = 'book'
                res['pub'] = pubyears[c]
                
        elif typ == 'jstor':
            inparens = re.findall(r'\(([^)]+)\)', c)[0]
            res['pub'] = int(inparens)
                

        
        
        
        
        
        
        
        
        




            
            
            
            
        # DEFINING DEATH1
        # death1 is max, as long as it's before RELIABLE_DATA_ENDS_HERE
        res['death1'] = None
        if res['maxpropy'] <= RELIABLE_DATA_ENDS_HERE:
            res['death1'] = res['maxcounty']
            
            
            
            
        # DEFINING DEATH2 
        
        # this list has an entry for each year after and including the maximum citations ever received (the last time)
        # look ahead to the next ten years and take the average
        next_year_sums = [
            (ycheck, sum( c for y,c in count.items() if ycheck + 10 >= y > ycheck ))
            for ycheck in range(res['maxcounty'], RELIABLE_DATA_ENDS_HERE - 10) 
        ]

        # need to make sure ALL subsequent decade intervals are also less...
        my_death_year = None

        l = len(next_year_sums)
        for i in range(l):
            not_this_one = False
            for j in range(i,l):
                if next_year_sums[j][1] >= res['maxcount']:
                    not_this_one = True
                if not_this_one:
                    break

            if not_this_one:
                continue

            my_death_year = next_year_sums[i][0]
            break

        if not len(next_year_sums):
            res['death2'] = None
        else:
            res['death2'] = my_death_year
            
            

        # DEATH3 is last, as long as it's before RELIABLE_DATA_ENDS_HERE
        res['death3'] = None
        if res['last'] <= RELIABLE_DATA_ENDS_HERE:
            res['death3'] = res['last']
        
            
            
            
            
        # DEATH5
        # 90% of their citations were received before death4, and it's been at least 30% of their lifespan
        myspan = np.array( [cits['c.fy'][(c,ycheck)] for ycheck in range(1900, 2020)] )
        
        res['death5'] = None

        Ea = np.sum(myspan)
        csum = np.sum(myspan)

        nonzeroyears = list(np.where(myspan>0))
        if not len(nonzeroyears):
            continue
        
        try:
            firsti = np.min(nonzeroyears)
        except:
            print("some strange error, that shouldn't happen, right??")
            
        first_year = firsti + 1900

        for cci, cc in enumerate(myspan[firsti:]):

            this_year = first_year+cci
            
            # running residual... 
            Ea -= cc

            # don't let them die too soon
            if cc == 0:
                continue

            if Ea/csum < 0.1 and (RELIABLE_DATA_ENDS_HERE - this_year)/(RELIABLE_DATA_ENDS_HERE - first_year) > 0.3:
                res['death5'] = this_year
                break
            
            
            
            
            
        
        if res['death2'] is not None and res['death2'] < res['pub']:
            meta_counters['death2 < pub!? dropped.'] += 1
            # small error catch
            continue

        #small error catch
        if res['maxpropy'] < res['pub']:
            meta_counters['maxpropy < pub!? dropped.'] += 1
            continue

        # don't care about those with only a single citation
        if res['total'] <= 1:
            meta_counters['literally 1 citation. dropped.'] += 1
            continue

        # we really don't care about those that never rise in use
        #if res['first'] == res['maxpropy']:
        #    continue
        meta_counters['passed tests pre-blacklist'] += 1

        cysum[c] = res
    
    
    
    
    
    
    blacklist = []
    for b in blacklist:
        if b in cysum:
            del cysum[b]
    
    todelete = []

    for c in todelete:
        if c in cysum:
            meta_counters['passed all other tests but was blacklisted'] += 1
            del cysum[c]
    
    print(dict(meta_counters))
    
    return cysum
In [5]:
OVERWRITE_EXISTING = True

print("Processing database '%s'"%database_name)

varname = "%s.cysum"%database_name

run = True # run 
if not OVERWRITE_EXISTING:
    try: 
        load_variable(varname)
        run = False
    except FileNotFoundError:
        pass

if run:
    cits = get_cnt("%s.doc"%database_name, ['c.fy','fy'])

    if 'wos' in database_name and 'jstor' in database_name:
        raise Exception("Please put 'wos' or 'jstor' but not both in any database_name.")
    elif 'wos' in database_name:
        cysum = create_cysum(cits, 'wos')
    elif 'jstor' in database_name:
        cysum = create_cysum(cits, 'jstor')
    else:
        raise Exception("Please include either 'wos' or 'jstor' in the name of the variable. This keys which data processing algorithm you used.")

    save_variable(varname, cysum)

    print("%s cysum entries for database '%s'" % (len(cysum), database_name))
Processing database 'sociology-wos'
Loaded keys: dict_keys(['c.fy', 'fy'])
Available keys: ['c', 'c.fj', 'c.fy', 'c.fy.j', 'fa', 'fa.c', 'fa.fj', 'fa.fj.fy', 'fa.fy', 'fj', 'fj.fy', 'fj.ta', 'fj.ty', 'fy', 'fy.ta', 'fy.ty', 'ta', 'ty']
c:\users\amcga\envs\citation-deaths\lib\site-packages\ipykernel_launcher.py:159: RuntimeWarning: invalid value encountered in longlong_scalars
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
some strange error, that shouldn't happen, right??
{'at least one citation': 296716, 'passed tests pre-blacklist': 264969, 'literally 1 citation. dropped.': 31671, 'maxpropy < pub!? dropped.': 50, 'death2 < pub!? dropped.': 26}
264969 cysum entries for database 'sociology-wos'

only necessary if you plan on filtering based on this set

In [6]:
save_variable("%s.included_citations"%database_name, set(cysum.keys()))