# default_exp indicators

These were origially SQL Scripts. We moved them over to python. Now we are trying to make these scripts even better.

Undone -> Yet to be Transcribed From SQL

??? 199 Number of Trees Planted treeplntXX TreeBaltimore

49 nomail - R

NO INFORMATION LOCATED.

This comes already aggregated and so it may be quicker.

129 artevnt - G

events_2017_csa indicator number 130 SELECT bAll.csa AS Bound, sum(bQuery.events_2017)*(1000/bAll.the_pop)as events_2017 FROM boundaries.csa2010 bAll LEFT JOIN ( SELECT bounds.csa AS Boundary, (count(Tables.gid ::numeric(20,4))::numeric(20,2)) AS events_2017 FROM arts.events_2017 AS Tables JOIN boundaries.csa2010 AS bounds ON st_contains(bounds.the_geom, Tables.the_geom) GROUP BY bounds.csa ORDER BY bounds.csa ) bQuery ON bAll.csa = bQuery.Boundary GROUP BY Bound, the_pop ORDER BY Bound;

168 weather - R

Waiting on Weatherization Data Normalization Source -> MD Property View. Aquired

with tbl AS ( select (count(job_number)::real)*(1000/the_pop::real) as result, csa from vital_signs.match_csas_and_bc_by_geom('sustainability.weatherization_2017', 'gid', 'the_geom') a left join sustainability.weatherization_2017 b on a.gid = b.gid

group by csa,the_pop

) update vital_signs.data set weather = result from tbl where data.csa = tbl.csa and data_year = '2017';

with numerator AS ( select (count( case when csa_present then 1 else NULL end)::numeric) as result, csa from vital_signs.match_csas_and_bc_by_geom('sustainability.weatherization_2017', 'gid', 'the_geom') a left join sustainability.weatherization_2017 b on a.gid = b.gid group by csa ), denominator AS ( select (sum( case when (address != $$NULL$$) AND (desclu = $$Apartments$$ OR desclu = $$Residential$$ OR desclu = $$Residential Commercial$$ OR desclu = $$Residential Condominium$$) then 1 else NULL end)::numeric ) as result, csa from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017v2', 'gid', 'the_geom') a left join housing.mdprop_2017v2 b on a.gid = b.gid group by csa, the_pop ), tbl AS ( select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result from numerator left join denominator on numerator.csa = denominator.csa ) select * from tbl where 1 = 1 ORDER BY csa ASC;

afform.py

#export #File: affordm.py #Author: Charles Karpati #Date: 1/25/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B25091 - MORTGAGE STATUS BY SELECTED MONTHLY OWNER COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS # Universe: Owner-occupied housing units # Table Creates: #purpose: Produce Housing and Community Development - Affordability Index - Mortgage Indicator #input: Year #output: import pandas as pd import glob def affordm( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B25091*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B25091_008E','B25091_009E','B25091_010E','B25091_011E','B25091_002E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B25091_008E','B25091_009E','B25091_010E','B25091_011E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B25091_002E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( (value[1]+value[2]+value[3]+value[4]) / nullif(value[5],0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ WITH tbl AS ( select csa, ( (value[1]+value[2]+value[3]+value[4]) / nullif(value[5],0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B25091_008E','B25091_009E','B25091_010E','B25091_011E','B25091_002E']) ) update vital_signs.data set affordm = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

affordr.py

#export #File: affordr.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B25070 - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS # Universe: Renter-occupied housing units #purpose: Produce Housing and Community Development - Affordability Index - Rent Indicator #input: Year #output: import pandas as pd import glob def affordr( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B25070*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B25070_007E','B25070_008E','B25070_009E','B25070_010E','B25070_001E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B25070_007E','B25070_008E','B25070_009E','B25070_010E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B25070_001E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( (value[1]+value[2]+value[3]+value[4]) / nullif(value[5],0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ WITH tbl AS ( select csa, ( (value[1]+value[2]+value[3]+value[4]) / nullif(value[5],0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B25070_007E','B25070_008E','B25070_009E','B25070_010E','B25070_001E']) ) update vital_signs.data set affordr = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

age5.py

#export #File: age5.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B01001 - SEX BY AGE # Universe: Total population # Table Creates: tpop, female, male, age5 age18 age24 age64 age65 #purpose: #input: Year #output: import pandas as pd import glob def age5( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # df.columns total = df['B01001_001E_Total'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) # Under 5 df1['under_5'] = ( df[ 'B01001_003E_Total_Male_Under_5_years' ] + df[ 'B01001_027E_Total_Female_Under_5_years' ] ) / total * 100 return df1['under_5']

age18.py

#export #File: age18.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B01001 - SEX BY AGE # Universe: Total population # Table Creates: tpop, female, male, age5 age18 age24 age64 age65 #purpose: #input: Year #output: import pandas as pd import glob def age18( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # df.columns total = df['B01001_001E_Total'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) df1['five_to_17'] = ( df[ 'B01001_004E_Total_Male_5_to_9_years' ] + df[ 'B01001_005E_Total_Male_10_to_14_years' ] + df[ 'B01001_006E_Total_Male_15_to_17_years' ] + df[ 'B01001_028E_Total_Female_5_to_9_years' ] + df[ 'B01001_029E_Total_Female_10_to_14_years' ] + df[ 'B01001_030E_Total_Female_15_to_17_years' ] ) / total * 100 return df1['five_to_17']

age24.py

#export #File: age24.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B01001 - SEX BY AGE # Universe: Total population # Table Creates: tpop, female, male, age5 age18 age24 age64 age65 #purpose: #input: Year #output: import pandas as pd import glob def age24( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # df.columns total = df['B01001_001E_Total'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) df1['eighteen_to_24'] = ( df[ 'B01001_007E_Total_Male_18_and_19_years' ] + df[ 'B01001_008E_Total_Male_20_years' ] + df[ 'B01001_009E_Total_Male_21_years' ] + df[ 'B01001_010E_Total_Male_22_to_24_years' ] + df[ 'B01001_031E_Total_Female_18_and_19_years' ] + df[ 'B01001_032E_Total_Female_20_years' ] + df[ 'B01001_033E_Total_Female_21_years' ] + df[ 'B01001_034E_Total_Female_22_to_24_years' ] ) / total * 100 return df1['eighteen_to_24']

age64.py

#export #File: age64.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B01001 - SEX BY AGE # Universe: Total population # Table Creates: tpop, female, male, age5 age18 age24 age64 age65 #purpose: #input: Year #output: import pandas as pd import glob def age64( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # df.columns total = df['B01001_001E_Total'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) df1['twentyfive_to_64'] = ( df[ 'B01001_011E_Total_Male_25_to_29_years' ] + df[ 'B01001_012E_Total_Male_30_to_34_years' ] + df[ 'B01001_013E_Total_Male_35_to_39_years' ] + df[ 'B01001_014E_Total_Male_40_to_44_years' ] + df[ 'B01001_015E_Total_Male_45_to_49_years' ] + df[ 'B01001_016E_Total_Male_50_to_54_years' ] + df[ 'B01001_017E_Total_Male_55_to_59_years' ] + df[ 'B01001_018E_Total_Male_60_and_61_years' ] + df[ 'B01001_019E_Total_Male_62_to_64_years' ] + df[ 'B01001_035E_Total_Female_25_to_29_years' ] + df[ 'B01001_036E_Total_Female_30_to_34_years' ] + df[ 'B01001_037E_Total_Female_35_to_39_years' ] + df[ 'B01001_038E_Total_Female_40_to_44_years' ] + df[ 'B01001_039E_Total_Female_45_to_49_years' ] + df[ 'B01001_040E_Total_Female_50_to_54_years' ] + df[ 'B01001_041E_Total_Female_55_to_59_years' ] + df[ 'B01001_042E_Total_Female_60_and_61_years' ] + df[ 'B01001_043E_Total_Female_62_to_64_years' ] ) / total * 100 return df1['twentyfive_to_64']

age65.py

#export #File: age65.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B01001 - SEX BY AGE # Universe: Total population # Table Creates: tpop, female, male, age5 age18 age24 age64 age65 #purpose: #input: Year #output: import pandas as pd import glob def age65( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # df.columns total = df['B01001_001E_Total'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) df1['sixtyfive_and_up'] = ( df[ 'B01001_020E_Total_Male_65_and_66_years' ] + df[ 'B01001_021E_Total_Male_67_to_69_years' ] + df[ 'B01001_022E_Total_Male_70_to_74_years' ] + df[ 'B01001_023E_Total_Male_75_to_79_years' ] + df[ 'B01001_024E_Total_Male_80_to_84_years' ] + df[ 'B01001_025E_Total_Male_85_years_and_over' ] + df[ 'B01001_044E_Total_Female_65_and_66_years' ] + df[ 'B01001_045E_Total_Female_67_to_69_years' ] + df[ 'B01001_046E_Total_Female_70_to_74_years' ] + df[ 'B01001_047E_Total_Female_75_to_79_years' ] + df[ 'B01001_048E_Total_Female_80_to_84_years' ] + df[ 'B01001_049E_Total_Female_85_years_and_over' ] ) / total * 100 return df1['sixtyfive_and_up']

bahigher.py

#export #File: bahigher.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B06009 - PLACE OF BIRTH BY EDUCATIONAL ATTAINMENT IN THE UNITED STATES #purpose: Produce Workforce and Economic Development - Percent Population (25 Years and over) with a Bachelor's Degree or Above #Table Uses: B06009 - lesshs, hsdipl, bahigher #input: Year #output: import pandas as pd import glob def bahigher( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B06009*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B06009_005E','B06009_006E','B06009_001E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B06009_005E','B06009_006E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B06009_001E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation + final mods # ( ( value[1] + value[2] ) / nullif(value[3],0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( ( value[1] + value[2] ) / nullif(value[3],0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B06009_003E','B06009_004E','B06009_001E']) ) update vital_signs.data set hsdipl = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; B06009_004E label "Estimate!!Total!!Some college or associate's degree" B06009_003E label "Estimate!!Total!!High school graduate (includes equivalency)" B06009_002E label "Estimate!!Total!!Less than high school graduate" B06009_001E label "Estimate!!Total" B06009_005E label "Estimate!!Total!!Bachelor's degree" B06009_006E label "Estimate!!Total!!Graduate or professional degree" """

carpool.py

#export #File: carpool.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B08101 - MEANS OF TRANSPORTATION TO WORK BY AGE # Universe: Workers 16 Years and Over # Table Creates: othrcom, drvalone, carpool, pubtran, walked #purpose: Produce Sustainability - Percent of Population that Carpool to Work Indicator #input: Year #output: import pandas as pd import glob def carpool( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B08101*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B08101_001E','B08101_049E','B08101_017E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B08101_017E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B08101_001E','B08101_049E'] for col in columns: denominators = addKey(df, denominators, col) #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation + final mods # ( value[3] / (value[1]-value[2]) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.iloc[: ,0] - denominators.iloc[: ,1] fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 #~~~~~~~~~~~~~~~ # Step 4) # Add Special Baltimore City Data #~~~~~~~~~~~~~~~ url = 'https://api.census.gov/data/20'+str(year)+'/acs/acs5/subject?get=NAME,S0801_C01_004E&for=county%3A510&in=state%3A24&key=829bf6f2e037372acbba32ba5731647c5127fdb0' table = pd.read_json(url, orient='records') fi['final']['Baltimore City'] = float(table.loc[1, table.columns[1]]) return fi['final'] """ WITH tbl AS ( select csa, ( value[3] / nullif( (value[1]-value[2]) ,0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2013',ARRAY['B08101_001E','B08101_049E','B08101_017E']) ) update vital_signs.data set carpool = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2013'; """

drvalone.py

#export #File: drvalone.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B08101 - MEANS OF TRANSPORTATION TO WORK BY AGE # Universe: Workers 16 Years and Over # Table Creates: othrcom, drvalone, carpool, pubtran, walked #purpose: Produce Sustainability - Percent of Population that Drove Alone to Work Indicator #input: Year #output: import pandas as pd import glob def drvalone( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B08101*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B08101_001E','B08101_049E','B08101_009E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B08101_009E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B08101_001E','B08101_049E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( value[3] / nullif((value[1]-value[2]),0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.iloc[: ,0] - denominators.iloc[: ,1] fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 #~~~~~~~~~~~~~~~ # Step 4) # Add Special Baltimore City Data #~~~~~~~~~~~~~~~ url = 'https://api.census.gov/data/20'+str(year)+'/acs/acs5/subject?get=NAME,S0801_C01_003E&for=county%3A510&in=state%3A24&key=829bf6f2e037372acbba32ba5731647c5127fdb0' table = pd.read_json(url, orient='records') fi['final']['Baltimore City'] = float(table.loc[1, table.columns[1]]) return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( value[3] / nullif((value[1]-value[2]),0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2013',ARRAY['B08101_001E','B08101_049E','B08101_009E']) ) update vital_signs.data set drvalone = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2013'; """

elheat.py

#export #File: elheat.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B25040 - HOUSE HEATING FUEL # Universe - Occupied housing units # Table Creates: elheat, heatgas #purpose: Produce Sustainability - Percent of Residences Heated by Electricity Indicator #input: Year #output: import pandas as pd import glob def elheat( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B25040*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B25040_004E','B25040_001E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B25040_004E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B25040_001E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation + final mods # ( value[1] / nullif(value[2],0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( value[1] / nullif(value[2],0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B25040_004E','B25040_001E']) ) update vital_signs.data set elheat = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

empl.py

#export #File: empl.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B23001 - SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER # Universe - Population 16 years and over # Table Creates: empl, unempl, unempr, nilf #purpose: Produce Workforce and Economic Development - Percent Population 16-64 Employed Indicator #input: Year #output: import pandas as pd import glob def empl( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B23001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B23001_003E', 'B23001_010E', 'B23001_017E', 'B23001_024E', 'B23001_031E', 'B23001_038E', 'B23001_045E', 'B23001_052E', 'B23001_059E', 'B23001_066E', 'B23001_089E', 'B23001_096E', 'B23001_103E', 'B23001_110E', 'B23001_117E', 'B23001_124E', 'B23001_131E', 'B23001_138E', 'B23001_145E', 'B23001_152E', 'B23001_007E', 'B23001_014E', 'B23001_021E', 'B23001_028E', 'B23001_035E', 'B23001_042E', 'B23001_049E', 'B23001_056E', 'B23001_063E', 'B23001_070E', 'B23001_093E', 'B23001_100E', 'B23001_107E', 'B23001_114E', 'B23001_121E', 'B23001_128E', 'B23001_135E', 'B23001_142E', 'B23001_149E', 'B23001_156E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B23001_007E', 'B23001_014E', 'B23001_021E', 'B23001_028E', 'B23001_035E', 'B23001_042E', 'B23001_049E', 'B23001_056E', 'B23001_063E', 'B23001_070E', 'B23001_093E', 'B23001_100E', 'B23001_107E', 'B23001_114E', 'B23001_121E', 'B23001_128E', 'B23001_135E', 'B23001_142E', 'B23001_149E', 'B23001_156E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B23001_003E', 'B23001_010E', 'B23001_017E', 'B23001_024E', 'B23001_031E', 'B23001_038E', 'B23001_045E', 'B23001_052E', 'B23001_059E', 'B23001_066E', 'B23001_089E', 'B23001_096E', 'B23001_103E', 'B23001_110E', 'B23001_117E', 'B23001_124E', 'B23001_131E', 'B23001_138E', 'B23001_145E', 'B23001_152E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # (value[21]+value[22]+value[23]+value[24]+value[25]+value[26]+value[27]+value[28]+value[29]+value[30]+value[31]+value[32]+value[33]+value[34]+value[35]+value[36]+value[37]+value[38]+value[39]+value[40]) --civil labor force empl 16-64 #/ #nullif( (value[1]+value[2]+value[3]+value[4]+value[5]+value[6]+value[7]+value[8]+value[9]+value[10]+value[11]+value[12]+value[13]+value[14]+value[15]+value[16]+value[17]+value[18]+value[19]+value[20]) -- population 16 to 64 ,0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( ( value[21]+value[22]+value[23]+value[24]+value[25]+value[26]+value[27]+value[28]+value[29]+value[30]+value[31]+value[32]+value[33]+value[34]+value[35]+value[36]+value[37]+value[38]+value[39]+value[40]) --civil labor force empl 16-64 / nullif( (value[1]+value[2]+value[3]+value[4]+value[5]+value[6]+value[7]+value[8]+value[9]+value[10]+value[11]+value[12]+value[13]+value[14]+value[15]+value[16]+value[17]+value[18]+value[19]+value[20]) -- population 16 to 64 ,0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY[ 'B23001_003E','B23001_010E','B23001_017E','B23001_024E','B23001_031E','B23001_038E','B23001_045E','B23001_052E','B23001_059E','B23001_066E','B23001_089E','B23001_096E','B23001_103E','B23001_110E','B23001_117E','B23001_124E','B23001_131E','B23001_138E','B23001_145E','B23001_152E','B23001_007E','B23001_014E','B23001_021E','B23001_028E','B23001_035E','B23001_042E','B23001_049E','B23001_056E','B23001_063E','B23001_070E','B23001_093E','B23001_100E','B23001_107E','B23001_114E','B23001_121E','B23001_128E','B23001_135E','B23001_142E','B23001_149E','B23001_156E']) ) update vital_signs.data set empl = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

fam.py

#export #File: fam.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B11005 - HOUSEHOLDS BY PRESENCE OF PEOPLE UNDER 18 YEARS BY HOUSEHOLD TYPE # Universe: Households # Table Creates: hhs, fam, femhhs #purpose: #input: Year #output: import pandas as pd import glob def fam( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B11005*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) # DIFFERENCES IN TABLE NAMES EXIST BETWEEN 16 and 17. 17 has no comma. rootStr = 'B11005_007E_Total_Households_with_one_or_more_people_under_18_years_Family_households_Other_family_Female_householder' str16 = rootStr + ',_no_husband_present' str17 = rootStr + '_no_husband_present' # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # Delete Unassigned--Jail df = df[df.index != 'Unassigned--Jail'] # Move Baltimore to Bottom bc = df.loc[ 'Baltimore City' ] df = df.drop( df.index[1] ) df.loc[ 'Baltimore City' ] = bc df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) # Actually produce the data df1['total'] = df[ 'B11005_001E_Total' ] df1['18Under'] = df[ 'B11005_002E_Total_Households_with_one_or_more_people_under_18_years' ] / df1['total'] * 100 return df1['18Under']

female.py

#export #File: female.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B01001 - SEX BY AGE # Universe: Total population # Table Creates: tpop, female, male, age5 age18 age24 age64 age65 #purpose: #input: Year #output: import pandas as pd import glob def female( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # df.columns total = df['B01001_001E_Total'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) df1['onlyTheLadies'] = df[ 'B01001_026E_Total_Female' ] return df1['onlyTheLadies']

femhhs.py

#export #File: femhhs.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B11005 - HOUSEHOLDS BY PRESENCE OF PEOPLE UNDER 18 YEARS BY HOUSEHOLD TYPE # Universe: Households # Table Creates: male, hhs, fam, femhhs #purpose: #input: Year #output: import pandas as pd import glob def femhhs( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B11005*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) # DIFFERENCES IN TABLE NAMES EXIST BETWEEN 16 and 17. 17 has no comma. rootStr = 'B11005_007E_Total_Households_with_one_or_more_people_under_18_years_Family_households_Other_family_Female_householder' str16 = rootStr + ',_no_husband_present' str17 = rootStr + '_no_husband_present' str19 = rootStr + ',_no_spouse_present' femhh = str17 if year == '17' else str19 if year == '19' else str16 # Actually produce the data df1['total'] = df[ 'B11005_001E_Total' ] df1['18Under'] = df[ 'B11005_002E_Total_Households_with_one_or_more_people_under_18_years' ] / df1['total'] * 100 df1['FemaleHH'] = df[ femhh ] / df['B11005_002E_Total_Households_with_one_or_more_people_under_18_years'] * 100 df1['FamHHChildrenUnder18'] = df['B11005_003E_Total_Households_with_one_or_more_people_under_18_years_Family_households'] df1['FamHHChildrenOver18'] = df['B11005_012E_Total_Households_with_no_people_under_18_years_Family_households'] df1['FamHH'] = df1['FamHHChildrenOver18'] + df1['FamHHChildrenUnder18'] return df1['FemaleHH']

heatgas.py

#export #File: heatgas.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B25040 - HOUSE HEATING FUEL # Universe - Occupied housing units # Table Creates: elheat, heatgas #purpose: Produce Sustainability - Percent of Residences Heated by Electricity Indicator #input: Year #output: import pandas as pd import glob def heatgas( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B25040*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B25040_002E','B25040_001E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B25040_002E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B25040_001E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( value[1] / nullif(value[2],0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( value[1] / nullif(value[2],0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B25040_002E','B25040_001E']) ) update vital_signs.data set heatgas = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

hh25inc.py

#export #File: hh25inc.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B19001 - HOUSEHOLD INCOME V # HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) # Table Creates: hh25 hh40 hh60 hh75 hhm75, mhhi #purpose: Produce Household Income Under 25K Indicator #input: Year #output: import pandas as pd import glob def hh25inc( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B19001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # val1.__class__.__name__ # # create a new dataframe for giggles fi = pd.DataFrame() # append into that dataframe col 001 key = getColName(df, '001') val = getColByName(df, '001') fi[key] = val # append into that dataframe col 002 key = getColName(df, '002') val = getColByName(df, '002') fi[key] = val # append into that dataframe col 003 key = getColName(df, '003') val = getColByName(df, '003') fi[key] = val # append into that dataframe col 004 key = getColName(df, '004') val = getColByName(df, '004') fi[key] = val # append into that dataframe col 005 key = getColName(df, '005') val = getColByName(df, '005') fi[key] = val # Delete Rows where the 'denominator' column is 0 -> like the Jail fi = fi[fi[fi.columns[0]] != 0] #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation #~~~~~~~~~~~~~~~ return fi.apply(lambda x: ( ( x[fi.columns[1] ]+ x[fi.columns[2] ]+ x[fi.columns[3] ]+ x[fi.columns[4] ] ) / x[fi.columns[0]])*100, axis=1)

hh40inc.py

#export #File: hh40inc.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B19001 - HOUSEHOLD INCOME V # HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) # Table Creates: hh25 hh40 hh60 hh75 hhm75, mhhi #purpose: Produce Household Income 25K-40K Indicator #input: Year #output: import pandas as pd import glob def hh40inc( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B19001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # val1.__class__.__name__ # # create a new dataframe for giggles fi = pd.DataFrame() # append into that dataframe col 001 key = getColName(df, '001') val = getColByName(df, '001') fi[key] = val # append into that dataframe col 006 key = getColName(df, '006') val = getColByName(df, '006') fi[key] = val # append into that dataframe col 007 key = getColName(df, '007') val = getColByName(df, '007') fi[key] = val # append into that dataframe col 008 key = getColName(df, '008') val = getColByName(df, '008') fi[key] = val # Delete Rows where the 'denominator' column is 0 -> like the Jail fi = fi[fi[fi.columns[0]] != 0] #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation #~~~~~~~~~~~~~~~ return fi.apply(lambda x: ( ( x[fi.columns[1] ]+ x[fi.columns[2] ]+ x[fi.columns[3] ] ) / x[fi.columns[0]])*100, axis=1) """ /* hh40inc */ -- WITH tbl AS ( select csa, ( (value[1] + value[2] + value[3]) / value[4] )*100 as result from vital_signs.get_acs_vars_csa_and_bc('2013',ARRAY['B19001_006E','B19001_007E','B19001_008E','B19001_001E']) ) UPDATE vital_signs.data set hh40inc = result from tbl where data.csa = tbl.csa and data_year = '2013'; """

hh60inc.py

#export #File: hh60inc.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B19001 - HOUSEHOLD INCOME V # HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) # Table Creates: hh25 hh40 hh60 hh75 hhm75, mhhi #purpose: Produce Household 45-60K Indicator #input: Year #output: import pandas as pd import glob def hh60inc( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B19001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # val1.__class__.__name__ # # create a new dataframe for giggles fi = pd.DataFrame() # append into that dataframe col 001 key = getColName(df, '001') val = getColByName(df, '001') fi[key] = val # append into that dataframe col 009 key = getColName(df, '009') val = getColByName(df, '009') fi[key] = val # append into that dataframe col 010 key = getColName(df, '010') val = getColByName(df, '010') fi[key] = val # append into that dataframe col 011 key = getColName(df, '011') val = getColByName(df, '011') fi[key] = val # Delete Rows where the 'denominator' column is 0 -> like the Jail fi = fi[fi[fi.columns[0]] != 0] #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation #~~~~~~~~~~~~~~~ return fi.apply(lambda x: ( ( x[fi.columns[1] ]+ x[fi.columns[2] ]+ x[fi.columns[3] ] ) / x[fi.columns[0]])*100, axis=1) """ /* hh60inc */ -- WITH tbl AS ( select csa, ( (value[1] + value[2] + value[3]) / value[4] )*100 as result from vital_signs.get_acs_vars_csa_and_bc('2013',ARRAY['B19001_009E','B19001_010E','B19001_011E','B19001_001E']) ) UPDATE vital_signs.data set hh60inc = result from tbl where data.csa = tbl.csa and data_year = '2013'; """

hh75inc.py

#export #File: hh75inc.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B19001 - HOUSEHOLD INCOME V # HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) # Table Creates: hh25 hh40 hh60 hh75 hhm75, mhhi #purpose: Produce Household Income 60-70K Indicator #input: Year #output: import pandas as pd import glob def hh75inc( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B19001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # val1.__class__.__name__ # # create a new dataframe for giggles fi = pd.DataFrame() # append into that dataframe col 001 key = getColName(df, '001') val = getColByName(df, '001') fi[key] = val # append into that dataframe col 012 key = getColName(df, '012') val = getColByName(df, '012') fi[key] = val # Delete Rows where the 'denominator' column is 0 -> like the Jail fi = fi[fi[fi.columns[0]] != 0] #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation #~~~~~~~~~~~~~~~ #12/1 return fi.apply(lambda x: ( x[fi.columns[1] ] / x[fi.columns[0]])*100, axis=1) """ /* hh75inc */ -- WITH tbl AS ( select csa, ( value[1] / value[2] )*100 as result from vital_signs.get_acs_vars_csa_and_bc('2013',ARRAY['B19001_012E','B19001_001E']) ) UPDATE vital_signs.data set hh75inc = result from tbl where data.csa = tbl.csa and data_year = '2013'; """

hhchpov.py

#export #File: hhchpov.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B17001 - POVERTY STATUS IN THE PAST 12 MONTHS BY SEX BY AGE # Universe: Population for whom poverty status is determined more information #purpose: Produce Household Poverty Indicator #input: Year #output: import pandas as pd import glob def hhchpov( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B17001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B17001_004E', 'B17001_005E', 'B17001_006E', 'B17001_007E', 'B17001_008E', 'B17001_009E', 'B17001_018E', 'B17001_019E', 'B17001_020E', 'B17001_021E', 'B17001_022E', 'B17001_023E', 'B17001_033E', 'B17001_034E', 'B17001_035E', 'B17001_036E', 'B17001_037E', 'B17001_038E', 'B17001_047E', 'B17001_048E', 'B17001_049E', 'B17001_050E', 'B17001_051E', 'B17001_052E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B17001_004E', 'B17001_005E', 'B17001_006E', 'B17001_007E', 'B17001_008E', 'B17001_009E', 'B17001_018E', 'B17001_019E', 'B17001_020E', 'B17001_021E', 'B17001_022E', 'B17001_023E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B17001_004E', 'B17001_005E', 'B17001_006E', 'B17001_007E', 'B17001_008E', 'B17001_009E', 'B17001_018E', 'B17001_019E', 'B17001_020E', 'B17001_021E', 'B17001_022E', 'B17001_023E', 'B17001_033E', 'B17001_034E', 'B17001_035E', 'B17001_036E', 'B17001_037E', 'B17001_038E', 'B17001_047E', 'B17001_048E', 'B17001_049E', 'B17001_050E', 'B17001_051E', 'B17001_052E'] for col in columns: denominators = addKey(df, denominators, col) #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] #Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 #~~~~~~~~~~~~~~~ # Step 4) # Add Special Baltimore City Data #~~~~~~~~~~~~~~~ url = 'https://api.census.gov/data/20'+str(year)+'/acs/acs5/subject?get=NAME,S1701_C03_002E&for=county%3A510&in=state%3A24&key=829bf6f2e037372acbba32ba5731647c5127fdb0' table = pd.read_json(url, orient='records') fi['final']['Baltimore City'] = float(table.loc[1, table.columns[1]]) return fi['final'] """ /* */ WITH tbl AS ( select csa, ( (value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11] + value[12]) / nullif( (value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11] + value[12] + value[13] + value[14] + value[15] + value[16] + value[17] + value[18] + value[19] + value[20] + value[21] + value[22] + value[23] + value[24] ), 0) ) * 100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B17001_004E','B17001_005E','B17001_006E','B17001_007E','B17001_008E','B17001_009E','B17001_018E','B17001_019E','B17001_020E','B17001_021E','B17001_022E','B17001_023E','B17001_033E','B17001_034E','B17001_035E','B17001_036E','B17001_037E','B17001_038E','B17001_047E','B17001_048E','B17001_049E','B17001_050E','B17001_051E','B17001_052E']) ) update vital_signs.data set hhchpov = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

hhm75.py

#export #File: hhm75.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B19001 - HOUSEHOLD INCOME V # HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) # Table Creates: hh25 hh40 hh60 hh75 hhm75, mhhi #purpose: Produce Household Income Over 75K Indicator #input: Year #output: import pandas as pd import glob def hhm75( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B19001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # val1.__class__.__name__ # # create a new dataframe for giggles fi = pd.DataFrame() # append into that dataframe col 001 key = getColName(df, '001') val = getColByName(df, '001') fi[key] = val # append into that dataframe col 002 key = getColName(df, '002') val = getColByName(df, '002') fi[key] = val # append into that dataframe col 003 key = getColName(df, '003') val = getColByName(df, '003') fi[key] = val # append into that dataframe col 004 key = getColName(df, '004') val = getColByName(df, '004') fi[key] = val # append into that dataframe col 005 key = getColName(df, '005') val = getColByName(df, '005') fi[key] = val # append into that dataframe col 006 key = getColName(df, '006') val = getColByName(df, '006') fi[key] = val # append into that dataframe col 007 key = getColName(df, '007') val = getColByName(df, '007') fi[key] = val # append into that dataframe col 008 key = getColName(df, '008') val = getColByName(df, '008') fi[key] = val # append into that dataframe col 009 key = getColName(df, '009') val = getColByName(df, '009') fi[key] = val # append into that dataframe col 010 key = getColName(df, '010') val = getColByName(df, '010') fi[key] = val # append into that dataframe col 011 key = getColName(df, '011') val = getColByName(df, '011') fi[key] = val # append into that dataframe col 012 key = getColName(df, '012') val = getColByName(df, '012') fi[key] = val # Delete Rows where the 'denominator' column is 0 -> like the Jail fi = fi[fi[fi.columns[0]] != 0] #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation #~~~~~~~~~~~~~~~ return fi.apply(lambda x: ( ( x[fi.columns[0]]-( x[fi.columns[1] ]+ x[fi.columns[2] ]+ x[fi.columns[3] ]+ x[fi.columns[4] ]+ x[fi.columns[5] ]+ x[fi.columns[6] ]+ x[fi.columns[7] ]+ x[fi.columns[8] ]+ x[fi.columns[9] ]+ x[fi.columns[10] ]+ x[fi.columns[11] ] ) ) / x[fi.columns[0]])*100, axis=1)

hhpov.py

#export #File: hhpov.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B17017 - Household Poverty, Uses Table B17017 which includes V # Poverty Status in the Past 12 Months by Household Type by Age of Householder (Universe = households) #purpose: Produce Household Poverty Indicator #input: Year #output: import pandas as pd import glob def hhpov( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B17017*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # create a new dataframe for giggles fi = pd.DataFrame() # append into that dataframe col 003 key = getColName(df, '003') val = getColByName(df, '003') fi[key] = val # append into that dataframe col 032 key = getColName(df, '032') val = getColByName(df, '032') fi[key] = val # construct the denominator, returns 0 iff the other two rows are equal. fi['denominator'] = nullIfEqual( df, '003', '032') # Delete Rows where the 'denominator' column is 0 fi = fi[fi['denominator'] != 0] #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation #~~~~~~~~~~~~~~~ return fi.apply(lambda x: (x[fi.columns[0]] / x['denominator'])*100, axis=1)

hhs.py

#export #File: hhs.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B11005 - HOUSEHOLDS BY PRESENCE OF PEOPLE UNDER 18 YEARS BY HOUSEHOLD TYPE # Universe: Households # Table Creates: hhs, fam, femhhs #purpose: #input: Year #output: import pandas as pd import glob def hhs( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B11005*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) df1['tot'] = df[ 'B11005_001E_Total' ] return df1['tot']

hsdipl.py

#export #File: hsdipl.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B06009 - PLACE OF BIRTH BY EDUCATIONAL ATTAINMENT IN THE UNITED STATES #purpose: Produce Workforce and Economic Development - Percent Population (25 Years and over) With High School Diploma and Some College or Associates Degree #Table Uses: B06009 - lesshs, hsdipl, bahigher #input: Year #output: import pandas as pd import glob def hsdipl( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B06009*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B06009_003E','B06009_004E','B06009_001E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B06009_003E','B06009_004E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B06009_001E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation + final mods # ( ( value[1] + value[2] ) / nullif(value[3],0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( ( value[1] + value[2] ) / nullif(value[3],0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B06009_003E','B06009_004E','B06009_001E']) ) update vital_signs.data set hsdipl = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

lesshs.py

#export #File: lesshs.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B06009 - PLACE OF BIRTH BY EDUCATIONAL ATTAINMENT IN THE UNITED STATES #purpose: Produce Workforce and Economic Development - Percent Population (25 Years and over) With Less Than a High School Diploma or GED Indicator #Table Uses: B06009 - lesshs, hsdipl, bahigher #input: Year #output: import pandas as pd import glob def lesshs( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B06009*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B06009_002E','B06009_001E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B06009_002E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B06009_001E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation + final mods # ( value[1] / nullif(value[2],0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( value[1] / nullif(value[2],0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B06009_002E','B06009_001E']) ) update vital_signs.data set lesshs = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

male.py

#export #File: male.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B01001 - SEX BY AGE # Universe: Total population # Table Creates: tpop, female, male, age5 age18 age24 age64 age65 #purpose: #input: Year #output: import pandas as pd import glob def male( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # df.columns total = df['B01001_001E_Total'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) df1['onlyTheFellas'] = df[ 'B01001_002E_Total_Male' ] return df1['onlyTheFellas']

mhhi.py

#export #File: mhhi.py #Author: Charles Karpati #Date: 1/24/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B19001 - HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS) # Universe: Households # Table Creates: hh25 hh40 hh60 hh75 hhm75, mhhi #purpose: Produce Sustainability - Percent of Population that Walks to Work Indicator #input: Year #output: import pandas as pd import glob def mhhi( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B19001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ info = pd.DataFrame( [ ['B19001_002E', 0, 10000], ['B19001_003E', 10000, 4999 ], ['B19001_004E', 15000, 4999 ], ['B19001_005E', 20000, 4999 ], ['B19001_006E', 25000, 4999 ], ['B19001_007E', 30000, 4999], ['B19001_008E', 35000, 4999 ], ['B19001_009E', 40000, 4999 ], ['B19001_010E', 45000, 4999 ], ['B19001_011E', 50000, 9999 ], ['B19001_012E', 60000, 14999], ['B19001_013E', 75000, 24999 ], ['B19001_014E', 100000, 24999 ], ['B19001_015E', 125000, 24999 ], ['B19001_016E', 150000, 49000 ], ['B19001_017E', 200000, 1000000000000000000000000 ], ], columns=['variable', 'lower', 'range'] ) # Final Dataframe data_table = pd.DataFrame() for index, row in info.iterrows(): #print(row['variable'], row['lower'], row['range']) data_table = addKey(df, data_table, row['variable']) # create a table of the accumulating total accross the columns from left to right for each csa. temp_table = data_table.cumsum(axis=1) # get the csa midpoint by divide column index 16 (the last column) of the cumulative totals temp_table['midpoint'] = (temp_table.iloc[ : , -1 :] /2) # V3 temp_table['midpoint_index'] = False temp_table['midpoint_index_value'] = False # Z3 temp_table['midpoint_index_lower'] = False # W3 temp_table['midpoint_index_range'] = False # X3 temp_table['midpoint_index_minus_one_cumulative_sum'] = False #Y3 # step 3 - csa_agg3: get the midpoint index by "when midpoint > agg[1] and midpoint <= agg[2] then 2" # Get CSA Midpoint Index using the breakpoints in our info table. # For each CSA for index, row in temp_table.iterrows(): # Get the index of the first column where our midpoint is greater than the columns value. # Do not use the temp columns (we just created) midpoint = row['midpoint'] midpoint_index = 0 for column in row.iloc[:-6]: # set midpoint index to the column with the highest value possible that is under midpoint if( midpoint >= int(column) ): # print (str(column) + ' - ' + str(midpoint)) temp_table.loc[ index, 'midpoint_index' ] = midpoint_index +1 midpoint_index += 1 temp_table = temp_table.drop('Unassigned--Jail') for index, row in temp_table.iterrows(): temp_table.loc[ index, 'midpoint_index_value' ] = data_table.loc[ index, data_table.columns[row['midpoint_index']] ] temp_table.loc[ index, 'midpoint_index_lower' ] = info.loc[ row['midpoint_index'] ]['lower'] temp_table.loc[ index, 'midpoint_index_range' ] = info.loc[ row['midpoint_index'] ]['range'] temp_table.loc[ index, 'midpoint_index_minus_one_cumulative_sum'] = row[ row['midpoint_index']-1 ] #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # Calculation = (midpoint_lower::numeric + (midpoint_range::numeric * ( (midpoint - midpoint_upto_agg) / nullif(midpoint_total,0) # Calculation = W3+X3*((V3-Y3)/Z3) # v3 -> 1 - midpoint of households == sum / 2 # w3 -> 2 - lower limit of the income range containing the midpoint of the housing total == row[lower] # x3 -> width of the interval containing the medium == row[range] # z3 -> number of hhs within the interval containing the median == row[total] # y3 -> 4 - cumulative frequency up to, but no==NOT including the median interval #~~~~~~~~~~~~~~~ temp_table['final'] = temp_table['midpoint_index_lower']+temp_table['midpoint_index_range']*((temp_table['midpoint']-temp_table['midpoint_index_minus_one_cumulative_sum'])/temp_table['midpoint_index_value']) #~~~~~~~~~~~~~~~ # Step 4) # Add Special Baltimore City Data #~~~~~~~~~~~~~~~ url = 'https://api.census.gov/data/20'+str(year)+'/acs/acs5/subject?get=NAME,S1901_C01_012E&for=county%3A510&in=state%3A24&key=829bf6f2e037372acbba32ba5731647c5127fdb0' table = pd.read_json(url, orient='records') temp_table['final']['Baltimore City'] = float(table.loc[1, table.columns[1]]) return temp_table['final'] """ /* */ -- with tbl_csa as ( select a.*,b.count from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B19001_002E','B19001_003E','B19001_004E','B19001_005E','B19001_006E','B19001_007E','B19001_008E','B19001_009E','B19001_010E','B19001_011E','B19001_012E','B19001_013E','B19001_014E','B19001_015E','B19001_016E','B19001_017E','B19013_001E']) a left join (select csa,count(*) as count from vital_signs.tracts group by csa) b on a.csa = b.csa ), info as ( select 'B19001_002E' as variable, 0 as lower, 10000 as range union all select 'B19001_003E' as variable, 10000 as lower, 4999 as range union all select 'B19001_004E' as variable, 15000 as lower, 4999 as range union all select 'B19001_005E' as variable, 20000 as lower, 4999 as range union all select 'B19001_006E' as variable, 25000 as lower, 4999 as range union all select 'B19001_007E' as variable, 30000 as lower, 4999 as range union all select 'B19001_008E' as variable, 35000 as lower, 4999 as range union all select 'B19001_009E' as variable, 40000 as lower, 4999 as range union all select 'B19001_010E' as variable, 45000 as lower, 4999 as range union all select 'B19001_011E' as variable, 50000 as lower, 9999 as range union all select 'B19001_012E' as variable, 60000 as lower, 14999 as range union all select 'B19001_013E' as variable, 75000 as lower, 24999 as range union all select 'B19001_014E' as variable, 100000 as lower, 24999 as range union all select 'B19001_015E' as variable, 125000 as lower, 24999 as range union all select 'B19001_016E' as variable, 150000 as lower, 49000 as range union all select 'B19001_017E' as variable, 200000 as lower, null as range ), csa_agg as ( select csa,value as total,count, ARRAY[ (value[1]), (value[1] + value[2]), (value[1] + value[2] + value[3]), (value[1] + value[2] + value[3] + value[4]), (value[1] + value[2] + value[3] + value[4] + value[5]), (value[1] + value[2] + value[3] + value[4] + value[5] + value[6]), (value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7]), (value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8]), (value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9]), (value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10]), (value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11]), (value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11] + value[12]), (value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11] + value[12] + value[13]), (value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11] + value[12] + value[13] + value[14]), (value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11] + value[12] + value[13] + value[14] + value[15]), (value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11] + value[12] + value[13] + value[14] + value[15] + value[16]) ] as agg, value[17] as median, variable from tbl_csa ), csa_agg2 as ( select csa,count,median,total,agg,variable, agg[16]/2::numeric as midpoint from csa_agg ), csa_agg3 as ( select csa,count,median,total,agg,variable,midpoint, (case when midpoint <= agg[1] then 1 when midpoint > agg[1] and midpoint <= agg[2] then 2 when midpoint > agg[2] and midpoint <= agg[3] then 3 when midpoint > agg[3] and midpoint <= agg[4] then 4 when midpoint > agg[4] and midpoint <= agg[5] then 5 when midpoint > agg[5] and midpoint <= agg[6] then 6 when midpoint > agg[6] and midpoint <= agg[7] then 7 when midpoint > agg[7] and midpoint <= agg[8] then 8 when midpoint > agg[8] and midpoint <= agg[9] then 9 when midpoint > agg[9] and midpoint <= agg[10] then 10 when midpoint > agg[10] and midpoint <= agg[11] then 11 when midpoint > agg[11] and midpoint <= agg[12] then 12 when midpoint > agg[12] and midpoint <= agg[13] then 13 when midpoint > agg[13] and midpoint <= agg[14] then 14 when midpoint > agg[14] and midpoint <= agg[15] then 15 when midpoint > agg[15] and midpoint <= agg[16] then 16 when midpoint > agg[16] then 17 end) as midpoint_idx from csa_agg2 ), csa_agg4 as ( select csa,count,median,total,agg,variable,midpoint,midpoint_idx, total[midpoint_idx] as midpoint_total, (case when (midpoint_idx - 1) = 0 then 0 else total[(midpoint_idx - 1)] end) as midpoint_upto_total, agg[midpoint_idx] as midpoint_agg, (case when (midpoint_idx - 1) = 0 then 0 else agg[(midpoint_idx - 1)] end) as midpoint_upto_agg, variable[midpoint_idx] as midpoint_variable from csa_agg3 ), csa_agg5 as ( select a.*,b.lower as midpoint_lower, b.range as midpoint_range from csa_agg4 a left join info b on a.midpoint_variable = b.variable ), tbl as ( select (CASE when count = 1 OR csa = 'Baltimore City' then median else (midpoint_lower::numeric + (midpoint_range::numeric * ( (midpoint - midpoint_upto_agg) / nullif(midpoint_total,0) ) ) ) END) as result,csa from csa_agg5 ) UPDATE vital_signs.data set mhhi = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

nilf.py

#export #File: nilf.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B23001 - SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER # Universe - Population 16 years and over # Table Creates: empl, unempl, unempr, nilf #purpose: Produce Workforce and Economic Development - Percent Population 16-64 Not in Labor Force Indicator #input: Year #output: import pandas as pd import glob def nilf( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B23001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B23001_003E', 'B23001_010E', 'B23001_017E', 'B23001_024E', 'B23001_031E', 'B23001_038E', 'B23001_045E', 'B23001_052E', 'B23001_059E', 'B23001_066E', 'B23001_089E', 'B23001_096E', 'B23001_103E', 'B23001_110E', 'B23001_117E', 'B23001_124E', 'B23001_131E', 'B23001_138E', 'B23001_145E', 'B23001_152E', 'B23001_009E', 'B23001_016E', 'B23001_023E', 'B23001_030E', 'B23001_037E', 'B23001_044E', 'B23001_051E', 'B23001_058E', 'B23001_065E', 'B23001_072E', 'B23001_095E', 'B23001_102E', 'B23001_109E', 'B23001_116E', 'B23001_123E', 'B23001_130E', 'B23001_137E', 'B23001_144E', 'B23001_151E', 'B23001_158E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B23001_009E', 'B23001_016E', 'B23001_023E', 'B23001_030E', 'B23001_037E', 'B23001_044E', 'B23001_051E', 'B23001_058E', 'B23001_065E', 'B23001_072E', 'B23001_095E', 'B23001_102E', 'B23001_109E', 'B23001_116E', 'B23001_123E', 'B23001_130E', 'B23001_137E', 'B23001_144E', 'B23001_151E', 'B23001_158E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B23001_003E', 'B23001_010E', 'B23001_017E', 'B23001_024E', 'B23001_031E', 'B23001_038E', 'B23001_045E', 'B23001_052E', 'B23001_059E', 'B23001_066E', 'B23001_089E', 'B23001_096E', 'B23001_103E', 'B23001_110E', 'B23001_117E', 'B23001_124E', 'B23001_131E', 'B23001_138E', 'B23001_145E', 'B23001_152E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( ( value[21]+value[22]+value[23]+value[24]+value[25]+value[26]+value[27]+value[28]+value[29]+value[30]+value[31]+value[32]+value[33]+value[34]+value[35]+value[36]+value[37]+value[38]+value[39]+value[40]) --not in labor force 16-64 # / # nullif( (value[1]+value[2]+value[3]+value[4]+value[5]+value[6]+value[7]+value[8]+value[9]+value[10]+value[11]+value[12]+value[13]+value[14]+value[15]+value[16]+value[17]+value[18]+value[19]+value[20]) -- population 16 to 64 ,0) )*100::numeric #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( (value[21]+value[22]+value[23]+value[24]+value[25]+value[26]+value[27]+value[28]+value[29]+value[30]+value[31]+value[32]+value[33]+value[34]+value[35]+value[36]+value[37]+value[38]+value[39]+value[40]) --not in labor force 16-64 / nullif( (value[1]+value[2]+value[3]+value[4]+value[5]+value[6]+value[7]+value[8]+value[9]+value[10]+value[11]+value[12]+value[13]+value[14]+value[15]+value[16]+value[17]+value[18]+value[19]+value[20]) -- population 16 to 64 ,0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014', ARRAY['B23001_003E','B23001_010E','B23001_017E','B23001_024E','B23001_031E','B23001_038E','B23001_045E','B23001_052E','B23001_059E','B23001_066E','B23001_089E','B23001_096E','B23001_103E','B23001_110E','B23001_117E','B23001_124E','B23001_131E','B23001_138E','B23001_145E','B23001_152E','B23001_009E','B23001_016E','B23001_023E','B23001_030E','B23001_037E','B23001_044E','B23001_051E','B23001_058E','B23001_065E','B23001_072E','B23001_095E','B23001_102E','B23001_109E','B23001_116E','B23001_123E','B23001_130E','B23001_137E','B23001_144E','B23001_151E','B23001_158E']) ) update vital_signs.data set nilf = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

nohhint.py

#export #File: nohhint.py #Author: Charles Karpati #Date: 1/25/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B28011 - INTERNET SUBSCRIPTIONS IN HOUSEHOLD # Universe: Households #purpose: Percent of Population with Broadband Internet Access #input: Year #output: import pandas as pd import glob def nohhint( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B28011*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B28011_001E', 'B28011_008E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B28011_008E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B28011_001E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( (value[1]+value[2]+value[3]+value[4]) / nullif(value[5],0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ WITH tbl AS ( select csa, ( (value[1]+value[2]+value[3]+value[4]) / nullif(value[5],0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B25091_008E','B25091_009E','B25091_010E','B25091_011E','B25091_002E']) ) update vital_signs.data set affordm = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

novhcl.py

#export #File: novhcl.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B08201 - HOUSEHOLD SIZE BY VEHICLES AVAILABLE # Universe: Households #purpose: Produce Sustainability - Percent of Households with No Vehicles Available Indicator #input: Year #output: import pandas as pd import glob def novhcl( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B08201*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B08201_002E','B08201_001E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B08201_002E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B08201_001E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( value[1]/ nullif(value[2],0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( value[1]/ nullif(value[2],0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08201_002E','B08201_001E']) ) update vital_signs.data set novhcl = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

othrcom.py

#export #File: othrcom.py #Author: Charles Karpati #Date: 1/24/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B08101 - MEANS OF TRANSPORTATION TO WORK BY AGE # Universe: Workers 16 years and over # Table Creates: othrcom, drvalone, carpool, pubtran, walked #purpose: Produce Sustainability - Percent of Population Using Other Means to Commute to Work (Taxi, Motorcycle, Bicycle, Other) Indicator #input: Year #output: import pandas as pd import glob def othrcom( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B08101*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B08101_001E','B08101_049E','B08101_041E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B08101_041E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B08101_001E','B08101_049E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( value[3] / nullif((value[1]-value[2]),0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.iloc[: ,0] - denominators.iloc[: ,1] fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 #~~~~~~~~~~~~~~~ # Step 4) # Add Special Baltimore City Data # 100- "6.7", "59.8", "9.2", "18.4", "3.7", = 2.2 # 100- (walked + drvalone + carpool + pubtran + workfromhome(13e)) #~~~~~~~~~~~~~~~ url = 'https://api.census.gov/data/20'+str(year)+'/acs/acs5/subject?get=NAME,S0801_C01_010E,S0801_C01_003E,S0801_C01_004E,S0801_C01_009E,S0801_C01_013E&for=county%3A510&in=state%3A24&key=829bf6f2e037372acbba32ba5731647c5127fdb0' table = pd.read_json(url, orient='records') walked = float(table.loc[1, table.columns[1]] ) drvalone = float(table.loc[1, table.columns[2]] ) carpool = float(table.loc[1, table.columns[3]] ) pubtran = float(table.loc[1, table.columns[4]] ) workfromhome = float(table.loc[1, table.columns[5]] ) fi['final']['Baltimore City'] = 100 - ( walked + drvalone + carpool + pubtran + workfromhome ) return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( value[3] / nullif((value[1]-value[2]),0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08101_001E','B08101_049E','B08101_041E']) ) update vital_signs.data set othrcom = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

p2more.py

#export #File: p2more.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B03002 - HISPANIC OR LATINO ORIGIN BY RACE # Universe: Total Population # Table Creates: racdiv, paa, pwhite, pasi, phisp, p2more, ppac #purpose: #input: Year #output: import pandas as pd import glob def p2more( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B03002*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # Append the one column from the other ACS Table df['B03002_012E_Total_Hispanic_or_Latino'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) tot = df[ 'B03002_001E_Total' ] df1['TwoOrMore%NH'] = df['B03002_009E_Total_Not_Hispanic_or_Latino_Two_or_more_races'] / tot * 100 return df1['TwoOrMore%NH']

paa.py

#export #File: paa.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B03002 - HISPANIC OR LATINO ORIGIN BY RACE # Universe: Total Population # Table Creates: racdiv, paa, pwhite, pasi, phisp, p2more, ppac #purpose: #input: Year #output: import pandas as pd import glob def paa( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B03002*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # Append the one column from the other ACS Table df['B03002_012E_Total_Hispanic_or_Latino'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) tot = df[ 'B03002_001E_Total' ] df1['African-American%NH'] = df[ 'B03002_004E_Total_Not_Hispanic_or_Latino_Black_or_African_American_alone' ]/ tot * 100 return df1['African-American%NH']

pasi.py

#export #File: pasi.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B03002 - HISPANIC OR LATINO ORIGIN BY RACE # Universe: Total Population # Table Creates: racdiv, paa, pwhite, pasi, phisp, p2more, ppac #purpose: #input: Year #output: import pandas as pd import glob def pasi( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B03002*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # Append the one column from the other ACS Table df['B03002_012E_Total_Hispanic_or_Latino'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) tot = df[ 'B03002_001E_Total' ] df1['Asian%NH'] = df[ 'B03002_006E_Total_Not_Hispanic_or_Latino_Asian_alone' ]/ tot * 100 return df1['Asian%NH']

phisp.py

#export #File: phisp.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B03002 - HISPANIC OR LATINO ORIGIN BY RACE # Universe: Total Population # Table Creates: racdiv, paa, pwhite, pasi, phisp, p2more, ppac #purpose: #input: Year #output: import pandas as pd import glob def phisp( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B03002*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # Append the one column from the other ACS Table df['B03002_012E_Total_Hispanic_or_Latino'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) tot = df[ 'B03002_001E_Total' ] df1['Hisp%'] = df['B03002_012E_Total_Hispanic_or_Latino']/ tot * 100 return df1['Hisp%']

ppac.py

#export #File: ppac.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B03002 - HISPANIC OR LATINO ORIGIN BY RACE # Universe: Total Population # Table Creates: racdiv, paa, pwhite, pasi, phisp, p2more, ppac #purpose: #input: Year #output: import pandas as pd import glob def ppac( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B03002*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # Append the one column from the other ACS Table df['B03002_012E_Total_Hispanic_or_Latino'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) tot = df[ 'B03002_001E_Total' ] df1['AllOther%NH'] = ( df['B03002_008E_Total_Not_Hispanic_or_Latino_Some_other_race_alone'] + df['B03002_005E_Total_Not_Hispanic_or_Latino_American_Indian_and_Alaska_Native_alone'] + df['B03002_007E_Total_Not_Hispanic_or_Latino_Native_Hawaiian_and_Other_Pacific_Islander_alone'] )/ tot * 100 return df1['AllOther%NH']

pubtran.py

#export #File: pubtran.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B08101 - MEANS OF TRANSPORTATION TO WORK BY AGE # Universe: Workers 16 Years and Over # Table Creates: othrcom, drvalone, carpool, pubtran, walked #purpose: Produce Sustainability - Percent of Population that Uses Public Transportation to Get to Work Indicator #input: Year #output: import pandas as pd import glob def pubtran( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B08101*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B08101_001E','B08101_049E','B08101_025E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B08101_025E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B08101_001E','B08101_049E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( value[3] / nullif((value[1]-value[2]),0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.iloc[: ,0] - denominators.iloc[: ,1] fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 #~~~~~~~~~~~~~~~ # Step 4) # Add Special Baltimore City Data #~~~~~~~~~~~~~~~ url = 'https://api.census.gov/data/20'+str(year)+'/acs/acs5/subject?get=NAME,S0801_C01_009E&for=county%3A510&in=state%3A24&key=829bf6f2e037372acbba32ba5731647c5127fdb0' table = pd.read_json(url, orient='records') fi['final']['Baltimore City'] = float(table.loc[1, table.columns[1]]) return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( value[3] / nullif((value[1]-value[2]),0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08101_001E','B08101_049E','B08101_025E']) ) update vital_signs.data set pubtran = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

pwhite.py

#export #File: pwhite.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B03002 - HISPANIC OR LATINO ORIGIN BY RACE # Universe: Total Population # Table Creates: racdiv, paa, pwhite, pasi, phisp, p2more, ppac #purpose: #input: Year #output: import pandas as pd import glob def pwhite( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B03002*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # Append the one column from the other ACS Table df['B03002_012E_Total_Hispanic_or_Latino'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) tot = df[ 'B03002_001E_Total' ] df1['White%NH'] = df[ 'B03002_003E_Total_Not_Hispanic_or_Latino_White_alone' ]/ tot * 100 return df1['White%NH']

racdiv.py

#export #File: racdiv.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B02001 - Race # Universe: Total Population # Uses ACS Table B03002 - HISPANIC OR LATINO ORIGIN BY RACE # Universe: Total Population # Table Creates: racdiv, paa, pwhite, pasi, phisp, p2more, ppac #purpose: #input: Year #output: import pandas as pd import glob def racdiv( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B02001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) fileName = '' for name in glob.glob('AcsDataClean/B03002*5y'+str(year)+'_est.csv'): fileName = name df_hisp = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') df_hisp = df_hisp.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) df_hisp = df_hisp.sum(numeric_only=True) # Append the one column from the other ACS Table df['B03002_012E_Total_Hispanic_or_Latino'] = df_hisp['B03002_012E_Total_Hispanic_or_Latino'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) df1['African-American%'] = df[ 'B02001_003E_Total_Black_or_African_American_alone' ] / df[ 'B02001_001E_Total' ] * 100 df1['White%'] = df[ 'B02001_002E_Total_White_alone' ] / df[ 'B02001_001E_Total' ] * 100 df1['American Indian%'] = df[ 'B02001_004E_Total_American_Indian_and_Alaska_Native_alone' ]/ df[ 'B02001_001E_Total' ] * 100 df1['Asian%'] = df[ 'B02001_005E_Total_Asian_alone' ] / df[ 'B02001_001E_Total' ] * 100 df1['Native Hawaii/Pac Islander%'] = df[ 'B02001_006E_Total_Native_Hawaiian_and_Other_Pacific_Islander_alone'] / df[ 'B02001_001E_Total' ] * 100 df1['Hisp %'] = df['B03002_012E_Total_Hispanic_or_Latino'] / df[ 'B02001_001E_Total' ] * 100 # =1-(POWER(%AA/100,2)+POWER(%White/100,2)+POWER(%AmerInd/100,2)+POWER(%Asian/100,2) + POWER(%NativeAm/100,2))*(POWER(%Hispanci/100,2) + POWER(1-(%Hispanic/100),2)) df1['Diversity_index'] = ( 1- ( ( df1['African-American%'] /100 )**2 +( df1['White%'] /100 )**2 +( df1['American Indian%'] /100 )**2 +( df1['Asian%'] /100 )**2 +( df1['Native Hawaii/Pac Islander%'] /100 )**2 )*( ( df1['Hisp %'] /100 )**2 +(1-( df1['Hisp %'] /100) )**2 ) ) * 100 return df1['Diversity_index']

retrieveAcsData.py

#export #File: retrieveAcsData.py #Author: Charles Karpati #Date: 1/9/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: #This file returns ACS data given an ID #def main(): #purpose: Retrieves ACS data from the web #input: ID #output: Acs Data. Prints to ../../data/2_cleaned/acs/ import pandas as pd import csv from urllib.parse import urlencode # This prevents timeouts import socket socket.setdefaulttimeout(10.0) def retrieve_acs_data(year, tableId): keys = [] vals = [] header = [] getTheseKeys = '' getTheseKeys2 = '' getTheseKeys3 = '' getTheseKeys4 = '' keyCount = 0 #~~~~~~~~~~~~~~~ # Step 1) # Retrieve a Meta Data Table Describing the Content of the Table #~~~~~~~~~~~~~~~ url = 'https://api.census.gov/data/20'+year+'/acs/acs5/groups/'+tableId+'.json' print(url); metaDataTable = pd.read_json(url, orient='records') #~~~~~~~~~~~~~~~ # Step 2) # Createa a Dictionary using the Meta Data Table #~~~~~~~~~~~~~~~ # Multiple Queries may be Required. # Max columns returned from any given query is 50. # For that reasons bin the Columns into Groups of 50. for key in metaDataTable['variables'].keys(): if key[-1:] == 'E': keyCount = keyCount + 1 if keyCount < 50 : getTheseKeys = getTheseKeys+','+key elif keyCount < 99 : getTheseKeys2 = getTheseKeys2+','+key elif keyCount < 148 : getTheseKeys3 = getTheseKeys3+','+key else: getTheseKeys4 = getTheseKeys4+','+key keys.append(key) val = metaDataTable['variables'][key]['label'] val = key+'_'+val.replace('Estimate!!', '').replace('!!', '_').replace(' ', '_') vals.append(val) dictionary = dict(zip(keys, vals)) #~~~~~~~~~~~~~~~ # Step 3) # Get the actual data we want with all the columns (obtained using the meta data table) #~~~~~~~~~~~~~~~ # https://api.census.gov/data/2016/acs/acs5?get=NAME,B11001_002E&for=county:005&in=state:24 urlRoot = 'https://api.census.gov/data/20'+year+'/acs/acs5?' def getParams(keys): return { 'get': 'NAME'+keys, 'for': 'tract:*', 'in': 'state:24 county:510', 'key': '829bf6f2e037372acbba32ba5731647c5127fdb0' } def getBCityParams(keys): return { 'get': 'NAME'+keys, 'for': 'county:510', 'in': 'state:24', 'key': '829bf6f2e037372acbba32ba5731647c5127fdb0' } def readIn( url ): tbl = pd.read_json(url, orient='records') tbl.columns = tbl.iloc[0] return tbl def appendColumns( table, params): # Get Tract and City Records For Specific Columns table2 = readIn( urlRoot+urlencode(getParams(params)) ) table3 = readIn( urlRoot+urlencode(getBCityParams(params)) ) table3['tract'] = '010000' # Concatenate the Records table2.append([table2, table3], sort=False) table2 = pd.concat([table2, table3], ignore_index=True) # Merge to Master Table table = pd.merge(table, table2, how='left', left_on=["NAME","state","county","tract"], right_on = ["NAME","state","county","tract"]) return table # Get Tract Data url = urlRoot+urlencode(getParams(getTheseKeys)) table = readIn(url) table = table.iloc[1:] # Get Baltimore City's Data . url = urlRoot+urlencode(getBCityParams(getTheseKeys)) table2 = readIn(url) table2 = table2[1:] table2['tract'] = '010000' #Append Baltimore to Tracts #table = pd.concat([table, table2], keys=["NAME","state","county",], axis=0) table.append([table, table2], sort=False) table = pd.concat([table, table2], ignore_index=True) if getTheseKeys2 != '' : table = appendColumns(table, getTheseKeys2) if getTheseKeys3 != '' : table = appendColumns( table, getTheseKeys3 ) if getTheseKeys4 != '' : table = appendColumns( table, getTheseKeys4 ) #~~~~~~~~~~~~~~~ # Step 4) # Prepare Column Names using the meta data table. The raw data has columnsNames in the first row, as well. # Replace column ID's with labels from the dictionary where applicable (should be always) #~~~~~~~~~~~~~~~ for column in table.columns: if column in keys: header.append(dictionary[column]) else: header.append(column) table.columns = header #table.drop(table.index[0], inplace=True) #~~~~~~~~~~~~~~~ # Step 5) Everything Else #~~~~~~~~~~~~~~~ # Prettify Names table['NAME'] = table['NAME'].str.replace(', Baltimore city, Maryland', '') table['NAME'][table['NAME'] == 'Baltimore city, Maryland'] = 'Baltimore City' # Convert to Integers Columns from Strings where Applicable table = table.apply(pd.to_numeric, errors='ignore') return table

sclemp.py

#export #File: sclemp.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B14005 - SEX BY SCHOOL ENROLLMENT BY EDUCATIONAL ATTAINMENT BY EMPLOYMENT STATUS FOR THE POPULATION 16 TO 19 YEARS # (Universe = Population 16 to 19 years) #purpose: Produce Education and Youth - Percentage of Population aged 16-19 in School and/or Employed Indicator #input: Year #output: import pandas as pd import glob def sclemp( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B14005*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B14005_004E', 'B14005_005E', 'B14005_006E', 'B14005_009E', 'B14005_013E', 'B14005_018E', 'B14005_019E', 'B14005_020E', 'B14005_023E', 'B14005_027E','B14005_001E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B14005_004E', 'B14005_005E', 'B14005_006E', 'B14005_009E', 'B14005_013E', 'B14005_018E', 'B14005_019E', 'B14005_020E', 'B14005_023E', 'B14005_027E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B14005_001E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( ( value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] ) / nullif(value[11],0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( ( value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] ) / nullif(value[11],0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B14005_004E', 'B14005_005E', 'B14005_006E', 'B14005_009E', 'B14005_013E', 'B14005_018E', 'B14005_019E', 'B14005_020E', 'B14005_023E', 'B14005_027E','B14005_001E']) ) update vital_signs.data set sclemp = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

tpop.py

#export #File: tpop.py #Author: Charles Karpati #Date: 4/16/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B01001 - SEX BY AGE # Universe: Total population # Table Creates: tpop, female, male, age5 age18 age24 age64 age65 #purpose: #input: Year #output: import pandas as pd import glob def tpop( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = df.sum(numeric_only=True) # df.columns total = df['B01001_001E_Total'] df1 = pd.DataFrame() df1['CSA'] = df.index df1.set_index('CSA', drop = True, inplace = True) df1['totalPop'] = total return df1['totalPop']

trav14.py

#export #File: trav14.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B08303 - TRAVEL TIME TO WORK, # (Universe: Workers 16 years and over who did not work at home) # Table Creates: trav14, trav29, trav44, trav45 #purpose: Produce Sustainability - Percent of Employed Population with Travel Time to Work of 0-14 Minutes Indicator #input: Year #output: import pandas as pd import glob def trav14( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B08303*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B08303_002E','B08303_003E','B08303_004E','B08303_001E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B08303_002E','B08303_003E','B08303_004E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B08303_001E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08303_002E','B08303_003E','B08303_004E','B08303_001E']) ) update vital_signs.data set trav14_ = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

trav29.py

#export #File: trav29.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B08303 - TRAVEL TIME TO WORK, # (Universe: Workers 16 years and over who did not work at home) # Table Creates: trav14, trav29, trav44, trav45 #purpose: Produce Sustainability - Percent of Employed Population with Travel Time to Work of 15-29 Minutes Indicator #input: Year #output: import pandas as pd import glob def trav29( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B08303*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B08303_005E','B08303_006E','B08303_007E','B08303_001E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B08303_005E','B08303_006E','B08303_007E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B08303_001E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08303_005E','B08303_006E','B08303_007E','B08303_001E']) ) update vital_signs.data set trav29_ = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

trav44.py

#export #File: trav44.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B08303 - TRAVEL TIME TO WORK, # (Universe: Workers 16 years and over who did not work at home) # Table Creates: trav14, trav29, trav44, trav45 #purpose: Produce Sustainability - Percent of Employed Population with Travel Time to Work of 30-44 Minutes Indicator #input: Year #output: import pandas as pd import glob def trav44( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B08303*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B08303_008E','B08303_009E','B08303_010E','B08303_001E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B08303_008E','B08303_009E','B08303_010E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B08303_001E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08303_008E','B08303_009E','B08303_010E','B08303_001E']) ) update vital_signs.data set trav44_ = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

trav45.py

#export #File: trav45.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B08303 - TRAVEL TIME TO WORK, # (Universe: Workers 16 years and over who did not work at home) # Table Creates: trav14, trav29, trav44, trav45 #purpose: Produce Sustainability - Percent of Employed Population with Travel Time to Work of 45 Minutes and Over Indicator #input: Year #output: import pandas as pd import glob def trav45( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B08303*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B08303_011E','B08303_012E','B08303_013E','B08303_001E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B08303_011E','B08303_012E','B08303_013E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B08303_001E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ WITH tbl AS ( select csa, ( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08303_011E','B08303_012E','B08303_013E','B08303_001E']) ) update vital_signs.data set trav45_ = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

unempl.py

#export #File: unempl.py #Author: Charles Karpati #Date: 1/17/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B23001 - SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER # Universe - Population 16 years and over #Table Creates: empl, unempl, unempr, nilf #purpose: Produce Workforce and Economic Development - Percent Population 16-64 Unemployed and Looking for Work Indicator #input: Year #output: import pandas as pd import glob def unempl( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B23001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = [ 'B23001_003E','B23001_010E', 'B23001_017E', 'B23001_024E', 'B23001_031E', 'B23001_038E', 'B23001_045E', 'B23001_052E', 'B23001_059E', 'B23001_066E', 'B23001_089E', 'B23001_096E', 'B23001_103E', 'B23001_110E', 'B23001_117E', 'B23001_124E', 'B23001_131E', 'B23001_138E', 'B23001_145E', 'B23001_152E', 'B23001_008E', 'B23001_015E', 'B23001_022E', 'B23001_029E', 'B23001_036E', 'B23001_043E', 'B23001_050E', 'B23001_057E', 'B23001_064E', 'B23001_071E', 'B23001_094E', 'B23001_101E', 'B23001_108E', 'B23001_115E', 'B23001_122E', 'B23001_129E', 'B23001_136E', 'B23001_143E', 'B23001_150E', 'B23001_157E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B23001_008E', 'B23001_015E', 'B23001_022E', 'B23001_029E', 'B23001_036E', 'B23001_043E', 'B23001_050E', 'B23001_057E', 'B23001_064E', 'B23001_071E', 'B23001_094E', 'B23001_101E', 'B23001_108E', 'B23001_115E', 'B23001_122E', 'B23001_129E', 'B23001_136E', 'B23001_143E', 'B23001_150E', 'B23001_157E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B23001_003E', 'B23001_010E', 'B23001_017E', 'B23001_024E', 'B23001_031E', 'B23001_038E', 'B23001_045E', 'B23001_052E', 'B23001_059E', 'B23001_066E', 'B23001_089E', 'B23001_096E', 'B23001_103E', 'B23001_110E', 'B23001_117E', 'B23001_124E', 'B23001_131E', 'B23001_138E', 'B23001_145E', 'B23001_152E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation #( ( value[21]+value[22]+value[23]+value[24]+value[25]+value[26]+value[27]+value[28]+value[29]+value[30]+value[31]+value[32]+value[33]+value[34]+value[35]+value[36]+value[37]+value[38]+value[39]+value[40]) --civil labor force unempl 16-64 # / # nullif( (value[1]+value[2]+value[3]+value[4]+value[5]+value[6]+value[7]+value[8]+value[9]+value[10]+value[11]+value[12]+value[13]+value[14]+value[15]+value[16]+value[17]+value[18]+value[19]+value[20]) -- population 16 to 64 ,0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ /* */ -- WITH tbl AS ( select csa, ( ( value[21]+value[22]+value[23]+value[24]+value[25]+value[26]+value[27]+value[28]+value[29]+value[30]+value[31]+value[32]+value[33]+value[34]+value[35]+value[36]+value[37]+value[38]+value[39]+value[40]) --civil labor force unempl 16-64 / nullif( (value[1]+value[2]+value[3]+value[4]+value[5]+value[6]+value[7]+value[8]+value[9]+value[10]+value[11]+value[12]+value[13]+value[14]+value[15]+value[16]+value[17]+value[18]+value[19]+value[20]) -- population 16 to 64 ,0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY[ 'B23001_003E','B23001_010E','B23001_017E','B23001_024E','B23001_031E','B23001_038E','B23001_045E','B23001_052E','B23001_059E','B23001_066E','B23001_089E','B23001_096E','B23001_103E','B23001_110E','B23001_117E','B23001_124E','B23001_131E','B23001_138E','B23001_145E','B23001_152E','B23001_008E','B23001_015E','B23001_022E','B23001_029E','B23001_036E','B23001_043E','B23001_050E','B23001_057E','B23001_064E','B23001_071E','B23001_094E','B23001_101E','B23001_108E','B23001_115E','B23001_122E','B23001_129E','B23001_136E','B23001_143E','B23001_150E','B23001_157E']) ) update vital_signs.data set unempl = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

unempr.py

#export #File: unempr.py #Author: Charles Karpati #Date: 1/24/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B23001 - SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER # Universe: Workers 16 years and over #Table Creates: empl, unempl, unempr, nilf #purpose: Produce Sustainability - Percent of Population that Walks to Work Indicator #input: Year #output: import pandas as pd import glob def unempr( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B23001*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = [ 'B23001_006E', 'B23001_013E', 'B23001_020E', 'B23001_027E', 'B23001_034E', 'B23001_041E', 'B23001_048E', 'B23001_055E', 'B23001_062E', 'B23001_069E', 'B23001_092E', 'B23001_099E', 'B23001_106E', 'B23001_113E', 'B23001_120E', 'B23001_127E', 'B23001_134E', 'B23001_141E', 'B23001_148E', 'B23001_155E', 'B23001_008E', 'B23001_015E', 'B23001_022E', 'B23001_029E', 'B23001_036E', 'B23001_043E', 'B23001_050E', 'B23001_057E', 'B23001_064E', 'B23001_071E', 'B23001_094E', 'B23001_101E', 'B23001_108E', 'B23001_115E', 'B23001_122E', 'B23001_129E', 'B23001_136E', 'B23001_143E', 'B23001_150E', 'B23001_157E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B23001_008E', 'B23001_015E', 'B23001_022E', 'B23001_029E', 'B23001_036E', 'B23001_043E', 'B23001_050E', 'B23001_057E', 'B23001_064E', 'B23001_071E', 'B23001_094E', 'B23001_101E', 'B23001_108E', 'B23001_115E', 'B23001_122E', 'B23001_129E', 'B23001_136E', 'B23001_143E', 'B23001_150E', 'B23001_157E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B23001_006E', 'B23001_013E', 'B23001_020E', 'B23001_027E', 'B23001_034E', 'B23001_041E', 'B23001_048E', 'B23001_055E', 'B23001_062E', 'B23001_069E', 'B23001_092E', 'B23001_099E', 'B23001_106E', 'B23001_113E', 'B23001_120E', 'B23001_127E', 'B23001_134E', 'B23001_141E', 'B23001_148E', 'B23001_155E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # ( ( value[21]+ value[22]+ value[23]+ value[24]+ value[25]+ value[26]+ value[27]+ value[28]+ value[29]+ value[30]+ value[31]+ value[32]+ value[33]+ value[34]+v alue[35]+ value[36]+ value[37]+ value[38]+ value[39]+ value[40]) --civil labor force unemployed 16-64 / nullif( (value[1] +value[2]+ value[3]+ value[4]+ value[5]+ value[6]+ value[7]+ value[8]+ value[9]+ value[10]+ value[11]+ value[12]+ value[13]+ value[14]+ value[15]+ value[16]+ value[17]+ value[18]+ value[19]+ value[20]) --civil labor force 16-64 ,0) )*100 #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.sum(axis=1) fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 return fi['final'] """ WITH tbl AS ( select csa, ( (value[21]+value[22]+value[23]+value[24]+value[25]+value[26]+value[27]+value[28]+value[29]+value[30]+value[31]+value[32]+value[33]+value[34]+value[35]+value[36]+value[37]+value[38]+value[39]+value[40]) --civil labor force unemployed 16-64 / nullif( (value[1]+value[2]+value[3]+value[4]+value[5]+value[6]+value[7]+value[8]+value[9]+value[10]+value[11]+value[12]+value[13]+value[14]+value[15]+value[16]+value[17]+value[18]+value[19]+value[20]) --civil labor force 16-64 ,0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2013',ARRAY['B23001_006E', 'B23001_013E', 'B23001_020E', 'B23001_027E', 'B23001_034E', 'B23001_041E', 'B23001_048E', 'B23001_055E', 'B23001_062E', 'B23001_069E', 'B23001_092E', 'B23001_099E', 'B23001_106E', 'B23001_113E', 'B23001_120E', 'B23001_127E', 'B23001_134E', 'B23001_141E', 'B23001_148E', 'B23001_155E', 'B23001_008E', 'B23001_015E', 'B23001_022E', 'B23001_029E', 'B23001_036E', 'B23001_043E', 'B23001_050E', 'B23001_057E', 'B23001_064E', 'B23001_071E', 'B23001_094E', 'B23001_101E', 'B23001_108E', 'B23001_115E', 'B23001_122E', 'B23001_129E', 'B23001_136E', 'B23001_143E', 'B23001_150E', 'B23001_157E'] ) ) update vital_signs.data set unempr = result from tbl where data2.csa = tbl.csa and update_data_year = '2013' and data_year = '2014'; """

walked.py

#export #File: walked.py #Author: Charles Karpati #Date: 1/24/19 #Section: Bnia #Email: karpati1@umbc.edu #Description: # Uses ACS Table B08101 - MEANS OF TRANSPORTATION TO WORK BY AGE # Universe: Workers 16 years and over # Table Creates: othrcom, drvalone, carpool, pubtran, walked #purpose: Produce Sustainability - Percent of Population that Walks to Work Indicator #input: Year #output: import pandas as pd import glob def walked( year ): def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0] def getColByName (df, col): return df[getColName(df, col)] def addKey(df, fi, col): key = getColName(df, col) val = getColByName(df, col) fi[key] = val return fi def nullIfEqual(df, c1, c2): return df.apply(lambda x: x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1) def sumInts(df): return df.sum(numeric_only=True) #~~~~~~~~~~~~~~~ # Step 1) # Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder. #~~~~~~~~~~~~~~~ fileName = '' for name in glob.glob('AcsDataClean/B08101*5y'+str(year)+'_est.csv'): fileName = name df = pd.read_csv( fileName, index_col=0 ) # Aggregate by CSA # Group By CSA so that they may be opperated on df = df.groupby('CSA') # Aggregate Numeric Values by Sum df = sumInts(df) # Add 'BALTIMORE' which is the SUM of all the CSAs #~~~~~~~~~~~~~~~ # Step 2) # Prepare the columns #~~~~~~~~~~~~~~~ # Final Dataframe fi = pd.DataFrame() columns = ['B08101_001E','B08101_049E','B08101_033E'] for col in columns: fi = addKey(df, fi, col) # Numerators numerators = pd.DataFrame() columns = ['B08101_033E'] for col in columns: numerators = addKey(df, numerators, col) # Denominators denominators = pd.DataFrame() columns = ['B08101_001E','B08101_049E'] for col in columns: denominators = addKey(df, denominators, col) # construct the denominator, returns 0 iff the other two rows are equal. #~~~~~~~~~~~~~~~ # Step 3) # Run the Calculation # value[3] / nullif((value[1]-value[2]),0) #~~~~~~~~~~~~~~~ fi['numerator'] = numerators.sum(axis=1) fi['denominator'] = denominators.iloc[: ,0] - denominators.iloc[: ,1] fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0 fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100 #~~~~~~~~~~~~~~~ # Step 4) # Add Special Baltimore City Data #~~~~~~~~~~~~~~~ url = 'https://api.census.gov/data/20'+str(year)+'/acs/acs5/subject?get=NAME,S0801_C01_010E&for=county%3A510&in=state%3A24&key=829bf6f2e037372acbba32ba5731647c5127fdb0' table = pd.read_json(url, orient='records') fi['final']['Baltimore City'] = float(table.loc[1, table.columns[1]]) return fi['final'] """ WITH tbl AS ( select csa, ( value[3] / nullif((value[1]-value[2]),0) )*100::numeric as result from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08101_001E','B08101_049E','B08101_033E']) ) update vital_signs.data set walked = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014'; """

SEARCH

CONNECT WITH US

DONATE

Help us keep this resource free and available to the public. Donate now!

Donate to BNIA-JFI

CONTACT US

Baltimore Neighborhood Indicators Alliance
The Jacob France Institute
1420 N. Charles Street, Baltimore, MD 21201
410-837-4377 | bnia-jfi@ubalt.edu