# default_exp indicators
These were origially SQL Scripts. We moved them over to python. Now we are trying to make these scripts even better.
Undone -> Yet to be Transcribed From SQL
??? 199 Number of Trees Planted treeplntXX TreeBaltimore
49 nomail - R
NO INFORMATION LOCATED.
This comes already aggregated and so it may be quicker.
129 artevnt - G
events_2017_csa indicator number 130
SELECT bAll.csa AS Bound, sum(bQuery.events_2017)*(1000/bAll.the_pop)as events_2017
FROM boundaries.csa2010 bAll
LEFT JOIN (
SELECT bounds.csa AS Boundary, (count(Tables.gid ::numeric(20,4))::numeric(20,2)) AS events_2017
FROM arts.events_2017 AS Tables
JOIN boundaries.csa2010 AS bounds
ON st_contains(bounds.the_geom, Tables.the_geom)
GROUP BY bounds.csa
ORDER BY bounds.csa
) bQuery
ON bAll.csa = bQuery.Boundary
GROUP BY Bound, the_pop
ORDER BY Bound;
168 weather - R
Waiting on Weatherization Data
Normalization Source -> MD Property View. Aquired
with tbl AS (
select (count(job_number)::real)*(1000/the_pop::real) as result, csa
from vital_signs.match_csas_and_bc_by_geom('sustainability.weatherization_2017', 'gid', 'the_geom') a
left join sustainability.weatherization_2017 b on a.gid = b.gid
group by csa,the_pop
)
update vital_signs.data
set weather = result from tbl where data.csa = tbl.csa and data_year = '2017';
with numerator AS (
select (count(
case
when csa_present
then 1
else NULL
end)::numeric) as result, csa
from vital_signs.match_csas_and_bc_by_geom('sustainability.weatherization_2017', 'gid', 'the_geom') a
left join sustainability.weatherization_2017 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (address != $$NULL$$) AND (desclu = $$Apartments$$ OR desclu = $$Residential$$ OR desclu = $$Residential Commercial$$ OR desclu = $$Residential Condominium$$)
then 1
else NULL
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017v2', 'gid', 'the_geom') a
left join housing.mdprop_2017v2 b on a.gid = b.gid
group by csa, the_pop
),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
afform.py
#export
#File: affordm.py
#Author: Charles Karpati
#Date: 1/25/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B25091 - MORTGAGE STATUS BY SELECTED MONTHLY OWNER COSTS AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS
# Universe: Owner-occupied housing units
# Table Creates:
#purpose: Produce Housing and Community Development - Affordability Index - Mortgage Indicator
#input: Year
#output:
import pandas as pd
import glob
def affordm( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B25091*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B25091_008E','B25091_009E','B25091_010E','B25091_011E','B25091_002E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B25091_008E','B25091_009E','B25091_010E','B25091_011E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B25091_002E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( (value[1]+value[2]+value[3]+value[4]) / nullif(value[5],0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
WITH tbl AS (
select csa,
( (value[1]+value[2]+value[3]+value[4]) / nullif(value[5],0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B25091_008E','B25091_009E','B25091_010E','B25091_011E','B25091_002E'])
)
update vital_signs.data
set affordm = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""
affordr.py
#export
#File: affordr.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B25070 - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS
# Universe: Renter-occupied housing units
#purpose: Produce Housing and Community Development - Affordability Index - Rent Indicator
#input: Year
#output:
import pandas as pd
import glob
def affordr( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B25070*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B25070_007E','B25070_008E','B25070_009E','B25070_010E','B25070_001E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B25070_007E','B25070_008E','B25070_009E','B25070_010E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B25070_001E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( (value[1]+value[2]+value[3]+value[4]) / nullif(value[5],0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
WITH tbl AS (
select csa,
( (value[1]+value[2]+value[3]+value[4]) / nullif(value[5],0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B25070_007E','B25070_008E','B25070_009E','B25070_010E','B25070_001E'])
)
update vital_signs.data
set affordr = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""
age5.py
#export
#File: age5.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B01001 - SEX BY AGE
# Universe: Total population
# Table Creates: tpop, female, male, age5 age18 age24 age64 age65
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def age5( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# df.columns
total = df['B01001_001E_Total']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
# Under 5
df1['under_5'] = ( df[ 'B01001_003E_Total_Male_Under_5_years' ]
+ df[ 'B01001_027E_Total_Female_Under_5_years' ]
) / total * 100
return df1['under_5']
age18.py
#export
#File: age18.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B01001 - SEX BY AGE
# Universe: Total population
# Table Creates: tpop, female, male, age5 age18 age24 age64 age65
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def age18( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# df.columns
total = df['B01001_001E_Total']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
df1['five_to_17'] = ( df[ 'B01001_004E_Total_Male_5_to_9_years' ]
+ df[ 'B01001_005E_Total_Male_10_to_14_years' ]
+ df[ 'B01001_006E_Total_Male_15_to_17_years' ]
+ df[ 'B01001_028E_Total_Female_5_to_9_years' ]
+ df[ 'B01001_029E_Total_Female_10_to_14_years' ]
+ df[ 'B01001_030E_Total_Female_15_to_17_years' ]
) / total * 100
return df1['five_to_17']
age24.py
#export
#File: age24.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B01001 - SEX BY AGE
# Universe: Total population
# Table Creates: tpop, female, male, age5 age18 age24 age64 age65
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def age24( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# df.columns
total = df['B01001_001E_Total']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
df1['eighteen_to_24'] = ( df[ 'B01001_007E_Total_Male_18_and_19_years' ]
+ df[ 'B01001_008E_Total_Male_20_years' ]
+ df[ 'B01001_009E_Total_Male_21_years' ]
+ df[ 'B01001_010E_Total_Male_22_to_24_years' ]
+ df[ 'B01001_031E_Total_Female_18_and_19_years' ]
+ df[ 'B01001_032E_Total_Female_20_years' ]
+ df[ 'B01001_033E_Total_Female_21_years' ]
+ df[ 'B01001_034E_Total_Female_22_to_24_years' ]
) / total * 100
return df1['eighteen_to_24']
age64.py
#export
#File: age64.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B01001 - SEX BY AGE
# Universe: Total population
# Table Creates: tpop, female, male, age5 age18 age24 age64 age65
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def age64( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# df.columns
total = df['B01001_001E_Total']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
df1['twentyfive_to_64'] = ( df[ 'B01001_011E_Total_Male_25_to_29_years' ]
+ df[ 'B01001_012E_Total_Male_30_to_34_years' ]
+ df[ 'B01001_013E_Total_Male_35_to_39_years' ]
+ df[ 'B01001_014E_Total_Male_40_to_44_years' ]
+ df[ 'B01001_015E_Total_Male_45_to_49_years' ]
+ df[ 'B01001_016E_Total_Male_50_to_54_years' ]
+ df[ 'B01001_017E_Total_Male_55_to_59_years' ]
+ df[ 'B01001_018E_Total_Male_60_and_61_years' ]
+ df[ 'B01001_019E_Total_Male_62_to_64_years' ]
+ df[ 'B01001_035E_Total_Female_25_to_29_years' ]
+ df[ 'B01001_036E_Total_Female_30_to_34_years' ]
+ df[ 'B01001_037E_Total_Female_35_to_39_years' ]
+ df[ 'B01001_038E_Total_Female_40_to_44_years' ]
+ df[ 'B01001_039E_Total_Female_45_to_49_years' ]
+ df[ 'B01001_040E_Total_Female_50_to_54_years' ]
+ df[ 'B01001_041E_Total_Female_55_to_59_years' ]
+ df[ 'B01001_042E_Total_Female_60_and_61_years' ]
+ df[ 'B01001_043E_Total_Female_62_to_64_years' ]
) / total * 100
return df1['twentyfive_to_64']
age65.py
#export
#File: age65.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B01001 - SEX BY AGE
# Universe: Total population
# Table Creates: tpop, female, male, age5 age18 age24 age64 age65
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def age65( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# df.columns
total = df['B01001_001E_Total']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
df1['sixtyfive_and_up'] = ( df[ 'B01001_020E_Total_Male_65_and_66_years' ]
+ df[ 'B01001_021E_Total_Male_67_to_69_years' ]
+ df[ 'B01001_022E_Total_Male_70_to_74_years' ]
+ df[ 'B01001_023E_Total_Male_75_to_79_years' ]
+ df[ 'B01001_024E_Total_Male_80_to_84_years' ]
+ df[ 'B01001_025E_Total_Male_85_years_and_over' ]
+ df[ 'B01001_044E_Total_Female_65_and_66_years' ]
+ df[ 'B01001_045E_Total_Female_67_to_69_years' ]
+ df[ 'B01001_046E_Total_Female_70_to_74_years' ]
+ df[ 'B01001_047E_Total_Female_75_to_79_years' ]
+ df[ 'B01001_048E_Total_Female_80_to_84_years' ]
+ df[ 'B01001_049E_Total_Female_85_years_and_over' ]
) / total * 100
return df1['sixtyfive_and_up']
bahigher.py
#export
#File: bahigher.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B06009 - PLACE OF BIRTH BY EDUCATIONAL ATTAINMENT IN THE UNITED STATES
#purpose: Produce Workforce and Economic Development - Percent Population (25 Years and over) with a Bachelor's Degree or Above
#Table Uses: B06009 - lesshs, hsdipl, bahigher
#input: Year
#output:
import pandas as pd
import glob
def bahigher( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B06009*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B06009_005E','B06009_006E','B06009_001E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B06009_005E','B06009_006E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B06009_001E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation + final mods
# ( ( value[1] + value[2] ) / nullif(value[3],0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
/*
*/ --
WITH tbl AS (
select csa,
( ( value[1] + value[2] ) / nullif(value[3],0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B06009_003E','B06009_004E','B06009_001E'])
)
update vital_signs.data
set hsdipl = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
B06009_004E
label "Estimate!!Total!!Some college or associate's degree"
B06009_003E
label "Estimate!!Total!!High school graduate (includes equivalency)"
B06009_002E
label "Estimate!!Total!!Less than high school graduate"
B06009_001E
label "Estimate!!Total"
B06009_005E
label "Estimate!!Total!!Bachelor's degree"
B06009_006E
label "Estimate!!Total!!Graduate or professional degree"
"""carpool.py
#export
#File: carpool.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B08101 - MEANS OF TRANSPORTATION TO WORK BY AGE
# Universe: Workers 16 Years and Over
# Table Creates: othrcom, drvalone, carpool, pubtran, walked
#purpose: Produce Sustainability - Percent of Population that Carpool to Work Indicator
#input: Year
#output:
import pandas as pd
import glob
def carpool( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B08101*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B08101_001E','B08101_049E','B08101_017E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B08101_017E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B08101_001E','B08101_049E']
for col in columns:
denominators = addKey(df, denominators, col)
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation + final mods
# ( value[3] / (value[1]-value[2]) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.iloc[: ,0] - denominators.iloc[: ,1]
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
#~~~~~~~~~~~~~~~
# Step 4)
# Add Special Baltimore City Data
#~~~~~~~~~~~~~~~
url = 'https://api.census.gov/data/20'+str(year)+'/acs/acs5/subject?get=NAME,S0801_C01_004E&for=county%3A510&in=state%3A24&key=829bf6f2e037372acbba32ba5731647c5127fdb0'
table = pd.read_json(url, orient='records')
fi['final']['Baltimore City'] = float(table.loc[1, table.columns[1]])
return fi['final']
"""
WITH tbl AS (
select csa,
( value[3] / nullif( (value[1]-value[2]) ,0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2013',ARRAY['B08101_001E','B08101_049E','B08101_017E'])
)
update vital_signs.data
set carpool = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2013';
"""drvalone.py
#export
#File: drvalone.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B08101 - MEANS OF TRANSPORTATION TO WORK BY AGE
# Universe: Workers 16 Years and Over
# Table Creates: othrcom, drvalone, carpool, pubtran, walked
#purpose: Produce Sustainability - Percent of Population that Drove Alone to Work Indicator
#input: Year
#output:
import pandas as pd
import glob
def drvalone( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B08101*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B08101_001E','B08101_049E','B08101_009E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B08101_009E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B08101_001E','B08101_049E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( value[3] / nullif((value[1]-value[2]),0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.iloc[: ,0] - denominators.iloc[: ,1]
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
#~~~~~~~~~~~~~~~
# Step 4)
# Add Special Baltimore City Data
#~~~~~~~~~~~~~~~
url = 'https://api.census.gov/data/20'+str(year)+'/acs/acs5/subject?get=NAME,S0801_C01_003E&for=county%3A510&in=state%3A24&key=829bf6f2e037372acbba32ba5731647c5127fdb0'
table = pd.read_json(url, orient='records')
fi['final']['Baltimore City'] = float(table.loc[1, table.columns[1]])
return fi['final']
"""
/* */ --
WITH tbl AS (
select csa,
( value[3] / nullif((value[1]-value[2]),0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2013',ARRAY['B08101_001E','B08101_049E','B08101_009E'])
)
update vital_signs.data
set drvalone = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2013';
"""elheat.py
#export
#File: elheat.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B25040 - HOUSE HEATING FUEL
# Universe - Occupied housing units
# Table Creates: elheat, heatgas
#purpose: Produce Sustainability - Percent of Residences Heated by Electricity Indicator
#input: Year
#output:
import pandas as pd
import glob
def elheat( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B25040*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B25040_004E','B25040_001E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B25040_004E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B25040_001E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation + final mods
# ( value[1] / nullif(value[2],0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
/* */ --
WITH tbl AS (
select csa,
( value[1] / nullif(value[2],0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B25040_004E','B25040_001E'])
)
update vital_signs.data
set elheat = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""empl.py
#export
#File: empl.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B23001 - SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER
# Universe - Population 16 years and over
# Table Creates: empl, unempl, unempr, nilf
#purpose: Produce Workforce and Economic Development - Percent Population 16-64 Employed Indicator
#input: Year
#output:
import pandas as pd
import glob
def empl( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B23001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B23001_003E', 'B23001_010E', 'B23001_017E', 'B23001_024E', 'B23001_031E', 'B23001_038E', 'B23001_045E', 'B23001_052E', 'B23001_059E', 'B23001_066E', 'B23001_089E', 'B23001_096E', 'B23001_103E', 'B23001_110E', 'B23001_117E', 'B23001_124E', 'B23001_131E', 'B23001_138E', 'B23001_145E', 'B23001_152E', 'B23001_007E', 'B23001_014E', 'B23001_021E', 'B23001_028E', 'B23001_035E', 'B23001_042E', 'B23001_049E', 'B23001_056E', 'B23001_063E', 'B23001_070E', 'B23001_093E', 'B23001_100E', 'B23001_107E', 'B23001_114E', 'B23001_121E', 'B23001_128E', 'B23001_135E', 'B23001_142E', 'B23001_149E', 'B23001_156E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B23001_007E', 'B23001_014E', 'B23001_021E', 'B23001_028E', 'B23001_035E', 'B23001_042E', 'B23001_049E', 'B23001_056E', 'B23001_063E', 'B23001_070E', 'B23001_093E', 'B23001_100E', 'B23001_107E', 'B23001_114E', 'B23001_121E', 'B23001_128E', 'B23001_135E', 'B23001_142E', 'B23001_149E', 'B23001_156E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B23001_003E', 'B23001_010E', 'B23001_017E', 'B23001_024E', 'B23001_031E', 'B23001_038E', 'B23001_045E', 'B23001_052E', 'B23001_059E', 'B23001_066E', 'B23001_089E', 'B23001_096E', 'B23001_103E', 'B23001_110E', 'B23001_117E', 'B23001_124E', 'B23001_131E', 'B23001_138E', 'B23001_145E', 'B23001_152E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# (value[21]+value[22]+value[23]+value[24]+value[25]+value[26]+value[27]+value[28]+value[29]+value[30]+value[31]+value[32]+value[33]+value[34]+value[35]+value[36]+value[37]+value[38]+value[39]+value[40]) --civil labor force empl 16-64
#/
#nullif( (value[1]+value[2]+value[3]+value[4]+value[5]+value[6]+value[7]+value[8]+value[9]+value[10]+value[11]+value[12]+value[13]+value[14]+value[15]+value[16]+value[17]+value[18]+value[19]+value[20]) -- population 16 to 64 ,0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
/* */ --
WITH tbl AS (
select csa,
( ( value[21]+value[22]+value[23]+value[24]+value[25]+value[26]+value[27]+value[28]+value[29]+value[30]+value[31]+value[32]+value[33]+value[34]+value[35]+value[36]+value[37]+value[38]+value[39]+value[40]) --civil labor force empl 16-64 / nullif( (value[1]+value[2]+value[3]+value[4]+value[5]+value[6]+value[7]+value[8]+value[9]+value[10]+value[11]+value[12]+value[13]+value[14]+value[15]+value[16]+value[17]+value[18]+value[19]+value[20]) -- population 16 to 64 ,0) )*100::numeric
as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY[ 'B23001_003E','B23001_010E','B23001_017E','B23001_024E','B23001_031E','B23001_038E','B23001_045E','B23001_052E','B23001_059E','B23001_066E','B23001_089E','B23001_096E','B23001_103E','B23001_110E','B23001_117E','B23001_124E','B23001_131E','B23001_138E','B23001_145E','B23001_152E','B23001_007E','B23001_014E','B23001_021E','B23001_028E','B23001_035E','B23001_042E','B23001_049E','B23001_056E','B23001_063E','B23001_070E','B23001_093E','B23001_100E','B23001_107E','B23001_114E','B23001_121E','B23001_128E','B23001_135E','B23001_142E','B23001_149E','B23001_156E'])
)
update vital_signs.data
set empl = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""fam.py
#export
#File: fam.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B11005 - HOUSEHOLDS BY PRESENCE OF PEOPLE UNDER 18 YEARS BY HOUSEHOLD TYPE
# Universe: Households
# Table Creates: hhs, fam, femhhs
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def fam( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B11005*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
# DIFFERENCES IN TABLE NAMES EXIST BETWEEN 16 and 17. 17 has no comma.
rootStr = 'B11005_007E_Total_Households_with_one_or_more_people_under_18_years_Family_households_Other_family_Female_householder'
str16 = rootStr + ',_no_husband_present'
str17 = rootStr + '_no_husband_present'
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# Delete Unassigned--Jail
df = df[df.index != 'Unassigned--Jail']
# Move Baltimore to Bottom
bc = df.loc[ 'Baltimore City' ]
df = df.drop( df.index[1] )
df.loc[ 'Baltimore City' ] = bc
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
# Actually produce the data
df1['total'] = df[ 'B11005_001E_Total' ]
df1['18Under'] = df[ 'B11005_002E_Total_Households_with_one_or_more_people_under_18_years' ] / df1['total'] * 100
return df1['18Under']female.py
#export
#File: female.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B01001 - SEX BY AGE
# Universe: Total population
# Table Creates: tpop, female, male, age5 age18 age24 age64 age65
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def female( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# df.columns
total = df['B01001_001E_Total']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
df1['onlyTheLadies'] = df[ 'B01001_026E_Total_Female' ]
return df1['onlyTheLadies']femhhs.py
#export
#File: femhhs.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B11005 - HOUSEHOLDS BY PRESENCE OF PEOPLE UNDER 18 YEARS BY HOUSEHOLD TYPE
# Universe: Households
# Table Creates: male, hhs, fam, femhhs
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def femhhs( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B11005*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
# DIFFERENCES IN TABLE NAMES EXIST BETWEEN 16 and 17. 17 has no comma.
rootStr = 'B11005_007E_Total_Households_with_one_or_more_people_under_18_years_Family_households_Other_family_Female_householder'
str16 = rootStr + ',_no_husband_present'
str17 = rootStr + '_no_husband_present'
str19 = rootStr + ',_no_spouse_present'
femhh = str17 if year == '17' else str19 if year == '19' else str16
# Actually produce the data
df1['total'] = df[ 'B11005_001E_Total' ]
df1['18Under'] = df[ 'B11005_002E_Total_Households_with_one_or_more_people_under_18_years' ] / df1['total'] * 100
df1['FemaleHH'] = df[ femhh ] / df['B11005_002E_Total_Households_with_one_or_more_people_under_18_years'] * 100
df1['FamHHChildrenUnder18'] = df['B11005_003E_Total_Households_with_one_or_more_people_under_18_years_Family_households']
df1['FamHHChildrenOver18'] = df['B11005_012E_Total_Households_with_no_people_under_18_years_Family_households']
df1['FamHH'] = df1['FamHHChildrenOver18'] + df1['FamHHChildrenUnder18']
return df1['FemaleHH']heatgas.py
#export
#File: heatgas.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B25040 - HOUSE HEATING FUEL
# Universe - Occupied housing units
# Table Creates: elheat, heatgas
#purpose: Produce Sustainability - Percent of Residences Heated by Electricity Indicator
#input: Year
#output:
import pandas as pd
import glob
def heatgas( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B25040*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B25040_002E','B25040_001E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B25040_002E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B25040_001E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( value[1] / nullif(value[2],0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
/* */ --
WITH tbl AS (
select csa,
( value[1] / nullif(value[2],0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B25040_002E','B25040_001E'])
)
update vital_signs.data
set heatgas = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""hh25inc.py
#export
#File: hh25inc.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B19001 - HOUSEHOLD INCOME V
# HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS)
# Table Creates: hh25 hh40 hh60 hh75 hhm75, mhhi
#purpose: Produce Household Income Under 25K Indicator
#input: Year
#output:
import pandas as pd
import glob
def hh25inc( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B19001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# val1.__class__.__name__
#
# create a new dataframe for giggles
fi = pd.DataFrame()
# append into that dataframe col 001
key = getColName(df, '001')
val = getColByName(df, '001')
fi[key] = val
# append into that dataframe col 002
key = getColName(df, '002')
val = getColByName(df, '002')
fi[key] = val
# append into that dataframe col 003
key = getColName(df, '003')
val = getColByName(df, '003')
fi[key] = val
# append into that dataframe col 004
key = getColName(df, '004')
val = getColByName(df, '004')
fi[key] = val
# append into that dataframe col 005
key = getColName(df, '005')
val = getColByName(df, '005')
fi[key] = val
# Delete Rows where the 'denominator' column is 0 -> like the Jail
fi = fi[fi[fi.columns[0]] != 0]
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
#~~~~~~~~~~~~~~~
return fi.apply(lambda x: ( ( x[fi.columns[1] ]+ x[fi.columns[2] ]+ x[fi.columns[3] ]+ x[fi.columns[4] ] ) / x[fi.columns[0]])*100, axis=1)
hh40inc.py
#export
#File: hh40inc.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B19001 - HOUSEHOLD INCOME V
# HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS)
# Table Creates: hh25 hh40 hh60 hh75 hhm75, mhhi
#purpose: Produce Household Income 25K-40K Indicator
#input: Year
#output:
import pandas as pd
import glob
def hh40inc( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B19001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# val1.__class__.__name__
#
# create a new dataframe for giggles
fi = pd.DataFrame()
# append into that dataframe col 001
key = getColName(df, '001')
val = getColByName(df, '001')
fi[key] = val
# append into that dataframe col 006
key = getColName(df, '006')
val = getColByName(df, '006')
fi[key] = val
# append into that dataframe col 007
key = getColName(df, '007')
val = getColByName(df, '007')
fi[key] = val
# append into that dataframe col 008
key = getColName(df, '008')
val = getColByName(df, '008')
fi[key] = val
# Delete Rows where the 'denominator' column is 0 -> like the Jail
fi = fi[fi[fi.columns[0]] != 0]
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
#~~~~~~~~~~~~~~~
return fi.apply(lambda x: ( ( x[fi.columns[1] ]+ x[fi.columns[2] ]+ x[fi.columns[3] ] ) / x[fi.columns[0]])*100, axis=1)
"""
/* hh40inc */ --
WITH tbl AS (
select csa,
( (value[1] + value[2] + value[3]) / value[4] )*100 as result
from vital_signs.get_acs_vars_csa_and_bc('2013',ARRAY['B19001_006E','B19001_007E','B19001_008E','B19001_001E'])
)
UPDATE vital_signs.data
set hh40inc = result from tbl where data.csa = tbl.csa and data_year = '2013';
"""hh60inc.py
#export
#File: hh60inc.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B19001 - HOUSEHOLD INCOME V
# HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS)
# Table Creates: hh25 hh40 hh60 hh75 hhm75, mhhi
#purpose: Produce Household 45-60K Indicator
#input: Year
#output:
import pandas as pd
import glob
def hh60inc( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B19001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# val1.__class__.__name__
#
# create a new dataframe for giggles
fi = pd.DataFrame()
# append into that dataframe col 001
key = getColName(df, '001')
val = getColByName(df, '001')
fi[key] = val
# append into that dataframe col 009
key = getColName(df, '009')
val = getColByName(df, '009')
fi[key] = val
# append into that dataframe col 010
key = getColName(df, '010')
val = getColByName(df, '010')
fi[key] = val
# append into that dataframe col 011
key = getColName(df, '011')
val = getColByName(df, '011')
fi[key] = val
# Delete Rows where the 'denominator' column is 0 -> like the Jail
fi = fi[fi[fi.columns[0]] != 0]
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
#~~~~~~~~~~~~~~~
return fi.apply(lambda x: ( ( x[fi.columns[1] ]+ x[fi.columns[2] ]+ x[fi.columns[3] ] ) / x[fi.columns[0]])*100, axis=1)
"""
/* hh60inc */ --
WITH tbl AS (
select csa,
( (value[1] + value[2] + value[3]) / value[4] )*100 as result
from vital_signs.get_acs_vars_csa_and_bc('2013',ARRAY['B19001_009E','B19001_010E','B19001_011E','B19001_001E'])
)
UPDATE vital_signs.data
set hh60inc = result from tbl where data.csa = tbl.csa and data_year = '2013';
"""
hh75inc.py
#export
#File: hh75inc.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B19001 - HOUSEHOLD INCOME V
# HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS)
# Table Creates: hh25 hh40 hh60 hh75 hhm75, mhhi
#purpose: Produce Household Income 60-70K Indicator
#input: Year
#output:
import pandas as pd
import glob
def hh75inc( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B19001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# val1.__class__.__name__
#
# create a new dataframe for giggles
fi = pd.DataFrame()
# append into that dataframe col 001
key = getColName(df, '001')
val = getColByName(df, '001')
fi[key] = val
# append into that dataframe col 012
key = getColName(df, '012')
val = getColByName(df, '012')
fi[key] = val
# Delete Rows where the 'denominator' column is 0 -> like the Jail
fi = fi[fi[fi.columns[0]] != 0]
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
#~~~~~~~~~~~~~~~
#12/1
return fi.apply(lambda x: ( x[fi.columns[1] ] / x[fi.columns[0]])*100, axis=1)
"""
/* hh75inc */ --
WITH tbl AS (
select csa,
( value[1] / value[2] )*100 as result
from vital_signs.get_acs_vars_csa_and_bc('2013',ARRAY['B19001_012E','B19001_001E'])
)
UPDATE vital_signs.data
set hh75inc = result from tbl where data.csa = tbl.csa and data_year = '2013';
"""hhchpov.py
#export
#File: hhchpov.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B17001 - POVERTY STATUS IN THE PAST 12 MONTHS BY SEX BY AGE
# Universe: Population for whom poverty status is determined more information
#purpose: Produce Household Poverty Indicator
#input: Year
#output:
import pandas as pd
import glob
def hhchpov( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B17001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B17001_004E', 'B17001_005E', 'B17001_006E', 'B17001_007E', 'B17001_008E', 'B17001_009E', 'B17001_018E', 'B17001_019E', 'B17001_020E', 'B17001_021E', 'B17001_022E', 'B17001_023E', 'B17001_033E', 'B17001_034E', 'B17001_035E', 'B17001_036E', 'B17001_037E', 'B17001_038E', 'B17001_047E', 'B17001_048E', 'B17001_049E', 'B17001_050E', 'B17001_051E', 'B17001_052E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B17001_004E', 'B17001_005E', 'B17001_006E', 'B17001_007E', 'B17001_008E', 'B17001_009E', 'B17001_018E', 'B17001_019E', 'B17001_020E', 'B17001_021E', 'B17001_022E', 'B17001_023E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B17001_004E', 'B17001_005E', 'B17001_006E', 'B17001_007E', 'B17001_008E', 'B17001_009E', 'B17001_018E', 'B17001_019E', 'B17001_020E', 'B17001_021E', 'B17001_022E', 'B17001_023E', 'B17001_033E', 'B17001_034E', 'B17001_035E', 'B17001_036E', 'B17001_037E', 'B17001_038E', 'B17001_047E', 'B17001_048E', 'B17001_049E', 'B17001_050E', 'B17001_051E', 'B17001_052E']
for col in columns:
denominators = addKey(df, denominators, col)
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] #Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
#~~~~~~~~~~~~~~~
# Step 4)
# Add Special Baltimore City Data
#~~~~~~~~~~~~~~~
url = 'https://api.census.gov/data/20'+str(year)+'/acs/acs5/subject?get=NAME,S1701_C03_002E&for=county%3A510&in=state%3A24&key=829bf6f2e037372acbba32ba5731647c5127fdb0'
table = pd.read_json(url, orient='records')
fi['final']['Baltimore City'] = float(table.loc[1, table.columns[1]])
return fi['final']
"""
/* */
WITH tbl AS (
select csa,
( (value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11] + value[12])
/ nullif(
(value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11] + value[12] + value[13] + value[14] + value[15] + value[16] + value[17] + value[18] + value[19] + value[20] + value[21] + value[22] + value[23] + value[24] ),
0)
) * 100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B17001_004E','B17001_005E','B17001_006E','B17001_007E','B17001_008E','B17001_009E','B17001_018E','B17001_019E','B17001_020E','B17001_021E','B17001_022E','B17001_023E','B17001_033E','B17001_034E','B17001_035E','B17001_036E','B17001_037E','B17001_038E','B17001_047E','B17001_048E','B17001_049E','B17001_050E','B17001_051E','B17001_052E'])
)
update vital_signs.data
set hhchpov = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""hhm75.py
#export
#File: hhm75.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B19001 - HOUSEHOLD INCOME V
# HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS)
# Table Creates: hh25 hh40 hh60 hh75 hhm75, mhhi
#purpose: Produce Household Income Over 75K Indicator
#input: Year
#output:
import pandas as pd
import glob
def hhm75( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B19001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# val1.__class__.__name__
#
# create a new dataframe for giggles
fi = pd.DataFrame()
# append into that dataframe col 001
key = getColName(df, '001')
val = getColByName(df, '001')
fi[key] = val
# append into that dataframe col 002
key = getColName(df, '002')
val = getColByName(df, '002')
fi[key] = val
# append into that dataframe col 003
key = getColName(df, '003')
val = getColByName(df, '003')
fi[key] = val
# append into that dataframe col 004
key = getColName(df, '004')
val = getColByName(df, '004')
fi[key] = val
# append into that dataframe col 005
key = getColName(df, '005')
val = getColByName(df, '005')
fi[key] = val
# append into that dataframe col 006
key = getColName(df, '006')
val = getColByName(df, '006')
fi[key] = val
# append into that dataframe col 007
key = getColName(df, '007')
val = getColByName(df, '007')
fi[key] = val
# append into that dataframe col 008
key = getColName(df, '008')
val = getColByName(df, '008')
fi[key] = val
# append into that dataframe col 009
key = getColName(df, '009')
val = getColByName(df, '009')
fi[key] = val
# append into that dataframe col 010
key = getColName(df, '010')
val = getColByName(df, '010')
fi[key] = val
# append into that dataframe col 011
key = getColName(df, '011')
val = getColByName(df, '011')
fi[key] = val
# append into that dataframe col 012
key = getColName(df, '012')
val = getColByName(df, '012')
fi[key] = val
# Delete Rows where the 'denominator' column is 0 -> like the Jail
fi = fi[fi[fi.columns[0]] != 0]
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
#~~~~~~~~~~~~~~~
return fi.apply(lambda x: ( ( x[fi.columns[0]]-( x[fi.columns[1] ]+ x[fi.columns[2] ]+ x[fi.columns[3] ]+ x[fi.columns[4] ]+ x[fi.columns[5] ]+ x[fi.columns[6] ]+ x[fi.columns[7] ]+ x[fi.columns[8] ]+ x[fi.columns[9] ]+ x[fi.columns[10] ]+ x[fi.columns[11] ] ) ) / x[fi.columns[0]])*100, axis=1)
hhpov.py
#export
#File: hhpov.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B17017 - Household Poverty, Uses Table B17017 which includes V
# Poverty Status in the Past 12 Months by Household Type by Age of Householder (Universe = households)
#purpose: Produce Household Poverty Indicator
#input: Year
#output:
import pandas as pd
import glob
def hhpov( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B17017*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# create a new dataframe for giggles
fi = pd.DataFrame()
# append into that dataframe col 003
key = getColName(df, '003')
val = getColByName(df, '003')
fi[key] = val
# append into that dataframe col 032
key = getColName(df, '032')
val = getColByName(df, '032')
fi[key] = val
# construct the denominator, returns 0 iff the other two rows are equal.
fi['denominator'] = nullIfEqual( df, '003', '032')
# Delete Rows where the 'denominator' column is 0
fi = fi[fi['denominator'] != 0]
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
#~~~~~~~~~~~~~~~
return fi.apply(lambda x: (x[fi.columns[0]] / x['denominator'])*100, axis=1)hhs.py
#export
#File: hhs.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B11005 - HOUSEHOLDS BY PRESENCE OF PEOPLE UNDER 18 YEARS BY HOUSEHOLD TYPE
# Universe: Households
# Table Creates: hhs, fam, femhhs
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def hhs( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B11005*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
df1['tot'] = df[ 'B11005_001E_Total' ]
return df1['tot']hsdipl.py
#export
#File: hsdipl.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B06009 - PLACE OF BIRTH BY EDUCATIONAL ATTAINMENT IN THE UNITED STATES
#purpose: Produce Workforce and Economic Development - Percent Population (25 Years and over) With High School Diploma and Some College or Associates Degree
#Table Uses: B06009 - lesshs, hsdipl, bahigher
#input: Year
#output:
import pandas as pd
import glob
def hsdipl( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B06009*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B06009_003E','B06009_004E','B06009_001E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B06009_003E','B06009_004E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B06009_001E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation + final mods
# ( ( value[1] + value[2] ) / nullif(value[3],0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
/* */ --
WITH tbl AS (
select csa,
( ( value[1] + value[2] ) / nullif(value[3],0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B06009_003E','B06009_004E','B06009_001E'])
)
update vital_signs.data
set hsdipl = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""lesshs.py
#export
#File: lesshs.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B06009 - PLACE OF BIRTH BY EDUCATIONAL ATTAINMENT IN THE UNITED STATES
#purpose: Produce Workforce and Economic Development - Percent Population (25 Years and over) With Less Than a High School Diploma or GED Indicator
#Table Uses: B06009 - lesshs, hsdipl, bahigher
#input: Year
#output:
import pandas as pd
import glob
def lesshs( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B06009*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B06009_002E','B06009_001E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B06009_002E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B06009_001E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation + final mods
# ( value[1] / nullif(value[2],0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
/* */ --
WITH tbl AS (
select csa,
( value[1] / nullif(value[2],0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B06009_002E','B06009_001E'])
)
update vital_signs.data
set lesshs = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""male.py
#export
#File: male.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B01001 - SEX BY AGE
# Universe: Total population
# Table Creates: tpop, female, male, age5 age18 age24 age64 age65
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def male( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# df.columns
total = df['B01001_001E_Total']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
df1['onlyTheFellas'] = df[ 'B01001_002E_Total_Male' ]
return df1['onlyTheFellas']mhhi.py
#export
#File: mhhi.py
#Author: Charles Karpati
#Date: 1/24/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B19001 - HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS)
# Universe: Households
# Table Creates: hh25 hh40 hh60 hh75 hhm75, mhhi
#purpose: Produce Sustainability - Percent of Population that Walks to Work Indicator
#input: Year
#output:
import pandas as pd
import glob
def mhhi( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B19001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
info = pd.DataFrame(
[
['B19001_002E', 0, 10000],
['B19001_003E', 10000, 4999 ],
['B19001_004E', 15000, 4999 ],
['B19001_005E', 20000, 4999 ],
['B19001_006E', 25000, 4999 ],
['B19001_007E', 30000, 4999],
['B19001_008E', 35000, 4999 ],
['B19001_009E', 40000, 4999 ],
['B19001_010E', 45000, 4999 ],
['B19001_011E', 50000, 9999 ],
['B19001_012E', 60000, 14999],
['B19001_013E', 75000, 24999 ],
['B19001_014E', 100000, 24999 ],
['B19001_015E', 125000, 24999 ],
['B19001_016E', 150000, 49000 ],
['B19001_017E', 200000, 1000000000000000000000000 ],
],
columns=['variable', 'lower', 'range']
)
# Final Dataframe
data_table = pd.DataFrame()
for index, row in info.iterrows():
#print(row['variable'], row['lower'], row['range'])
data_table = addKey(df, data_table, row['variable'])
# create a table of the accumulating total accross the columns from left to right for each csa.
temp_table = data_table.cumsum(axis=1)
# get the csa midpoint by divide column index 16 (the last column) of the cumulative totals
temp_table['midpoint'] = (temp_table.iloc[ : , -1 :] /2) # V3
temp_table['midpoint_index'] = False
temp_table['midpoint_index_value'] = False # Z3
temp_table['midpoint_index_lower'] = False # W3
temp_table['midpoint_index_range'] = False # X3
temp_table['midpoint_index_minus_one_cumulative_sum'] = False #Y3
# step 3 - csa_agg3: get the midpoint index by "when midpoint > agg[1] and midpoint <= agg[2] then 2"
# Get CSA Midpoint Index using the breakpoints in our info table.
# For each CSA
for index, row in temp_table.iterrows():
# Get the index of the first column where our midpoint is greater than the columns value.
# Do not use the temp columns (we just created)
midpoint = row['midpoint']
midpoint_index = 0
for column in row.iloc[:-6]:
# set midpoint index to the column with the highest value possible that is under midpoint
if( midpoint >= int(column) ):
# print (str(column) + ' - ' + str(midpoint))
temp_table.loc[ index, 'midpoint_index' ] = midpoint_index +1
midpoint_index += 1
temp_table = temp_table.drop('Unassigned--Jail')
for index, row in temp_table.iterrows():
temp_table.loc[ index, 'midpoint_index_value' ] = data_table.loc[ index, data_table.columns[row['midpoint_index']] ]
temp_table.loc[ index, 'midpoint_index_lower' ] = info.loc[ row['midpoint_index'] ]['lower']
temp_table.loc[ index, 'midpoint_index_range' ] = info.loc[ row['midpoint_index'] ]['range']
temp_table.loc[ index, 'midpoint_index_minus_one_cumulative_sum'] = row[ row['midpoint_index']-1 ]
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# Calculation = (midpoint_lower::numeric + (midpoint_range::numeric * ( (midpoint - midpoint_upto_agg) / nullif(midpoint_total,0)
# Calculation = W3+X3*((V3-Y3)/Z3)
# v3 -> 1 - midpoint of households == sum / 2
# w3 -> 2 - lower limit of the income range containing the midpoint of the housing total == row[lower]
# x3 -> width of the interval containing the medium == row[range]
# z3 -> number of hhs within the interval containing the median == row[total]
# y3 -> 4 - cumulative frequency up to, but no==NOT including the median interval
#~~~~~~~~~~~~~~~
temp_table['final'] = temp_table['midpoint_index_lower']+temp_table['midpoint_index_range']*((temp_table['midpoint']-temp_table['midpoint_index_minus_one_cumulative_sum'])/temp_table['midpoint_index_value'])
#~~~~~~~~~~~~~~~
# Step 4)
# Add Special Baltimore City Data
#~~~~~~~~~~~~~~~
url = 'https://api.census.gov/data/20'+str(year)+'/acs/acs5/subject?get=NAME,S1901_C01_012E&for=county%3A510&in=state%3A24&key=829bf6f2e037372acbba32ba5731647c5127fdb0'
table = pd.read_json(url, orient='records')
temp_table['final']['Baltimore City'] = float(table.loc[1, table.columns[1]])
return temp_table['final']
"""
/* */ --
with tbl_csa as (
select a.*,b.count from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B19001_002E','B19001_003E','B19001_004E','B19001_005E','B19001_006E','B19001_007E','B19001_008E','B19001_009E','B19001_010E','B19001_011E','B19001_012E','B19001_013E','B19001_014E','B19001_015E','B19001_016E','B19001_017E','B19013_001E'])
a left join (select csa,count(*) as count from vital_signs.tracts group by csa) b
on a.csa = b.csa
),
info as (
select 'B19001_002E' as variable, 0 as lower, 10000 as range
union all select 'B19001_003E' as variable, 10000 as lower, 4999 as range
union all select 'B19001_004E' as variable, 15000 as lower, 4999 as range
union all select 'B19001_005E' as variable, 20000 as lower, 4999 as range
union all select 'B19001_006E' as variable, 25000 as lower, 4999 as range
union all select 'B19001_007E' as variable, 30000 as lower, 4999 as range
union all select 'B19001_008E' as variable, 35000 as lower, 4999 as range
union all select 'B19001_009E' as variable, 40000 as lower, 4999 as range
union all select 'B19001_010E' as variable, 45000 as lower, 4999 as range
union all select 'B19001_011E' as variable, 50000 as lower, 9999 as range
union all select 'B19001_012E' as variable, 60000 as lower, 14999 as range
union all select 'B19001_013E' as variable, 75000 as lower, 24999 as range
union all select 'B19001_014E' as variable, 100000 as lower, 24999 as range
union all select 'B19001_015E' as variable, 125000 as lower, 24999 as range
union all select 'B19001_016E' as variable, 150000 as lower, 49000 as range
union all select 'B19001_017E' as variable, 200000 as lower, null as range
),
csa_agg as (
select csa,value as total,count,
ARRAY[
(value[1]),
(value[1] + value[2]),
(value[1] + value[2] + value[3]),
(value[1] + value[2] + value[3] + value[4]),
(value[1] + value[2] + value[3] + value[4] + value[5]),
(value[1] + value[2] + value[3] + value[4] + value[5] + value[6]),
(value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7]),
(value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8]),
(value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9]),
(value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10]),
(value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11]),
(value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11] + value[12]),
(value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11] + value[12] + value[13]),
(value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11] + value[12] + value[13] + value[14]),
(value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11] + value[12] + value[13] + value[14] + value[15]),
(value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] + value[11] + value[12] + value[13] + value[14] + value[15] + value[16])
] as agg,
value[17] as median,
variable from tbl_csa
),
csa_agg2 as (
select csa,count,median,total,agg,variable,
agg[16]/2::numeric as midpoint from csa_agg
),
csa_agg3 as (
select csa,count,median,total,agg,variable,midpoint,
(case
when midpoint <= agg[1] then 1
when midpoint > agg[1] and midpoint <= agg[2] then 2
when midpoint > agg[2] and midpoint <= agg[3] then 3
when midpoint > agg[3] and midpoint <= agg[4] then 4
when midpoint > agg[4] and midpoint <= agg[5] then 5
when midpoint > agg[5] and midpoint <= agg[6] then 6
when midpoint > agg[6] and midpoint <= agg[7] then 7
when midpoint > agg[7] and midpoint <= agg[8] then 8
when midpoint > agg[8] and midpoint <= agg[9] then 9
when midpoint > agg[9] and midpoint <= agg[10] then 10
when midpoint > agg[10] and midpoint <= agg[11] then 11
when midpoint > agg[11] and midpoint <= agg[12] then 12
when midpoint > agg[12] and midpoint <= agg[13] then 13
when midpoint > agg[13] and midpoint <= agg[14] then 14
when midpoint > agg[14] and midpoint <= agg[15] then 15
when midpoint > agg[15] and midpoint <= agg[16] then 16
when midpoint > agg[16] then 17
end) as midpoint_idx from csa_agg2
),
csa_agg4 as (
select csa,count,median,total,agg,variable,midpoint,midpoint_idx,
total[midpoint_idx] as midpoint_total,
(case
when (midpoint_idx - 1) = 0 then 0
else total[(midpoint_idx - 1)]
end) as midpoint_upto_total,
agg[midpoint_idx] as midpoint_agg, (case when (midpoint_idx - 1) = 0 then 0 else agg[(midpoint_idx - 1)] end) as midpoint_upto_agg,
variable[midpoint_idx] as midpoint_variable
from csa_agg3
),
csa_agg5 as (
select a.*,b.lower as midpoint_lower, b.range as midpoint_range from
csa_agg4 a left join info b on a.midpoint_variable = b.variable
),
tbl as (
select (CASE
when count = 1 OR csa = 'Baltimore City'
then median
else
(midpoint_lower::numeric +
(midpoint_range::numeric * (
(midpoint - midpoint_upto_agg) / nullif(midpoint_total,0)
)
)
)
END) as result,csa
from csa_agg5
)
UPDATE vital_signs.data
set mhhi = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""nilf.py
#export
#File: nilf.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B23001 - SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER
# Universe - Population 16 years and over
# Table Creates: empl, unempl, unempr, nilf
#purpose: Produce Workforce and Economic Development - Percent Population 16-64 Not in Labor Force Indicator
#input: Year
#output:
import pandas as pd
import glob
def nilf( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B23001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B23001_003E', 'B23001_010E', 'B23001_017E', 'B23001_024E', 'B23001_031E', 'B23001_038E', 'B23001_045E', 'B23001_052E', 'B23001_059E', 'B23001_066E', 'B23001_089E', 'B23001_096E', 'B23001_103E', 'B23001_110E', 'B23001_117E', 'B23001_124E', 'B23001_131E', 'B23001_138E', 'B23001_145E', 'B23001_152E', 'B23001_009E', 'B23001_016E', 'B23001_023E', 'B23001_030E', 'B23001_037E', 'B23001_044E', 'B23001_051E', 'B23001_058E', 'B23001_065E', 'B23001_072E', 'B23001_095E', 'B23001_102E', 'B23001_109E', 'B23001_116E', 'B23001_123E', 'B23001_130E', 'B23001_137E', 'B23001_144E', 'B23001_151E', 'B23001_158E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B23001_009E', 'B23001_016E', 'B23001_023E', 'B23001_030E', 'B23001_037E', 'B23001_044E', 'B23001_051E', 'B23001_058E', 'B23001_065E', 'B23001_072E', 'B23001_095E', 'B23001_102E', 'B23001_109E', 'B23001_116E', 'B23001_123E', 'B23001_130E', 'B23001_137E', 'B23001_144E', 'B23001_151E', 'B23001_158E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B23001_003E', 'B23001_010E', 'B23001_017E', 'B23001_024E', 'B23001_031E', 'B23001_038E', 'B23001_045E', 'B23001_052E', 'B23001_059E', 'B23001_066E', 'B23001_089E', 'B23001_096E', 'B23001_103E', 'B23001_110E', 'B23001_117E', 'B23001_124E', 'B23001_131E', 'B23001_138E', 'B23001_145E', 'B23001_152E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( ( value[21]+value[22]+value[23]+value[24]+value[25]+value[26]+value[27]+value[28]+value[29]+value[30]+value[31]+value[32]+value[33]+value[34]+value[35]+value[36]+value[37]+value[38]+value[39]+value[40]) --not in labor force 16-64
# /
# nullif( (value[1]+value[2]+value[3]+value[4]+value[5]+value[6]+value[7]+value[8]+value[9]+value[10]+value[11]+value[12]+value[13]+value[14]+value[15]+value[16]+value[17]+value[18]+value[19]+value[20]) -- population 16 to 64 ,0) )*100::numeric
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
/* */ --
WITH tbl AS (
select csa,
( (value[21]+value[22]+value[23]+value[24]+value[25]+value[26]+value[27]+value[28]+value[29]+value[30]+value[31]+value[32]+value[33]+value[34]+value[35]+value[36]+value[37]+value[38]+value[39]+value[40]) --not in labor force 16-64 / nullif( (value[1]+value[2]+value[3]+value[4]+value[5]+value[6]+value[7]+value[8]+value[9]+value[10]+value[11]+value[12]+value[13]+value[14]+value[15]+value[16]+value[17]+value[18]+value[19]+value[20]) -- population 16 to 64 ,0) )*100::numeric
as result
from vital_signs.get_acs_vars_csa_and_bc('2014', ARRAY['B23001_003E','B23001_010E','B23001_017E','B23001_024E','B23001_031E','B23001_038E','B23001_045E','B23001_052E','B23001_059E','B23001_066E','B23001_089E','B23001_096E','B23001_103E','B23001_110E','B23001_117E','B23001_124E','B23001_131E','B23001_138E','B23001_145E','B23001_152E','B23001_009E','B23001_016E','B23001_023E','B23001_030E','B23001_037E','B23001_044E','B23001_051E','B23001_058E','B23001_065E','B23001_072E','B23001_095E','B23001_102E','B23001_109E','B23001_116E','B23001_123E','B23001_130E','B23001_137E','B23001_144E','B23001_151E','B23001_158E'])
)
update vital_signs.data
set nilf = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""nohhint.py
#export
#File: nohhint.py
#Author: Charles Karpati
#Date: 1/25/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B28011 - INTERNET SUBSCRIPTIONS IN HOUSEHOLD
# Universe: Households
#purpose: Percent of Population with Broadband Internet Access
#input: Year
#output:
import pandas as pd
import glob
def nohhint( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B28011*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B28011_001E', 'B28011_008E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B28011_008E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B28011_001E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( (value[1]+value[2]+value[3]+value[4]) / nullif(value[5],0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
WITH tbl AS (
select csa,
( (value[1]+value[2]+value[3]+value[4]) / nullif(value[5],0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B25091_008E','B25091_009E','B25091_010E','B25091_011E','B25091_002E'])
)
update vital_signs.data
set affordm = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""novhcl.py
#export
#File: novhcl.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B08201 - HOUSEHOLD SIZE BY VEHICLES AVAILABLE
# Universe: Households
#purpose: Produce Sustainability - Percent of Households with No Vehicles Available Indicator
#input: Year
#output:
import pandas as pd
import glob
def novhcl( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B08201*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B08201_002E','B08201_001E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B08201_002E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B08201_001E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( value[1]/ nullif(value[2],0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
/* */ --
WITH tbl AS (
select csa,
( value[1]/ nullif(value[2],0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08201_002E','B08201_001E'])
)
update vital_signs.data
set novhcl = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""othrcom.py
#export
#File: othrcom.py
#Author: Charles Karpati
#Date: 1/24/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B08101 - MEANS OF TRANSPORTATION TO WORK BY AGE
# Universe: Workers 16 years and over
# Table Creates: othrcom, drvalone, carpool, pubtran, walked
#purpose: Produce Sustainability - Percent of Population Using Other Means to Commute to Work (Taxi, Motorcycle, Bicycle, Other) Indicator
#input: Year
#output:
import pandas as pd
import glob
def othrcom( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B08101*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B08101_001E','B08101_049E','B08101_041E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B08101_041E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B08101_001E','B08101_049E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( value[3] / nullif((value[1]-value[2]),0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.iloc[: ,0] - denominators.iloc[: ,1]
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
#~~~~~~~~~~~~~~~
# Step 4)
# Add Special Baltimore City Data
# 100- "6.7", "59.8", "9.2", "18.4", "3.7", = 2.2
# 100- (walked + drvalone + carpool + pubtran + workfromhome(13e))
#~~~~~~~~~~~~~~~
url = 'https://api.census.gov/data/20'+str(year)+'/acs/acs5/subject?get=NAME,S0801_C01_010E,S0801_C01_003E,S0801_C01_004E,S0801_C01_009E,S0801_C01_013E&for=county%3A510&in=state%3A24&key=829bf6f2e037372acbba32ba5731647c5127fdb0'
table = pd.read_json(url, orient='records')
walked = float(table.loc[1, table.columns[1]] )
drvalone = float(table.loc[1, table.columns[2]] )
carpool = float(table.loc[1, table.columns[3]] )
pubtran = float(table.loc[1, table.columns[4]] )
workfromhome = float(table.loc[1, table.columns[5]] )
fi['final']['Baltimore City'] = 100 - ( walked + drvalone + carpool + pubtran + workfromhome )
return fi['final']
"""
/* */ --
WITH tbl AS (
select csa,
( value[3] / nullif((value[1]-value[2]),0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08101_001E','B08101_049E','B08101_041E'])
)
update vital_signs.data
set othrcom = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""p2more.py
#export
#File: p2more.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B03002 - HISPANIC OR LATINO ORIGIN BY RACE
# Universe: Total Population
# Table Creates: racdiv, paa, pwhite, pasi, phisp, p2more, ppac
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def p2more( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B03002*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# Append the one column from the other ACS Table
df['B03002_012E_Total_Hispanic_or_Latino']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
tot = df[ 'B03002_001E_Total' ]
df1['TwoOrMore%NH'] = df['B03002_009E_Total_Not_Hispanic_or_Latino_Two_or_more_races'] / tot * 100
return df1['TwoOrMore%NH']paa.py
#export
#File: paa.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B03002 - HISPANIC OR LATINO ORIGIN BY RACE
# Universe: Total Population
# Table Creates: racdiv, paa, pwhite, pasi, phisp, p2more, ppac
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def paa( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B03002*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# Append the one column from the other ACS Table
df['B03002_012E_Total_Hispanic_or_Latino']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
tot = df[ 'B03002_001E_Total' ]
df1['African-American%NH'] = df[ 'B03002_004E_Total_Not_Hispanic_or_Latino_Black_or_African_American_alone' ]/ tot * 100
return df1['African-American%NH']pasi.py
#export
#File: pasi.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B03002 - HISPANIC OR LATINO ORIGIN BY RACE
# Universe: Total Population
# Table Creates: racdiv, paa, pwhite, pasi, phisp, p2more, ppac
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def pasi( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B03002*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# Append the one column from the other ACS Table
df['B03002_012E_Total_Hispanic_or_Latino']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
tot = df[ 'B03002_001E_Total' ]
df1['Asian%NH'] = df[ 'B03002_006E_Total_Not_Hispanic_or_Latino_Asian_alone' ]/ tot * 100
return df1['Asian%NH']phisp.py
#export
#File: phisp.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B03002 - HISPANIC OR LATINO ORIGIN BY RACE
# Universe: Total Population
# Table Creates: racdiv, paa, pwhite, pasi, phisp, p2more, ppac
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def phisp( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B03002*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# Append the one column from the other ACS Table
df['B03002_012E_Total_Hispanic_or_Latino']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
tot = df[ 'B03002_001E_Total' ]
df1['Hisp%'] = df['B03002_012E_Total_Hispanic_or_Latino']/ tot * 100
return df1['Hisp%']ppac.py
#export
#File: ppac.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B03002 - HISPANIC OR LATINO ORIGIN BY RACE
# Universe: Total Population
# Table Creates: racdiv, paa, pwhite, pasi, phisp, p2more, ppac
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def ppac( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B03002*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# Append the one column from the other ACS Table
df['B03002_012E_Total_Hispanic_or_Latino']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
tot = df[ 'B03002_001E_Total' ]
df1['AllOther%NH'] = (
df['B03002_008E_Total_Not_Hispanic_or_Latino_Some_other_race_alone']
+ df['B03002_005E_Total_Not_Hispanic_or_Latino_American_Indian_and_Alaska_Native_alone']
+ df['B03002_007E_Total_Not_Hispanic_or_Latino_Native_Hawaiian_and_Other_Pacific_Islander_alone']
)/ tot * 100
return df1['AllOther%NH']pubtran.py
#export
#File: pubtran.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B08101 - MEANS OF TRANSPORTATION TO WORK BY AGE
# Universe: Workers 16 Years and Over
# Table Creates: othrcom, drvalone, carpool, pubtran, walked
#purpose: Produce Sustainability - Percent of Population that Uses Public Transportation to Get to Work Indicator
#input: Year
#output:
import pandas as pd
import glob
def pubtran( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B08101*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B08101_001E','B08101_049E','B08101_025E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B08101_025E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B08101_001E','B08101_049E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( value[3] / nullif((value[1]-value[2]),0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.iloc[: ,0] - denominators.iloc[: ,1]
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
#~~~~~~~~~~~~~~~
# Step 4)
# Add Special Baltimore City Data
#~~~~~~~~~~~~~~~
url = 'https://api.census.gov/data/20'+str(year)+'/acs/acs5/subject?get=NAME,S0801_C01_009E&for=county%3A510&in=state%3A24&key=829bf6f2e037372acbba32ba5731647c5127fdb0'
table = pd.read_json(url, orient='records')
fi['final']['Baltimore City'] = float(table.loc[1, table.columns[1]])
return fi['final']
""" /* */ --
WITH tbl AS (
select csa,
( value[3] / nullif((value[1]-value[2]),0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08101_001E','B08101_049E','B08101_025E'])
)
update vital_signs.data
set pubtran = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""pwhite.py
#export
#File: pwhite.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B03002 - HISPANIC OR LATINO ORIGIN BY RACE
# Universe: Total Population
# Table Creates: racdiv, paa, pwhite, pasi, phisp, p2more, ppac
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def pwhite( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B03002*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# Append the one column from the other ACS Table
df['B03002_012E_Total_Hispanic_or_Latino']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
tot = df[ 'B03002_001E_Total' ]
df1['White%NH'] = df[ 'B03002_003E_Total_Not_Hispanic_or_Latino_White_alone' ]/ tot * 100
return df1['White%NH']racdiv.py
#export
#File: racdiv.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B02001 - Race
# Universe: Total Population
# Uses ACS Table B03002 - HISPANIC OR LATINO ORIGIN BY RACE
# Universe: Total Population
# Table Creates: racdiv, paa, pwhite, pasi, phisp, p2more, ppac
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def racdiv( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B02001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
fileName = ''
for name in glob.glob('AcsDataClean/B03002*5y'+str(year)+'_est.csv'):
fileName = name
df_hisp = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
df_hisp = df_hisp.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
df_hisp = df_hisp.sum(numeric_only=True)
# Append the one column from the other ACS Table
df['B03002_012E_Total_Hispanic_or_Latino'] = df_hisp['B03002_012E_Total_Hispanic_or_Latino']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
df1['African-American%'] = df[ 'B02001_003E_Total_Black_or_African_American_alone' ] / df[ 'B02001_001E_Total' ] * 100
df1['White%'] = df[ 'B02001_002E_Total_White_alone' ] / df[ 'B02001_001E_Total' ] * 100
df1['American Indian%'] = df[ 'B02001_004E_Total_American_Indian_and_Alaska_Native_alone' ]/ df[ 'B02001_001E_Total' ] * 100
df1['Asian%'] = df[ 'B02001_005E_Total_Asian_alone' ] / df[ 'B02001_001E_Total' ] * 100
df1['Native Hawaii/Pac Islander%'] = df[ 'B02001_006E_Total_Native_Hawaiian_and_Other_Pacific_Islander_alone'] / df[ 'B02001_001E_Total' ] * 100
df1['Hisp %'] = df['B03002_012E_Total_Hispanic_or_Latino'] / df[ 'B02001_001E_Total' ] * 100
# =1-(POWER(%AA/100,2)+POWER(%White/100,2)+POWER(%AmerInd/100,2)+POWER(%Asian/100,2) + POWER(%NativeAm/100,2))*(POWER(%Hispanci/100,2) + POWER(1-(%Hispanic/100),2))
df1['Diversity_index'] = ( 1- (
( df1['African-American%'] /100 )**2
+( df1['White%'] /100 )**2
+( df1['American Indian%'] /100 )**2
+( df1['Asian%'] /100 )**2
+( df1['Native Hawaii/Pac Islander%'] /100 )**2
)*(
( df1['Hisp %'] /100 )**2
+(1-( df1['Hisp %'] /100) )**2
) ) * 100
return df1['Diversity_index']retrieveAcsData.py
#export
#File: retrieveAcsData.py
#Author: Charles Karpati
#Date: 1/9/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
#This file returns ACS data given an ID
#def main():
#purpose: Retrieves ACS data from the web
#input: ID
#output: Acs Data. Prints to ../../data/2_cleaned/acs/
import pandas as pd
import csv
from urllib.parse import urlencode
# This prevents timeouts
import socket
socket.setdefaulttimeout(10.0)
def retrieve_acs_data(year, tableId):
keys = []
vals = []
header = []
getTheseKeys = ''
getTheseKeys2 = ''
getTheseKeys3 = ''
getTheseKeys4 = ''
keyCount = 0
#~~~~~~~~~~~~~~~
# Step 1)
# Retrieve a Meta Data Table Describing the Content of the Table
#~~~~~~~~~~~~~~~
url = 'https://api.census.gov/data/20'+year+'/acs/acs5/groups/'+tableId+'.json'
print(url);
metaDataTable = pd.read_json(url, orient='records')
#~~~~~~~~~~~~~~~
# Step 2)
# Createa a Dictionary using the Meta Data Table
#~~~~~~~~~~~~~~~
# Multiple Queries may be Required.
# Max columns returned from any given query is 50.
# For that reasons bin the Columns into Groups of 50.
for key in metaDataTable['variables'].keys():
if key[-1:] == 'E':
keyCount = keyCount + 1
if keyCount < 50 : getTheseKeys = getTheseKeys+','+key
elif keyCount < 99 : getTheseKeys2 = getTheseKeys2+','+key
elif keyCount < 148 : getTheseKeys3 = getTheseKeys3+','+key
else: getTheseKeys4 = getTheseKeys4+','+key
keys.append(key)
val = metaDataTable['variables'][key]['label']
val = key+'_'+val.replace('Estimate!!', '').replace('!!', '_').replace(' ', '_')
vals.append(val)
dictionary = dict(zip(keys, vals))
#~~~~~~~~~~~~~~~
# Step 3)
# Get the actual data we want with all the columns (obtained using the meta data table)
#~~~~~~~~~~~~~~~
# https://api.census.gov/data/2016/acs/acs5?get=NAME,B11001_002E&for=county:005&in=state:24
urlRoot = 'https://api.census.gov/data/20'+year+'/acs/acs5?'
def getParams(keys): return {
'get': 'NAME'+keys,
'for': 'tract:*',
'in': 'state:24 county:510',
'key': '829bf6f2e037372acbba32ba5731647c5127fdb0'
}
def getBCityParams(keys): return {
'get': 'NAME'+keys,
'for': 'county:510',
'in': 'state:24',
'key': '829bf6f2e037372acbba32ba5731647c5127fdb0'
}
def readIn( url ):
tbl = pd.read_json(url, orient='records')
tbl.columns = tbl.iloc[0]
return tbl
def appendColumns( table, params):
# Get Tract and City Records For Specific Columns
table2 = readIn( urlRoot+urlencode(getParams(params)) )
table3 = readIn( urlRoot+urlencode(getBCityParams(params)) )
table3['tract'] = '010000'
# Concatenate the Records
table2.append([table2, table3], sort=False)
table2 = pd.concat([table2, table3], ignore_index=True)
# Merge to Master Table
table = pd.merge(table, table2, how='left', left_on=["NAME","state","county","tract"], right_on = ["NAME","state","county","tract"])
return table
# Get Tract Data
url = urlRoot+urlencode(getParams(getTheseKeys))
table = readIn(url)
table = table.iloc[1:]
# Get Baltimore City's Data .
url = urlRoot+urlencode(getBCityParams(getTheseKeys))
table2 = readIn(url)
table2 = table2[1:]
table2['tract'] = '010000'
#Append Baltimore to Tracts
#table = pd.concat([table, table2], keys=["NAME","state","county",], axis=0)
table.append([table, table2], sort=False)
table = pd.concat([table, table2], ignore_index=True)
if getTheseKeys2 != '' :
table = appendColumns(table, getTheseKeys2)
if getTheseKeys3 != '' :
table = appendColumns( table, getTheseKeys3 )
if getTheseKeys4 != '' :
table = appendColumns( table, getTheseKeys4 )
#~~~~~~~~~~~~~~~
# Step 4)
# Prepare Column Names using the meta data table. The raw data has columnsNames in the first row, as well.
# Replace column ID's with labels from the dictionary where applicable (should be always)
#~~~~~~~~~~~~~~~
for column in table.columns:
if column in keys: header.append(dictionary[column])
else: header.append(column)
table.columns = header
#table.drop(table.index[0], inplace=True)
#~~~~~~~~~~~~~~~
# Step 5) Everything Else
#~~~~~~~~~~~~~~~
# Prettify Names
table['NAME'] = table['NAME'].str.replace(', Baltimore city, Maryland', '')
table['NAME'][table['NAME'] == 'Baltimore city, Maryland'] = 'Baltimore City'
# Convert to Integers Columns from Strings where Applicable
table = table.apply(pd.to_numeric, errors='ignore')
return tablesclemp.py
#export
#File: sclemp.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B14005 - SEX BY SCHOOL ENROLLMENT BY EDUCATIONAL ATTAINMENT BY EMPLOYMENT STATUS FOR THE POPULATION 16 TO 19 YEARS
# (Universe = Population 16 to 19 years)
#purpose: Produce Education and Youth - Percentage of Population aged 16-19 in School and/or Employed Indicator
#input: Year
#output:
import pandas as pd
import glob
def sclemp( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B14005*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B14005_004E', 'B14005_005E', 'B14005_006E', 'B14005_009E', 'B14005_013E', 'B14005_018E', 'B14005_019E', 'B14005_020E', 'B14005_023E', 'B14005_027E','B14005_001E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B14005_004E', 'B14005_005E', 'B14005_006E', 'B14005_009E', 'B14005_013E', 'B14005_018E', 'B14005_019E', 'B14005_020E', 'B14005_023E', 'B14005_027E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B14005_001E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( ( value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] ) / nullif(value[11],0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
/* */ --
WITH tbl AS (
select csa,
( ( value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7] + value[8] + value[9] + value[10] ) / nullif(value[11],0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B14005_004E', 'B14005_005E', 'B14005_006E', 'B14005_009E', 'B14005_013E', 'B14005_018E', 'B14005_019E', 'B14005_020E', 'B14005_023E', 'B14005_027E','B14005_001E']) )
update vital_signs.data
set sclemp = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""tpop.py
#export
#File: tpop.py
#Author: Charles Karpati
#Date: 4/16/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B01001 - SEX BY AGE
# Universe: Total population
# Table Creates: tpop, female, male, age5 age18 age24 age64 age65
#purpose:
#input: Year
#output:
import pandas as pd
import glob
def tpop( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B01001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = df.sum(numeric_only=True)
# df.columns
total = df['B01001_001E_Total']
df1 = pd.DataFrame()
df1['CSA'] = df.index
df1.set_index('CSA', drop = True, inplace = True)
df1['totalPop'] = total
return df1['totalPop']trav14.py
#export
#File: trav14.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B08303 - TRAVEL TIME TO WORK,
# (Universe: Workers 16 years and over who did not work at home)
# Table Creates: trav14, trav29, trav44, trav45
#purpose: Produce Sustainability - Percent of Employed Population with Travel Time to Work of 0-14 Minutes Indicator
#input: Year
#output:
import pandas as pd
import glob
def trav14( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B08303*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B08303_002E','B08303_003E','B08303_004E','B08303_001E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B08303_002E','B08303_003E','B08303_004E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B08303_001E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
/* */ --
WITH tbl AS (
select csa,
( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08303_002E','B08303_003E','B08303_004E','B08303_001E'])
)
update vital_signs.data
set trav14_ = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""trav29.py
#export
#File: trav29.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B08303 - TRAVEL TIME TO WORK,
# (Universe: Workers 16 years and over who did not work at home)
# Table Creates: trav14, trav29, trav44, trav45
#purpose: Produce Sustainability - Percent of Employed Population with Travel Time to Work of 15-29 Minutes Indicator
#input: Year
#output:
import pandas as pd
import glob
def trav29( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B08303*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B08303_005E','B08303_006E','B08303_007E','B08303_001E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B08303_005E','B08303_006E','B08303_007E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B08303_001E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
/* */ --
WITH tbl AS (
select csa,
( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08303_005E','B08303_006E','B08303_007E','B08303_001E'])
)
update vital_signs.data
set trav29_ = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""trav44.py
#export
#File: trav44.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B08303 - TRAVEL TIME TO WORK,
# (Universe: Workers 16 years and over who did not work at home)
# Table Creates: trav14, trav29, trav44, trav45
#purpose: Produce Sustainability - Percent of Employed Population with Travel Time to Work of 30-44 Minutes Indicator
#input: Year
#output:
import pandas as pd
import glob
def trav44( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B08303*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B08303_008E','B08303_009E','B08303_010E','B08303_001E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B08303_008E','B08303_009E','B08303_010E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B08303_001E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
/* */ --
WITH tbl AS (
select csa,
( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08303_008E','B08303_009E','B08303_010E','B08303_001E']) )
update vital_signs.data
set trav44_ = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""trav45.py
#export
#File: trav45.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B08303 - TRAVEL TIME TO WORK,
# (Universe: Workers 16 years and over who did not work at home)
# Table Creates: trav14, trav29, trav44, trav45
#purpose: Produce Sustainability - Percent of Employed Population with Travel Time to Work of 45 Minutes and Over Indicator
#input: Year
#output:
import pandas as pd
import glob
def trav45( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B08303*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B08303_011E','B08303_012E','B08303_013E','B08303_001E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B08303_011E','B08303_012E','B08303_013E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B08303_001E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
WITH tbl AS (
select csa,
( (value[1] + value[2] + value[3] ) / nullif(value[4],0) )*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08303_011E','B08303_012E','B08303_013E','B08303_001E'])
)
update vital_signs.data
set trav45_ = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""unempl.py
#export
#File: unempl.py
#Author: Charles Karpati
#Date: 1/17/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B23001 - SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER
# Universe - Population 16 years and over
#Table Creates: empl, unempl, unempr, nilf
#purpose: Produce Workforce and Economic Development - Percent Population 16-64 Unemployed and Looking for Work Indicator
#input: Year
#output:
import pandas as pd
import glob
def unempl( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B23001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = [ 'B23001_003E','B23001_010E', 'B23001_017E', 'B23001_024E', 'B23001_031E', 'B23001_038E', 'B23001_045E', 'B23001_052E', 'B23001_059E', 'B23001_066E', 'B23001_089E', 'B23001_096E', 'B23001_103E', 'B23001_110E', 'B23001_117E', 'B23001_124E', 'B23001_131E', 'B23001_138E', 'B23001_145E', 'B23001_152E', 'B23001_008E', 'B23001_015E', 'B23001_022E', 'B23001_029E', 'B23001_036E', 'B23001_043E', 'B23001_050E', 'B23001_057E', 'B23001_064E', 'B23001_071E', 'B23001_094E', 'B23001_101E', 'B23001_108E', 'B23001_115E', 'B23001_122E', 'B23001_129E', 'B23001_136E', 'B23001_143E', 'B23001_150E', 'B23001_157E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B23001_008E', 'B23001_015E', 'B23001_022E', 'B23001_029E', 'B23001_036E', 'B23001_043E', 'B23001_050E', 'B23001_057E', 'B23001_064E', 'B23001_071E', 'B23001_094E', 'B23001_101E', 'B23001_108E', 'B23001_115E', 'B23001_122E', 'B23001_129E', 'B23001_136E', 'B23001_143E', 'B23001_150E', 'B23001_157E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B23001_003E', 'B23001_010E', 'B23001_017E', 'B23001_024E', 'B23001_031E', 'B23001_038E', 'B23001_045E', 'B23001_052E', 'B23001_059E', 'B23001_066E', 'B23001_089E', 'B23001_096E', 'B23001_103E', 'B23001_110E', 'B23001_117E', 'B23001_124E', 'B23001_131E', 'B23001_138E', 'B23001_145E', 'B23001_152E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
#( ( value[21]+value[22]+value[23]+value[24]+value[25]+value[26]+value[27]+value[28]+value[29]+value[30]+value[31]+value[32]+value[33]+value[34]+value[35]+value[36]+value[37]+value[38]+value[39]+value[40]) --civil labor force unempl 16-64
# /
# nullif( (value[1]+value[2]+value[3]+value[4]+value[5]+value[6]+value[7]+value[8]+value[9]+value[10]+value[11]+value[12]+value[13]+value[14]+value[15]+value[16]+value[17]+value[18]+value[19]+value[20]) -- population 16 to 64 ,0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
/* */ --
WITH tbl AS (
select csa,
( ( value[21]+value[22]+value[23]+value[24]+value[25]+value[26]+value[27]+value[28]+value[29]+value[30]+value[31]+value[32]+value[33]+value[34]+value[35]+value[36]+value[37]+value[38]+value[39]+value[40]) --civil labor force unempl 16-64 / nullif( (value[1]+value[2]+value[3]+value[4]+value[5]+value[6]+value[7]+value[8]+value[9]+value[10]+value[11]+value[12]+value[13]+value[14]+value[15]+value[16]+value[17]+value[18]+value[19]+value[20]) -- population 16 to 64 ,0) )*100::numeric
as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY[ 'B23001_003E','B23001_010E','B23001_017E','B23001_024E','B23001_031E','B23001_038E','B23001_045E','B23001_052E','B23001_059E','B23001_066E','B23001_089E','B23001_096E','B23001_103E','B23001_110E','B23001_117E','B23001_124E','B23001_131E','B23001_138E','B23001_145E','B23001_152E','B23001_008E','B23001_015E','B23001_022E','B23001_029E','B23001_036E','B23001_043E','B23001_050E','B23001_057E','B23001_064E','B23001_071E','B23001_094E','B23001_101E','B23001_108E','B23001_115E','B23001_122E','B23001_129E','B23001_136E','B23001_143E','B23001_150E','B23001_157E'])
)
update vital_signs.data
set unempl = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""unempr.py
#export
#File: unempr.py
#Author: Charles Karpati
#Date: 1/24/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B23001 - SEX BY AGE BY EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS AND OVER
# Universe: Workers 16 years and over
#Table Creates: empl, unempl, unempr, nilf
#purpose: Produce Sustainability - Percent of Population that Walks to Work Indicator
#input: Year
#output:
import pandas as pd
import glob
def unempr( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B23001*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = [ 'B23001_006E', 'B23001_013E', 'B23001_020E', 'B23001_027E', 'B23001_034E', 'B23001_041E', 'B23001_048E', 'B23001_055E', 'B23001_062E', 'B23001_069E', 'B23001_092E', 'B23001_099E', 'B23001_106E', 'B23001_113E', 'B23001_120E', 'B23001_127E', 'B23001_134E', 'B23001_141E', 'B23001_148E', 'B23001_155E', 'B23001_008E', 'B23001_015E', 'B23001_022E', 'B23001_029E', 'B23001_036E', 'B23001_043E', 'B23001_050E', 'B23001_057E', 'B23001_064E', 'B23001_071E', 'B23001_094E', 'B23001_101E', 'B23001_108E', 'B23001_115E', 'B23001_122E', 'B23001_129E', 'B23001_136E', 'B23001_143E', 'B23001_150E', 'B23001_157E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B23001_008E', 'B23001_015E', 'B23001_022E', 'B23001_029E', 'B23001_036E', 'B23001_043E', 'B23001_050E', 'B23001_057E', 'B23001_064E', 'B23001_071E', 'B23001_094E', 'B23001_101E', 'B23001_108E', 'B23001_115E', 'B23001_122E', 'B23001_129E', 'B23001_136E', 'B23001_143E', 'B23001_150E', 'B23001_157E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B23001_006E', 'B23001_013E', 'B23001_020E', 'B23001_027E', 'B23001_034E', 'B23001_041E', 'B23001_048E', 'B23001_055E', 'B23001_062E', 'B23001_069E', 'B23001_092E', 'B23001_099E', 'B23001_106E', 'B23001_113E', 'B23001_120E', 'B23001_127E', 'B23001_134E', 'B23001_141E', 'B23001_148E', 'B23001_155E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# ( ( value[21]+ value[22]+ value[23]+ value[24]+ value[25]+ value[26]+ value[27]+ value[28]+ value[29]+ value[30]+ value[31]+ value[32]+ value[33]+ value[34]+v alue[35]+ value[36]+ value[37]+ value[38]+ value[39]+ value[40]) --civil labor force unemployed 16-64 / nullif( (value[1] +value[2]+ value[3]+ value[4]+ value[5]+ value[6]+ value[7]+ value[8]+ value[9]+ value[10]+ value[11]+ value[12]+ value[13]+ value[14]+ value[15]+ value[16]+ value[17]+ value[18]+ value[19]+ value[20]) --civil labor force 16-64 ,0) )*100
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.sum(axis=1)
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
return fi['final']
"""
WITH tbl AS (
select csa,
( (value[21]+value[22]+value[23]+value[24]+value[25]+value[26]+value[27]+value[28]+value[29]+value[30]+value[31]+value[32]+value[33]+value[34]+value[35]+value[36]+value[37]+value[38]+value[39]+value[40]) --civil labor force unemployed 16-64 / nullif( (value[1]+value[2]+value[3]+value[4]+value[5]+value[6]+value[7]+value[8]+value[9]+value[10]+value[11]+value[12]+value[13]+value[14]+value[15]+value[16]+value[17]+value[18]+value[19]+value[20]) --civil labor force 16-64 ,0) )*100::numeric
as result
from vital_signs.get_acs_vars_csa_and_bc('2013',ARRAY['B23001_006E', 'B23001_013E', 'B23001_020E', 'B23001_027E', 'B23001_034E', 'B23001_041E', 'B23001_048E', 'B23001_055E', 'B23001_062E', 'B23001_069E', 'B23001_092E', 'B23001_099E', 'B23001_106E', 'B23001_113E', 'B23001_120E', 'B23001_127E', 'B23001_134E', 'B23001_141E', 'B23001_148E', 'B23001_155E', 'B23001_008E', 'B23001_015E', 'B23001_022E', 'B23001_029E', 'B23001_036E', 'B23001_043E', 'B23001_050E', 'B23001_057E', 'B23001_064E', 'B23001_071E', 'B23001_094E', 'B23001_101E', 'B23001_108E', 'B23001_115E', 'B23001_122E', 'B23001_129E', 'B23001_136E', 'B23001_143E', 'B23001_150E', 'B23001_157E'] )
)
update vital_signs.data
set unempr = result from tbl where data2.csa = tbl.csa and update_data_year = '2013' and data_year = '2014';
"""walked.py
#export
#File: walked.py
#Author: Charles Karpati
#Date: 1/24/19
#Section: Bnia
#Email: karpati1@umbc.edu
#Description:
# Uses ACS Table B08101 - MEANS OF TRANSPORTATION TO WORK BY AGE
# Universe: Workers 16 years and over
# Table Creates: othrcom, drvalone, carpool, pubtran, walked
#purpose: Produce Sustainability - Percent of Population that Walks to Work Indicator
#input: Year
#output:
import pandas as pd
import glob
def walked( year ):
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
def getColByName (df, col): return df[getColName(df, col)]
def addKey(df, fi, col):
key = getColName(df, col)
val = getColByName(df, col)
fi[key] = val
return fi
def nullIfEqual(df, c1, c2):
return df.apply(lambda x:
x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
def sumInts(df): return df.sum(numeric_only=True)
#~~~~~~~~~~~~~~~
# Step 1)
# Fetch Tract Files w/CSA Lables by Name from the 2_cleaned folder.
#~~~~~~~~~~~~~~~
fileName = ''
for name in glob.glob('AcsDataClean/B08101*5y'+str(year)+'_est.csv'):
fileName = name
df = pd.read_csv( fileName, index_col=0 )
# Aggregate by CSA
# Group By CSA so that they may be opperated on
df = df.groupby('CSA')
# Aggregate Numeric Values by Sum
df = sumInts(df)
# Add 'BALTIMORE' which is the SUM of all the CSAs
#~~~~~~~~~~~~~~~
# Step 2)
# Prepare the columns
#~~~~~~~~~~~~~~~
# Final Dataframe
fi = pd.DataFrame()
columns = ['B08101_001E','B08101_049E','B08101_033E']
for col in columns:
fi = addKey(df, fi, col)
# Numerators
numerators = pd.DataFrame()
columns = ['B08101_033E']
for col in columns:
numerators = addKey(df, numerators, col)
# Denominators
denominators = pd.DataFrame()
columns = ['B08101_001E','B08101_049E']
for col in columns:
denominators = addKey(df, denominators, col)
# construct the denominator, returns 0 iff the other two rows are equal.
#~~~~~~~~~~~~~~~
# Step 3)
# Run the Calculation
# value[3] / nullif((value[1]-value[2]),0)
#~~~~~~~~~~~~~~~
fi['numerator'] = numerators.sum(axis=1)
fi['denominator'] = denominators.iloc[: ,0] - denominators.iloc[: ,1]
fi = fi[fi['denominator'] != 0] # Delete Rows where the 'denominator' column is 0
fi['final'] = (fi['numerator'] / fi['denominator'] ) * 100
#~~~~~~~~~~~~~~~
# Step 4)
# Add Special Baltimore City Data
#~~~~~~~~~~~~~~~
url = 'https://api.census.gov/data/20'+str(year)+'/acs/acs5/subject?get=NAME,S0801_C01_010E&for=county%3A510&in=state%3A24&key=829bf6f2e037372acbba32ba5731647c5127fdb0'
table = pd.read_json(url, orient='records')
fi['final']['Baltimore City'] = float(table.loc[1, table.columns[1]])
return fi['final']
"""
WITH tbl AS (
select csa,
(
value[3]
/ nullif((value[1]-value[2]),0)
)*100::numeric as result
from vital_signs.get_acs_vars_csa_and_bc('2014',ARRAY['B08101_001E','B08101_049E','B08101_033E'])
)
update vital_signs.data
set walked = result from tbl where data2.csa = tbl.csa and update_data_year = '2014' and data_year = '2014';
"""