# default_exp dhr
This colab and more can be found at https://github.com/BNIA/vitalsigns.
Whats Inside?:
The Guided Walkthrough
This notebook was made to create the following Housing Vital Signs Indicators:
Indicators Used
Datasets Used
❌
year = '19'
Guided Walkthrough
SETUP Enviornment:
Import Modules
! pip install -U -q PyDrive
! pip install geopy
! pip install geopandas
! pip install geoplot
! pip install dataplay
! pip install matplotlib
! pip install psycopg2-binary! apt-get install build-dep python-psycopg2
! apt-get install libpq-dev
! apt-get install libspatialindex-dev!pip install rtree
!pip install dexplotfrom dataplay.geoms import workWithGeometryData%%capture
# These imports will handle everything
import os
import sys
import csv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
import psycopg2
import pyproj
from pyproj import Proj, transform
# conda install -c conda-forge proj4
from shapely.geometry import Point
from shapely import wkb
from shapely.wkt import loads
# https://pypi.org/project/geopy/
from geopy.geocoders import Nominatim
# In case file is KML, enable support
import fiona
fiona.drvsupport.supported_drivers['kml'] = 'rw'
fiona.drvsupport.supported_drivers['KML'] = 'rw'from IPython.display import clear_output
clear_output(wait=True)import ipywidgets as widgets
from ipywidgets import interact, interact_manual
Configure Enviornment
# This will just beautify the output
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# pd.set_option('display.expand_frame_repr', False)
# pd.set_option('display.precision', 2)
# pd.reset_option('max_colwidth')
pd.set_option('max_colwidth', 20)
# pd.reset_option('max_colwidth')
Prep Datasets
TPOP CSA and Baltimore
Get Baltimore
Click to toggle
csa = pd.read_csv('Families Denominator 2010 for TANF.csv') Get CSA
csa.head(3)csa.tail(3)
SNAP
import pandas as pd
import geopandas
original = gpd.read_file("SNAP20"+year+"_CSACity.shp", geometry='geometry');
original.columns original.rename(columns={ 'CSA':'CSA2010', 'BaltCity':'InBaltimore'}, inplace=True)
df = original[ original['CSA2010'].notnull() | original['InBaltimore'].notnull() ]print('After filtering records where a CSA or Baltimore geo-code match Exists')
print( 'All rows Before Filter: ', original.shape[0] ) # rows, columns
print( '# w BCity.isnull: ', df.InBaltimore.isnull().sum() ); bmorow = df[ df.CSA2010.isnull() ].shape[0]
print( '# w CSA2010.isnull: ', bmorow ); csarow = df[ df.CSA2010.notnull() ].shape[0]
print( '# w CSA2010.notnull: ', csarow );
print( '# rows After Filter: ', df.shape[0],'==',csarow,'+',bmorow,'==', csarow + bmorow); # add baltimore city
df.CSA2010 = df.CSA2010.fillna('Baltimore City')snapdf = df.copy()
snapdf = snapdf[['CSA2010','InBaltimore']]
snapdf.head(1)
TANF
import pandas as pd
import geopandas
original = gpd.read_file("TANF20"+year+"_CSACity.shp", geometry='geometry');
original.columns original.rename(columns={ 'CSA':'CSA2010', 'BaltCity':'InBaltimore'}, inplace=True)
df = original[ original['CSA2010'].notnull() | original['InBaltimore'].notnull() ]print('After filtering records where a CSA or Baltimore geo-code match Exists')
print( 'All rows Before Filter: ', original.shape[0] ) # rows, columns
print( '# w BCity.isnull: ', df.InBaltimore.isnull().sum() ); bmorow = df[ df.CSA2010.isnull() ].shape[0]
print( '# w CSA2010.isnull: ', bmorow ); csarow = df[ df.CSA2010.notnull() ].shape[0]
print( '# w CSA2010.notnull: ', csarow );
print( '# rows After Filter: ', df.shape[0],'==',csarow,'+',bmorow,'==', csarow + bmorow); # add baltimore city
df.CSA2010 = df.CSA2010.fillna('Baltimore City')tandf = df.copy()
tandf = tandf[['CSA2010','InBaltimore']]
tandf.head(1)
106 tanf - G
https://bniajfi.org/indicators/Children%20And%20Family%20Health/tanf
Temporary Assistance for Needy Families (TANF) is a federal assistance program. The Act provides temporary financial assistance while aiming to get people off of that assistance, primarily through employment.
Percent of Families Receiving TANF
Temporary Assistance for Needy Families (TANF) is a federal assistance program. The Act provides temporary financial assistance while aiming to get people off of that assistance, primarily through employment.
WORKS BUT NEEDS TO BE DIVIDED BY ? Normalization Source
Population, # Houses, Avg HH Size
We need the Family Households. From 2010. Census not ACS Data.
df1['FamHHChildrenUnder18'] = df['B11005_003E_Total_Households_with_one_or_more_people_under_18_years_Family_households']
df1['FamHHChildrenOver18'] = df['B11005_012E_Total_Households_with_no_people_under_18_years_Family_households']
df1['FamHH'] = df1['FamHHChildrenOver18'] + df1['FamHHChildrenUnder18']
FINAL NOTE ^ EVERYTHING ABOVE WAS WRITTEN PRIOR TO THIS NOTICE:
Normalization Source Location V
P:\Project Libraries\Vital Signs\Vital Signs 12\5 Chapter Health
Cheryl found this source after Seema and I were struggling.
It appears to be coming from the 2010 data. Not the 5 yr aggregates.def tanf(df, csa, yr):
# tanf.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
# Baltimoire has records not in the
tanf.at[55,'count']=tanf['count'].sum()
# Perform the calculation
tanf['106-tanf'+year] = tanf['count'] / tanf['FamHH_2010'] * 1000
"""
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Tanf/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
prevYear = 'tanf'+ str( int(year) - 1 )
if prevYear in compareYears.columns:
tanf = tanf.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
tanf['change'] = tanf['106-tanf'+year] - tanf[ prevYear ]
tanf['percentChange'] = tanf['change' ] / tanf[ prevYear ] * 100
tanf['change'] = tanf['change'].apply(lambda x: "{:.2f}".format(x) )
"""
print( 'Records Matching Query: ', tanf.size / len(tanf.columns) )
return tanffin = tanf(tandf, csa, year)
fin.to_csv('106-tanf'+year+'.csv', index=False)
fin.head(60) #export
def tanf(df, csa, yr):
# Create the Numerator
tanf = df.copy()
tanf['count'] = 1
tanf = tanf.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
tanf = csa.merge( tanf, left_on='CSA2010', right_on='CSA2010', how='outer' )
# Baltimoire may have records not in the CSA (not actually the case right now but..)
tanf.at[55,'count']=tanf['count'].sum()
# Perform the calculation
tanf['106-tanf'+year] = tanf['count'] / tanf['FamHH_2010'] * 100
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Tanf/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
prevYear = 'tanf'+ str( int(year) - 1 )
if prevYear in compareYears.columns:
tanf = tanf.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
tanf['change'] = tanf['106-tanf'+year] - tanf[ prevYear ]
tanf['percentChange'] = tanf['change' ] / tanf[ prevYear ] * 100
tanf['change'] = tanf['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', tanf.size / len(tanf.columns) )
return tanffin = tanf(tandf, csa, year)
fin.to_csv('106-tanf'+year+'.csv', index=False)
fin.head(60)
??? SNAP - G
DESCRIPTION
def snap(df, csa, yr):
id = '107'
shortname = 'snap'
df['count'] = 1
# Create the Numerator
numer = df.copy()
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
# Do after sortViaCsaMerge to get index right. False records would show underneath it but still get added to the sum.
numer.at[55,'count']=numer['count'].sum()
# Perform the calculation
numer[id+'-'+shortname+year] = numer['count'] / numer['tpop10'] * 100
netyet = """
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
prevYear = shortname+ str( int(year) - 1 )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[id+'-'+shortname+year] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
"""
return numer.drop(columns=[])fin = snap(snapdf, csa, year)
fin.to_csv('107-snap'+year+'.csv', index=False)
fin.head(6) #export
def snap(df, csa, yr):
# Create the Numerator
snap = df.copy()
snap['count'] = 1
snap = snap.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
snap = csa.merge( snap, left_on='CSA2010', right_on='CSA2010', how='outer' )
# Baltimoire may have records not in the CSA (not actually the case right now but..)
snap.at[55,'count']=snap['count'].sum()
# Perform the calculation
snap['???-snap'+year] = snap['count']
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Snap/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
prevYear = 'snap'+ str( int(year) - 1 )
if prevYear in compareYears.columns:
snap = snap.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
snap['change'] = snap['???-snap'+year] - snap[ prevYear ]
snap['percentChange'] = snap['change' ] / snap[ prevYear ] * 100
snap['change'] = snap['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', snap.size / len(snap.columns) )
return snapfin = snap(tandf, csa, year)
fin.to_csv('???-snap'+year+'.csv', index=False)
fin.head(60)