This is an annual query. So I am going to keep this doc here.
It needs to import the create_acs_indicator
fn.
Carlos,
The Regional Planning District (RPD) to tract crosswalk file is attached.
For pulling the ACS data for the COVID map we’ll need to focus in on tracts where GEOID starts with ‘24005’ to grab tracts within Baltimore County
(this is a master file that is for the entire metro area so we just need a subset).
We’ll then need to aggregate the tract level ACS data to the RPDNAME field; think of it like aggregating tracts to the CSA name.
- Use 5-year ACS estimates for 2014-2018, same as Vital Signs,
- Save .csv with columns for the RPDNAME and the indicators (ex. ‘paa18’, ‘hh25inc18’, etc.)
-2020
Per Cheryl's request, here are the indicators on the COVID dash, along with their indicator numbers and shortnames! Has she spoken to you about pulling the tract data for Baltimore County and aggregate to the regional Planning Districts for 2015-2019? You should have the Tracts to Regional Planning Districts crosswalk for it. - 2021
Import Python Modules
You will need to run this next box first in order for any of the code after it to work
#@title Run This Cell: Install Modules
! pip install -U -q PyDrive
! pip install geopy
! pip install geopandas
! pip install geoplot
! pip install dexplot
! pip install dataplay#@title Run This Cell: Import Modules
# Install the Widgets Module.
# Colabs does not locally provide this Python Library
# The '!' is a special prefix used in colabs when talking to the terminal
!pip install -U -q ipywidgets
!pip install geopandas
# Once installed we need to import and configure the Widgets
import ipywidgets as widgets
!jupyter nbextension enable --py widgetsnbextension
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
# Used 4 Importing Data
import urllib.request as urllib
from urllib.parse import urlencode
# This Prevents Timeouts when Importing
import socket
socket.setdefaulttimeout(10.0)
# Pandas Data Manipulation Libraries
import pandas as pd
# Show entire column widths
pd.set_option('display.max_colwidth', -1)
# 4 Working with Json Data
import json
# 4 Data Processing
import numpy as np
# 4 Reading Json Data into Pandas
from pandas.io.json import json_normalize
# 4 exporting data as CSV
import csv
from dataplay.acsDownload import retrieve_acs_data
from dataplay.merge import mergeDatasets
from dataplay.geoms import readInGeometryData
from dataplay.geoms import map_points
from dataplay.geoms import workWithGeometryData
# Geo-Formatting
# Postgres-Conversion
import geopandas as gpd
from geopandas import GeoDataFrame
import psycopg2,pandas,numpy
from shapely import wkb
from shapely.wkt import loads
import os
import sys
# In case file is KML
import fiona
fiona.drvsupport.supported_drivers['kml'] = 'rw' # enable KML support which is disabled by default
fiona.drvsupport.supported_drivers['KML'] = 'rw' # enable KML support which is disabled by default
# https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.2010.html
# https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2010&layergroup=Census+Tracts
# load libraries
#from shapely.wkt import loads
#from pandas import ExcelWriter
#from pandas import ExcelFile
%matplotlib inline
import matplotlib.pyplot as plt
import glob
import imageiopd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('max_colwidth', 20)
Functions and Globals
Globals
# Our download function will use Baltimore City's tract, county and state as internal paramters
# Change these values in the cell below using different geographic reference codes will change those parameters
tract = '*'
county = '005' # Baltimore County
state = '24'
# Specify the download parameters the acs download function will receieve here
year = '19'
saveAcs = TrueaggMethod = 'sum'
saveCrosswalked = True
saveCrosswalkedFileName = False
local_match_col = 'tract'
The Regional Planning District (RPD) to tract crosswalk file
- For pulling the ACS data... grab tracts within Baltimore County (tracts where GEOID starts with ‘24005’)
- The file given is master file for the entire metro area (We just need a subset).
# Read in XL
b17 = pd.read_excel('2010 Tracts to RPD and CSA.xlsx')
#
b17['CleanGEOID10'] = b17['GEOID10'].map(lambda x: str(x)[:-6])
b17 = b17[b17['CleanGEOID10'] == '24005']
b17 = b17[['RPDNAME', 'PRDCODE', 'CleanGEOID10', 'GEOID10', 'TRACT10']]
b17.head()
The Baltimore City tract CSA file
cwUrl = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vREwwa_s8Ix39OYGnnS_wA8flOoEkU7reIV4o3ZhlwYhLXhpNEvnOia_uHUDBvnFptkLLHHlaQNvsQE/pub?output=csv'csaCw = pd.read_csv(cwUrl)
pd.read_csv(cwUrl)
Test Merge
Try a RPDNAME Merge For Baltimore County data
# Merge data for Baltimore County
df = retrieve_acs_data(state, '005', tract, 'B01001', year, saveAcs)
print('Table: B01001' + ', Year: ' + year + ' imported.')
df.merge(b17, left_on='tract', right_on='TRACT10', how='outer').head(1)
Try a CSA Merge For Baltimore City data
# Merge data Baltimore City
df = retrieve_acs_data(state, '510', tract, 'B01001', year, saveAcs)
print('Table: B01001' + ', Year: ' + year + ' imported.')
df.merge(csaCw, left_on='tract', right_on='TRACT2010', how='outer')# Specify the crosswalk parameters for Baltimore County
cwUrl = b17
local_match_col = 'tract'
foreign_match_col= 'TRACT10'
foreign_wanted_col= 'TRACT10'
groupBy = 'RPDNAME'
columnsToInclude = ['RPDNAME']
# Create the age5 Indicator. Only at the Tract Level this time
county = '005' # Baltimore County
tableId = 'B01001'
finalFileName = './age65_20'+year+'_Baltimore_County_16July2020.csv'
method = age65
ind65 = createIndicator(state, county, tract, year, tableId, saveAcs, b17,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
ind65.columns
ind65.head()# Specify the crosswalk parameters for Baltimore City
cwUrl = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vREwwa_s8Ix39OYGnnS_wA8flOoEkU7reIV4o3ZhlwYhLXhpNEvnOia_uHUDBvnFptkLLHHlaQNvsQE/pub?output=csv'
csaCw = pd.read_csv(cwUrl)
foreign_match_col= 'TRACT2010'
foreign_wanted_col= 'CSA2010'
# Create the age5 Indicator. Only at the Tract Level this time
county = '510' # Baltimore City
tableId = 'B01001'
finalFileName = './age65_20'+year+'_Baltimore_City_16July2020.csv'
method = age65
groupBy = 'CSA2010'
columnsToInclude = ['CSA2010']
ind65 = createIndicator(state, county, tract, year, tableId, saveAcs, csaCw,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
ind65.columns
ind65.head()
age18. Percent of Population Aged 5-17 [# 12] *
# Specify the crosswalk parameters for Baltimore County
cwUrl = b17
local_match_col = 'tract'
foreign_match_col= 'TRACT10'
foreign_wanted_col= 'TRACT10'
groupBy = 'RPDNAME'
columnsToInclude = ['RPDNAME']
# Create the age5 Indicator. Only at the Tract Level this time
county = '005' # Baltimore County
tableId = 'B01001'
finalFileName = './age18_20'+year+'_Baltimore_County_16July2020.csv'
method = age18
ind18 = createIndicator(state, county, tract, year, tableId, saveAcs, b17,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
ind18.columns
ind18.head()# Specify the crosswalk parameters for Baltimore City
cwUrl = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vREwwa_s8Ix39OYGnnS_wA8flOoEkU7reIV4o3ZhlwYhLXhpNEvnOia_uHUDBvnFptkLLHHlaQNvsQE/pub?output=csv'
csaCw = pd.read_csv(cwUrl)
foreign_match_col= 'TRACT2010'
foreign_wanted_col= 'CSA2010'
# Create the age5 Indicator. Only at the Tract Level this time
county = '510' # Baltimore City
tableId = 'B01001'
finalFileName = './age18_20'+year+'_Baltimore_City_16July2020.csv'
method = age18
groupBy = 'CSA2010'
columnsToInclude = ['CSA2010']
ind18 = createIndicator(state, county, tract, year, tableId, saveAcs, csaCw,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
ind18.columns
ind18.head()
paa. Percent of Population Black/African-American (Non-Hispanic) [# 4] *
# Specify the crosswalk parameters for Baltimore County
cwUrl = b17
local_match_col = 'tract'
foreign_match_col= 'TRACT10'
foreign_wanted_col= 'TRACT10'
groupBy = 'RPDNAME'
columnsToInclude = ['RPDNAME']
# Create the age5 Indicator. Only at the Tract Level this time
county = '005' # Baltimore County
tableId = 'B03002'
finalFileName = './paa_20'+year+'_Baltimore_County_16July2020.csv'
method = paa
indpaa = createIndicator(state, county, tract, year, tableId, saveAcs, b17,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
indpaa.columns
indpaa# Specify the crosswalk parameters for Baltimore City
cwUrl = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vREwwa_s8Ix39OYGnnS_wA8flOoEkU7reIV4o3ZhlwYhLXhpNEvnOia_uHUDBvnFptkLLHHlaQNvsQE/pub?output=csv'
csaCw = pd.read_csv(cwUrl)
foreign_match_col= 'TRACT2010'
foreign_wanted_col= 'CSA2010'
# Create the age5 Indicator. Only at the Tract Level this time
county = '510' # Baltimore City
tableId = 'B03002'
finalFileName = './paa'+year+'_Baltimore_City_16July2020.csv'
method = paa
groupBy = 'CSA2010'
columnsToInclude = ['CSA2010']
indpaa = createIndicator(state, county, tract, year, tableId, saveAcs, csaCw,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
indpaa.columns
indpaa
hisp. Percent of Population Hispanic [# 9] *
# Specify the crosswalk parameters for Baltimore County
cwUrl = b17
local_match_col = 'tract'
foreign_match_col= 'TRACT10'
foreign_wanted_col= 'TRACT10'
groupBy = 'RPDNAME'
columnsToInclude = ['RPDNAME']
# Create the age5 Indicator. Only at the Tract Level this time
county = '005' # Baltimore County
tableId = 'B03002'
finalFileName = './hisp_20'+year+'_Baltimore_County_16July2020.csv'
method = hisp
indhisp = createIndicator(state, county, tract, year, tableId, saveAcs, b17,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
indhisp.columns
indhisp.head()# Specify the crosswalk parameters for Baltimore City
cwUrl = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vREwwa_s8Ix39OYGnnS_wA8flOoEkU7reIV4o3ZhlwYhLXhpNEvnOia_uHUDBvnFptkLLHHlaQNvsQE/pub?output=csv'
csaCw = pd.read_csv(cwUrl)
foreign_match_col= 'TRACT2010'
foreign_wanted_col= 'CSA2010'
# Create the age5 Indicator. Only at the Tract Level this time
county = '510' # Baltimore City
tableId = 'B03002'
finalFileName = './hisp_20'+year+'_Baltimore_City_16July2020.csv'
method = hisp
groupBy = 'CSA2010'
columnsToInclude = ['CSA2010']
indhisp = createIndicator(state, county, tract, year, tableId, saveAcs, csaCw,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
indhisp.columns
indhisp.head()
pwhite. Percent of Residents - White/Caucasian (Non-Hispanic) [# 5] *
# Specify the crosswalk parameters for Baltimore County
cwUrl = b17
local_match_col = 'tract'
foreign_match_col= 'TRACT10'
foreign_wanted_col= 'TRACT10'
groupBy = 'RPDNAME'
columnsToInclude = ['RPDNAME']
# Create the age5 Indicator. Only at the Tract Level this time
county = '005' # Baltimore County
tableId = 'B03002'
finalFileName = './pwhite_20'+year+'_Baltimore_County_16July2020.csv'
method = pwhite
indpwhite = createIndicator(state, county, tract, year, tableId, saveAcs, b17,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
indpwhite.columns
indpwhite.head()# Specify the crosswalk parameters for Baltimore City
cwUrl = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vREwwa_s8Ix39OYGnnS_wA8flOoEkU7reIV4o3ZhlwYhLXhpNEvnOia_uHUDBvnFptkLLHHlaQNvsQE/pub?output=csv'
csaCw = pd.read_csv(cwUrl)
foreign_match_col= 'TRACT2010'
foreign_wanted_col= 'CSA2010'
# Create the age5 Indicator. Only at the Tract Level this time
county = '510' # Baltimore City
tableId = 'B03002'
finalFileName = './pwhite'+year+'_Baltimore_City_16July2020.csv'
method = pwhite
groupBy = 'CSA2010'
columnsToInclude = ['CSA2010']
indpwhite = createIndicator(state, county, tract, year, tableId, saveAcs, csaCw,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
indpwhite.columns
indpwhite.head()
affordr. Affordability Index – Rent [# 45] *
# Specify the crosswalk parameters for Baltimore County
cwUrl = b17
local_match_col = 'tract'
foreign_match_col= 'TRACT10'
foreign_wanted_col= 'TRACT10'
groupBy = 'RPDNAME'
columnsToInclude = ['RPDNAME']
# Create the affordr Indicator. Only at the Tract Level this time
county = '005' # Baltimore County
tableId = 'B25070'
finalFileName = './affordr_20'+year+'_Baltimore_County_16July2020.csv'
method = affordr
indaffordr = createIndicator(state, county, tract, year, tableId, saveAcs, b17,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
indaffordr.columns
indaffordr# Specify the crosswalk parameters for Baltimore City
cwUrl = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vREwwa_s8Ix39OYGnnS_wA8flOoEkU7reIV4o3ZhlwYhLXhpNEvnOia_uHUDBvnFptkLLHHlaQNvsQE/pub?output=csv'
csaCw = pd.read_csv(cwUrl)
foreign_match_col= 'TRACT2010'
foreign_wanted_col= 'CSA2010'
# Create the affordr Indicator. Only at the Tract Level this time
county = '510' # Baltimore City
tableId = 'B25070'
finalFileName = './affordr_20'+year+'_Baltimore_City_16July2020.csv'
method = affordr
groupBy = 'CSA2010'
columnsToInclude = ['CSA2010']
indaffordr = createIndicator(state, county, tract, year, tableId, saveAcs, csaCw,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
indaffordr.columns
indaffordr
hh25inc. Percent of Households Earning Less than $25,000 [# 21] *
# Specify the crosswalk parameters for Baltimore County
cwUrl = b17
local_match_col = 'tract'
foreign_match_col= 'TRACT10'
foreign_wanted_col= 'TRACT10'
groupBy = 'RPDNAME'
columnsToInclude = ['RPDNAME']
# Create the age5 Indicator. Only at the Tract Level this time
county = '005' # Baltimore County
tableId = 'B19001'
finalFileName = './hh25inc_20'+year+'_Baltimore_County_16July2020.csv'
method = hh25inc
indhh25inc = createIndicator(state, county, tract, year, tableId, saveAcs, b17,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
indhh25inc.columns
indhh25inc.head()# Specify the crosswalk parameters for Baltimore City
cwUrl = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vREwwa_s8Ix39OYGnnS_wA8flOoEkU7reIV4o3ZhlwYhLXhpNEvnOia_uHUDBvnFptkLLHHlaQNvsQE/pub?output=csv'
csaCw = pd.read_csv(cwUrl)
foreign_match_col= 'TRACT2010'
foreign_wanted_col= 'CSA2010'
# Create the age5 Indicator. Only at the Tract Level this time
county = '510' # Baltimore City
tableId = 'B19001'
finalFileName = './hh25inc'+year+'_Baltimore_City_16July2020.csv'
method = hh25inc
groupBy = 'CSA2010'
columnsToInclude = ['CSA2010']
indhh25inc = createIndicator(state, county, tract, year, tableId, saveAcs, csaCw,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
indhh25inc.columns
indhh25inc.head()
novhcl. Percent of Households with No Vehicle [# 184] *
# Specify the crosswalk parameters for Baltimore County
cwUrl = b17
local_match_col = 'tract'
foreign_match_col= 'TRACT10'
foreign_wanted_col= 'TRACT10'
groupBy = 'RPDNAME'
columnsToInclude = ['RPDNAME']
# Create the age5 Indicator. Only at the Tract Level this time
county = '005' # Baltimore County
tableId = 'B08201'
finalFileName = './novhcl_20'+year+'_Baltimore_County_16July2020.csv'
method = novhcl
indnovhcl = createIndicator(state, county, tract, year, tableId, saveAcs, b17,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
indnovhcl.columns
indnovhcl.head()# Specify the crosswalk parameters for Baltimore City
cwUrl = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vREwwa_s8Ix39OYGnnS_wA8flOoEkU7reIV4o3ZhlwYhLXhpNEvnOia_uHUDBvnFptkLLHHlaQNvsQE/pub?output=csv'
csaCw = pd.read_csv(cwUrl)
foreign_match_col= 'TRACT2010'
foreign_wanted_col= 'CSA2010'
# Create the age5 Indicator. Only at the Tract Level this time
county = '510' # Baltimore City
tableId = 'B08201'
finalFileName = './novhcl_20'+year+'_Baltimore_City_16July2020.csv'
method = novhcl
groupBy = 'CSA2010'
columnsToInclude = ['CSA2010']
indnovhcl = createIndicator(state, county, tract, year, tableId, saveAcs, csaCw,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
indnovhcl.columns
indnovhcl.head()
nohhint. Percent of Households with No Internet at Home [# 156] *
# Specify the crosswalk parameters for Baltimore County
cwUrl = b17
local_match_col = 'tract'
foreign_match_col= 'TRACT10'
foreign_wanted_col= 'TRACT10'
groupBy = 'RPDNAME'
columnsToInclude = ['RPDNAME']
# Create the age5 Indicator. Only at the Tract Level this time
county = '005' # Baltimore County
tableId = 'B28011'
finalFileName = './nohhint_20'+year+'_Baltimore_County_16July2020.csv'
method = nohhint
indnohhint = createIndicator(state, county, tract, year, tableId, saveAcs, b17,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
indnohhint.columns
indnohhint.head()# Specify the crosswalk parameters for Baltimore City
cwUrl = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vREwwa_s8Ix39OYGnnS_wA8flOoEkU7reIV4o3ZhlwYhLXhpNEvnOia_uHUDBvnFptkLLHHlaQNvsQE/pub?output=csv'
csaCw = pd.read_csv(cwUrl)
foreign_match_col= 'TRACT2010'
foreign_wanted_col= 'CSA2010'
# Create the age5 Indicator. Only at the Tract Level this time
county = '510' # Baltimore City
tableId = 'B28011'
finalFileName = './nohhint_20'+year+'_Baltimore_City_16July2020.csv'
method = nohhint
groupBy = 'CSA2010'
columnsToInclude = ['CSA2010']
indnohhint = createIndicator(state, county, tract, year, tableId, saveAcs, csaCw,
local_match_col, foreign_match_col, foreign_wanted_col, saveCrosswalked,
saveCrosswalkedFileName, groupBy, aggMethod, method, columnsToInclude, finalFileName)
indnohhint.columns
indnohhint.head()