# default_exp citistat

This colab and more can be found at https://github.com/BNIA/vitalsigns.

Whats Inside?:

The Guided Walkthrough

This notebook was made to create the following Housing Vital Signs Indicators:

Todo:

  • Wrap as Function
  • Merge with Lights out Doc

Indicators Used

  • ✅ 162 - dirtyst - (CitiStat) Median Price of Homes Sold
  • ✅ 163 - clogged - (CitiStat) Number of Homes Sold
  • ✔️ 215 - Lights - (CitiStat) The rate of service requests for addressing street light outages made through Baltimore's 311 system per 1,000 residents.

Datasets Used

  • ✅ CitiStat.CitiStat_201X (162-dirtyst, 163-clogged -> )

Topic Area: Sustainability

Source: Baltimore City CitiStat

Years Available: 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018

The rate of service requests for addressing clogged storm drains made through Baltimore's 311 system per 1,000 residents. More than one service request may be made for the same issue but is logged as a unique request.

https://bniajfi.org/indicators/Crime%20and%20Safety/lights

  • Topic Area: Crime and Safety

  • Source: Baltimore City CitiStat

  • Years Available: 2016, 2017, 2018

  • The rate of service requests for addressing street light outages made through Baltimore's 311 system per 1,000 residents. More than one service request may be made for the same issue but is logged as a unique request.

Number 215 on the indicator Sheet

Guided Walkthrough

SETUP Enviornment:

Import Modules

! pip install -U -q PyDrive ! pip install geopy ! pip install geopandas ! pip install geoplot ! pip install dataplay ! pip install matplotlib ! pip install psycopg2-binary! apt-get install build-dep python-psycopg2 ! apt-get install libpq-dev ! apt-get install libspatialindex-dev!pip install rtree !pip install dexplotfrom dataplay.geoms import workWithGeometryData%%capture # These imports will handle everything import os import sys import csv import matplotlib.pyplot as plt import numpy as np import pandas as pd import geopandas as gpd from geopandas import GeoDataFrame import psycopg2 import pyproj from pyproj import Proj, transform # conda install -c conda-forge proj4 from shapely.geometry import Point from shapely import wkb from shapely.wkt import loads # https://pypi.org/project/geopy/ from geopy.geocoders import Nominatim # In case file is KML, enable support import fiona fiona.drvsupport.supported_drivers['kml'] = 'rw' fiona.drvsupport.supported_drivers['KML'] = 'rw'from IPython.display import clear_output clear_output(wait=True)import ipywidgets as widgets from ipywidgets import interact, interact_manual

Configure Enviornment

# This will just beautify the output pd.set_option('display.expand_frame_repr', False) pd.set_option('display.precision', 2) from IPython.core.interactiveshell import InteractiveShell InteractiveShell.ast_node_interactivity = "all" # pd.set_option('display.expand_frame_repr', False) # pd.set_option('display.precision', 2) # pd.reset_option('max_colwidth') pd.set_option('max_colwidth', 20) # pd.reset_option('max_colwidth')

Prep Datasets

TPOP CSA and Baltimore

Get Baltimore

Click to toggle csa = "https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Tpop/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson" csa = gpd.read_file(csa); csa.head(1)

Get CSA

url2 = "https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Tpop/FeatureServer/1/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson" csa2 = gpd.read_file(url2); csa2['CSA2010'] = csa2['City_1'] csa2['OBJECTID'] = 56 csa2 = csa2.drop(columns=['City_1']) csa2.head()

Append do no append Bcity. We put it on the Bottom of the df because when performing the ponp it returns only the last matching columns CSA Label.

# csa = pd.concat([csa2, csa], ignore_index=True) csa = csa.append(csa2).reset_index(drop=True)csa.head(3)csa.tail(3)csa.head()csa.drop(columns=['Shape__Area', 'Shape__Length', 'OBJECTID'], axis=1).to_file("BCity_and_CSA.geojson", driver='GeoJSON')

Dirty Streets

import pandas as pd import geopandas year = '19' original = gpd.read_file("DirtyStreetsAlleys_20"+year+"_CSACity.shp", geometry='geometry'); original.columns original.rename(columns={ 'CSA':'CSA2010', 'BaltCity':'InBaltimore'}, inplace=True) df = original[ original['CSA2010'].notnull() | original['InBaltimore'].notnull() ]print('After filtering records where a CSA or Baltimore geo-code match Exists') print( 'All rows Before Filter: ', original.shape[0] ) # rows, columns print( '# w BCity.isnull: ', df.InBaltimore.isnull().sum() ); bmorow = df[ df.CSA2010.isnull() ].shape[0] print( '# w CSA2010.isnull: ', bmorow ); csarow = df[ df.CSA2010.notnull() ].shape[0] print( '# w CSA2010.notnull: ', csarow ); print( '# rows After Filter: ', df.shape[0],'==',csarow,'+',bmorow,'==', csarow + bmorow); # add baltimore city df.CSA2010 = df.CSA2010.fillna('Baltimore City')dirtydf = df.copy() dirtydf.head(1)

Clogged

import pandas as pd import geopandas year = '19' original = gpd.read_file("Clogged_20"+year+"_CSACity.shp", geometry='geometry'); original.columns original.rename(columns={ 'CSA':'CSA2010', 'BaltCity':'InBaltimore'}, inplace=True) df = original[ original['CSA2010'].notnull() | original['InBaltimore'].notnull() ]print('After filtering records where a CSA or Baltimore geo-code match Exists') print( 'All rows Before Filter: ', original.shape[0] ) # rows, columns print( '# w BCity.isnull: ', df.InBaltimore.isnull().sum() ); bmorow = df[ df.CSA2010.isnull() ].shape[0] print( '# w CSA2010.isnull: ', bmorow ); csarow = df[ df.CSA2010.notnull() ].shape[0] print( '# w CSA2010.notnull: ', csarow ); print( '# rows After Filter: ', df.shape[0],'==',csarow,'+',bmorow,'==', csarow + bmorow); # add baltimore city df.CSA2010 = df.CSA2010.fillna('Baltimore City')cloggedf = df.copy() cloggedf.head(1)

Lights

lsoriginal = gpd.read_file("LightsOut_20"+year+"_CSACity.shp");original.rename(columns={ 'CSA':'CSA2010', 'BaltCity':'InBaltimore'}, inplace=True)original.head()# Convert to EPSG:4326 # fares.crs original = original.to_crs(epsg=4326)

Original Dataset

original.plot()

Remove these for not being either a CSA or Baltimore

removeThese = original[ original['CSA2010'].isnull() & original['InBaltimore'].isnull() ] removeThese.plot()

Keep These

df = original[ original['CSA2010'].notnull() | original['InBaltimore'].notnull() ] df.plot()print('After filtering records where a CSA or Baltimore geo-code match Exists') print( 'All rows Before Filter: ', original.shape[0] ) # rows, columns print( '# w BCity.isnull: ', df.InBaltimore.isnull().sum() ); bmorow = df[ df.CSA2010.isnull() ].shape[0] print( '# w CSA2010.isnull: ', bmorow ); csarow = df[ df.CSA2010.notnull() ].shape[0] print( '# w CSA2010.notnull: ', csarow ); print( '# rows After Filter: ', df.shape[0],'==',csarow,'+',bmorow,'==', csarow + bmorow); # add baltimore city df.CSA2010 = df.CSA2010.fillna('Baltimore City')df.head(1)df.to_csv('citistat_ponp_gdf'+year+'.csv', index=False) # list(fares.columns) print(df.columns.values)

Create Indicators

162 dirtyst - G

bad2017SQL = """WITH tbl AS ( SELECT (Count(DISTINCT srrecordid)::real)*(1000/the_pop::real) AS result, csa FROM vital_signs.Match_csas_and_bc_by_geom('citistat.citistat_2017', 'gid', 'the_geom' ) a LEFT JOIN citistat.citistat_2017 b ON a.gid = b.gid WHERE srtype LIKE 'WW-St%' GROUP BY csa, the_pop ) UPDATE vital_signs.data SET clogged = result FROM tbl WHERE data.csa = tbl.csa AND data_year = '2017'; """ # ^ BAD QUERY ^ # WORKS V new17SQL = """with tbl AS ( select (count(distinct srrecordid)::real)*(1000/the_pop::real) as result, csa from vital_signs.match_csas_and_bc_by_geom('citistat.citistat_2016', 'gid', 'the_geom') a left join citistat.citistat_2016 b on a.gid = b.gid WHERE srtype LIKE 'SW-Dirty Alley' OR srtype LIKE 'SW-Dirty Street' group by csa,the_pop ) select * from tbl where 1 = 1 ORDER BY csa ASC; """ pseudocodo = """ Numerator = WHERE srtype LIKE 'SW-Dirty Alley' OR srtype LIKE 'SW-Dirty Street' """ dirtydf.columnsdirtydf.SRType.unique()#export # Copy the Data dirtyst = dirtydf.copy() # Query the Data # dirtyst = dirtyst[ (dirtyst['SRType'].str.contains('SW-Dirty Alley|SW-Dirty Street', regex=True) ) ] # *Special*: # UPDATE HERE AND THEN GROUP dirtyst['162-dirtyst'+year] = 1 dirtyst = dirtyst.append({'CSA2010': 'Baltimore City' , '162-dirtyst'+year : dirtyst['162-dirtyst'+year].sum() } , ignore_index=True) dirtyst = dirtyst.groupby('CSA2010').sum(numeric_only=True) # Make sure ALL csas and BaltimoreCity are included and sorted. dirtyst = csa.merge( dirtyst, left_on='CSA2010', right_on='CSA2010', how='outer' ) dirtyst.drop(columns=['Shape__Length', 'Shape__Area', 'geometry' ], inplace=True) dirtyst = dirtyst[['CSA2010', '162-dirtyst'+year, 'tpop10']] # Create the Indicator dirtyst['162-dirtyst'+year] = dirtyst['162-dirtyst'+year] * 1000 / dirtyst['tpop10'] display( dirtyst.head(2) ) dirtyst.tail(2) dirtyst.to_csv('162-dirtyst'+year+'.csv')

163 clogged - G

2016 uses 'WW St%' 2017 uses 'WW-St%'

with tbl AS ( select (count(distinct srrecordid)::real)*(1000/the_pop::real) as result, csa from vital_signs.match_csas_and_bc_by_geom('citistat.citistat_2017', 'gid', 'the_geom') a left join citistat.citistat_2017 b on a.gid = b.gid WHERE srtype LIKE 'WW-St%' group by csa,the_pop ) select * from tbl where 1 = 1 ORDER BY csa ASC;

cloggedf.columnscloggedf.SRType#export # Copy the Data clogged = cloggedf.copy() # Query the Data # clogged = clogged[ # (dirtyst['PROPERTYIN'].str.contains('CONDOMINIUM|SINGLE FAMILY', regex=True) ) #| (dirtyst['SALEDATE'] >= '20'+year+'-01-01') #] # *Special*: # UPDATE HERE AND THEN GROUP clogged['163-clogged'+year] = 1 clogged = clogged.append({'CSA2010': 'Baltimore City' , '163-clogged'+year : clogged['163-clogged'+year].sum() } , ignore_index=True) clogged = clogged.groupby('CSA2010').sum(numeric_only=True) # Make sure ALL csas and BaltimoreCity are included and sorted. clogged = csa.merge( clogged, left_on='CSA2010', right_on='CSA2010', how='outer' ) clogged.drop(columns=['Shape__Length', 'Shape__Area', 'geometry' ], inplace=True) clogged = clogged[['CSA2010', '163-clogged'+year, 'tpop10']] # Create the Indicator clogged['163-clogged'+year] = clogged['163-clogged'+year] * 1000 / clogged['tpop10'] display( clogged.head(2) ) clogged.tail(2) clogged.to_csv('163-clogged'+year+'.csv')

215 - Lights

The rate of service requests for addressing street light outages made through Baltimore's 311 system per 1,000 residents.

More than one service request may be made for the same issue but is logged as a unique request.

originalSQL = """ WITH tbl AS ( SELECT ( Sum( CASE WHEN csa_present THEN 1 ELSE 0 END )::numeric (10,2)* 1000 )/the_pop AS result, csa FROM vital_signs.match_csas_and_bc_by_geom('crime.lightsout_2017', 'gid', 'the_geom') a LEFT JOIN crime.lightsout_2017 b ON a.gid = b.gid GROUP BY csa, the_pop ) UPDATE vital_signs.data SET lights = result FROM tbl WHERE data.csa = tbl.csa AND data_year = '2017'; """

compareYears

compeareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Lights/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson"); compeareYears.head()#export def lights(df, csa, yr): # The rate of service requests for addressing street light outages made through Baltimore's 311 system per 1,000 residents. # More than one service request may be made for the same issue but is logged as a unique request. # Create the Numerator lights = df.copy() lights['count'] = 1 lights = lights.groupby('CSA2010').sum(numeric_only=True) # Make sure ALL csas and BaltimoreCity are included and sorted. lights = csa.merge( lights, left_on='CSA2010', right_on='CSA2010', how='outer' ) lights.drop( columns=['geometry', 'Shape__Length','CouncilDis','Latitude','Longitude','Shape__Area','OBJECTID_y','OBJECTID_x'], inplace=True) # Baltimoire has records not in the lights.at[55,'count']=lights['count'].sum() # Perform the calculation lights['215-lights'+year] = lights['count'] / lights['tpop10'] * 1000 compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Lights/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson"); prevYear = 'lights'+ str( int(year) - 1 ) if prevYear in compareYears.columns: lights = lights.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' ) lights['change'] = lights['215-lights'+year] - lights[ prevYear ] lights['percentChange'] = lights['change' ] / lights[ prevYear ] * 100 lights['change'] = lights['change'].apply(lambda x: "{:.2f}".format(x) ) print( 'Records Matching Query: ', lights.size / len(lights.columns) ) return lights fin = lights(df, csa, year) fin.to_csv('215-lights'+year+'.csv', index=False) fin.head(60)

SEARCH

CONNECT WITH US

DONATE

Help us keep this resource free and available to the public. Donate now!

Donate to BNIA-JFI

CONTACT US

Baltimore Neighborhood Indicators Alliance
The Jacob France Institute
1420 N. Charles Street, Baltimore, MD 21201
410-837-4377 | bnia-jfi@ubalt.edu