# default_exp bidbaltimore
This colab and more can be found at https://github.com/BNIA/VitalSigns.
Whats Inside?:
Indicators Used
- ✔️ 40 - taxlien - (MdProp, TaxSale) Percentage of residential tax lien sales
❌
Datasets Used
- ✔️ housing.taxsales_201X (40-taxlien) From BidBaltimore
year = "19"
Import Modules
! pip install -U -q PyDrive
! pip install geopy
! pip install geopandas
! pip install geoplot
! pip install dataplay
! pip install matplotlib
! pip install psycopg2-binary! apt-get install build-dep python-psycopg2
! apt-get install libpq-dev
! apt-get install libspatialindex-dev!pip install rtree
!pip install dexplotfrom dataplay.geoms import workWithGeometryData%%capture
# These imports will handle everything
import os
import sys
import csv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
import psycopg2
import pyproj
from pyproj import Proj, transform
# conda install -c conda-forge proj4
from shapely.geometry import Point
from shapely import wkb
from shapely.wkt import loads
# https://pypi.org/project/geopy/
from geopy.geocoders import Nominatim
# In case file is KML, enable support
import fiona
fiona.drvsupport.supported_drivers['kml'] = 'rw'
fiona.drvsupport.supported_drivers['KML'] = 'rw'from IPython.display import clear_output
clear_output(wait=True)import ipywidgets as widgets
from ipywidgets import interact, interact_manual
Configure Enviornment
# This will just beautify the output
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# pd.set_option('display.expand_frame_repr', False)
# pd.set_option('display.precision', 2)
# pd.reset_option('max_colwidth')
pd.set_option('max_colwidth', 20)
# pd.reset_option('max_colwidth')totalres = pd.read_csv("37-totalres-19.csv")
# totalres.set_index('CSA2010', inplace=True)
totalres.tail(1)
Taxsales_2019 - BidBaltimore - OLD
# Convert to EPSG:4326
taxSales = taxSales.to_crs(epsg=4326)# Convert Geom to Coords
taxSales['x'] = taxSales.geometry.x
taxSales['y'] = taxSales.geometry.y
# taxSales.head(5)
taxSales = taxSales[ taxSales.geometry.y > 38 ]
taxSales = taxSales[ taxSales.geometry.x < -70 ]
taxSales = taxSales[ taxSales.geometry.x > -80 ]# Reference: All Points
base = csa.plot(color='white', edgecolor='black')
taxSales.plot(ax=base, marker='o', color='green', markersize=5);# Get CSA Labels for all Points.
taxSalesCsa = workWithGeometryData(
method='ponp', df=taxSales, polys=csa, ptsCoordCol='geometry',
polygonsCoordCol='geometry', polygonsLabel='CSA2010'
)
taxSalesCsa = taxSalesCsa.drop('geometry',axis=1)
taxSalesCsa.to_csv('ponp_taxSales_19.csv', index=False)
taxSalesCsa.head(10)
Taxsales_2018 - BidBaltimore - OLD
lstaxSales = gpd.read_file("./TaxSales_2018.shp");# Convert to EPSG:4326
taxSales = taxSales.to_crs(epsg=4326)# Convert Geom to Coords
taxSales['x'] = taxSales.geometry.x
taxSales['y'] = taxSales.geometry.y
# taxSales.head(5)
taxSales = taxSales[ taxSales.geometry.y > 38 ]
taxSales = taxSales[ taxSales.geometry.x < -70 ]
taxSales = taxSales[ taxSales.geometry.x > -80 ]# Reference: All Points
base = csa.plot(color='white', edgecolor='black')
taxSales.plot(ax=base, marker='o', color='green', markersize=5);# Get CSA Labels for all Points.
taxSalesCsa18 = workWithGeometryData(
method='ponp', df=taxSales, polys=csa, ptsCoordCol='geometry',
polygonsCoordCol='geometry', polygonsLabel='CSA2010'
)
taxSalesCsa18 = taxSalesCsa18.drop('geometry',axis=1)
taxSalesCsa18.to_csv('ponp_taxSales_18.csv', index=False)
taxSalesCsa18.head(10)
Taxsales_2019 - BidBaltimore - NEW
lsoriginal = gpd.read_file("TaxSales_20"+year+"_CSACity.shp");original.rename(columns={ 'CSA':'CSA2010', 'BaltCity':'InBaltimore'}, inplace=True)
df = original[ original['CSA2010'].notnull() | original['InBaltimore'].notnull() ]print('After filtering records where a CSA or Baltimore geo-code match Exists')
print( 'All rows Before Filter: ', original.shape[0] ) # rows, columns
print( '# w BCity.isnull: ', df.InBaltimore.isnull().sum() ); bmorow = df[ df.CSA2010.isnull() ].shape[0]
print( '# w CSA2010.isnull: ', bmorow ); csarow = df[ df.CSA2010.notnull() ].shape[0]
print( '# w CSA2010.notnull: ', csarow );
print( '# rows After Filter: ', df.shape[0],'==',csarow,'+',bmorow,'==', csarow + bmorow); # add baltimore city
df.CSA2010 = df.CSA2010.fillna('Baltimore City')taxSales = df.copy()
taxSales.head(1)
Taxlien 40 - (MdProp, TaxSale)
# 40- taxlien - Percentage of residential tax lien sales
# https://services1.arcgis.com/mVFRs7NF4iFitgbY/arcgis/rest/services/taxlien/FeatureServer/layers
# https://bniajfi.org/indicators/Housing%20And%20Community%20Development/taxlien
# Numerator: housing.taxsales_201X
# Denominator: housing.mdprop_201X
long_Description: """
The percentage of residential properties with city liens sold as tax certificates at the annual tax lien certificate sale held in May.
Tax sales are used to collect delinquent real property taxes and other unpaid charges to the city, which are liens against the real property.
The tax certificate sale is a public online auction of City lien interests on properties that occurs annually in May.
"""
taxlien_SQL = """
2017 Query
WITH numerator AS (
select (sum( case
when csa_present
then 1 else 0 end)::numeric) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.taxsales_2017', 'gid', 'the_geom') a
left join housing.taxsales_2017 b on a.gid = b.gid
group by csa ),
denominator AS (
select (sum( case
when (address != $$NULL$$) AND (desclu = $$Apartments$$ OR desclu = $$Residential$$ OR desclu = $$Residential Commercial$$ OR desclu = $$Residential Condominium$$) then 1 else NULL end)::numeric ) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017', 'gid', 'the_geom') a
left join housing.mdprop_2017 b on a.gid = b.gid
group by csa, the_pop ),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa )
select * from tbl where 1 = 1 ORDER BY csa ASC;"
2016 query
WITH numerator AS ( select (sum( case
when csa_present then 1 else 0 end)::numeric) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.taxsales_2016', 'gid', 'the_geom') a
left join housing.taxsales_2016 b on a.gid = b.gid
group by csa ),
denominator AS (
select (sum( case
when (address != $$NULL$$) AND (desclu = $$Apartments$$ OR desclu = $$Residential$$ OR desclu = $$Residential Commercial$$ OR desclu = $$Residential Condominium$$) then 1 else NULL end)::numeric ) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017', 'gid', 'the_geom') a
left join housing.mdprop_2017 b on a.gid = b.gid
group by csa, the_pop ),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa )
update vital_signs.data
set taxlien = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
taxlien_translation = " (sum taxsales_2017 when csa_present / mdprop.totalres )* 100 "oldpy = """
taxlien18 = df18.drop(['X', 'Y', 'x', 'y'], axis=1).copy()
taxlien18.head(2)
taxlien19 = df19.drop(['X', 'Y', 'x', 'y'], axis=1).copy()
taxlien19.head(2)
taxlien18['taxlien18Count'] = 1
taxlien19['taxlien19Count'] = 1
taxlien = taxlien19.groupby('CSA2010').sum(numeric_only=True)
taxlien['taxlien18Count'] = taxlien18.groupby('CSA2010').sum(numeric_only=True)['taxlien18Count']
taxlien = taxlien[['taxlien18Count', 'taxlien19Count']]
taxlien['totalres18'] = totalres['totalres18']
taxlien.head(1)
# DOES 2019 use the same denominator as 2018 as 2017?
taxlien['totalres19'] = taxlien['totalres18']
"""totalres.columnstotalres = pd.read_csv("37-totalres-"+year+".csv")#export
def taxlien(df, totalres, year):
taxlien = df.copy()
# Aggregate Numeric Values by Sum
taxlien['taxlienCount'] = 1
taxlien = taxlien.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included. among other things
taxlien = totalres[ ['CSA2010','totalres'+year] ].merge( taxlien, left_on='CSA2010', right_on='CSA2010', how='outer' )
# Update the baltimore CSA.
taxlien.at[55,'taxlienCount'] = taxlien['taxlienCount'].sum()
# Create the Indicator
taxlien['40-taxlien'+year] = taxlien['taxlienCount'] / totalres['totalres'+year] * 100
taxlien = taxlien[['CSA2010', 'taxlienCount', '40-taxlien'+year, 'totalres'+year ]]
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Taxlien/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
prevYear = 'taxlien'+ str( int(year) - 1 )
if prevYear in compareYears.columns:
taxlien = taxlien.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
taxlien['change'] = taxlien['40-taxlien'+year] - taxlien[ prevYear ]
taxlien['percentChange'] = taxlien['change'] / taxlien[ prevYear ] * 100
taxlien['change'] = taxlien['change'].apply(lambda x: "{:.2f}".format(x) )
taxlien = taxlien[['CSA2010', 'taxlienCount', 'totalres19', 'taxlien18', '40-taxlien19', 'percentChange', 'change']]
return taxlientaxlien = taxlien( taxSales.drop(['X', 'Y'], axis=1), totalres, year)
taxlien.head(60)
taxlien.to_csv('40-taxlien'+year+'.csv', index=False)