# default_exp mdprop
43 baltvac - O
NO Query Found
Waiting on City Housing
Completed by Cheyrl
Todo:
Columns Used
-
✅ 32 - ownroc - (MdProp) Owner Occupied
-
✅ 34 - vacants - (MdProp[totalres], Vacants) number of demolition permits per 1000 residential properties
-
✅ xxx - baltvac - (Vacants)
-
✅ 35 - vio - (Violations, MdProp[totalres]) Number of of new constructio permits per 1000 residential properties
-
✅ 37 - Totalres - (Mdprop) The total number of residential properties located within an area as identified by Maryland Property View.
-
✅ 36 - resrehab - (MdProp[totalres], Permits) Percentage of properties with rehabilitation permits exceeding $5k
-
✅ 33 - fore - ( MdProp[totalres], Close_Crawl )
-
✅ 41 - demper - (MdProp[totalres], Permits) number of demolition permits per 1000 residential properties
-
✅ 42 - constper - (MdProp[totalres], Permits) Number of of new constructio permits per 1000 residential properties
-
✅ 141 - comprop - (MdProp) Percentage of properties with rehabilitation permits exceeding $5k
-
✅ 142 - crehab - (MdProp[comprop], Permits) Percentage of properties with rehabilitation permits exceeding $5k
Datasets Used
-
✅ foreclosures.foreclosures_201X (33-fore-> 2018/ 2019 Close-Crawler)
-
✅✔️ housing.vacants_201X (34-vacant -> datenotice, dateabate, datecancle)
-
✅ housing.permits_201X (36-resrehab, 41-demper, 42-constper -> field22, casetype, propuse, existingus, cost)
-
✅✔️ housing.mdprop_201X - totalres __( 33-fore, 34-vacant, 35-vio, 36-resrehab, 40-taxlien, 41-demper, 42-constper, __ ownroc )
❌ = no data for
✔️ = Geocoded
✅ = Processed (starting from 2019)
This colab and more can be found at https://github.com/BNIA/vitalSigns.
General Outline
- Import and merge CSA/ BCity Esri boundary files
-
year = '20'
About this Tutorial:
Whats Inside?
The Tutorial
This lab is split into two sections.
Guided Walkthrough
SETUP:
Import Modules
! pip install -U -q PyDrive
! pip install geopy
! pip install geopandas
! pip install geoplot!apt install libspatialindex-dev
!pip install rtree!pip install dataplay%%capture
# These imports will handle everything
import os
import sys
import csv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
import psycopg2
import pyproj
from pyproj import Proj, transform
# conda install -c conda-forge proj4
from shapely.geometry import Point
from shapely import wkb
from shapely.wkt import loads
# https://pypi.org/project/geopy/
from geopy.geocoders import Nominatim
# In case file is KML, enable support
import fiona
fiona.drvsupport.supported_drivers['kml'] = 'rw'
fiona.drvsupport.supported_drivers['KML'] = 'rw'from IPython.display import clear_output
clear_output(wait=True)import ipywidgets as widgets
from ipywidgets import interact, interact_manual
Configure Enviornment
# This will just beautify the output
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# pd.set_option('display.expand_frame_repr', False)
# pd.set_option('display.precision', 2)
# pd.reset_option('max_colwidth')
pd.set_option('max_colwidth', 20)
# pd.reset_option('max_colwidth')
TPOP CSA and Baltimore
Get Baltimore
Click to toggle
csa = "https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Tpop/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson"
csa = gpd.read_file(csa);
csa.head(1) Get CSA
url2 = "https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Tpop/FeatureServer/1/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson"
csa2 = gpd.read_file(url2);
csa2['CSA2010'] = csa2['City_1']
csa2['OBJECTID'] = 56
csa2 = csa2.drop(columns=['City_1'])
csa2.head()
Append do no append Bcity. We put it on the Bottom of the df because when performing the ponp it returns only the last matching columns CSA Label.
# csa = pd.concat([csa2, csa], ignore_index=True)
csa = csa.append(csa2).reset_index(drop=True)csa.head(3)csa.tail(3)csa.head()csa.drop(columns=['Shape__Area', 'Shape__Length', 'OBJECTID'], axis=1).to_file("BCity_and_CSA.geojson", driver='GeoJSON')
MDProp
import pandas as pd
import geopandas
original = gpd.read_file('MDPropertyView_2021_CSACity.shp')
original.columns
pd.set_option('display.max_columns', None)original.rename(columns={ 'CSA':'CSA2010', 'BaltCity':'InBaltimore'}, inplace=True)
df = original[ original['CSA2010'].notnull() | original['InBaltimore'].notnull() ]print('After filtering records where a CSA or Baltimore geo-code match Exists')
print( 'All rows Before Filter: ', original.shape[0] ) # rows, columns
print( '# w BCity.isnull: ', df.InBaltimore.isnull().sum() ); bmorow = df[ df.CSA2010.isnull() ].shape[0]
print( '# w CSA2010.isnull: ', bmorow ); csarow = df[ df.CSA2010.notnull() ].shape[0]
print( '# w CSA2010.notnull: ', csarow );
print( '# rows After Filter: ', df.shape[0],'==',csarow,'+',bmorow,'==', csarow + bmorow); # add baltimore city
df.CSA2010 = df.CSA2010.fillna('Baltimore City')mdprop = df.copy()
mdprop.head(1)
Permits - OLD PointsInPolygons
Import
permits = gpd.read_file("Permits_2019_CSACity.shp");
permits.head()permits.columns
permits.crs
permits.head(5)# Convert to EPSG:4326
permits = permits.to_crs(epsg=4326)
permits.crs# Convert Geom to Coords
permits['x'] = permits.geometry.x
permits['y'] = permits.geometry.y
permits.head(5)permits = permits[ permits.geometry.y > 38 ]# Reference: All Points
base = csa.plot(color='white', edgecolor='black')
permits.plot(ax=base, marker='o', color='green', markersize=5);permits.columnscsa.tail()from dataplay import geoms
# Get CSA Labels for all Points.
# permitsCsa = geoms.getPolygonOnPoints(permits, csa, 'geometry', 'geometry', 'CSA2010' )
permitsCsa = geoms.workWithGeometryData(method='ponp', df=permits, polys=csa, ptsCoordCol='geometry', polygonsCoordCol='geometry', polyColorCol=False, polygonsLabel='CSA2010', pntsClr='red', polysClr='white')
# permitsCsa = permitsCsa.drop('geometry',axis=1)permitsCsa.head(10)
Processing
All
permitsAll = permits# Reference: All Points
base = csa.plot(color='white', edgecolor='black')
permitsAll.plot(ax=base, marker='o', color='green', markersize=5);permits = permitsAll# y < 0
permitsLessThanZero = permits[ permits.geometry.y < 0 ]
print('Y<0: ', permitsLessThanZero.size, '\n')# y > 0
permitsGreaterThanZero = permits[ permits.geometry.y > 0 ]
print('Y>0: ', permitsGreaterThanZero.size, '\n')
permitsGreaterThanZero.plot();# 0 < y < 38
permitsOver38 = permits[ permits.geometry.y < 38 ]
permitsOver38 = permitsOver38[ permitsOver38.geometry.y > 0 ]
print('0 < y < 38: ', permitsOver38.size, '\n') # y > 38
permitsUnder38 = permits[ permits.geometry.y > 38 ]
print('Y>38 Less than Zero: ', permitsUnder38.size, '\n')
permitsUnder38.plot();
Permits
import pandas as pd
import geopandas
original = gpd.read_file("Permits_20"+year+"_CSACity.shp", geometry='geometry');
original.columns original.rename(columns={ 'CSA':'CSA2010', 'BaltCity':'InBaltimore'}, inplace=True)
df = original[ original['CSA2010'].notnull() | original['InBaltimore'].notnull() ]print('After filtering records where a CSA or Baltimore geo-code match Exists')
print( 'All rows Before Filter: ', original.shape[0] ) # rows, columns
print( '# w BCity.isnull: ', df.InBaltimore.isnull().sum() ); bmorow = df[ df.CSA2010.isnull() ].shape[0]
print( '# w CSA2010.isnull: ', bmorow ); csarow = df[ df.CSA2010.notnull() ].shape[0]
print( '# w CSA2010.notnull: ', csarow );
print( '# rows After Filter: ', df.shape[0],'==',csarow,'+',bmorow,'==', csarow + bmorow); # add baltimore city
df.CSA2010 = df.CSA2010.fillna('Baltimore City')permitsCsa = df.copy()
permitsCsa.head(1)
Vacants
import pandas as pd
import geopandas
original = gpd.read_file("Vacants_20"+year+"_CSACity.shp", geometry='geometry');
original.columns original.rename(columns={ 'CSA':'CSA2010', 'BaltCity':'InBaltimore'}, inplace=True)
df = original[ original['CSA2010'].notnull() | original['InBaltimore'].notnull() ]print('After filtering records where a CSA or Baltimore geo-code match Exists')
print( 'All rows Before Filter: ', original.shape[0] ) # rows, columns
print( '# w BCity.isnull: ', df.InBaltimore.isnull().sum() ); bmorow = df[ df.CSA2010.isnull() ].shape[0]
print( '# w CSA2010.isnull: ', bmorow ); csarow = df[ df.CSA2010.notnull() ].shape[0]
print( '# w CSA2010.notnull: ', csarow );
print( '# rows After Filter: ', df.shape[0],'==',csarow,'+',bmorow,'==', csarow + bmorow); # add baltimore city
df.CSA2010 = df.CSA2010.fillna('Baltimore City')vacants = df.copy()
vacants.head(1)
Vio
import pandas as pd
import geopandas
original = gpd.read_file("Violations_20"+year+"_CSACity.shp", geometry='geometry');
original.columns original.rename(columns={ 'CSA':'CSA2010', 'BaltCity':'InBaltimore'}, inplace=True)
df = original[ original['CSA2010'].notnull() | original['InBaltimore'].notnull() ]print('After filtering records where a CSA or Baltimore geo-code match Exists')
print( 'All rows Before Filter: ', original.shape[0] ) # rows, columns
print( '# w BCity.isnull: ', df.InBaltimore.isnull().sum() ); bmorow = df[ df.CSA2010.isnull() ].shape[0]
print( '# w CSA2010.isnull: ', bmorow ); csarow = df[ df.CSA2010.notnull() ].shape[0]
print( '# w CSA2010.notnull: ', csarow );
print( '# rows After Filter: ', df.shape[0],'==',csarow,'+',bmorow,'==', csarow + bmorow); # add baltimore city
df.CSA2010 = df.CSA2010.fillna('Baltimore City')violations = df.copy()
violations.head(1)
Foreclosures_2019_CSACity
import pandas as pd
import geopandas
original = gpd.read_file("Foreclosures_20192020_CSACity.shp", geometry='geometry');
original.columns original.rename(columns={ 'CSA':'CSA2010', 'BaltCity':'InBaltimore'}, inplace=True)
df = original[ original['CSA2010'].notnull() | original['InBaltimore'].notnull() ]print('After filtering records where a CSA or Baltimore geo-code match Exists')
print( 'All rows Before Filter: ', original.shape[0] ) # rows, columns
print( '# w BCity.isnull: ', df.InBaltimore.isnull().sum() ); bmorow = df[ df.CSA2010.isnull() ].shape[0]
print( '# w CSA2010.isnull: ', bmorow ); csarow = df[ df.CSA2010.notnull() ].shape[0]
print( '# w CSA2010.notnull: ', csarow );
print( '# rows After Filter: ', df.shape[0],'==',csarow,'+',bmorow,'==', csarow + bmorow); # add baltimore city
df.CSA2010 = df.CSA2010.fillna('Baltimore City')foreclosures = df.copy()
foreclosures.head(1)foreclosures19 = foreclosures[foreclosures['Case_Numbe'].str.contains("24O19")]
foreclosures20 = foreclosures[foreclosures['Case_Numbe'].str.contains("24O20")]
if (year=='19'): forclosure = foreclosures19.copy()
else: forclosure = foreclosures20.copy()forclosure.head()
Create Indicators
mdprop.head(1)#export
totalres = mdprop.copy()
totalres['totalres'+year] = 1
totalres = totalres[ totalres['DESCLU'].isin(['Apartments', 'Residential', 'Residential Commercial', 'Residential Condominium']) ]
totalres = totalres[ totalres['ADDRESS'].notna() ]
print(totalres.ADDRESS.unique() )
totalres = totalres[['CSA2010','totalres'+year]]
totalres = totalres.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included. among other things
totalres = csa[ ['CSA2010','tpop10'] ].merge( totalres, left_on='CSA2010', right_on='CSA2010', how='outer' )
# Update the baltimore CSA.
totalres.at[55,'totalres'+year] = totalres['totalres'+year].sum()
# Create the Indicator
totalres.to_csv('37-totalres-'+year+'.csv', index=False)
totalres.head(58)
old_notes="""
2016 uses dataset housing.mdprop_2017
2017 uses dataset housing.mdprop_2017v2
2016s dataset was collected in January of 2017
2017s dataset was collected in Novermber of 2017
"""
OwnRoc 32 - ( MdProp[totalres] ) - Complete
original = """ with numerator AS (
select (sum(
case
when (ooi like 'H') AND (address != $$NULL$$) AND (desclu = $$Apartments$$ OR desclu = $$Residential$$ OR desclu = $$Residential Commercial$$ OR desclu = $$Residential Condominium$$)
then 1
else 0
end)::numeric) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017', 'gid', 'the_geom') a
left join housing.mdprop_2017 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (address != $$NULL$$) AND (desclu = $$Apartments$$ OR desclu = $$Residential$$ OR desclu = $$Residential Commercial$$ OR desclu = $$Residential Condominium$$)
then 1
else NULL
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017', 'gid', 'the_geom') a
left join housing.mdprop_2017 b on a.gid = b.gid
group by csa, the_pop
),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
update vital_signs.data
set ownroc = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
ownroc_translation = """
Numerator = sum vacants_2017 when
(ooi like 'H') AND
(address != $$NULL$$) AND
(desclu = $$Apartments$$ OR
desclu = $$Residential$$ OR
desclu = $$Residential Commercial$$ OR
desclu = $$Residential Condominium$$
Denominator = mdprop_2017.totalres
return = (numerator / denominator )* 100
"""oldNotes = """ 2016 uses dataset housing.mdprop_2017
2017 uses dataset housing.mdprop_2017v2
2016s dataset was collected in January of 2017
2017s dataset was collected in Novermber of 2017
"""#export
import datetime
ownroc = mdprop.copy()
ownroc = ownroc[ ownroc['OOI']=='H']
ownroc = ownroc.dropna( subset=['ADDRESS'] )
ownroc = ownroc[ ownroc['DESCLU'].isin(['Apartments', 'Residential', 'Residential Commercial', 'Residential Condominium']) ]
ownroc.to_csv('ownroc'+str(year)+'_Filtered_Records.csv', index=False)
print( 'Records Matching Query: ', ownroc.size / len(ownroc.columns) )
# Aggregate Numeric Values by Sum
ownroc['ownrocCount'] = 1
ownroc = ownroc.groupby('CSA2010').sum(numeric_only=True)
ownroc = csa[ ['CSA2010'] ].merge( ownroc, left_on='CSA2010', right_on='CSA2010', how='outer' )
ownroc = csa.merge( ownroc, left_on='CSA2010', right_on='CSA2010', how='outer' )
# Create the Indicator
ownroc['ownroc'] = ownroc['ownrocCount'] * 100 / totalres['totalres'+year]
ownroc.at[55,'ownrocCount'] = ownroc['ownrocCount'].sum()
ownroc.at[55,'ownroc'] = ownroc['ownrocCount'].sum() * 100/ totalres['totalres'+year].sum()
ownroc = ownroc[ ['CSA2010', 'ownrocCount', 'ownroc'] ]
ownroc.to_csv('32-ownroc'+year+'.csv', index=False)
ownroc.tail(60)
Vacant 34 - (MdProp[totalres], Vacants) - Complete
totalres = pd.read_csv('37-totalres-'+year+'.csv')# 34- vacant - percentage of residential properties that are vacant and abandoned
# https://services1.arcgis.com/mVFRs7NF4iFitgbY/arcgis/rest/services/vacant/FeatureServer/layers
# Numerator: housing.vacants_201X
# Denominator: housing.mdprop_201X
long_Description: """
The percentage of residential properties that have been classified as being vacant and abandoned by the Baltimore City Department
of Housing out of all properties. Properties are classified as being vacant and abandoned if: the property is not habitable and
appears boarded up or open to the elements; the property was designated as being vacant prior to the current year and still
remains vacant; and the property is a multi-family structure where all units are considered to be vacant.
"""
vacant_SQL = """
2016
with numerator AS (
select (sum( case
when (datenotice between '2004-01-01' and '2016-12-31') AND (dateabate is NULL OR dateabate >= '2016-12-31') AND
(datecancel is NULL OR datecancel > '2016-12-31') then 1 else NULL end)::numeric) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.vacants_2016', 'gid', 'the_geom') a
left join housing.vacants_2016 b on a.gid = b.gid
group by csa ),
denominator AS (
select (sum(
case
when (address != $$NULL$$) AND (desclu = $$Apartments$$ OR desclu = $$Residential$$ OR desclu = $$Residential Commercial$$ OR desclu = $$Residential Condominium$$)
then 1
else NULL
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017', 'gid', 'the_geom') a
left join housing.mdprop_2017 b on a.gid = b.gid
group by csa, the_pop
),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
update vital_signs.data
set vacant = result from tbl where data.csa = tbl.csa and data_year = '2016';"
2017
with numerator AS (
select (sum( case
when (datenotice between '2004-01-01' and '2017-12-31') AND (dateabate is NULL OR dateabate >= '2017-12-31') AND (datecancel is NULL OR datecancel > '2017-12-31') then 1 else NULL end)::numeric) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.vacants_2017', 'gid', 'the_geom') a
left join housing.vacants_2017 b on a.gid = b.gid
group by csa ),
denominator AS (
select (sum( case
when (address != $$NULL$$) AND (desclu = $$Apartments$$ OR desclu = $$Residential$$ OR desclu = $$Residential Commercial$$ OR desclu = $$Residential Condominium$$)
then 1 else NULL end)::numeric ) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017vs', 'gid', 'the_geom') a
left join housing.mdprop_2017vs b on a.gid = b.gid
group by csa, the_pop ),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl order by csa asc"
column "dateabate" does not exist
"""
vacant_translation = """
Numerator = sum vacants_2017 when
(datenotice between '2004-01-01' and '2017-12-31')
AND (dateabate is NULL OR dateabate >= '2017-12-31')
AND (datecancel is NULL OR datecancel > '2017-12-31')
Denominator = mdprop_2017.totalres
return = (numerator / denominator )* 100
"""te = """
with numerator AS (
select (sum(
case
when (datenotice between '2004-01-01' and '2017-12-31') AND (dateabate is NULL OR dateabate >= '2017-12-31') AND (datecancel is NULL OR datecancel > '2017-12-31')
then 1
else NULL
end)::numeric) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.vacants_2017', 'gid', 'the_geom') a
left join housing.vacants_2017 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl order by csa asc
"""vacants.head(1)#export
import datetime
vacantsCsa = vacants.copy()
# (datenotice between '2004-01-01' and '2016-12-31') AND
# (dateabate is NULL OR dateabate >= '2016-12-31') AND
# (datecancel is NULL OR datecancel > '2016-12-31')
vacantsCsa['DateNotice2'] = pd.to_datetime(vacantsCsa['DateNotice'],infer_datetime_format=True)
vacantsCsa = vacantsCsa[
( vacantsCsa['DateNotice2']>=pd.Timestamp(2000+int(year)-13,1,1) ) &
( vacantsCsa['DateNotice2']<=pd.Timestamp(2000+int(year),12,31) )
]
vacantsCsa.to_csv('vacants_Filtered_Records.csv', index=False)
print( 'Records Matching Query: ', vacantsCsa.size / len(vacantsCsa.columns) )
# Aggregate Numeric Values by Sum
vacantsCsa['vacantsCount'] = 1
vacantsCsa = vacantsCsa.groupby('CSA2010').sum(numeric_only=True)
vacantsCsa = totalres[ ['CSA2010', 'totalres'+year] ].merge( vacantsCsa, left_on='CSA2010', right_on='CSA2010', how='outer' )
# Create the Indicator
vacantsCsa['vacants'+year] = vacantsCsa['vacantsCount'] * 100 / totalres['totalres'+year]
vacantsCsa.at[55,'vacantsCount'] = vacantsCsa['vacantsCount'].sum()
vacantsCsa.at[55,'vacants'+year] = vacantsCsa['vacantsCount'].sum() * 100 / totalres['totalres'+year].sum()
vacantsCsa = vacantsCsa[ ['CSA2010', 'vacantsCount', 'vacants'+year, 'totalres'+year ] ]
vacantsCsa.to_csv('34-vacants'+year+'.csv', index=False)
vacantsCsa.tail(60)
BaltVac 43 - (Vacants) - Complete
vacants.OwnerAbbr.unique()#export
import datetime
baltvac = vacants.copy()
baltvac = baltvac[ (baltvac['OwnerAbbr'].str.contains('DHCD|HABC|HUD|MCC|USA', regex=True, na=False) ) ]
baltvac['DateNotice2'] = pd.to_datetime(baltvac['DateNotice'],infer_datetime_format=True)
baltvac = baltvac[
( baltvac['DateNotice2']>=pd.Timestamp(2000+int(year)-13,1,1) ) &
( baltvac['DateNotice2']<=pd.Timestamp(2000+int(year),12,31) )
]
baltvac.to_csv('baltvac_Filtered_Records.csv', index=False)
print( 'Records Matching Query: ', baltvac.size / len(baltvac.columns) )
# Aggregate Numeric Values by Sum
baltvac['baltvacCount'] = 1
baltvac = baltvac.groupby('CSA2010').sum(numeric_only=True)
baltvac = vacantsCsa[ ['CSA2010', 'vacants'+year] ].merge( baltvac, left_on='CSA2010', right_on='CSA2010', how='outer' )
# Create the Indicator
baltvac['baltvac'+year] = baltvac['baltvacCount'] / vacantsCsa['vacantsCount'] * 100
baltvac.at[55,'baltvacCount'] = baltvac['baltvacCount'].sum()
baltvac.at[55,'baltvac'+year] = baltvac['baltvacCount'].sum() * 100 / vacantsCsa['vacantsCount'].sum()
baltvac = baltvac[ ['CSA2010', 'baltvacCount', 'baltvac'+year, 'vacants'+year ] ]
baltvac.to_csv('43-baltvac'+year+'.csv', index=False)
baltvac.tail(60)
Vio 35 - (MdProp[totalres], Violations) - Complete
a2016_query = """
with numerator AS (
select (sum(
case
when (datenotice between '2016-01-01' and '2016-12-31') AND (dateabate is NULL OR dateabate >= '2016-12-31') AND (datecancel is NULL OR datecancel > '2016-12-31')
then 1
else NULL
end)::numeric) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.violations_thru2016', 'gid', 'the_geom') a
left join housing.violations_thru2016 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (address != $$NULL$$) AND (desclu = $$Apartments$$ OR desclu = $$Residential$$ OR desclu = $$Residential Commercial$$ OR desclu = $$Residential Condominium$$)
then 1
else NULL
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017', 'gid', 'the_geom') a
left join housing.mdprop_2017 b on a.gid = b.gid
group by csa, the_pop
),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
update vital_signs.data
set vio = result from tbl where data.csa = tbl.csa and data_year = '2016';
(datenotice between '2016-01-01' and '2016-12-31') AND (dateabate is NULL OR dateabate >= '2016-12-31') AND (datecancel is NULL OR datecancel > '2016-12-31')
"""
a2017_query = """
with numerator AS (
select (sum(
case
when (datenotice between '2017-01-01' and '2017-12-31') AND (dateabate is NULL OR dateabate >= '2017-12-31') AND (datecancel is NULL OR datecancel > '2017-12-31')
then 1
else NULL
end)::numeric) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.violations_2017', 'gid', 'the_geom') a
left join housing.violations_2017 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (address != $$NULL$$) AND (desclu = $$Apartments$$ OR desclu = $$Residential$$ OR desclu = $$Residential Commercial$$ OR desclu = $$Residential Condominium$$)
then 1
else NULL
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017v2', 'gid', 'the_geom') a
left join housing.mdprop_2017v2 b on a.gid = b.gid
group by csa, the_pop
),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
#export
# Numerator
vio = violations.copy()
# drop null
vio['DateCancel'] = pd.to_datetime(vio['DateCancel'])
vio['DateAbate'] = pd.to_datetime(vio['DateAbate'])
vio['DateNotice'] = pd.to_datetime(vio['DateNotice'], errors='coerce')
# Numerator
vio = vio[['DateNotice', 'DateAbate', 'DateCancel','CSA2010']]
vio.head(1)
start_date = '20'+year+'-01-01'
end_date = '20'+year+'-12-31'
mask = vio[ ( vio['DateNotice'] > start_date ) & ( vio['DateNotice'] <= end_date) ]
mask1 = mask[ ( pd.isnull( mask['DateAbate'] ) ) | ( mask['DateAbate'] >= end_date ) ]
mask2 = mask1[ pd.isnull( mask1['DateCancel'] ) | ( mask1['DateCancel'] > end_date ) ]
vio = mask2.copy()
vio.to_csv('vio_Filtered_Records.csv', index=False)#export
# Aggregate Numeric Values by Sum
vio['vioCount'] = 1
vio = vio.groupby('CSA2010').sum(numeric_only=True)
vio = totalres[ ['CSA2010', 'totalres'+year] ].merge( vio, left_on='CSA2010', right_on='CSA2010', how='outer' )
# Create the Indicator
vio['vio'] = vio['vioCount'] * 100 / totalres['totalres'+year]
# Create Baltimore's Record.
vio.at[55,'vioCount'] = vio['vioCount'].sum()
vio.at[55,'vio'] = vio['vioCount'].sum() * 100 / totalres['totalres'+year].sum()
vio.to_csv('35-violations'+year+'.csv', index=False)
vio.tail()
Fore 33 - (MdProp[totalres], Close Crawl) - Complete
- case crawl should have a dramatic reduction in 2020. no foreclosures were permited after march 2020 because covid.
# 33 - fore - percent of properties under mortgage foreclosure
# https://services1.arcgis.com/mVFRs7NF4iFitgbY/arcgis/rest/services/Constper/FeatureServer/layers
# https://bniajfi.org/indicators/Housing%20And%20Community%20Development/fore
# Numerator: foreclosures.foreclosures_201X
# Denominator: housing.mdprop_201X
# run the 2018 and 2019 crawler first!
long_Description: """
The percentage of properties where the lending company or loan servicer has filed a foreclosure proceeding with the
altimore City Circuit Court out of all residential properties within an area. This is not a measure of actual foreclosures
since not every property that receives a filing results in a property dispossession.
"""
fore_SQL = """
2016
with numerator AS (
select (sum( case when csa_present then 1 else NULL end)::numeric) as result, csa
from vital_signs.match_csas_and_bc_by_geom('foreclosures.foreclosures_2016', 'gid', 'the_geom') a
left join foreclosures.foreclosures_2016 b on a.gid = b.gid
group by csa ),
denominator AS (
select (sum( case when (address != $$NULL$$)
AND (desclu = $$Apartments$$
OR desclu = $$Residential$$
OR desclu = $$Residential Commercial$$
OR desclu = $$Residential Condominium$$) then 1 else NULL end)::numeric ) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017', 'gid', 'the_geom') a
left join housing.mdprop_2017 b on a.gid = b.gid
group by csa, the_pop ),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa )
update vital_signs.data
set fore = result from tbl where data.csa = tbl.csa and data_year = '2016';
--/*
*/
select(sum( case when csa_present then 1 else NULL end)::numeric) as result, csa
from vital_signs.match_csas_and_bc_by_geom('foreclosures.foreclosures_2016', 'gid', 'the_geom') a
left join foreclosures.foreclosures_2016 b on a.gid = b.gid
group by csa order by csa = 'Baltimore City', csa
WHERE (ooi like 'H')
AND (address != $$NULL$$)
AND (desclu = $$Apartments$$
OR desclu = $$Residential$$
OR desclu = $$Residential Commercial$$
OR desclu = $$Residential Condominium$$
"""
fore_translation = "( count of closecrawl records per CSA / mdprop_2017.totalres )* 100"#export
# Aggregate Numeric Values by Sum
forclosure['foreCount'] = 1
fore = forclosure.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included. among other things
fore = totalres[ ['CSA2010', 'totalres'+year] ].merge( fore, left_on='CSA2010', right_on='CSA2010', how='outer' )
# Create the Indicator
fore['fore'] = fore['foreCount'] * 100 / fore['totalres'+year]
fore.at[55,'foreCount'] = fore['foreCount'].sum()
fore.at[55,'fore'] = fore['foreCount'].sum() * 100 / fore['totalres'+year].sum()
fore = fore[['CSA2010', 'foreCount', 'fore', 'totalres'+year ]]
fore.to_csv('33-fore'+year+'.csv', index=False)
fore.tail(60)Resrehab 36 - (MdProp[totalres], Permits) - Complete
# 36- resrehab - Percentage of properties with rehabilitation permits exceeding $5k
# https://services1.arcgis.com/mVFRs7NF4iFitgbY/arcgis/rest/services/resrehab/FeatureServer/layers
# Numerator: housing.permits_201X
# Denominator: housing.mdprop_201X
long_Description: """
The percent of residential properties that have applied for and received a permit to renovate the interior and/or exterior
of a property where the cost of renovation will exceed $5,000. The threshold of $5,000 is used to differentiate a minor
and more significant renovation project.
"""
resrehab_SQL = """
2016
with numerator AS (
select sum( case
when (exis = $$SF$$ OR exis = $$MF$$ ) AND (type1 = $$AA$$ OR type1 = $$ADD$$ OR type1 = $$ALT$$) AND (costts >=5000)
then 1 else 0 end)::numeric as result, csa from vital_signs.match_csas_and_bc_by_geom('housing.permits_2016', 'gid', 'the_geom') a
left join housing.permits_2016 b on a.gid = b.gid
group by csa ),
denominator AS (
select (sum( case
when (address != $$NULL$$) AND (desclu = $$Apartments$$ OR desclu = $$Residential$$ OR desclu = $$Residential Commercial$$ OR desclu = $$Residential Condominium$$)
then 1 else NULL end)::numeric ) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017', 'gid', 'the_geom') a
left join housing.mdprop_2017 b on a.gid = b.gid
group by csa, the_pop ),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa )
update vital_signs.data
set resrehab = result from tbl where data.csa = tbl.csa and data_year = '2016';"
2017
with numerator AS (
select sum(case
when (existingus = $$SF$$
OR propuse = $$SF$$
OR existingus = $$MF$$
OR propuse = $$MF$$
OR existingus = $$DFAM$$
OR propuse = $$DFAM$$
OR existingus like '%1-%'
OR propuse like '%1-%'
)
AND casetype LIKE any (ARRAY['COM'])
AND (field22 = $$AA$$ OR field22 = $$ADD$$ OR field22 = $$ALT$$)
AND (cost >=5000) then 1 else 0 end )::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.permits_2017', 'gid', 'the_geom') a
left join housing.permits_2017 b on a.gid = b.gid
group by csa ),
denominator AS (
select (sum( case
when (address != $$NULL$$) AND (desclu = $$Apartments$$ OR desclu = $$Residential$$ OR desclu = $$Residential Commercial$$ OR desclu = $$Residential Condominium$$) then 1 else NULL end )::numeric ) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017v2', 'gid', 'the_geom') a
left join housing.mdprop_2017v2 b on a.gid = b.gid
group by csa, the_pop ),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa )
select * from tbl where 1 = 1 ORDER BY csa ASC;"
INVALID COLUMN NAMES
"/* FIELD22: NEW ALT OTH DEM AA ADD */
2016 - > exis, prop, type1, cossts
2017 -> existingus, propuse, field22, cost
"""
resrehab_translation = """
Numerator = sum permits_2017 when (
existingus = $$SF$$ OR existingus = $$MF$$ OR existingus = $$DFAM$$ OR existingus like '%1-%'
OR propuse = $$SF$$ OR propuse = $$MF$$ OR propuse = $$DFAM$$ OR propuse like '%1-%'
)
AND casetype LIKE any (ARRAY['COM'])
AND (field22 = $$AA$$ OR field22 = $$ADD$$ OR field22 = $$ALT$$)
AND (cost >=5000) then 1 else 0 end )
Denominator = mdprop.totalres
return = (numerator / denominator )* 100
"""INVALID COLUMN NAMES
2016 - > exis, prop, type1, cossts
2017 -> existingus, propuse, field22, cost
#export
resrehab = permitsCsa
resrehab['Field22'] = resrehab['typework']
use = ".SF.|.MF.|.DFAM.|.1-.|SF|MF|DFAM|1-.|.1-"
resrehab = resrehab[
( permitsCsa['existingus'].str.contains(use, regex=True, na=False) ) &
( permitsCsa['propuse'].str.contains(use, regex=True, na=False) ) &
( permitsCsa['casetype'].str.contains('.COM.|COM', regex=True, na=False) ) &
( permitsCsa['Field22'].str.contains('.AA.|.ADD.|.ALT.|AA|ADD|ALT|ADD', regex=True, na=False) ) &
( permitsCsa['cost'] >=5000 )
]
resrehab.to_csv('resrehab'+year+'_Filtered_Records.csv', index=False)
print( 'Records Matching Query: ', resrehab.size / len(resrehab.columns) )
# Aggregate Numeric Values by Sum
resrehab['resrehabCount'] = 1
resrehab = resrehab.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included. among other things
resrehab = totalres[ ['CSA2010','totalres'+year] ].merge( resrehab, left_on='CSA2010', right_on='CSA2010', how='outer' )
# Update the baltimore CSA.
resrehab.at[55,'resrehabCount'] = resrehab['resrehabCount'].sum()
# Create the Indicator
resrehab['resrehab'+year] = resrehab['resrehabCount'] * 100 / totalres['totalres'+year]
resrehab = resrehab[ ['CSA2010', 'resrehabCount', 'resrehab'+year, 'totalres'+year ] ]
resrehab.to_csv('36-resrehab'+year+'.csv', index=False)
resrehab.head()
resrehab.tail()Demper 41 - (MdProp[totalres], Permits) - Complete
INVALID COLUMN NAMES 2016 - > exis, prop, type1, cossts 2017 -> existingus, propuse, field22, cost
# 41- demper - number of demolition permits per 1000 residential properties
# https://services1.arcgis.com/mVFRs7NF4iFitgbY/arcgis/rest/services/demper/FeatureServer/layers
# Numerator: housing.dempermits_201X
# Denominator: housing.mdprop_201X
long_Description: """
The number of permits issued for the demolition of residential buildings per 1,000 existing residential properties.
The permits are analyzed by date of issue and not date of actual demolition.
"""
demper_SQL = """
2016
with numerator AS (
select (sum( case
when csa_present then 1 else 0 end)::numeric) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.dempermits_2016', 'gid', 'the_geom') a
left join housing.dempermits_2016 b on a.gid = b.gid
group by csa ),
denominator AS (
select (sum(
case
when (address != $$NULL$$) AND (desclu = $$Apartments$$ OR desclu = $$Residential$$ OR desclu = $$Residential Commercial$$ OR desclu = $$Residential Condominium$$)
then 1
else NULL
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017', 'gid', 'the_geom') a
left join housing.mdprop_2017 b on a.gid = b.gid
group by csa, the_pop ),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(1000::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa )
update vital_signs.data
set demper = result from tbl where data.csa = tbl.csa and data_year = '2016'; "
2017
with numerator AS (
select (sum(
case
when csa_present AND casetype LIKE any ( ARRAY['DEM'] ) AND planaddres != ''
then 1 else 0
end
)::numeric) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.permits_2017', 'gid', 'the_geom') a
left join housing.permits_2017 b on a.gid = b.gid
group by csa ),
denominator AS (
select (sum( case
when (address != $$NULL$$) AND
(desclu = $$Apartments$$
OR desclu = $$Residential$$
OR desclu = $$Residential Commercial$$
OR desclu = $$Residential Condominium$$
) then 1 else NULL end )::numeric ) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017v2', 'gid', 'the_geom') a
left join housing.mdprop_2017v2 b on a.gid = b.gid
group by csa, the_pop ),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(1000::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
select * from tbl where 1 = 1 ORDER BY csa ASC;"
INVALID COLUMN NAMES
2016 - > exis, prop, type1, cossts
2017 -> existingus, propuse, field22, cost
"""
demper_translation = """ ( sum permits_2017 when csa_present AND casetype LIKE any ( ARRAY['DEM'] ) AND planaddres != '' / mdprop.totalres )* 1000 """#export
demper = permitsCsa[
( permitsCsa['casetype'].str.contains('DEM|.DEM.|DEM.|.DEM', regex=True, na=False) )
]
filter = demper["PLANADDRES"] != ""
demper = demper[filter]
demper.to_csv('demper'+year+'_Filtered_Records.csv', index=False)
print( 'Records Matching Query: ', demper.size / len(demper.columns) )
# Aggregate Numeric Values by Sum
demper['demperCount'] = 1
demper = demper.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included. among other things
demper = totalres[ ['CSA2010','totalres'+year] ].merge( demper, left_on='CSA2010', right_on='CSA2010', how='outer' )
# Update the baltimore CSA.
demper.at[55,'demperCount'] = demper['demperCount'].sum()
# Create the Indicator
demper['demper'+year] = demper['demperCount'] * 100 / totalres['totalres'+year]
# Create the Indicator
demper['demper'+year] = demper['demperCount'] * 1000 / totalres['totalres'+year]
demper = demper[['CSA2010', 'demperCount', 'demper'+year, 'totalres'+year ]]
demper.to_csv('41-demper'+year+'.csv', index=False)
demper.head(60)Constper 42 - (MdProp[totalres], Permits) - Complete
INVALID COLUMN NAMES 2016 - > exis, prop, type1, cossts 2017 -> existingus, propuse, field22, cost
# 42- constper - Number of of new constructio permits per 1000 residential properties
# https://services1.arcgis.com/mVFRs7NF4iFitgbY/arcgis/rest/services/constper/FeatureServer/layers
# Numerator: housing.permits_201X
# Denominator: housing.mdprop_201X
long_Description: """
The number of permits issued for new residential buildings per 1,000 existing residential properties within a community.
The permits are analyzed by date of issue and not date of completion.
"""
constper_SQL = """
2016
with numerator as (
select sum( case
when (prop = $$SF$$ OR prop = $$MF$$) AND (type1 = $$NEW$$) then 1 else 0 end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.permits_2016', 'gid', 'the_geom') a
left join housing.permits_2016 b on a.gid = b.gid
group by csa ),
denominator AS (
select (sum( case
when (address != $$NULL$$) AND (desclu = $$Apartments$$ OR desclu = $$Residential$$ OR desclu = $$Residential Commercial$$ OR desclu = $$Residential Condominium$$)
then 1 else NULL end)::numeric ) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017', 'gid', 'the_geom') a
left join housing.mdprop_2017 b on a.gid = b.gid
group by csa, the_pop ),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(1000::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa )
update vital_signs.data
set constper = result from tbl where data.csa = tbl.csa and data_year = '2016'; "
2017
with numerator as (
select sum(
case
when (existingus = $$SF$$
OR propuse = $$SF$$
OR existingus = $$MF$$
OR propuse = $$MF$$
OR existingus = $$DFAM$$
OR propuse = $$DFAM$$
OR existingus like '%1-%'
OR propuse like '%1-%'
) AND (field22 = $$NEW$$)
AND casetype LIKE any ( ARRAY['COM'] )
then 1 else 0 end )::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.permits_2017', 'gid', 'the_geom') a
left join housing.permits_2017 b on a.gid = b.gid
group by csa ),
denominator AS (
select (sum( case
when (address != $$NULL$$)
AND (desclu = $$Apartments$$
OR desclu = $$Residential$$
OR desclu = $$Residential Commercial$$
OR desclu = $$Residential Condominium$$
) then 1 else NULL end )::numeric ) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017v2', 'gid', 'the_geom') a
left join housing.mdprop_2017v2 b on a.gid = b.gid
group by csa, the_pop ),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(1000::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa )
select * from tbl where 1 = 1 ORDER BY csa ASC;"
INVALID COLUMN NAMES
2016 - > exis, prop, type1, cossts
2017 -> existingus, propuse, field22, cost
"""
constper_translation = """
Numerator = sum permits_2017 when (existingus = $$SF$$
OR existingus = $$DFAM$$ OR existingus = $$MF$$ OR existingus like '%1-%'
OR propuse = $$SF$$ OR propuse = $$MF$$ OR propuse = $$DFAM$$ OR propuse like '%1-%'
)
AND (field22 = $$NEW$$)
AND casetype LIKE any ( ARRAY['COM'] )
Denominator = mdprop.totalres
return = (numerator / Denominator )* 1000
"""permitsCsa.casetype.unique()#export
# 2018 and 2017 is not working with the new datasets given (CSA LABELS)
use = "SF|MF|.SF.|.MF.|.SF|.MF|SF.|MF."
constper = permitsCsa
constper['Field22'] = constper['typework']
constper = constper[
#(
# permitsCsa['existingus'].str.contains(use, regex=True, na=False) |
# permitsCsa['propuse'].str.contains(use, regex=True, na=False)
#) &
#( permitsCsa['casetype'].str.contains('COM|.COM.|COM.|.COM', regex=True, na=False) ) &
( constper['Field22'].str.contains('NEW|.NEW.|NEW.|.NEW', regex=True, na=False) )
]
constper = constper[constper["PLANADDRES"] != ""]
constper = constper[['CSA2010','existingus','propuse','casetype','Field22','PLANADDRES' ]]
constper.to_csv('constper'+year+'_Filtered_Records.csv', index=False)
print( 'Records Matching Query: ', constper.size / len(constper.columns) )
# Aggregate Numeric Values by Sum
constper['constperCount'] = 1
constper = constper.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included. among other things
constper = totalres[ ['CSA2010','totalres'+year] ].merge( constper, left_on='CSA2010', right_on='CSA2010', how='outer' )
# Update the baltimore CSA.
constper.at[55,'constperCount'] = constper['constperCount'].sum()
# Create the Indicator
constper['42-constper'+year] = constper['constperCount'] * 1000 / totalres['totalres'+year]
constper.to_csv('42-constper'+year+'.csv', index=False)
constper.head(80)Comprop 141 - (MdProp) - Complete
"""
2016 uses dataset housing.mdprop_2017
2017 uses dataset housing.mdprop_2017v2
2016s dataset was collected in January of 2017
2017s dataset was collected in Novermber of 2017
""""""
* Indicator Number 141/
with tbl AS (
select (sum(
case
when (lu like 'C' OR lu LIKE 'EC' OR lu LIKE 'I')
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017', 'gid', 'the_geom') a
left join housing.mdprop_2017 b on a.gid = b.gid
group by csa, the_pop
)
update vital_signs.data
set comprop = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""#export
# sum( case when (lu like 'C' OR lu LIKE 'EC' OR lu LIKE 'I')
comprop = mdprop.copy()
comprop['comprop'+year] = 1
# mdprop = csa[['CSA','comprop19']]
comprop = comprop[ comprop['LU'].isin( ['C','EC','I'] ) ]
comprop = comprop.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included. among other things
comprop = csa[ ['CSA2010','tpop10'] ].merge( comprop, left_on='CSA2010', right_on='CSA2010', how='outer' )
# Update the baltimore CSA.
comprop.at[55,'comprop'+year] = comprop['comprop'+year].sum()
comprop = comprop[['comprop'+year, 'CSA2010']]
comprop.head(58)
# Create the Indicator
comprop.to_csv('141-comprop'+year+'.csv', index=False) Crehab 142 - (Permits, MdProp[comprop]) - Complete
This is the original SQL query
It Uses Comprop 141.
2016 - > exis, prop, type1, cossts 2017 -> existingus, propuse, field22, cost Column Errors NO COLUMN exis NO COLUMN prop
originalQuery = """ * Indicator Number 142/
with numerator AS (
select sum(
case
when (
exis LIKE any (ARRAY['COM','IND','BUS','AIR','ANIM','BAR','BEAU','DELI','FAC','ASM','ALV%','DOTH','DWC','EDU','FOOD','HCF','HIH','HOS','MIXC','INS','MER','LIB','MNTL','MOB','PUB','STO','UT','VAC','VAL','DFAM'])
AND
prop LIKE any (ARRAY['COM','IND','BUS','AIR','ANIM','BAR','BEAU','DELI','FAC','ASM','ALV%','DOTH','DWC','EDU','FOOD','HCF','HIH','HOS','MIXC','INS','MER','LIB','MNTL','MOB','PUB','STO','UT','DFAM'])
AND
type1 = ANY (ARRAY['AA','ALT','ADD','NEW'])
and costts >=5000
)
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.permits_2016', 'gid', 'the_geom') a
left join housing.permits_2016 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (lu like 'C' OR lu LIKE 'EC' OR lu LIKE 'I')
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017', 'gid', 'the_geom') a
left join housing.mdprop_2017 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
update vital_signs.data
set crehab = result from tbl where data.csa = tbl.csa and data_year = '2016';"""
original17Query = """
NEW SQL 17 QUERY Version 1
/* FIELD22: NEW ALT OTH DEM AA ADD */
with numerator AS (
select sum(
case
when (
(
existingus LIKE any (ARRAY['2-%','3-%','4-%','5-%','6-%','7-%', 'COM','IND','BUS','AIR','ANIM','BAR','BEAU','DELI','FAC','ASM','ALV%','DOTH','DWC','EDU','FOOD','HCF','HIH','HOS','MIXC','INS','MER','LIB','MNTL','MOB','PUB','STO','UT','VAC','VAL','DFAM'])
)
and casetype LIKE any (ARRAY['COM'])
AND field22 = ANY (ARRAY['AA','ALT','ADD', 'NEW'])
AND cost >=5000
)
then 1 else 0 end
)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.permits_2017', 'gid', 'the_geom') a
left join housing.permits_2017 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (lu like 'C' OR lu LIKE 'EC' OR lu LIKE 'I')
then 1 else 0
end
)::numeric ) as result, csa
from vital_signs.match_csas_and_bc_by_geom('housing.mdprop_2017v2', 'gid', 'the_geom') a
left join housing.mdprop_2017v2 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator
left join denominator
on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""The above query is outdated because we shouldnt filter for new in the type1 column.
Also. NO FILTERING ON PROP!
permitsCsa.head(1)# '2-%','3-%','4-%','5-%','6-%','7-%', 'COM','IND','BUS','AIR','ANIM','BAR','BEAU','DELI','FAC','ASM','ALV%','DOTH','DWC','EDU','FOOD','HCF','HIH','HOS','MIXC','INS','MER','LIB','MNTL','MOB','PUB','STO','UT','VAC','VAL','DFAM'
# crehab.loc[crehab['existingus'].str.contains('2-|3-|4-|5-|6-|7-|COM|IND|BUS|AIR|ANIM|BAR|BEAU|DELI|FAC|ASM|ALV|DOTH|DWC|EDU|FOOD|HCF|HIH|HOS|MIXC|INS|MER|LIB|MNTL|MOB|PUB|STO|UT|VAC|VAL|DFAM') == True]#export
# get the permits file
crehab = permitsCsa.copy()
# Our Column to Sum on
crehab['crehab'+year] = 1
# Filter 1
print('No Filter:', crehab.shape[0])
print('Filter Cost:', crehab[crehab['cost'] >=5000].cost.shape[0])
print('Filter ExistingUse:', crehab.loc[crehab['existingus'].str.contains('2-|3-|4-|5-|6-|7-|COM|IND|BUS|AIR|ANIM|BAR|BEAU|DELI|FAC|ASM|ALV|DOTH|DWC|EDU|FOOD|HCF|HIH|HOS|MIXC|INS|MER|LIB|MNTL|MOB|PUB|STO|UT|VAC|VAL|DFAM') == True].shape[0])
# print('Filter Propuse:', crehab.loc[ crehab['propuse'].str.contains('COM|IND|BUS|AIR|ANIM|BAR|BEAU|DELI|FAC|ASM|ALV|DOTH|DWC|EDU|FOOD|HCF|HIH|HOS|MIXC|INS|MER|LIB|MNTL|MOB|PUB|STO|UT|DFAM') == True].shape[0])
print('Filter typework:', crehab[ crehab['typework'].isin( ['AA','ALT','ADD'] ) ].shape[0])
crehab.loc[ crehab['propuse'].str.contains('COM|IND|BUS|AIR|ANIM|BAR|BEAU|DELI|FAC|ASM|ALV|DOTH|DWC|EDU|FOOD|HCF|HIH|HOS|MIXC|INS|MER|LIB|MNTL|MOB|PUB|STO|UT|DFAM') == True].propuse.unique()
crehab = crehab.loc[crehab['existingus'].str.contains('2-|3-|4-|5-|6-|7-|COM|IND|BUS|AIR|ANIM|BAR|BEAU|DELI|FAC|ASM|ALV|DOTH|DWC|EDU|FOOD|HCF|HIH|HOS|MIXC|INS|MER|LIB|MNTL|MOB|PUB|STO|UT|VAC|VAL|DFAM') == True]
# crehab = crehab.loc[ crehab['propuse'].str.contains('COM|IND|BUS|AIR|ANIM|BAR|BEAU|DELI|FAC|ASM|ALV|DOTH|DWC|EDU|FOOD|HCF|HIH|HOS|MIXC|INS|MER|LIB|MNTL|MOB|PUB|STO|UT|DFAM') == True]
crehab = crehab[ crehab['cost'] >=5000 ]
crehab = crehab[ crehab['typework'].isin( ['AA','ALT','ADD'] ) ]
crehab.head(1)#export
crehab = crehab.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included. among other things
crehab = csa[ ['CSA2010','tpop10'] ].merge( crehab, left_on='CSA2010', right_on='CSA2010', how='outer' )
# Update the baltimore CSA.
crehab.at[55,'crehab'+year] = crehab['crehab'+year].sum()
crehab = crehab[['crehab'+year, 'CSA2010']]
crehab['crehab'+year] = crehab['crehab'+year] *100 / comprop['comprop'+year]
crehab.head(58)
# Create the Indicator
crehab.to_csv('142-crehab'+year+'.csv', index=False)