# default_exp fares

This colab and more can be found at https://github.com/BNIA/vitalsigns.

Whats Inside?:

The Guided Walkthrough

This notebook was made to create the following Housing Vital Signs Indicators:

Indicators Used

  • ✅ 29 - salepr - (Fares) Median Price of Homes Sold
  • ✅ 31 - shomes - (Fares) Number of Homes Sold

Datasets Used

  • ✅ foreclosures.fares_201X (29-salepr, 31-shomes -> saledate primcatcod landusecod saleamount)

year = '19'

Guided Walkthrough

SETUP Enviornment:

Import Modules

! pip install -U -q PyDrive ! pip install geopy ! pip install geopandas ! pip install geoplot ! pip install dataplay ! pip install matplotlib ! pip install psycopg2-binary! apt-get install build-dep python-psycopg2 ! apt-get install libpq-dev ! apt-get install libspatialindex-dev!pip install rtree !pip install dexplotfrom dataplay.geoms import workWithGeometryData%%capture # These imports will handle everything import os import sys import csv import matplotlib.pyplot as plt import numpy as np import pandas as pd import geopandas as gpd from geopandas import GeoDataFrame import psycopg2 import pyproj from pyproj import Proj, transform # conda install -c conda-forge proj4 from shapely.geometry import Point from shapely import wkb from shapely.wkt import loads # https://pypi.org/project/geopy/ from geopy.geocoders import Nominatim # In case file is KML, enable support import fiona fiona.drvsupport.supported_drivers['kml'] = 'rw' fiona.drvsupport.supported_drivers['KML'] = 'rw'from IPython.display import clear_output clear_output(wait=True)import ipywidgets as widgets from ipywidgets import interact, interact_manual

Configure Enviornment

# This will just beautify the output pd.set_option('display.expand_frame_repr', False) pd.set_option('display.precision', 2) from IPython.core.interactiveshell import InteractiveShell InteractiveShell.ast_node_interactivity = "all" # pd.set_option('display.expand_frame_repr', False) # pd.set_option('display.precision', 2) # pd.reset_option('max_colwidth') pd.set_option('max_colwidth', 20) # pd.reset_option('max_colwidth')

Prep Datasets

TPOP CSA and Baltimore

Get Baltimore

Click to toggle csa = "https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Tpop/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson" csa = gpd.read_file(csa); csa.head(1)

Get CSA

url2 = "https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Tpop/FeatureServer/1/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson" csa2 = gpd.read_file(url2); csa2['CSA2010'] = csa2['City_1'] csa2['OBJECTID'] = 56 csa2 = csa2.drop(columns=['City_1']) csa2.head()

Append do no append Bcity. We put it on the Bottom of the df because when performing the ponp it returns only the last matching columns CSA Label.

# csa = pd.concat([csa2, csa], ignore_index=True) csa = csa.append(csa2).reset_index(drop=True)csa.head(3)csa.tail(3)csa.head()csa.drop(columns=['Shape__Area', 'Shape__Length', 'OBJECTID'], axis=1).to_file("BCity_and_CSA.geojson", driver='GeoJSON')

Fares

import pandas as pd import geopandas original = gpd.read_file("FARES_20"+year+"_CSACity.shp", geometry='geometry'); original.columns original.rename(columns={ 'CSA':'CSA2010', 'BaltCity':'InBaltimore'}, inplace=True) df = original[ original['CSA2010'].notnull() | original['InBaltimore'].notnull() ]print('After filtering records where a CSA or Baltimore geo-code match Exists') print( 'All rows Before Filter: ', original.shape[0] ) # rows, columns print( '# w BCity.isnull: ', df.InBaltimore.isnull().sum() ); bmorow = df[ df.CSA2010.isnull() ].shape[0] print( '# w CSA2010.isnull: ', bmorow ); csarow = df[ df.CSA2010.notnull() ].shape[0] print( '# w CSA2010.notnull: ', csarow ); print( '# rows After Filter: ', df.shape[0],'==',csarow,'+',bmorow,'==', csarow + bmorow); # add baltimore city df.CSA2010 = df.CSA2010.fillna('Baltimore City')fares = df.copy() fares.head(1)

Not Used

# Convert Geom to Coords # fares["x"] = fares.centroid.map(lambda p: p.x) # fares["y"] = fares.centroid.map(lambda p: p.y) fares['x'] = fares.geometry.x fares['y'] = fares.geometry.y # fares.head(5) fares = fares[ fares.geometry.y > 38 ] fares = fares[ fares.geometry.x < -70 ] fares = fares[ fares.geometry.x > -80 ]# Reference: All Points base = csa.plot(color='white', edgecolor='black') fares.plot(ax=base, marker='o', color='green', markersize=5);fares = fares[['SALEDATE', 'SALEAMOUNT', 'LANDUSECOD', 'geometry']] fares.head()# Get CSA Labels for all Points. faresCsa = workWithGeometryData( method='ponp', df=fares, polys=csa, ptsCoordCol='geometry', polygonsCoordCol='geometry', polygonsLabel='CSA2010' ) faresCsa = faresCsa.drop('geometry',axis=1) faresCsa.to_csv('ponp_fares.csv', index=False) faresCsa.head(1)

Indicators

faresCsa = fares.copy()faresCsa.dtypesfaresCsa.LANDUSECOD.unique()

29 - salepr (Fares)

landusecode was discontinued a few years ago

Q: The original sql queries say we need to filter on for ['HIGH RISE CONDO', 'CONDOMINIUM', 'RESIDENTIAL (NEC)', 'MID RISE CONDO', 'MULTI FAMILY DWELLING', 'MULTI FAMILY 10 UNITS LESS', 'SFR', 'TOWNHOUSE/ROWHOUSE'] but the column is missing

A: as of 3/30/2021 I (Carlos) officially declare that the we will not be using landusecod in our query and are going to be using PROPERTYIN on fields 'CONDOMINIUM', 'SINGLE FAMILY'.

oldquerilandusecodfilter = "HIGH RISE CONDO|CONDOMINIUM|RESIDENTIAL (NEC)|MID RISE CONDO|MULTI FAMILY DWELLING|MULTI FAMILY 10 UNITS LESS|SFR|TOWNHOUSE/ROWHOUSE"

# salepr - Median Price of Homes Sold # https://services1.arcgis.com/mVFRs7NF4iFitgbY/arcgis/rest/services/salepr/FeatureServer/layers # Numerator: Fares # Denominator: None long_Description: """ The median home sales price is the middle value of the prices for which homes are sold (both market and private transactions) within a calendar year. The median value is used as opposed to the average so that both extremely high and extremely low prices do not distort the prices for which homes are sold. This measure does not take into account the assessed value of a property. """ salepr_SQL = """ SELECT fullbounds.csa, sQuery.Result FROM boundaries.csa2010 as fullbounds LEFT JOIN (SELECT bounds.csa AS Boundary, median(Tables.saleamount::numeric(10,2))::numeric(10,2) as Result FROM housing.fares_2016 AS Tables JOIN boundaries.csa2010 AS bounds ON ST_Contains(bounds.the_geom, Tables.the_geom) where (landusecod LIKE 'HIGH RISE CONDO' OR landusecod LIKE 'CONDOMINIUM' OR landusecod LIKE 'RESIDENTIAL (NEC)' OR landusecod LIKE 'MID RISE CONDO' OR landusecod LIKE 'MULTI FAMILY DWELLING' OR landusecod LIKE 'MULTI FAMILY 10 UNITS LESS' OR landusecod LIKE 'SFR' OR landusecod LIKE 'TOWNHOUSE/ROWHOUSE' ) AND ( primcatcod LIKE 'ARMS LENGTH') AND ( saledate between '20160101' and '20161231') AND ( saleamount > 1000 ) GROUP BY Boundary ORDER BY Boundary) as sQuery ON fullbounds.csa = sQuery.Boundary ORDER BY fullbounds.csa """ # landusecod No longer being used salepr_translation = """ Select median saleamount from CSA Where ( ( landusecod LIKE ['HIGH RISE CONDO', 'CONDOMINIUM', 'RESIDENTIAL (NEC)', 'MID RISE CONDO', 'MULTI FAMILY DWELLING', 'MULTI FAMILY 10 UNITS LESS', 'SFR', 'TOWNHOUSE/ROWHOUSE'] ) & ( primcatcod LIKE 'ARMS LENGTH') & ( '20160101' < saledate > '20161231') & ( saleamount > 1000) ) """#export # Copy the Data faresCsa = fares.copy() faresCsa['SALEDATE'] = pd.to_datetime(faresCsa['SALEDATE'], format='%Y%m%d') # ,infer_datetime_format=True) # Query the Data salepr = faresCsa[ (faresCsa['PROPERTYIN'].str.contains('CONDOMINIUM|SINGLE FAMILY', regex=True) ) & (faresCsa['SALEDATE'] >= '20'+year+'-01-01') & (faresCsa['SALEDATE'] <= '20'+year+'-12-31') & (faresCsa['SALEAMOUNT'] > 1000 ) & (faresCsa['PRIMARYCAT'] == 'ARMS LENGTH' ) ] # Prep and Save the Filtered Records salepr.rename(columns={ 'SALEAMOUNT':'29-salepr'+year}, inplace=True) salepr.drop(columns=['geometry', 'PRIMARYCAT', 'PROPERTYIN', 'LANDUSECOD', 'SALEDATE' ], inplace=True) salepr.to_csv('fares_filtered_'+year+'.csv') # *Special*: # UPDATE HERE AND THEN GROUP salepr = salepr.append({'CSA2010': 'Baltimore City' , '29-salepr'+year : salepr['29-salepr'+year].median() } , ignore_index=True) salepr = salepr.groupby('CSA2010').median(numeric_only=True) # Make sure ALL csas and BaltimoreCity are included and sorted. salepr = csa.merge( salepr, left_on='CSA2010', right_on='CSA2010', how='outer' ) salepr.drop(columns=['OBJECTID', 'Shape__Length', 'Shape__Area', 'geometry' ], inplace=True) salepr = salepr[['CSA2010', '29-salepr'+year]] display( salepr.head(2) ) salepr.tail(2) salepr.to_csv('29-salepr'+year+'.csv') # & (faresCsa['PRIMCATCOD'] == 'ARMS LENGTH' ) # faresCsa['LANDUSECOD'].str.contains('HIGH RISE CONDO|CONDOMINIUM|RESIDENTIAL (NEC)|MID RISE CONDO|MULTI FAMILY DWELLING|MULTI FAMILY 10 UNITS LESS|SFR|TOWNHOUSE/ROWHOUSE', regex=False) faresCsa = faresCsa.astype({'SALEAMOUNT': 'int32'})pd.options.display.float_format = '{:.2f}'.format t = faresCsa[ (faresCsa.CSA2010 == 'Greater Roland Park/Poplar Hill') & (faresCsa['PROPERTYIN'].str.contains('CONDOMINIUM|SINGLE FAMILY', regex=True) ) & (faresCsa['SALEDATE'] >= '20'+year+'-01-01') & (faresCsa['SALEDATE'] <= '20'+year+'-12-31') & (faresCsa['SALEAMOUNT'] > 1000 ) & (faresCsa['PRIMARYCAT'] == 'ARMS LENGTH' ) ][['CSA2010','SALEAMOUNT', 'PROPERTYIN']].sort_values(by='SALEAMOUNT') t.to_csv('GRP salepr records.csv') t.plot.bar(x='CSA2010', y='SALEAMOUNT', rot=0)t.head(20)faresCsa.PROPERTYIN.unique()pd.options.display.float_format = '{:.2f}'.format faresCsa[ (faresCsa.CSA2010 == 'Greater Roland Park/Poplar Hill') & ( faresCsa.PRIMARYCAT == 'ARMS LENGTH') ].plot.bar(x='CSA2010', y='SALEAMOUNT', rot=0)pd.options.display.float_format = '{:.2f}'.format faresCsa[ (faresCsa.CSA2010 == 'Greater Roland Park/Poplar Hill') & (faresCsa['PROPERTYIN'].str.contains('CONDOMINIUM|SINGLE FAMILY', regex=True) ) ].head(10).plot.bar(x='CSA2010', y='SALEAMOUNT', rot=0)faresCsa.PRIMARYCAT.unique()faresCsa[ faresCsa['PRIMARYCAT'] == 'ARMS LENGTH' ]['CSA2010'].value_counts()salepr.head(22)# faresCsa[ (faresCSA['CSA2010']=='Midtown') & (faresCsa['SALEAMOUNT'] > 1000) & (faresCsa['PRIMARYCAT'] == 'ARMS LENGTH' ) & (faresCsa['PROPERTYIN'].str.contains('CONDOMINIUM|SINGLE FAMILY', regex=True) ) ].head(3)# faresCsa[ (faresCSA['CSA2010']=='Midtown') & (faresCsa['SALEAMOUNT'] > 1000) ].head(3)

31 - shomes - (Fares)

# shomes - Number of Homes Sold # https://services1.arcgis.com/mVFRs7NF4iFitgbY/arcgis/rest/services/shomes/FeatureServer/layers # Numerator: housing.fares_201X # Denominator: None long_Description: """The total number of residential properties sold in a calendar year.""" _SQL = """ with tbl AS ( select ( sum( case when ( landusecod = 'HIGH RISE CONDO' OR landusecod = 'CONDOMINIUM' OR landusecod = 'RESIDENTIAL (NEC)' OR landusecod = 'MID RISE CONDO' OR landusecod = 'MULTI FAMILY DWELLING' OR landusecod = 'MULTI FAMILY 10 UNITS LESS' OR landusecod = 'SFR' OR landusecod = 'TOWNHOUSE/ROWHOUSE' ) AND (primcatcod = 'ARMS LENGTH') AND (saledate between '20160101' and '20161231') AND (saleamount > 1000) then 1 else 0 end)::numeric ) as result, csa from vital_signs.match_csas_and_bc_by_geom('housing.fares_2016', 'gid', 'the_geom') a left join housing.fares_2016 b on a.gid = b.gid group by csa ) update vital_signs.data set shomes = result from tbl where data.csa = tbl.csa and data_year = '2016'; """ _translation = """ Sum records where landusecod in ['HIGH RISE CONDO', 'CONDOMINIUM', 'RESIDENTIAL (NEC)', 'MID RISE CONDO', 'MULTI FAMILY DWELLING', 'MULTI FAMILY 10 UNITS LESS', 'SFR', 'TOWNHOUSE/ROWHOUSE'] AND (primcatcod = 'ARMS LENGTH') AND (saledate between '20160101' and '20161231') AND (saleamount > 1000) """#export # Copy the Data faresCsa = fares.copy() faresCsa['SALEDATE'] = pd.to_datetime(faresCsa['SALEDATE'], format='%Y%m%d') # ,infer_datetime_format=True) # Query the Data shomes = faresCsa[ (faresCsa['PROPERTYIN'].str.contains('CONDOMINIUM|SINGLE FAMILY', regex=True) ) & (faresCsa['SALEDATE'] >= '20'+year+'-01-01') & (faresCsa['SALEDATE'] <= '20'+year+'-12-31') & (faresCsa['SALEAMOUNT'] > 1000 ) & (faresCsa['PRIMARYCAT'] == 'ARMS LENGTH' ) ] # Prep and Save the Filtered Records shomes['31-shomes'+year] = 1 # shomes.rename(columns={ 'SALEAMOUNT':'29-shomes'+year}, inplace=True) # shomes.drop(columns=['geometry', 'PRIMARYCAT', 'PROPERTYIN', 'LANDUSECOD', 'SALEDATE', 'SALEAMOUNT'], inplace=True) shomes = shomes[['CSA2010','31-shomes'+year]] shomes.to_csv('fares_filtered_'+year+'.csv') # *Special*: # UPDATE HERE AND THEN GROUP # shomes = shomes.append({'CSA2010': 'Baltimore City', '31-shomes'+year : shomes['31-shomes'+year].sum() } , ignore_index=True) shomes = shomes.groupby('CSA2010').sum(numeric_only=True) # Make sure ALL csas and BaltimoreCity are included and sorted. shomes = csa.merge( shomes, left_on='CSA2010', right_on='CSA2010', how='outer' ) shomes.drop(columns=['OBJECTID', 'Shape__Length', 'Shape__Area', 'geometry' ], inplace=True) # *SPECIAL* Update the baltimore CSA. shomes.at[55] = shomes.sum(numeric_only=True) shomes.at[55, 'CSA2010'] = 'Baltimore City' display( shomes.head(2) ) shomes.tail(2) shomes.to_csv('31-shomes'+year+'.csv') # & (faresCsa['PRIMCATCOD'] == 'ARMS LENGTH' ) # faresCsa['LANDUSECOD'].str.contains('HIGH RISE CONDO|CONDOMINIUM|RESIDENTIAL (NEC)|MID RISE CONDO|MULTI FAMILY DWELLING|MULTI FAMILY 10 UNITS LESS|SFR|TOWNHOUSE/ROWHOUSE', regex=False)

Merge and Save Both

SEARCH

CONNECT WITH US

DONATE

Help us keep this resource free and available to the public. Donate now!

Donate to BNIA-JFI

CONTACT US

Baltimore Neighborhood Indicators Alliance
The Jacob France Institute
1420 N. Charles Street, Baltimore, MD 21201
410-837-4377 | bnia-jfi@ubalt.edu