# default_exp enoch
Todo:
- Refractor Queries for no Point In Polygons
- Wrap as Function
Whats Inside?:
Indicators Used
- ❌ 128 - libcard - (EnochPratt) The rate of persons per 1,000 residents that possess a valid public library system card. This includes all library card types (first card, juvenile, young adult, adult).
originalSql = """
with tbl AS (
select (sum(
case
when csa_present
then 1
else 0
end)::numeric
* 1000 )/the_pop as result, csa
from vital_signs.match_csas_and_bc_by_geom('enochpratt.enochpratt_2016', 'gid', 'the_geom') a
left join enochpratt.enochpratt_2016 b on a.gid = b.gid
group by csa, the_pop
)
update vital_signs.data
set libcard = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
SETUP:
Import Modules
# @title Run: Install Modules! pip install geopy
! pip install geopandas
! pip install geoplot
! pip install dataplay!pip install dexplot# export
# @title Run: Import Modules
# These imports will handle everything
import os
import sys
import csv
from IPython.display import clear_output
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
import psycopg2
import pyproj
from pyproj import Proj, transform
# conda install -c conda-forge proj4
from shapely.geometry import Point
from shapely.geometry import LineString
from shapely import wkb
from shapely.wkt import loads
# https://pypi.org/project/geopy/
from geopy.geocoders import Nominatim
import folium
from folium import plugins
from dataplay.merge import mergeDatasets
import dexplot as dxp
# In case file is KML, enable support
import fiona
fiona.drvsupport.supported_drivers['kml'] = 'rw'
fiona.drvsupport.supported_drivers['KML'] = 'rw'
Configure Enviornment
# This will just beautify the output
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# pd.set_option('display.expand_frame_repr', False)
# pd.set_option('display.precision', 2)
# pd.reset_option('max_colwidth')
pd.set_option('max_colwidth', 50)
# pd.reset_option('max_colwidth')#hide
# %matplotlib inline
# !jupyter nbextension enable --py widgetsnbextension
Read in df
csa_gdf = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Tpop/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson")
csa_gdf.plot(column='tpop10')csa_gdf.head()
(Optional) GoogleDrive Access
# (Optional) Run this cell to gain access to Google Drive (Colabs only)
from google.colab import drive
# Colabs operates in a virtualized enviornment
# Colabs default directory is at ~/content.
# We mount Drive into a temporary folder at '~/content/drive'
drive.mount('/content/drive')cd drive/'MyDrive'/vitalSignslscd enochlscrs = {'init' :'epsg:2248'}
gdf = gpd.read_file("EnochPratt_2019.shp", crs=crs)
gdf = gdf.to_crs(epsg=4326)
gdf.head()from dataplay import geoms help(geoms)fdf.head()# export
fdf = geoms.workWithGeometryData(method='pinp', df=gdf, polys=csa_gdf, ptsCoordCol='geometry', polygonsCoordCol='geometry', polyColorCol=False, polygonsLabel='RecordsInCsa', pntsClr='red', polysClr='white')
findf = fdf.copy()
findf = findf[['CSA2010', 'tpop10', 'pointsinpolygon', 'geometry']]
findf['libcard19'] = findf['pointsinpolygon'] / findf['tpop10'] * 1000findf.tail(1)findf.dtypes#export
findf = findf.append({
'CSA2010': 'Baltimore City',
'tpop10': findf['tpop10'].sum(),
'pointsinpolygon': findf['pointsinpolygon'].sum(),
'geometry': 'na',
'libcard19': (findf['pointsinpolygon'].sum() / findf['tpop10'].sum() * 1000), }, ignore_index=True)findf.tail()findf.to_csv('enoch_pratt_libcard19.csv')
The rate of persons per 1,000 residents that possess a valid public library system card. This includes all library card types (first card, juvenile, young adult, adult).
OLD 18 content below
import pandas as pd!git clone https://github.com/BNIA/tidyaddr-js.git! npm install tidyaddr-js/cd tidyaddr-jspd.read_excel(open('EnochPratt_FY2020_VS19.xlsx', 'rb'), sheet_name='AllCardTypes2020').to_csv('lib.csv')ls!node tidyaddr.js clean-csv lib.csv tidyaddrd.csvdf = pd.read_csv("tidyaddrd.csv");df.head(1)df.to_excel('output1.xlsx', engine='xlsxwriter') ;
df.head(1)! pip install geopy
! pip install geopandas
! pip install geoplot
! pip install dataplayls# export
# @title Run: Import Modules
# These imports will handle everything
import os
import sys
import csv
from IPython.display import clear_output
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
import psycopg2
import pyproj
from pyproj import Proj, transform
# conda install -c conda-forge proj4
from shapely.geometry import Point
from shapely.geometry import LineString
from shapely import wkb
from shapely.wkt import loads
# https://pypi.org/project/geopy/
from geopy.geocoders import Nominatim
import folium
from folium import plugins
from dataplay.merge import mergeDatasets
import dexplot as dxp
# In case file is KML, enable support
import fiona
fiona.drvsupport.supported_drivers['kml'] = 'rw'
fiona.drvsupport.supported_drivers['KML'] = 'rw'
address_df = df.copy()
addrCol = 'address'
# An example of loading in an internal BNIA file
crs = {'init' :'epsg:2248'} # More information vist: https://geopy.readthedocs.io/en/stable/#module-geopy.geocoders
# In this example we retrieve and map a dataset with no lat/lng but containing an address
# In this example our data is stored in the 'STREET' attribute
geometry = []
geolocator = Nominatim(user_agent="my-application")
for index, row in address_df.iterrows():
# We will try and return an address for each Street Name
try:
# retrieve the geocoded information of our street address
geol = geolocator.geocode(row[addrCol], timeout=None)
# create a mappable coordinate point from the response object's lat/lang values.
pnt = Point(geol.longitude, geol.latitude)
# Append this value to the list of geometries
geometry.append(pnt)
except:
# If no street name was found decide what to do here.
# df.loc[index]['geom'] = Point(0,0) # Alternate method
geometry.append(Point(0,0))
# Finally, we stuff the geometry data we created back into the dataframe
address_df['geometry'] = geometry# save after every 100th geocode