Major Findings
- Column names are different (desc_ vs descriptio)
- Descriptions columns values share the same unique values. 2019 has less cols though.
- no arson. shooting or common assault.
- Points within a CSA returned these results
Description | 18 Values | 19 Values |
Total Points | 48143.0 | 37166 |
Total Points in Polygons | 47748 | 37018 |
Prcnt Points in Polygons | 0.9917952765718796 | 0.9960178657913147 |
- No firearm description but we do have a shooting column in the 19 dataset. which means that gunhom can't be calculated.
Whats Inside?:
- Retrieve everything done in the previous notebook.
- Compare aggregates and idicators.
SETUP Enviornment:
Import Modules
! pip install -U -q PyDrive
! pip install geopy
! pip install geopandas
! pip install geoplot
! pip install dataplay
! pip install matplotlib
! pip install psycopg2-binary! apt-get install build-dep python-psycopg2
! apt-get install libpq-dev
! apt-get install libspatialindex-dev!pip install rtree
!pip install dexplot%%capture
# These imports will handle everything
import os
import sys
import csv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
import psycopg2
import pyproj
from pyproj import Proj, transform
# conda install -c conda-forge proj4
from shapely.geometry import Point
from shapely import wkb
from shapely.wkt import loads
# https://pypi.org/project/geopy/
from geopy.geocoders import Nominatim
# In case file is KML, enable support
import fiona
fiona.drvsupport.supported_drivers['kml'] = 'rw'
fiona.drvsupport.supported_drivers['KML'] = 'rw'from IPython.display import clear_output
clear_output(wait=True)import ipywidgets as widgets
from ipywidgets import interact, interact_manual
Configure Enviornment
# This will just beautify the output
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# pd.set_option('display.expand_frame_repr', False)
# pd.set_option('display.precision', 2)
# pd.reset_option('max_colwidth')
pd.set_option('max_colwidth', 20)
# pd.reset_option('max_colwidth')
(Optional) GoogleDrive Access
# (Optional) Run this cell to gain access to Google Drive (Colabs only)
from google.colab import drive
# Colabs operates in a virtualized enviornment
# Colabs default directory is at ~/content.
# We mount Drive into a temporary folder at '~/content/drive'
drive.mount('/content/drive')cd drive/'MyDrive'/vitalSignslscd p1crimels
Razzle Dazzle
df2 = pd.read_csv('p1crime_18_19_indicators_and_desc_aggregates.csv')df2[['19_CSA2010', '18_Desc__SHOOTING', '18_crime18', '18_viol18', '18_prop18',
'18_gunhom18', '19_Desc__AGG. ASSAULT',
'19_Desc__AUTO THEFT', '19_Desc__BURGLARY', '19_Desc__HOMICIDE',
'19_Desc__LARCENY', '19_Desc__LARCENY FROM AUTO', '19_Desc__RAPE',
'19_Desc__ROBBERY - CARJACKING', '19_Desc__ROBBERY - COMMERCIAL',
'19_Desc__ROBBERY - RESIDENCE', '19_Desc__ROBBERY - STREET',
'19_crime19', '19_viol19', '19_prop19', '18_Unnamed: 0',
'18_Desc__AGG. ASSAULT', '18_Desc__ARSON', '18_Desc__AUTO THEFT',
'18_Desc__BURGLARY', '18_Desc__COMMON ASSAULT', '18_Desc__HOMICIDE',
'18_Desc__LARCENY', '18_Desc__LARCENY FROM AUTO', '18_Desc__RAPE',
'18_Desc__ROBBERY - CARJACKING', '18_Desc__ROBBERY - COMMERCIAL',
'18_Desc__ROBBERY - RESIDENCE', '18_Desc__ROBBERY - STREET',
]].to_csv('vizthis.csv')df = pd.read_csv('vizthis.csv')!pip install sweetviz
!pip install autoviz# https://medium.com/python-in-plain-english/how-to-use-pandas-profiling-on-google-colab-e34f34ff1c9f
! pip install https://github.com/pandas-profiling/pandas-profiling/archive/master.zipimport numpy as np
import pandas as pd
from pandas_profiling import ProfileReportdesign_report = ProfileReport(
df,
title="Pandas Profiling Report One",
html= {'style': { 'full_width': True } }
)
design_report.to_file(output_file='pandas-profiling.html')
#from IPython.display import display,HTML,IFrame
#display(HTML(open('pandas-profiling.html').read()))
design_report.to_notebook_iframe()display(HTML(open('pandas-profiling.html').read()))# https://pypi.org/project/sweetviz/
import sweetviz as sv
sweet_report = sv.analyze(df)
sweet_report.show_html('sweet_report.html')
from IPython.display import display,HTML,IFrame
display(HTML(open('sweet_report.html').read()))ls!pip install pivottablejsdf.columns# https://github.com/AutoViML/AutoViz
from autoviz.AutoViz_Class import AutoViz_Class
AV = AutoViz_Class()
df = AV.AutoViz('vizthis.csv') # , depVar='CSA2010'
https://towardsdatascience.com/exploratory-data-analysis-dataprep-eda-vs-pandas-profiling-7137683fe47f
!pip install dataprepfrom dataprep.eda import plot, plot_correlation, plot_missing, create_report
plot(df)plot(df, "18_Desc__ARSON") #, bins=2)plot(df, "18_crime18", "19_crime19") #, bins=2)plot(df, "18_viol18", "19_viol19") #, bins=2)plot(df, "18_prop18", "19_prop19") #, bins=2)plot(df, "18_gunhom18", "19_gunhom19") #, bins=2)plot_correlation(df, "18_prop18", value_range=[-1, 1])plot_correlation(df, x="Desc__ARSON", y="Desc__AUTO THEFT", k=5) # top k attributesmissing = plot_missing(df) #, "Desc__ARSON")missing.save('My missing Report')missingreport = create_report(df)report.save('My Fantastic Report')reportls
uhhhm
dft18 = dft18.merge( csa[ ['CSA2010', 'geometry'] ], left_on='CSA2010', right_on='CSA2010' ) dft18 = gpd.GeoDataFrame( dft18, geometry='geometry')dft18.head(1)dft18.plot(column='Desc__AGG. ASSAULT')dft18.head(1)!pip install mpld3df.head()df2.head()data!pip install mpld3