# default_exp BCPSS
This colab and more can be found at https://github.com/BNIA/vitalsigns.
"""
SchoolSwitcherFlag
Frequency
Percent
Valid Percent
Cumulative Percent
0 = Duplicate
1 = ??
2 = School Switcher
3 = Grade Switcher (same school--so effectively a duplicate)
4 = Primary Record
"""
Whats Inside?:
The Guided Walkthrough
This notebook was made to create the following Vital Signs Indicators:
Indicators Used
- ✅ 66 eattend - (BCPSS)
- ✅ 67 mattend - (BCPSS)
- ✅ 68 hsattend - (BCPSS)
- ✅ 70 aastud - (BCPSS)
- ✅ 71 wstud - (BCPSS)
- ✅ 72 hstud - (BCPSS)
- ✅ 73 abse - (BCPSS)
- ✅ 74 absmd - (BCPSS)
- ✅ 75 abshs - (BCPSS)
- ✅ 91 drop - (BCPSS)
- ✅ 92 compl - (BCPSS)
- ~❌ 94 sclsw - (BCPSS)
- ✅ 195 eenrol - (BCPSS)
- ✅ 196 menrol - (BCPSS)
- ✅ 197 hsenrol - (BCPSS)
- ❌ 206 pmath3 - (BCPSS)
- ❌ 207 pread3 - (BCPSS)
- ❌ 208 pmath5 - (BCPSS)
- ❌ 209 pread5 - (BCPSS)
- ❌ 210 pmath8 - (BCPSS)
- ❌ 211 pread8 - (BCPSS)
- ✅ 212 palg1 - (BCPSS)
- ✅ 213 palg2 - (BCPSS)
- ~❌ 214 kra - (BCPSS)
Datasets Used
- ✅ Schools.BCPSS_201X (66-eattend, 67-mattend -> grade )
year = '19'
Guided Walkthrough
SETUP Enviornment:
Import Modules
! pip install -U -q PyDrive
! pip install geopy
! pip install geopandas
! pip install geoplot
! pip install dataplay
! pip install matplotlib
! pip install psycopg2-binary! apt-get install build-dep python-psycopg2
! apt-get install libpq-dev
! apt-get install libspatialindex-dev!pip install rtree
!pip install dexplotfrom dataplay.geoms import workWithGeometryData%%capture
# These imports will handle everything
import os
import sys
import csv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
import psycopg2
import pyproj
from pyproj import Proj, transform
# conda install -c conda-forge proj4
from shapely.geometry import Point
from shapely import wkb
from shapely.wkt import loads
# https://pypi.org/project/geopy/
from geopy.geocoders import Nominatim
# In case file is KML, enable support
import fiona
fiona.drvsupport.supported_drivers['kml'] = 'rw'
fiona.drvsupport.supported_drivers['KML'] = 'rw'from IPython.display import clear_output
clear_output(wait=True)import ipywidgets as widgets
from ipywidgets import interact, interact_manual
Configure Enviornment
# This will just beautify the output
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# pd.set_option('display.expand_frame_repr', False)
# pd.set_option('display.precision', 2)
# pd.reset_option('max_colwidth')
pd.set_option('max_colwidth', 20)
# pd.reset_option('max_colwidth')
Prep Datasets
TPOP CSA and Baltimore
Get Baltimore
Click to toggle
csa = "https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Tpop/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson"
csa = gpd.read_file(csa);
csa.head(1) Get CSA
url2 = "https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Tpop/FeatureServer/1/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson"
csa2 = gpd.read_file(url2);
csa2['CSA2010'] = csa2['City_1']
csa2['OBJECTID'] = 56
csa2 = csa2.drop(columns=['City_1'])
csa2.head()
Append do no append Bcity. We put it on the Bottom of the df because when performing the ponp it returns only the last matching columns CSA Label.
# csa = pd.concat([csa2, csa], ignore_index=True)
csa = csa.append(csa2).reset_index(drop=True)csa.head(3)csa.tail(3)csa.drop(columns=['Shape__Area', 'Shape__Length', 'OBJECTID'], axis=1).to_file("BCity_and_CSA.geojson", driver='GeoJSON')csa.head()
BCPSS
import pandas as pd
import geopandas
# original = gpd.read_file("BCPSS_20"+year+"_CSACity.shp", geometry='geometry');
original = pd.read_csv("BCPSS_20"+year+"_CSACity_dbf_to_csv.csv");
original.columns original.head()original.rename(columns={ 'csa':'CSA2010', 'BaltCity':'InBaltimore'}, inplace=True)
df = original[ original['CSA2010'].notnull() | original['InBaltimore'].notnull() ]df.head(1)print('After filtering records where a CSA or Baltimore geo-code match Exists')
print( 'All rows Before Filter: ', original.shape[0] ) # rows, columns
print( '# w BCity.isnull: ', df.InBaltimore.isnull().sum() ); bmorow = df[ df.CSA2010.isnull() ].shape[0]
print( '# w CSA2010.isnull: ', bmorow ); csarow = df[ df.CSA2010.notnull() ].shape[0]
print( '# w CSA2010.notnull: ', csarow );
print( '# rows After Filter: ', df.shape[0],'==',csarow,'+',bmorow,'==', csarow + bmorow); df[df['CSA2010'] == ' ']['InBaltimore'].unique()df = df[df['CSA2010'] != ' '] #['InBaltimore'] # .unique()# add baltimore city
df.CSA2010 = df.CSA2010.fillna('Baltimore City')df['CSA2010'].head()bcpssTemp = df.copy()
bcpssTemp.head(1)
bcpssTemp = bcpssTemp[['CSA2010','SchoolSwitcherFlag', 'EnrollmentFlag', 'daysattend', 'daysabsent', 'grade', 'wleavecode', 'hispani_la', 'race_AIAN', 'race_asian', 'race_AA', 'race_NHPI', 'race_white', 'std_number', 'enterdate']]
bcpssTemp['grade']= bcpssTemp['grade'].map(str)
bcpssTemp.head(1)bcpssTemp[bcpssTemp['SchoolSwitcherFlag'] == 0].shape[0] # 0 = DuplicatebcpssTemp[bcpssTemp['SchoolSwitcherFlag'] == 1].shape[0]bcpssTemp[bcpssTemp['SchoolSwitcherFlag'] == 2].shape[0]bcpssTemp[bcpssTemp['SchoolSwitcherFlag'] == 3].shape[0]bcpssTemp[bcpssTemp['SchoolSwitcherFlag'] == 4].shape[0]
School switcher records 0 1 3 and 4 all together make a difference here because we do not dedup on enterdate for each student like we do on the other queries.
We want to know if students come in and out of enrollment multiple times
Create the School Switchers Dataset
schoolSwitchersDF = bcpssTemp[bcpssTemp['SchoolSwitcherFlag'] == 2] # 2 = School Switcher
Create the Enrollment Dataset
enrollment = bcpssTemp[ (bcpssTemp['EnrollmentFlag'] == 1 ) &
( (bcpssTemp['SchoolSwitcherFlag'] == 4) | (bcpssTemp['SchoolSwitcherFlag'] == 3) |
(bcpssTemp['SchoolSwitcherFlag'] == 1) | (bcpssTemp['SchoolSwitcherFlag'] == 0)
)
]bcpss = bcpssTemp[ ( (bcpssTemp['SchoolSwitcherFlag'] == 4) | (bcpssTemp['SchoolSwitcherFlag'] == 3) |
(bcpssTemp['SchoolSwitcherFlag'] == 1) | (bcpssTemp['SchoolSwitcherFlag'] == 0)
) ]schoolSwitchersDF.shape[0]enrollment.shape[0]bcpss.shape[0]
Indicators:
66 eattend - G - No Drop Dupes
Who was in the classroom at any point in the year
lbl = """ BCPSS - Education and Youth - Number of Students Ever Attended 1st - 5th Grade """
query16 = """with tbl AS (
select (sum(
case
when (daysattend > 1) AND (grade = '1' OR grade = '2' OR grade = '3' OR grade = '4' OR grade = '5')
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
)
update vital_signs.data
set eattend = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
OLD17Query = """with tbl AS (
select (sum(
case
when (daysattend > 1) AND (grade = '1' OR grade = '2' OR grade = '3' OR grade = '4' OR grade = '5')
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
NEW17Query = """with tbl AS (
-- What we want
select count(*) as result, csa
-- CSA
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
-- RESULT
where daysattend > 1 AND grade in ('1', '2', '3', '4', '5') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
simplifiedQuery = """
daysattend > 1 AND
grade in ('1', '2', '3', '4', '5') AND
(std_number, enterdate) in ( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
"""
# The way the script goes, for every record it will it will take the entire dataset perform a group on it using std_number,
# then return on std and max(enterdate) for each std_number and compare each and every to the current record's std_num and and enterdate.
# If any is a match, this one part of the filter succedes for the current record. And then in does this for the next record.
hard = """(std_number, enterdate) in ( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )"""
# A simpler way would be to sort by enterdate and then remove duplicates on std_number leaving only the most recent enterdate for each unique std_num.
easy = """df.sort_values('enterdate', ascending=False).drop_duplicates(['std_number'])"""
#export
def eattend(df, csa, yr):
# Create the Numerator
eattend = df.copy()
eattend = eattend[ eattend['grade'].str.contains('^1$|^2$|^3$|^4$|^5$', regex=True) & (eattend['daysattend'] > 1) ]
eattend['count'] = 1
eattend = eattend.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
eattend = csa.merge( eattend, left_on='CSA2010', right_on='CSA2010', how='outer' )
eattend.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
# Baltimoire has records not in the
eattend.at[55,'count']=eattend['count'].sum()
# Perform the calculation
eattend['66-eattend'+year] = eattend['count']
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Eattend/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = 'eattend'+ str( int(year) - goback )
if prevYear in compareYears.columns:
eattend = eattend.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
eattend['change'] = eattend['66-eattend'+year] - eattend[ prevYear ]
eattend['percentChange'] = eattend['change' ] / eattend[ prevYear ] * 100
eattend['change'] = eattend['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', eattend.size / len(eattend.columns) )
return eattend.drop(columns=['daysattend', 'SchoolSwitcherFlag', 'wleavecode', 'daysabsent', 'race_AIAN', 'race_asian', 'race_AA', 'race_NHPI', 'race_white', 'std_number', 'enterdate'])fin = eattend(bcpss, csa, year)
fin.to_csv('66-eattend'+year+'.csv', index=False)
fin.tail(6)
67 mattend - G - No Drop Dupes
lbl = """BCPSS - Education and Youth - Number of Students Ever Attended 6th - 8th Grade """
query16 = """with tbl AS (
select (sum(
case
when (daysattend > 1) AND ( grade =6 OR grade = 7 OR grade = 8)
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
)
update vital_signs.data
set mattend = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
OLD17Query = """ with tbl AS (
select (sum(
case
when (daysattend > 1) AND (grade = '6' OR grade = '7' OR grade = '8')
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
NEW17Query = """ select count(*) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND grade in ('6', '7', '8') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa ORDER BY csa ASC;
"""#export
def mattend(df, csa, yr):
# Create the Numerator
mattend = df.copy()
mattend = mattend[ mattend['grade'].str.contains('^6$|^7$|^8$', regex=True) & (mattend['daysattend'] > 1) ]
mattend['count'] = 1
mattend = mattend.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
mattend = csa.merge( mattend, left_on='CSA2010', right_on='CSA2010', how='outer' )
mattend.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
# Baltimoire has records not in the
mattend.at[55,'count']=mattend['count'].sum()
# Perform the calculation
mattend['67-mattend'+year] = mattend['count']
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Mattend/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = 'mattend'+ str( int(year) - goback )
if prevYear in compareYears.columns:
mattend = mattend.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
mattend['change'] = mattend['67-mattend'+year] - mattend[ prevYear ]
mattend['percentChange'] = mattend['change' ] / mattend[ prevYear ] * 100
mattend['change'] = mattend['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', mattend.size / len(mattend.columns) )
return mattend.drop(columns=['daysattend', 'SchoolSwitcherFlag', 'wleavecode', 'daysabsent', 'race_AIAN', 'race_asian', 'race_AA', 'race_NHPI', 'race_white', 'std_number', 'enterdate'])fin = mattend(bcpss, csa, year)
fin.to_csv('67-mattend'+year+'.csv', index=False)
fin.tail(6)
68 hsattend - G - No Drop Dupes
""" daysattend > 1 AND grade in ('9', '10', '11', '12') AND (std_number, enterdate) in ( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )"""#export
def hsattend(df, csa, yr):
# Create the Numerator
hsattend = df.copy()
hsattend = hsattend[ hsattend['grade'].str.contains('^9$|^10$|^11$|^12$', regex=True) & (hsattend['daysattend'] > 1) ]
hsattend['count'] = 1
hsattend = hsattend.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
hsattend = csa.merge( hsattend, left_on='CSA2010', right_on='CSA2010', how='outer' )
hsattend.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
# Baltimoire has records not in the
hsattend.at[55,'count']=hsattend['count'].sum()
# Perform the calculation
hsattend['68-hsattend'+year] = hsattend['count']
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Hsattend/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = 'hsattend'+ str( int(year) - goback )
if prevYear in compareYears.columns:
hsattend = hsattend.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
hsattend['change'] = hsattend['68-hsattend'+year] - hsattend[ prevYear ]
hsattend['percentChange'] = hsattend['change' ] / hsattend[ prevYear ] * 100
hsattend['change'] = hsattend['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', hsattend.size / len(hsattend.columns) )
return hsattend.drop(columns=['daysattend', 'SchoolSwitcherFlag', 'wleavecode',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate'])fin = hsattend(bcpss, csa, year)
fin.to_csv('68-hsattend'+year+'.csv', index=False)
fin.head(6)
70 aastud - G- No Drop Dupes
lbl = """ BCPSS - Education and Youth - Percent of Students that are African American (non-Hispanic)"""
query2016 = """with numerator as (
select sum(
case
when (daysattend::numeric > 1) AND
(future_rac::text ='300' OR future_rac::text ='305' OR future_rac::text ='340' OR future_rac::text ='345') AND
(hipani_lat LIKE 'N') AND
(grade::numeric <13 )
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1) AND (grade::numeric <13 )
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
update vital_signs.data
set aastud = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
OLD17Query = """with numerator as (
select sum(
case
when (daysattend::numeric > 1) AND (future_rac = 300 OR future_rac = 305 OR future_rac = 340 OR future_rac = 345) AND (hispani_la LIKE 'N') AND (grade <13 )
then 1 else 0 end
)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend > 1) AND (grade <13 )
then 1 else 0 end
)::numeric ) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / NULLIF(denominator.result, 0) )*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
New17Query = """with numerator as (
select count(*)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND future_rac in ('300', '305', '340', '345') AND (hispani_la LIKE 'N') AND (grade <13 ) AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
),
denominator AS (
select count(*)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND grade <13 AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / NULLIF(denominator.result,0) )*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
miscNotes = """afile_1617
17 hispani_la
16 hispani_lat
NULLIF(denominator.result,0)
""""""
where daysattend > 1 AND future_rac in ('300', '305', '340', '345') AND (hispani_la LIKE 'N') AND (grade <13 ) AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
/
where daysattend > 1 AND grade <13 AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
"""#export
def aastud(df, csa, yr):
df['count'] = 1
# Create the Denominator
denom = df.copy()
denom['grade'] = denom['grade'].apply(pd.to_numeric, errors='coerce')
denom = denom[ (denom['daysattend'] > 1) & (denom['grade'] < 13) ]
# Create the Numerator
numer = denom[ (denom['race_AA'] > 0 ) & (denom['hispani_la'] == 'N') ]
id = '70'
shortname = 'aastud'
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
denom = denom.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
denom = csa.merge( denom, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
numer['denomCount'] = denom['count']
# Do after sortViaCsaMerge to get index right. False records would show underneath it but still get added to the sum.
numer.at[55,'count']=numer['count'].sum()
numer.at[55,'denomCount']=numer['denomCount'].sum()
# Perform the calculation
numer[id+'-'+shortname+year] = numer['count'] / numer['denomCount'] * 100
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname.capitalize()+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = shortname+ str( int(year) - goback )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[id+'-'+shortname+year] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
return numer.drop(columns=['daysattend', 'SchoolSwitcherFlag', 'wleavecode',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate'])fin = aastud(bcpss, csa, year)
fin.to_csv('70-aastud'+year+'.csv', index=False)
fin.head(6)
71 wstud - G - No Drop Dupes
lbl = """BCPSS - Education and Youth - Percent of Students that are White (non-Hispanic)"""
Query16 = """with numerator as (
select sum(
case
when (daysattend::numeric > 1) AND (future_rac::text ='5') AND (hipani_lat LIKE 'N') AND (grade::numeric <13 )
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1) AND (grade::numeric <13 )
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
update vital_signs.data
set wstud = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
OLD17Query = """with numerator as (
select sum(
case
when (daysattend::numeric > 1) AND (future_rac::text ='5') AND (hispani_la LIKE 'N') AND (grade::numeric <13 )
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1) AND (grade::numeric <13 )
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / NULLIF(denominator.result,0) )*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
NEW17Query = """with numerator as (
select count(*)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND future_rac ='5' AND hispani_la LIKE 'N' AND grade <13 AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
),
denominator AS (
select count(*)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND grade <13 AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / NULLIF(denominator.result,0) )*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
misc = """
afile_1617
17 hispani_la
16 hispani_lat
NULLIF(denominator.result,0)
""""""
where daysattend > 1 AND future_rac ='5' AND hispani_la LIKE 'N' AND grade <13 AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
/
where daysattend > 1 AND grade <13 AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
"""#export
def wstud(df, csa, yr):
id = '71'
shortname = 'wstud'
df['count'] = 1
# Create the Denominator
denom = df.copy()
denom['grade'] = denom['grade'].apply(pd.to_numeric, errors='coerce')
denom = denom[ (denom['daysattend'] > 1) & (denom['grade'] < 13) ]
# Create the Numerator
numer = denom[ (denom['race_white'] > 0 ) & (denom['hispani_la'] == 'N') ]
id = '71'
shortname = 'wstud'
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
denom = denom.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
denom = csa.merge( denom, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
numer['denomCount'] = denom['count']
# Do after sortViaCsaMerge to get index right. False records would show underneath it but still get added to the sum.
numer.at[55,'count']=numer['count'].sum()
numer.at[55,'denomCount']=numer['denomCount'].sum()
# Perform the calculation
numer[id+'-'+shortname+year] = numer['count'] / numer['denomCount'] * 100
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname.capitalize()+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = shortname + str( int(year) - goback )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[id+'-'+shortname+year] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
return numer.drop(columns=['daysattend', 'SchoolSwitcherFlag', 'wleavecode',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate'])fin = wstud(bcpss, csa, year)
fin.to_csv('71-wstud'+year+'.csv', index=False)
fin.head(6)
72 hstud - G - No Drop Dupes
lbl = """ BCPSS - Education and Youth """
Query16 = """ with numerator as (
select sum(
case
when (daysattend::numeric > 1) AND (hipani_lat LIKE 'Y') AND (grade::numeric <13 )
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1) AND (grade::numeric <13 )
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
update vital_signs.data
set hstud = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
NEW17Query = """with numerator as (
select count(*)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND hispani_la LIKE 'Y' AND grade <13 AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
),
denominator AS (
select count(*)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND grade <13 AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / NULLIF(denominator.result,0) )*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
OLD17Query = """with numerator as (
select sum(
case
when (daysattend::numeric > 1) AND (hispani_la LIKE 'Y') AND (grade::numeric <13 )
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1) AND (grade::numeric <13 )
then 1
else NULL
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / NULLIF(denominator.result,0) )*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl order by csa asc
""""""
where daysattend > 1 AND hispani_la LIKE 'Y' AND grade <13 AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
/
where daysattend > 1 AND grade <13 AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
"""denom = bcpss.copy()
denom['grade'] = denom['grade'].apply(pd.to_numeric, errors='coerce')
denom = denom[ (denom['daysattend'] > 1) & (denom['grade'] < 13) ]
denom.groupby('CSA2010').sum(numeric_only=True).head()numer = denom = denom[ (denom['hispani_la'] == 'Y') ]
numer.groupby('CSA2010').sum(numeric_only=True).head()bcpss[ (bcpss['CSA2010'] == 'Allendale/Irvington/S. Hilton') & (bcpss['hispani_la'] == 'Y') ].head(1)#export
def hstud(df, csa, yr):
df['count'] = 1
# Create the Denominator
denom = df.copy()
denom['grade'] = denom['grade'].apply(pd.to_numeric, errors='coerce')
denom = denom[ (denom['daysattend'] > 1) & (denom['grade'] < 13) ]
# Create the Numerator
numer = denom[ (denom['hispani_la'] == 'Y') ]
id = '72'
shortname = 'hstud'
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
denom = denom.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
denom = csa.merge( denom, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
numer['denomCount'] = denom['count']
# Do after sortViaCsaMerge to get index right. False records would show underneath it but still get added to the sum.
numer.at[55,'count']=numer['count'].sum()
numer.at[55,'denomCount']=numer['denomCount'].sum()
# Perform the calculation
numer[id+'-'+shortname+year] = numer['count'] / numer['denomCount'] * 100
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname.capitalize()+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = shortname + str( int(year) - goback )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[id+'-'+shortname+year] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
return numer.drop(columns=['daysattend', 'SchoolSwitcherFlag', 'wleavecode',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate'])fin = hstud(bcpss, csa, year)
fin.to_csv('72-hstud'+year+'.csv', index=False)
fin.head(6)
73 abse - G - No Drop Dupes
lbl = """ BCPSS - Education and Youth - Percent of 1st-5th Grade Students that are Chronically Absent (Missing at least 20 days) """
Query16 = """with numerator as (
select sum(
case
when (daysattend > 1) AND (grade = '1' OR grade = '2' OR grade = '3' OR grade = '4'OR grade = '5') AND (daysabsent > 200)
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1) AND (grade = '1' OR grade = '2' OR grade = '3' OR grade = '4'OR grade = '5')
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
update vital_signs.data
set abse = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
New17Query = """with numerator as (
select count(*)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND daysabsent > 200 AND grade in ('1','2','3','4','5') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
),
denominator AS (
select count(*)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND grade in ('1','2','3','4','5') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / NULLIF(denominator.result,0) )*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
OLD17Query = """with numerator as (
select sum(
case
when (daysattend > 1) AND (grade = 1 OR grade = 2 OR grade = 3 OR grade = 4 OR grade = 5 ) AND (daysabsent > 200)
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1) AND (grade = '1' OR grade = '2' OR grade = '3' OR grade = '4'OR grade = '5')
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / NULLIF(denominator.result,0) )*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
""
afile_1617
17 hispani_la
16 hispani_lat
NULLIF(denominator.result,0)
""""""
where daysattend > 1 AND daysabsent > 200 AND grade in ('1','2','3','4','5') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
/
where daysattend > 1 AND grade in ('1','2','3','4','5') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
"""
numerator only differentiates from the denominator in that it looks for days absent > 200..also. the query filters for 200 but the indicator description says 20
denom[ denom['daysabsent'] > 200]#export
def abse(df, csa, yr):
df['count'] = 1
# Create the Denominator
denom = df.copy()
denom = denom[ denom['grade'].str.contains('^1$|^2$|^3$|^4$|^5$', regex=True) & (denom['daysattend'] > 1) ]
# Create the Numerator
numer = denom[ (denom['daysabsent'] > 200 ) ]
id = '73'
shortname = 'abse'
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
denom = denom.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
denom = csa.merge( denom, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
denom.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
numer.drop(columns=['tpop10', 'EnrollmentFlag', 'daysattend', 'SchoolSwitcherFlag', 'wleavecode',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate']).to_csv(id+'-'+shortname+year+'_numer.csv', index=False)
denom.drop(columns=['tpop10', 'EnrollmentFlag', 'daysattend', 'SchoolSwitcherFlag', 'wleavecode',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate']).to_csv(id+'-'+shortname+year+'_denom.csv', index=False)
numer['denomCount'] = denom['count']
# Do after sortViaCsaMerge to get index right. False records would show underneath it but still get added to the sum.
numer.at[55,'count']=numer['count'].sum()
numer.at[55,'denomCount']=numer['denomCount'].sum()
# Perform the calculation
numer[id+'-'+shortname+year] = numer['count'] / numer['denomCount'] * 100
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname.capitalize()+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = shortname + str( int(year) - goback )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[id+'-'+shortname+year] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
return numer.drop(columns=['daysattend', 'SchoolSwitcherFlag', 'wleavecode',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate'])fin = abse(bcpss, csa, year)
fin.to_csv('73-abse'+year+'.csv', index=False)
fin.head(6)
74 absmd - G - No Drop Dupes
lbl = """ BCPSS - Education and Youth - Percent of 6th-8th Grade Students that are Chronically Absent (Missing at least 20 days) """
query16 = """ with numerator as (
select sum(
case
when (daysattend > 1) AND (grade = '6' OR grade = '7' OR grade = '8') AND (daysabsent > 200)
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1) AND (grade = '6' OR grade = '7' OR grade = '8')
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
update vital_signs.data
set absmd = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
NEW17Query = """with numerator as (
select count(*)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND daysabsent > 200 AND grade in ('6','7','8') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
),
denominator AS (
select count(*)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND grade in ('6','7','8') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / NULLIF(denominator.result,0) )*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
OLD17Query = """with numerator as (
select sum(
case
when (daysattend > 1) AND (grade = '6' OR grade = '7' OR grade = '8') AND (daysabsent > 200)
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1) AND (grade = '6' OR grade = '7' OR grade = '8')
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / NULLIF(denominator.result,0) )*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
""" afile_1617 NULLIF(denominator.result,0) """"""
where daysattend > 1 AND daysabsent > 200 AND grade in ('6','7','8') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
/
where daysattend > 1 AND grade in ('6','7','8') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
"""#export
def absmd(df, csa, yr):
df['count'] = 1
# Create the Denominator
denom = df.copy()
denom = denom[ denom['grade'].str.contains('^6$|^7$|^8$', regex=True) & (denom['daysattend'] > 1) ]
# Create the Numerator
numer = denom[ (denom['daysabsent'] > 200 ) ]
id = '74'
shortname = 'absmd'
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
denom = denom.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
denom = csa.merge( denom, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
denom.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
numer.drop(columns=['tpop10', 'EnrollmentFlag', 'daysattend', 'SchoolSwitcherFlag', 'wleavecode',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate']).to_csv(id+'-'+shortname+year+'_numer.csv', index=False)
denom.drop(columns=['tpop10', 'EnrollmentFlag', 'daysattend', 'SchoolSwitcherFlag', 'wleavecode',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate']).to_csv(id+'-'+shortname+year+'_denom.csv', index=False)
numer['denomCount'] = denom['count']
# Do after sortViaCsaMerge to get index right. False records would show underneath it but still get added to the sum.
numer.at[55,'count']=numer['count'].sum()
numer.at[55,'denomCount']=numer['denomCount'].sum()
# Perform the calculation
numer[id+'-'+shortname+year] = numer['count'] / numer['denomCount'] * 100
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname.capitalize()+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = shortname + str( int(year) - goback )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[id+'-'+shortname+year] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
return numer.drop(columns=['daysattend', 'SchoolSwitcherFlag', 'wleavecode',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate']) fin = absmd(bcpss, csa, year)
fin.to_csv('74-absmd'+year+'.csv', index=False)
fin.head(6)
75 abshs - G - No Drop Dupes
lbl = """ BCPSS - Education and Youth - Percent of 9th-12th Grade Students that are Chronically Absent (Missing at least 20 days) """
Query16 = """with numerator as (
select sum(
case
when (daysattend > 1) AND (grade = '9' OR grade = '10' OR grade = '11' OR grade = '12') AND (daysabsent > 200)
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1) AND (grade = '9' OR grade = '10' OR grade = '11' OR grade = '12')
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / denominator.result)*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
update vital_signs.data
set abshs = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
New17Query = """with numerator as (
select count(*)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND daysabsent > 200 AND grade in ('9','10','11','12') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
),
denominator AS (
select count(*)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND grade in ('9','10','11','12') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / NULLIF(denominator.result,0) )*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
"""
afile_1617
NULLIF(denominator.result,0)
""""""
where daysattend > 1 AND daysabsent > 200 AND grade in ('9','10','11','12') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
/
where daysattend > 1 AND grade in ('9','10','11','12') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
"""#export
def abshd(df, csa, yr):
df['count'] = 1
# Create the Denominator
denom = df.copy()
denom = denom[ denom['grade'].str.contains('^9$|^10$|^11$|^12$', regex=True) & (denom['daysattend'] > 1) ]
# Create the Numerator
numer = denom[ (denom['daysabsent'] > 200 ) ]
id = '75'
shortname = 'abshs'
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
denom = denom.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
denom = csa.merge( denom, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
denom.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
numer.drop(columns=['tpop10', 'EnrollmentFlag', 'daysattend', 'SchoolSwitcherFlag', 'wleavecode',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate']).to_csv(id+'-'+shortname+year+'_numer.csv', index=False)
denom.drop(columns=['tpop10', 'EnrollmentFlag', 'daysattend', 'SchoolSwitcherFlag', 'wleavecode',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate']).to_csv(id+'-'+shortname+year+'_denom.csv', index=False)
numer['denomCount'] = denom['count']
# Do after sortViaCsaMerge to get index right. False records would show underneath it but still get added to the sum.
numer.at[55,'count']=numer['count'].sum()
numer.at[55,'denomCount']=numer['denomCount'].sum()
# Perform the calculation
numer[id+'-'+shortname+year] = numer['count'] / numer['denomCount'] * 100
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname.capitalize()+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = shortname + str( int(year) - goback )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[id+'-'+shortname+year] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
return numer.drop(columns=['daysattend', 'SchoolSwitcherFlag', 'wleavecode',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate'])fin = abshd(bcpss, csa, year)
fin.to_csv('75-abshs'+year+'.csv', index=False)
fin.head(6)
91 drop - G - No Drop Dupes
lbl = """BCPSS - Education and Youth - High School Dropout/Withdrawl Rate"""
query16 = """with numerator as (
select sum(
case
when daysattend::numeric > 1 AND (wleavecode LIKE 'W%') AND (wleavecode != 'W43') AND (grade = '9' OR grade = '10' OR grade = '11' OR grade = '12')
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1) AND (grade = '9' OR grade = '10' OR grade = '11' OR grade = '12')
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / NULLIF(denominator.result,0) )*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
NEW17Query = """with numerator as (
select count(*)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND grade in ('9','10','11','12') AND wleavecode IN ('30','31','32','33','34','35','36','37','38','39','41','42','44','46','50','71','85')
AND (std_number, enterdate) in ( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
),
denominator AS (
select count(*)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
where daysattend > 1 AND grade in ('9','10','11','12') AND (std_number, enterdate) in
( select std_number, max(enterdate) enterdate from schools.afile_1617 group by std_number )
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / NULLIF(denominator.result,0) )*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
OLD17 = """ with numerator as (
select sum(
case
when daysattend::numeric > 1
AND (grade = '9' OR grade = '10' OR grade = '11' OR grade = '12')
AND wleavecode IN ('30','31','32','33','34','35','36','37','38','39','41','42','44','46','50','71','85')
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1) AND (grade = '9' OR grade = '10' OR grade = '11' OR grade = '12')
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
),
tbl AS (
select denominator.csa,(numerator.result / NULLIF(denominator.result,0) )*(100::numeric) as result
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
miscnotes = """
afile_1617
NULLIF(denominator.result,0)
ERROR: wleavecode
43 - 'W43 - Death'
LEAVE CODES
WLEAVECODE SY15-16 LEAVECODE SY16-17 Code
0 00 0 'None'
C60 60 'C60 - Graduated'
'C61 - Graduated Certificate of Merit'
C62 62 'C62 - Graduated HS certificate'
'C63 - Graduated Special Ed'
C70 70 'C70 - Early college admission'
T10 10 'T10 - To other BCPS'
T13 13 'T13 - Transfer to MD Public School'
T14 14 'T14 - to US public school'
T15 15 'T15 - to non-public school within city'
16 'T16 - to non-public school outside Baltimore'
17 'T17 - to Us Non-public school'
18'T18 - to any school outside the US' T21 T21 - to evening high school'
22 'T22 - To state institution w/epdgm'
24 'T24 - To parent teaching'
26 T27
30 'W30 - Illness- no education program'
31 'W31 - Over 16, lack of academic success'
'W32 - Over 16, discpl (not expelled) '
33 'W33 - Over 16, lack of motivation'
34 'W34 employment'
'W35 - marriage'
'W36 - military service'
38 'W38 - Court action to institution'
W39 'W39 - Over 16, economic reasons'
'W40 - Under 16 expulsion'
41 'W41 - Under 5 immaturity'
42 'W42 - Under 16 special case'
43 'W43 - Death'
'W44 - Over 16 parenting'
'W46 - Over 16 expulsion required'
50 'W50 - Whereabouts unknown'
W71 'W71 - Pregnancy'.
W85 85
"""#export
def drop(df, csa, yr):
df['count'] = 1
# Create the Denominator
denom = df.copy()
denom = denom[ denom['grade'].str.contains('^9$|^10$|^11$|^12$', regex=True) & (denom['daysattend'] > 1) ]
# Create the Numerator
denom['wleavecode'] = denom['wleavecode'].astype(str)
numer = denom[ ( denom['wleavecode'].str.contains('30|31|32|33|34|35|36|37|38|39|41|42|44|46|50|71|85', regex=True) ) ]
id = '91'
shortname = 'drop'
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
denom = denom.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
denom = csa.merge( denom, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
numer['denomCount'] = denom['count']
# Do after sortViaCsaMerge to get index right. False records would show underneath it but still get added to the sum.
numer.at[55,'count']=numer['count'].sum()
numer.at[55,'denomCount']=numer['denomCount'].sum()
# Perform the calculation
numer[id+'-'+shortname+year] = numer['count'] / numer['denomCount'] * 100
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname.capitalize()+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = shortname + str( int(year) - goback )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[id+'-'+shortname+year] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
return numer.drop(columns=['daysattend', 'SchoolSwitcherFlag',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate'])fin = drop(bcpss, csa, year)
fin.to_csv('91-drop'+year+'.csv', index=False)
fin.head(6)
92 compl - G - No Drop Dupes
lbl = """BCPSS - Education and Youth - High School Completion Rate"""
query16 = """with numerator as (
select sum(
case
when (daysattend::numeric > 1) AND (wleavecode LIKE 'C60') OR (wleavecode LIKE 'C62') AND (grade = '12')
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1) AND (grade = '12')
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
tbl AS (
select vital_signs.div_zero (numerator.result, denominator.result)*(100::numeric) as result, numerator.csa
from numerator left join denominator on numerator.csa = denominator.csa
)
update vital_signs.data
set compl = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
OLD17 = """with numerator as (
select sum(
case
when (daysattend::numeric > 1) AND (wleavecode = '60') OR (wleavecode = '62') AND (grade = '12')
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1) AND (grade = '12')
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_1617', 'gid', 'the_geom') a
left join schools.afile_1617 b on a.gid = b.gid
group by csa
),
tbl AS (
select vital_signs.div_zero (numerator.result, denominator.result)*(100::numeric) as result, numerator.csa
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
misc = """
New 17:
afile_1617
NULLIF(denominator.result,0)
ERROR: wleavecode
2016 Leave Codes: W50 T10 C60 T22
2017 Leave Codes: 85 62 10 10 60
"""#export
def compl(df, csa, yr):
df['count'] = 1
# Create the Denominator
denom = df.copy()
denom = denom[ denom['grade'].str.contains('^12$', regex=True) & (denom['daysattend'] > 1) ]
# Create the Numerator
denom['wleavecode'] = denom['wleavecode'].astype(str)
numer = denom[ ( denom['wleavecode'].str.contains('85|62|10|60', regex=True) ) ]
id = '92'
shortname = 'compl'
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
denom = denom.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
denom = csa.merge( denom, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
numer['denomCount'] = denom['count']
# Do after sortViaCsaMerge to get index right. False records would show underneath it but still get added to the sum.
numer.at[55,'count']=numer['count'].sum()
numer.at[55,'denomCount']=numer['denomCount'].sum()
# Perform the calculation
numer[id+'-'+shortname+year] = numer['count'] / numer['denomCount'] * 100
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname.capitalize()+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = shortname + str( int(year) - goback )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[id+'-'+shortname+year] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
return numer.drop(columns=['daysattend', 'SchoolSwitcherFlag',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate'])fin = compl(bcpss, csa, year)
fin.to_csv('92-compl'+year+'.csv', index=False)
fin.head(6)
94 sclsw - B - No Filter Duplicates
daysattend
lbl = """BCPSS - Education and Youth - Percent of Students Switching Schools within School Year"""
""" Leave Code = 10 13 14 15 16 17 18 212 22 24"""
q16 = """ with numerator as (
select sum(
case
when (daysattend > 1)
then 1
else 0
end)::numeric as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.schoolswitchers_2016', 'gid', 'the_geom') a
left join schools.schoolswitchers_2016 b on a.gid = b.gid
group by csa
),
denominator AS (
select (sum(
case
when (daysattend::numeric > 1)
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.afile_2016', 'gid', 'the_geom') a
left join schools.afile_2016 b on a.gid = b.gid
group by csa
),
tbl AS (
select vital_signs.div_zero (numerator.result, denominator.result)*(100::numeric) as result, numerator.csa
from numerator left join denominator on numerator.csa = denominator.csa
)
update vital_signs.data
set sclsw = result from tbl where data.csa = tbl.csa and data_year = '2016';
""" """
when (daysattend > 1)
then 1
else 0
/
when (daysattend::numeric > 1)
then 1
else 0
"""#export
def sclsw(df, denomdf, csa, yr):
df['count'] = 1
denomdf['count'] = 1
# Create the Denominator
denom = denomdf.copy() #.sort_values('enterdate', ascending=False).drop_duplicates(['std_number'])
denom = denom[ (denom['daysattend'] > 1) ]
# Create the Numerator
numer = df.copy()
numer = numer[ (numer['daysattend'] > 1) ]
id = '94'
shortname = 'sclsw'
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
denom = denom.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
denom = csa.merge( denom, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
numer['denomCount'] = denom['count']
# Do after sortViaCsaMerge to get index right. False records would show underneath it but still get added to the sum.
numer.at[55,'count']=numer['count'].sum()
numer.at[55,'denomCount']=numer['denomCount'].sum()
# Perform the calculation
numer[id+'-'+shortname+year] = numer['count'] / numer['denomCount'] * 100
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname.capitalize()+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = shortname + str( int(year) - goback )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[id+'-'+shortname+year] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
return numer.drop(columns=['daysattend', 'SchoolSwitcherFlag',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate'])fin = sclsw(schoolSwitchersDF, bcpss, csa, year)
fin.to_csv('94-sclsw'+year+'.csv', index=False)
fin.head(6)
195 eenrol - G - Drop Dupes - enrollment DF
grade
lbl = """BCPSS - Education and Youth - Number of Students Officially Enrolled in 1st - 5th Grade"""
q16 = """ with tbl AS (
select (sum(
case
when grade = '1' OR grade = '2' OR grade = '3' OR grade = '4' OR grade = '5'
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.enrollment_2016', 'gid', 'the_geom') a
left join schools.enrollment_2016 b on a.gid = b.gid
group by csa
)
update vital_signs.data
set eenrol = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
q17 = """with tbl AS (
select (sum( case
when grade = '1' OR grade = '2' OR grade = '3' OR grade = '4' OR grade = '5'
then 1 else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.enrollment_1617', 'gid', 'the_geom') a
left join schools.enrollment_1617 b on a.gid = b.gid
group by csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""bcpssTemp.shape[0]bcpss.shape[0]enrollment.shape[0]
Whatever the VS year is, the school year is the year before. so for vs20, the school year to filter on (need to know the date they entered) is sept 2019.
enrollment.dtypes#export
def eenrol(df, csa, yr):
df['count'] = 1
# Create the Numerator
numer = df.copy() # .sort_values('enterdate', ascending=False).drop_duplicates(['std_number'])
numer = numer[ numer['grade'].str.contains('^1$|^2$|^3$|^4$|^5$', regex=True) ]
id = '195'
shortname = 'eenrol'
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
# Do after sortViaCsaMerge to get index right. False records would show underneath it but still get added to the sum.
numer.at[55,'count']=numer['count'].sum()
# Perform the calculation
numer[id+'-'+shortname+year] = numer['count']
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname.capitalize()+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = shortname + str( int(year) - goback )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[id+'-'+shortname+year] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
return numer.drop(columns=['daysattend', 'SchoolSwitcherFlag',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate'])fin = eenrol(enrollment, csa, year)
fin.to_csv('195-eenrol'+year+'.csv', index=False)
fin.tail(3)
196 menrol - G - Drop Dupes
grade
""" BCPSS - Education and Youth - Number of Students Officially Enrolled in 6th - 8th Grade"""
q16 = """with tbl AS (
select (sum(
case
when grade = '6' OR grade = '7' OR grade = '8'
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.enrollment_2016', 'gid', 'the_geom') a
left join schools.enrollment_2016 b on a.gid = b.gid
group by csa
)
update vital_signs.data
set menrol = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
q17 = """with tbl AS (
select (sum( case
when grade = '6' OR grade = '7' OR grade = '8'
then 1 else 0 end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.enrollment_1617', 'gid', 'the_geom') a
left join schools.enrollment_1617 b on a.gid = b.gid
group by csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
""""""
when grade = '6' OR grade = '7' OR grade = '8'
then 1 else 0 end)::numeric
"""#export
def menrol(df, csa, yr):
df['count'] = 1
# Create the Denominator
numer = df.copy()
numer = numer[ numer['grade'].str.contains('^6$|^7$|^8$', regex=True) ]
id = '196'
shortname = 'menrol'
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
# Do after sortViaCsaMerge to get index right. False records would show underneath it but still get added to the sum.
numer.at[55,'count']=numer['count'].sum()
# Perform the calculation
numer[id+'-'+shortname+year] = numer['count']
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname.capitalize()+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = shortname + str( int(year) - goback )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[id+'-'+shortname+year] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
return numer.drop(columns=['daysattend', 'SchoolSwitcherFlag', 'wleavecode',
'daysabsent', 'race_AIAN', 'race_asian', 'race_AA',
'race_NHPI', 'race_white', 'std_number', 'enterdate'])fin = menrol(enrollment, csa, year)
fin.to_csv('196-menrol'+year+'.csv', index=False)
fin.tail(6)
197 hsenrol - G - Drop Dupes
grade
lbl = """BCPSS - Education and Youth - Number of Students Officially Enrolled in 9th - 12th Grade """
q16 = """with tbl AS (
select (sum(
case
when grade = '9' OR grade = '10' OR grade = '11' OR grade = '12'
then 1
else 0
end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.enrollment_2016', 'gid', 'the_geom') a
left join schools.enrollment_2016 b on a.gid = b.gid
group by csa
)
update vital_signs.data
set hsenrol = result from tbl where data.csa = tbl.csa and data_year = '2016';
"""
q17 = """ with tbl AS (
select (sum( case
when grade = '9' OR grade = '10' OR grade = '11' OR grade = '12'
then 1 else 0 end)::numeric
) as result, csa
from vital_signs.match_csas_and_bc_by_geom('schools.enrollment_1617', 'gid', 'the_geom') a
left join schools.enrollment_1617 b on a.gid = b.gid
group by csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
""""""
when grade = '9' OR grade = '10' OR grade = '11' OR grade = '12'
then 1 else 0 end)::numeric
"""#export
def hsenrol(df, csa, yr):
df['count'] = 1
# Create the Denominator
numer = df.copy() #.sort_values('enterdate', ascending=False).drop_duplicates(['std_number'])
numer = numer[ numer['grade'].str.contains('^9$|^10$|^11$|^12$', regex=True) ]
print( numer['grade'].unique() )
id = '197'
shortname = 'hsenrol'
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
# Bcity is the sum of the community sums.
# Incorrect Bcity Sum IFF Groupby keeps a 'False' row (index 56)
numer.at[55,'count']=numer['count'].sum()
# Perform the calculation
fincol = id+'-'+shortname+year
numer[fincol] = numer['count']
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname.capitalize()+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
goback = 2 if year == '19' else 3
prevYear = shortname + str( int(year) - goback )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[fincol] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
return numer.drop(columns=['daysattend', 'daysabsent', 'race_AIAN', 'race_asian', 'race_AA', 'race_NHPI', 'race_white', 'std_number', 'enterdate'])fin = hsenrol(enrollment, csa, year)
fin.to_csv('197-hsenrol'+year+'.csv', index=False)
fin.tail(6)
206 pmath3 - G ❌
grade parcc_perf subject
lbl = """BCPSS - Education and Youth - Percentage of 3rd Grade Students who Met or Exceeded PARCC Math"""
q16 = """with numerator as (
select sum( case
when (grade = '3') AND (parcc_perf =4 OR parcc_perf = 5)
then 1 else 0 end
)::numeric as math3_proficiency, csa
from vital_signs.match_csas_and_bc_by_geom('schools.math_2016', 'gid', 'the_geom') a
left join schools.math_2016 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '3')
then 1 else 0 end
)::numeric as math3_testtakers, csa
from vital_signs.match_csas_and_bc_by_geom('schools.math_2016', 'gid', 'the_geom') a
left join schools.math_2016 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.math3_proficiency, denominator.math3_testtakers)*(100::numeric) as result, numerator.csa
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl
"""
q17 = """with numerator as (
select sum( case
when (grade = '3') AND (parcc_perf =4 OR parcc_perf = 5) AND subject = 'Mathematics'
then 1 else 0 end
)::numeric as math3_proficiency, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '3') AND subject = 'Mathematics'
then 1 else 0 end
)::numeric as math3_testtakers, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.math3_proficiency, denominator.math3_testtakers)*(100::numeric) as result, numerator.csa
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;""" when (grade = '3') AND (parcc_perf =4 OR parcc_perf = 5) AND subject = 'Mathematics'
then 1 else 0 end
/
when (grade = '3') AND subject = 'Mathematics'
then 1 else 0 end
207 pread3 - G ❌
grade parcc_perf subject
BCPSS
Education and Youth
Percentage of 3rd Grade Students who Met or Exceeded PARCC Reading
q16 = """with numerator as (
select sum( case
when (grade = '3') AND (parcc_perf =4 OR parcc_perf = 5)
then 1 else 0 end
)::numeric as eng_prof, csa
from vital_signs.match_csas_and_bc_by_geom('schools.english_2016', 'gid', 'the_geom') a
left join schools.english_2016 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '3')
then 1 else 0 end
)::numeric as eng_prof, csa
from vital_signs.match_csas_and_bc_by_geom('schools.english_2016', 'gid', 'the_geom') a
left join schools.english_2016 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.eng_prof, denominator.eng_prof)*(100::numeric) as result, numerator.csa
from numerator
left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
q17 = """ with numerator as (
select sum( case
when (grade = '3') AND (parcc_perf =4 OR parcc_perf = 5) AND subject = 'English Language Arts/Literacy'
then 1 else 0 end
)::numeric as eng_prof, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '3') AND subject = 'English Language Arts/Literacy'
then 1 else 0 end
)::numeric as eng_prof, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.eng_prof, denominator.eng_prof)*(100::numeric) as result, numerator.csa
from numerator
left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
""" when (grade = '3') AND (parcc_perf =4 OR parcc_perf = 5) AND subject = 'English Language Arts/Literacy'
then 1 else 0 end
/
when (grade = '3') AND subject = 'English Language Arts/Literacy'
then 1 else 0 end
208 pmath5 - G ❌
grade parcc_perf subject
lbl = """BCPSS - Education and Youth - Percentage of 5th Grade Students who Met or Exceeded PARCC Math"""
q16 = """ with numerator as (
select sum( case
when (grade = '5') AND (parcc_perf =4 OR parcc_perf = 5)
then 1 else 0 end
)::numeric as math3_proficiency, csa
from vital_signs.match_csas_and_bc_by_geom('schools.math_2016', 'gid', 'the_geom') a
left join schools.math_2016 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '5')
then 1 else 0 end
)::numeric as math3_testtakers, csa
from vital_signs.match_csas_and_bc_by_geom('schools.math_2016', 'gid', 'the_geom') a
left join schools.math_2016 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.math3_proficiency, denominator.math3_testtakers)*(100::numeric) as result, numerator.csa
from numerator
left join denominator on numerator.csa = denominator.csa
)
select * from tbl
"""
q17 = """
with numerator as (
select sum( case
when (grade = '5') AND (parcc_perf =4 OR parcc_perf = 5) AND subject = 'Mathematics'
then 1 else 0 end
)::numeric as math3_proficiency, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '5') AND subject = 'Mathematics'
then 1 else 0 end
)::numeric as math3_testtakers, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.math3_proficiency, denominator.math3_testtakers)*(100::numeric) as result, numerator.csa
from numerator
left join denominator on numerator.csa = denominator.csa
)
select * from tbl
""" when (grade = '5') AND (parcc_perf =4 OR parcc_perf = 5) AND subject = 'Mathematics'
then 1 else 0 end
/
when (grade = '5') AND subject = 'Mathematics'
then 1 else 0 end
209 pread5 - G ❌
grade parcc_perf subject
lbl = """BCPSS - Education and Youth - Percentage of 5th Grade Students who Met or Exceeded PARCC Reading"""
queries16 = """select sum(
case
when (grade = '5') AND (parcc_perf =4 OR parcc_perf = 5)
then 1
else 0
end)::numeric as eng_prof, csa
from vital_signs.match_csas_and_bc_by_geom('schools.english_2016', 'gid', 'the_geom') a
left join schools.english_2016 b on a.gid = b.gid
group by csa
order by csa;
select sum(
case
when (grade_code = '5')
then 1
else 0
end)::numeric as eng_prof, csa
from vital_signs.match_csas_and_bc_by_geom('schools.english_2016', 'gid', 'the_geom') a
left join schools.english_2016 b on a.gid = b.gid
group by csa
order by csa;
"""
q17 = """with numerator as (
select sum( case
when (grade = '8') AND (parcc_perf =4 OR parcc_perf = 5) AND subject = 'Mathematics'
then 1 else 0 end
)::numeric as math3_proficiency, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '8')
then 1 else 0 end
)::numeric as math3_testtakers, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.math3_proficiency, denominator.math3_testtakers)*(100::numeric) as result, numerator.csa
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl
""" when (grade = '8') AND (parcc_perf =4 OR parcc_perf = 5) AND subject = 'Mathematics'
then 1 else 0 end
/
when (grade = '8')
then 1 else 0 end
210 pmath8 - G ❌
grade parcc_perf subject
lbl = """BCPSS - Education and Youth - Percentage of 8th Grade Students who Met or Exceeded PARCC Math"""
q16 = """with numerator as (
select sum( case
when (grade = '8') AND (parcc_perf =4 OR parcc_perf = 5)
then 1 else 0 end
)::numeric as math3_proficiency, csa
from vital_signs.match_csas_and_bc_by_geom('schools.math_2016', 'gid', 'the_geom') a
left join schools.math_2016 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '8')
then 1 else 0 end
)::numeric as math3_testtakers, csa
from vital_signs.match_csas_and_bc_by_geom('schools.math_2016', 'gid', 'the_geom') a
left join schools.math_2016 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.math3_proficiency, denominator.math3_testtakers)*(100::numeric) as result, numerator.csa
from numerator
left join denominator on numerator.csa = denominator.csa
)
select * from tbl
"""
q17 = """with numerator as (
select sum( case
when (grade = '8') AND (parcc_perf =4 OR parcc_perf = 5) AND subject = 'Mathematics'
then 1 else 0 end
)::numeric as math3_proficiency, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '8') AND subject = 'Mathematics'
then 1 else 0 end
)::numeric as math3_testtakers, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.math3_proficiency, denominator.math3_testtakers)*(100::numeric) as result, numerator.csa
from numerator
left join denominator on numerator.csa = denominator.csa
)
select * from tbl
""" when (grade = '8') AND (parcc_perf =4 OR parcc_perf = 5) AND subject = 'Mathematics'
then 1 else 0 end
/
when (grade = '8') AND subject = 'Mathematics'
then 1 else 0 end
211 pread8 - G ❌
grade parcc_perf subject eng_prof
lbl = """BCPSS - Education and Youth - Percentage of 8th Grade Students who Met or Exceeded PARCC Reading"""
q16 = """with numerator as (
select sum( case
when (grade = '8') AND (parcc_perf =4 OR parcc_perf = 5)
then 1 else 0 end
)::numeric as eng_prof, csa
from vital_signs.match_csas_and_bc_by_geom('schools.english_2016', 'gid', 'the_geom') a
left join schools.english_2016 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '8')
then 1 else 0
end
)::numeric as eng8_testtakers, csa
from vital_signs.match_csas_and_bc_by_geom('schools.english_2016', 'gid', 'the_geom') a
left join schools.english_2016 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.eng_prof, denominator.eng8_testtakers)*(100::numeric) as result, numerator.csa
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl
"""
q17 = """with numerator as (
select sum( case
when (grade = '8') AND (parcc_perf =4 OR parcc_perf = 5) AND subject = 'English Language Arts/Literacy'
then 1 else 0 end
)::numeric as eng_prof, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '8') AND subject = 'English Language Arts/Literacy'
then 1 else 0 end
)::numeric as eng_prof, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.eng_prof, denominator.eng_prof)*(100::numeric) as result, numerator.csa
from numerator
left join denominator on numerator.csa = denominator.csa
)
select * from tbl where 1 = 1 ORDER BY csa ASC;
"""
when (grade = '8') AND (parcc_perf =4 OR parcc_perf = 5) AND subject = 'English Language Arts/Literacy'
then 1 else 0 end
/
when (grade = '8') AND subject = 'English Language Arts/Literacy'
then 1 else 0 end
212 palg1 - G ? - ❌
grade parcc_perf subject
lbl = """BCPSS - Education and Youth - Percentage of Students who Met or Exceeded PARCC Algebra I"""
q16 = """with numerator as (
select sum( case
when (grade = '9' or grade = '10' or grade = '11' or grade = '12')
AND (parcc_perf =4 OR parcc_perf = 5)
then 1 else 0
end
)::numeric as algebra1_met, csa
from vital_signs.match_csas_and_bc_by_geom('schools.algebra1_2016', 'gid', 'the_geom') a
left join schools.algebra1_2016 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '9' or grade = '10' or grade = '11' or grade = '12')
then 1 else 0
end
)::numeric as algebra1_testtakers, csa
from vital_signs.match_csas_and_bc_by_geom('schools.algebra1_2016', 'gid', 'the_geom') a
left join schools.algebra1_2016 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.algebra1_met, denominator.algebra1_testtakers)*(100::numeric) as result, numerator.csa
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl
"""
q17 = """with numerator as (
select sum( case
when (grade = '9' or grade = '10' or grade = '11' or grade = '12') AND subject = 'Algebra I'
AND (parcc_perf =4 OR parcc_perf = 5)
then 1 else 0
end
)::numeric as algebra1_met, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '9' or grade = '10' or grade = '11' or grade = '12') AND subject = 'Algebra I'
then 1 else 0
end
)::numeric as algebra1_testtakers, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.algebra1_met, denominator.algebra1_testtakers)*(100::numeric) as result, numerator.csa
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl
""""""
when (grade = '9' or grade = '10' or grade = '11' or grade = '12') AND subject = 'Algebra I'
AND (parcc_perf =4 OR parcc_perf = 5)
/
when (grade = '9' or grade = '10' or grade = '11' or grade = '12') AND subject = 'Algebra I'
then 1 else 0
"""def palg1(df, csa, yr):
df['count'] = 1
# Create the Denominator
denom = df.copy().sort_values('enterdate', ascending=False).drop_duplicates(['std_number'])
denom = denom[ denom['grade'].str.contains('9|10|11|12', regex=True) & (denom['subject'] == 'Algebra I') ]
# Create the Numerator
denom['parcc_perf'] = denom['parcc_perf'].astype(str)
numer = denom[ ( denom['parcc_perf'].str.contains('4|5', regex=True) ) ]
id = '212'
shortname = 'palg1'
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
denom = denom.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
denom = csa.merge( denom, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
numer['denomCount'] = denom['count']
# Do after sortViaCsaMerge to get index right. False records would show underneath it but still get added to the sum.
numer.at[55,'count']=numer['count'].sum()
numer.at[55,'denomCount']=numer['denomCount'].sum()
# Perform the calculation
numer[id+'-'+shortname+year] = numer['count'] / numer['denomCount'] * 100
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
prevYear = shortname+ str( int(year) - 1 )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[id+'-'+shortname+year] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
return numer.drop(columns=['daysattend', 'daysabsent', 'GENDER', 'race_AIAN', 'race_asian', 'race_AA', 'race_NHPI', 'race_white', 'std_number', 'enterdate'])
fin = palg1(bcpss, csa, year)
fin.to_csv('212-palg1'+year+'.csv', index=False)
fin.head(60)
213 palg2 - G ? - ❌
grade parcc_perf subject
lbl = """Percentage of Students who Met or Exceeded PARCC Algebra II - Education and Youth - BCPSS"""
query16 = """with numerator as (
select sum( case
when (grade = '9' or grade = '10' or grade = '11' or grade = '12')
AND (parcc_perf =4 OR parcc_perf = 5)
then 1 else 0
end
)::numeric as algebra1_met, csa
from vital_signs.match_csas_and_bc_by_geom('schools.algebra2_2016', 'gid', 'the_geom') a
left join schools.algebra2_2016 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '9' or grade = '10' or grade = '11' or grade = '12')
then 1 else 0
end
)::numeric as algebra1_testtakers, csa
from vital_signs.match_csas_and_bc_by_geom('schools.algebra2_2016', 'gid', 'the_geom') a
left join schools.algebra2_2016 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.algebra1_met, denominator.algebra1_testtakers)*(100::numeric) as result, numerator.csa
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl
"""
query17 = """ with numerator as (
select sum( case
when (grade = '9' or grade = '10' or grade = '11' or grade = '12') AND subject = 'Algebra II'
AND (parcc_perf =4 OR parcc_perf = 5)
then 1 else 0
end
)::numeric as algebra1_met, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
denominator AS (
select sum( case
when (grade = '9' or grade = '10' or grade = '11' or grade = '12') AND subject = 'Algebra II'
then 1 else 0
end
)::numeric as algebra1_testtakers, csa
from vital_signs.match_csas_and_bc_by_geom('schools.parcc_1617', 'gid', 'the_geom') a
left join schools.parcc_1617 b on a.gid = b.gid
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.algebra1_met, denominator.algebra1_testtakers)*(100::numeric) as result, numerator.csa
from numerator left join denominator on numerator.csa = denominator.csa
)
select * from tbl
""""""
when (grade = '9' or grade = '10' or grade = '11' or grade = '12') AND subject = 'Algebra II'
AND (parcc_perf =4 OR parcc_perf = 5)
/
when (grade = '9' or grade = '10' or grade = '11' or grade = '12') AND subject = 'Algebra II'
"""def palg2(df, csa, yr):
df['count'] = 1
# Create the Denominator
denom = df.copy().sort_values('enterdate', ascending=False).drop_duplicates(['std_number'])
denom = denom[ denom['grade'].str.contains('9|10|11|12', regex=True) & (denom['subject'] == 'Algebra I') ]
# Create the Numerator
denom['parcc_perf'] = denom['parcc_perf'].astype(str)
numer = denom[ ( denom['parcc_perf'].str.contains('4|5', regex=True) ) ]
id = '213'
shortname = 'palg2'
# Group by CSA
numer = numer.groupby('CSA2010').sum(numeric_only=True)
denom = denom.groupby('CSA2010').sum(numeric_only=True)
# Make sure ALL csas and BaltimoreCity are included and sorted.
numer = csa.merge( numer, left_on='CSA2010', right_on='CSA2010', how='outer' )
denom = csa.merge( denom, left_on='CSA2010', right_on='CSA2010', how='outer' )
numer.drop( columns=['geometry', 'Shape__Length','Shape__Area'], inplace=True)
numer['denomCount'] = denom['count']
# Do after sortViaCsaMerge to get index right. False records would show underneath it but still get added to the sum.
numer.at[55,'count']=numer['count'].sum()
numer.at[55,'denomCount']=numer['denomCount'].sum()
# Perform the calculation
numer[id+'-'+shortname+year] = numer['count'] / numer['denomCount'] * 100
compareYears = gpd.read_file("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+shortname+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson");
prevYear = shortname+ str( int(year) - 1 )
if prevYear in compareYears.columns:
numer = numer.merge( compareYears[['CSA2010', prevYear]], left_on='CSA2010', right_on='CSA2010', how='outer' )
numer['change'] = numer[id+'-'+shortname+year] - numer[ prevYear ]
numer['percentChange'] = numer['change' ] / numer[ prevYear ] * 100
numer['change'] = numer['change'].apply(lambda x: "{:.2f}".format(x) )
print( 'Records Matching Query: ', numer.size / len(numer.columns) )
return numer.drop(columns=['daysattend', 'daysabsent', 'GENDER', 'race_AIAN', 'race_asian', 'race_AA', 'race_NHPI', 'race_white', 'std_number', 'enterdate'])
fin = palg2(bcpss, csa, year)
fin.to_csv('213-palg2'+year+'.csv', index=False)
fin.head(60)
214 kra - O ❌
std_number Demonstrating comp
lbl = """ Kindergarten Readiness - Education and Youth - BCPSS"""
desc = """The Kindergarten Readiness Assessment (KRA) measure children’s readiness to do kindergarten work.
The KRA is a developmentally appropriate assessment tool that measures of incoming public-school
kindergarteners across four learning domains. A child is considered to be ready for kindergarten
if assessed as “demonstrating” the foundational skills and behaviors that prepare him/her for
curriculum based on the kindergarten standards. """
query16 = """with numerator as (
select csa, count ( DISTINCT std_numb_1 ) as test_met
from vital_signs.match_csas_and_bc_by_geom('schools.kra_2016', 'gid', 'the_geom') as a
left join schools.kra_2016 as b
on a.gid = b.gid AND comp = 'Demonstrating'
group by csa
order by csa
),
denominator AS (
select csa, count ( DISTINCT std_number ) test_takers
from vital_signs.match_csas_and_bc_by_geom('schools.kra_2016', 'gid', 'the_geom') as a
left join schools.kra_2016 as b
on a.gid = b.gid AND comp NOT IN ('Other', 'Incomplete')
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.test_met, denominator.test_takers)*(100::numeric)
as result, numerator.csa
from numerator
left join denominator
on numerator.csa = denominator.csa
)
select * from tbl
"""
query17 = """with numerator as (
select csa, count ( DISTINCT std_number ) as test_met
from vital_signs.match_csas_and_bc_by_geom('schools.kra_1617', 'gid', 'the_geom') as a
left join schools.kra_1617 as b
on a.gid = b.gid AND comp = 'Demonstrating'
group by csa
order by csa
),
denominator AS (
select csa, count ( DISTINCT std_number ) test_takers
from vital_signs.match_csas_and_bc_by_geom('schools.kra_1617', 'gid', 'the_geom') as a
left join schools.kra_1617 as b
on a.gid = b.gid AND comp NOT IN ('Other', 'Incomplete')
group by csa
order by csa
),
tbl AS (
select vital_signs.div_zero (numerator.test_met, denominator.test_takers)*(100::numeric)
as result, numerator.csa
from numerator
left join denominator
on numerator.csa = denominator.csa
)
select * from tbl
"""
uhmm = """
The percentage of children whose composite score indicates full school readiness out of all
kindergarten school children tested within an area in a school year. The Maryland Model for
School Readiness (MMSR) is an assessment and instructional system that was designed to provide
parents, teachers, and early childhood providers with a common understanding of what children
know and are able to do upon entering school. Under the MMSR system, all children entering
kindergarten are assessed for level of mastery across several learning domains. These domains
include: social and personal development; language and literacy; mathematical thinking;
scientific thinking; social studies; the arts; and physical development and health.
Kindergarten teachers must evaluate students during the first few months of the kindergarten
year using selected Work Sampling System (WSS) indicators and report their ratings by the
end of November of each year to the state. """
query2013 = """
"-- mmsr13nbr_bcf
--mmsr13 indicator number 80
SELECT bAll.name AS Bound, sum(bQuery.mmsr13nbr) as mmsr13nbr
FROM boundaries.bcf_areas bAll
LEFT JOIN (
SELECT bounds.name as Boundary, (count(DISTINCT b.sid::numeric(20,4))) as mmsr13nbr
FROM schools.mmsr_2013 as b
LEFT JOIN boundaries.bcf_areas as a
ON ST_Contains(a.the_geom, b.the_geom)
WHERE compscore = 3
GROUP BY bounds.name
ORDER BY bounds.name
) bQuery
ON bAll.name = bQuery.Boundary
GROUP BY Bound
ORDER BY Bound;
-- mmsr_total13nbr_bcf
--mmsr13 indicator number 80
SELECT bAll.name AS Bound, sum(bQuery.mmsr13nbr) as mmsr13nbr
FROM boundaries.bcf_areas bAll
LEFT JOIN (
SELECT bounds.name as Boundary, (count(DISTINCT Tables.sid::numeric(20,4))) as mmsr13nbr
FROM schools.mmsr_2013 as Tables
LEFT JOIN boundaries.bcf_areas as bounds
ON ST_Contains(bounds.the_geom, Tables.the_geom)
GROUP BY bounds.name
ORDER BY bounds.name
) bQuery
ON bAll.name = bQuery.Boundary
GROUP BY Bound
ORDER BY Bound;
"""
specialnote = """2014 and 2015 does not exist for either kindergarden Indicator. These two years the tests were in the process of changing."""query17 = """with numerator = on a.gid = b.gid AND comp = 'Demonstrating'
denominator AS on a.gid = b.gid AND comp NOT IN ('Other', 'Incomplete')