Source code for sensortoolkit.model._sensor_ols

# -*- coding: utf-8 -*-
"""This module contains a method for computing the ordinary least-squares (OLS)
regression between collocated sensor and FRM/FEM reference measurements.

================================================================================

@Author:
  | Samuel Frederick, NSSC Contractor (ORAU)
  | U.S. EPA / ORD / CEMM / AMCD / SFSB

Created:
  Mon Jul 27 09:21:14 2020
Last Updated:
  Mon Jul 27 10:47:00 2020
"""
import statsmodels.formula.api as smf


[docs]def sensor_ols(df, ref_df, y_var=None, x_vars=[]): """ Generates a statsmodels OLS Regressions Results summary for selected x (independent) and y (dependent) variables for sensor data. Args: df (Pandas Dataframe): Sensor data. ref_df (Pandas Dataframe): Reference data. y_var (str): Name of dependent variable (often sensor concentration values). x_vars (list): Column names for independent variables (reference conc., met params, etc.). Returns: results: statsmodels.regression.linear_model.RegressionResults instance. See the link below for more info on attributes: https://www.statsmodels.org/stable/generated/statsmodels.regression. linear_model.RegressionResults.html """ # Search for reference name based on dependent variable, append to sensor # dataframe df = df.join(ref_df[y_var]).dropna() # Create a string for the RHS of OLS equation x_str = '' for i, var in enumerate(x_vars, 1): if i < len(x_vars): x_str += var + ' + ' else: x_str += var # OLS regression ols_eqn = y_var + ' ~ ' + x_str results = smf.ols(ols_eqn, data=df).fit() print(results.summary()) return results