Source code for sensortoolkit.lib_utils._flatten_datasets

# -*- coding: utf-8 -*-
"""
This module contains a method for converting datasets for AirSensor and
ReferenceMonitor objects corresponding to instruments that have been collocated
at an ambient monitoring site into a single pandas DataFrame object and
(optionally) saved as comma-separated value files for each sampling or
averaging interval present in sensor and reference datasets.

================================================================================

@Author:
  | Samuel Frederick, NSSC Contractor (ORAU)
  | U.S. EPA / ORD / CEMM / AMCD / SFSB


Created:
  Mon Jan 31 11:06:57 2022
Last Updated:
  Mon Jan 31 11:06:57 2022
"""
import pandas as pd
from sensortoolkit.datetime_utils import get_todays_date
from sensortoolkit.lib_utils import flatten_list

[docs]def flatten_datasets(AirSensor, ReferenceMonitor, write_to_file=False): """ Args: AirSensor (sensortoolkit.AirSensor): DESCRIPTION. ReferenceMonitor (sensortoolkit.ReferenceMonitor): DESCRIPTION. write_to_file (bool, optional): DESCRIPTION. Defaults to False. Returns: flat_dict (dict): DESCRIPTION. """ site_info_cols = ['Agency', 'Site_Name', 'Site_AQS','Site_Lat', 'Site_Lon', 'Data_Source', 'Data_Acquisition_Date_Time'] ref_intervals = list(set(flatten_list([list(ReferenceMonitor.data[key].keys()) for key in ReferenceMonitor.data]))) flat_dict = {} for interval in AirSensor.data.keys(): flat_df = pd.DataFrame() print(f'Flattening {interval} datasets') for sensor_key in AirSensor.data[interval]: sensor_df = AirSensor.data[interval][sensor_key] suffix = f'_{sensor_key}' sensor_df = sensor_df.add_suffix(suffix) flat_df = flat_df.join(sensor_df, how='outer') for classifier in ReferenceMonitor.data.keys(): for ref_interval in ReferenceMonitor.data[classifier]: if ref_interval == interval: ref_df = ReferenceMonitor.data[classifier][interval] site_info = ref_df[site_info_cols] ref_df = ref_df.drop(columns=site_info_cols) suffix = f'_Ref' ref_df = ref_df.add_suffix(suffix) flat_df = flat_df.join(ref_df, how='outer') if interval in ref_intervals: flat_df = flat_df.join(site_info) if write_to_file: print('..writing flattened dataset to .csv') today = get_todays_date() interv = interval.replace('-', '_') #TODO: allow customization for where this file will be saved flat_df.to_csv(f'flatten_data_export_{interv}_{today}.csv') flat_dict[interval] = flat_df return flat_dict