Source code for sensortoolkit.calculate._uptime

# -*- coding: utf-8 -*-
"""
This module computes the uptime for sensor or reference datasets.

U.S. EPA's Performance Targets Reports define uptime in the following way:

    A measure of the amount of valid data obtained by all tested sensors
    relative to the amount of data that was expected to be obtained under
    correct, normal operation for the entire length of a test. For example, if
    valid data is collected by all three sensors for 29 days of a 30-day base
    test field deployment the uptime for the deployment can be expressed as
    96.7% (i.e., 29 days/30 days * 100%). Operation may be interrupted by
    sensor failure, connectivity issues, equipment maintenance, extreme weather
    events, etc. No matter the reason for missing data, all downtime should be
    included in the uptime calculation. However, tests may report more
    information such as specifying the percent of downtime attributed to
    various types of interruptions.

@Author:
  | Samuel Frederick, NSSC Contractor (ORAU)
  | U.S. EPA / ORD / CEMM / AMCD / SFSB

Created:
  Fri Apr 24 13:03:32 2020
Last Updated:
  Tue Nov 24 16:58:00 2020
"""
import pandas as pd
from sensortoolkit.datetime_utils import get_timestamp_interval

[docs]def uptime(dataframe_object, key=None): """Compute uptime for either sensor or reference data. Uptime calculated as the number of non-null data points recorded within the deployment period divided by the total number of data points (null + non-null). Args: dataframe_object (pandas dataframe or a list of dataframes): Sensor dataframe or list of sensor dataframes key (str): A unique identifier corresponding to the dataframe passed (either a serial ID, number, or other string). Returns: uptime_dict (dict): A dictionary containing entries for each sensor in the testing group and sub-dictionaries for each sensor indicating the uptime, as well as how many hourly periods met or did not meet the completeness threshold and the total number of hours that the sensors were sampling. """ print('Calculating uptime') # Check whether dataframe object is single dataframe or list of dataframes if isinstance(dataframe_object, pd.core.frame.DataFrame): df_list = [dataframe_object] else: df_list = dataframe_object uptime_dict = {} # Compute uptime ratio for each sensor dataframe and parameter for n, df in enumerate(df_list, 1): if key is None: key = str(n) uptime_dict.update({key: {}}) meets_thres = df.count().mode()[0] below_thres = df.isna().sum().mode()[0] dt = get_timestamp_interval(df,as_timedelta=True) expected = pd.date_range(df.index.min(), df.index.max(), freq=dt).shape[0] uptime_value = (float(meets_thres) / expected)*100 print(f'..non-null count: {meets_thres}, expected count: {expected}, ' f'uptime: {uptime_value:0.2f}%') uptime_dict[key]['Uptime'] = float("{0:.3f}".format(uptime_value)) uptime_dict[key]['Meets Threshold'] = meets_thres uptime_dict[key]['Below Threshold'] = below_thres uptime_dict[key]['Total Hours'] = expected return uptime_dict