Source code for sensortoolkit.deploy._create_deploy_dict

# -*- coding: utf-8 -*-
"""
This module constructs and populates the deployment dictionary data
structure ``deploy_dict``. Below is an overview of the deployment dictionary:

* Testing organization
    * Organization name, contact information
* Testing Location
    * Site name, address, coordinates, and AQS site identifier
* Deployment Information and Statistics
    * Unique deployment groups
        * Description of sensor uptime for each sensor unit
    * Evaluation parameter statistics
        * Precision
        * Error
        * Description of reference monitor, measured range during
          deployment period at 1-hour and 24-hour averages
    * Meteorological conditions
        * Description of temperature instrument, measured range during
          deployment period at 1-hour and 24-hour averages
        * Description of relative humidity instrument, measured range during
          deployment period at 1-hour and 24-hour averages

================================================================================

@Author:
  | Samuel Frederick, NSSC Contractor (ORAU)
  | U.S. EPA / ORD / CEMM / AMCD / SFSB

Created:
  Mon Nov  9 10:47:56 2020
Last Updated:
  Tue Jul 12 13:38:00 2021
"""
import pandas as pd
import numpy as np
from datetime import datetime
from sensortoolkit.calculate import uptime
from sensortoolkit.lib_utils import _get_version
from sensortoolkit.param import Parameter
from sensortoolkit.datetime_utils import (deploy_timestamp_index,
                                          get_timestamp_interval)

[docs]def construct_deploy_dict(deploy_df, full_df_list, hourly_df_list, daily_df_list, sensor_name, testing_loc, testing_org, **kwargs): """Create the deployment dictionary, initialize with sensor group info, time period of deployment, testing agency and location, and library version and time at which the dictionary were constructed. Determines which sensors match the beginning and end dates for deployment (provided a timedelta padding window of 1 day around the begin and end timestamps). Sensors measuring concurrently are grouped together as a `deployment group`. Sensors with beginning and end deployment dates that differ from the identified deployment group are assigned ``True`` for the ``deploy_dict`` sensor unit entry ``deploy_issues``. Args: deploy_df (pandas dataframe): A data frame containing the start time (`Begin`), end time (`End`), and total duration of evaluation period for each sensor in a deployment group. full_df_list (list): List of sensor data frames of length N (where N is the number of sensor units in a testing group). Data frames indexed by at recorded sampling frequency. hourly_df_list (list): List of sensor data frames of length N (where N is the number of sensor units in a testing group). Data frames indexed by DateTime at 1-hour averaged sampling frequency. daily_df_list (list): List of sensor data frames of length N (where N is the number of sensor units in a testing group). Data frames indexed by DateTime at 24-hour averaged sampling frequency. sensor_name (str): The make and model of the sensor being evaluated. testing_org (dict): A dictionary containing the information about the testing organization. testing_loc (dict): A dictionary containing information about the testing site. If the site is part of U.S. EPA’s Air Quality System (AQS), the AQS Site ID should be specified. Returns: deploy_dict (dict): Dictionary containing separate deployment group start and end times (based on the latest (max) start timestamp and earliest (min) end timestamp in group), deployment duration, and sensor serial IDs for devices within each deployment group. """ current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S %p') deploy_dict = {'sensortoolkit Version': _get_version(), 'Date of Analysis': current_time, 'Sensor Name': sensor_name, 'Sensor Firmware Version': kwargs.get('sensor_firmware', 'Unspecified'), 'Deployment Groups': {}, 'Testing Organization': testing_org, 'Testing Location': testing_loc} deploy_grp_n = 1 while deploy_df.empty is False: i = deploy_df.index[0] match_begin = abs(deploy_df.loc[i, 'Begin'] - deploy_df.loc[:, 'Begin'] ) < pd.Timedelta('1 day') deploy = deploy_df[match_begin] # Date (YYYY-MM-DD) of deployment group end, calculate mode end_date = deploy.loc[:, "End"].dt.strftime("%Y-%m-%d") end_date_mode = end_date.mode()[0] # Sensors that concluded deployment before end of majority of group deploy['Issues'] = end_date != end_date_mode serials = {str(i): serial for i, serial in zip( deploy.Sensor_Number, deploy.Sensor_Serial)} deployments = deploy_dict['Deployment Groups'] deployments['Group ' + str(deploy_grp_n)] = {} deployments['Group ' + str(deploy_grp_n)]['sensors'] = {} sensor_info = {i: {'serial_id': j} for i, j in zip(serials.keys(), serials.values())} deployments['Group ' + str(deploy_grp_n)]['sensors'] = sensor_info deployments['Group ' + str(deploy_grp_n)]['eval_start'] = \ deploy.Begin.min().strftime("%Y-%m-%dT%H:%M:%S%z") deployments['Group ' + str(deploy_grp_n)]['eval_end'] = \ deploy.End.max().strftime("%Y-%m-%dT%H:%M:%S%z") deployments['Group ' + str(deploy_grp_n)]['eval_duration'] = \ str(abs(deploy.Begin.min() - deploy.End.max())) start = deployments['Group ' + str(deploy_grp_n)]['eval_start'] end = deployments['Group ' + str(deploy_grp_n)]['eval_end'] # round timestamp down to nearest hour start = pd.to_datetime(start).floor(freq='H') # round timestamp up to nearest hour end = pd.to_datetime(end).ceil(freq='H') for sensor_n in list(sensor_info.keys()): i = int(sensor_n) - 1 full_df = full_df_list[i] hourly_df = hourly_df_list[i] daily_df = daily_df_list[i] # Record whether sensor encountered issues during deployment, ended # deployment early sensor_df = deploy[deploy.Sensor_Number == sensor_n] sensor_df = sensor_df.reset_index(drop=True) sensor_info[sensor_n]['deploy_issues'] = str(bool( sensor_df.Issues[0])) # Compute recording interval for data time_delta = get_timestamp_interval(full_df) sensor_info[sensor_n]['recording_interval'] = time_delta # 1-hr uptime sensor_h_uptime = uptime(hourly_df.loc[start:end, :], key=sensor_n) sensor_info[sensor_n]['uptime_1-hour'] = sensor_h_uptime[sensor_n]['Uptime'] # 24-hr uptime sensor_d_uptime = uptime(daily_df.loc[start:end, :], key=sensor_n) sensor_info[sensor_n]['uptime_24-hour'] = sensor_d_uptime[sensor_n]['Uptime'] deploy_df = deploy_df.drop(deploy.index, axis=0) deploy_grp_n += 1 return deploy_dict
[docs]def deploy_ref_stats(deploy_dict, ref_df, cal_check_dict=None, param=None, ref_name=None): """Add reference monitor statistics to the parameter statistics subfield in the deployment dictionary. Details added include: * The FRM/FEM monitor name * The minimum concentration recorded at the specified interval averaging. * The maximum concentration recorded at the specified interval averaging. * The number of intervals during which the FRM/FEM exceeds the goal concentration recommended by the performance targets testing report for elevated concentrations (goal :math:`\\geq`` three days). Args: deploy_dict (dict): Dictionary containing separate deployment group start and end times (based on the latest (max) start timestamp and earliest (min) end timestamp in group), deployment duration, and sensor serial IDs for devices within each deployment group. ref_df (pandas dataframe): Dataframe for reference concentrations at either 1-hour or 24-hour averaging depending on the performance targets recommended averaging interval. cal_check_dict (dict): [Future feature] Dictionary for housing dates and descriptions of QC calibration checks as part of regularly scheduled and cataloged QC procedures. param_obj (str): The evaluation parameter ref_name (str): The name of the FRM/FEM monitor (make and model). Returns: deploy_dict: Dictionary containing separate deployment group start and end times (based on the latest (max) start timestamp and earliest (min) end timestamp in group), deployment duration, and sensor serial IDs for devices within each deployment group. """ param_obj = Parameter(param) param_name = param_obj.name date_index, avg_suffix = deploy_timestamp_index(ref_df, averaging_suffix=True) if param_name == 'PM25': conc_goal = 25 # Concentration goal: 25 ug/m^3 for at least one day elif param_name == 'PM10': conc_goal = 40 # Concentration goal: 40 ug/m^3 for at least one day elif param_name == 'O3': conc_goal = 60 # Concentration goal: 60 ppbv for at least one day ref_df[f'{param_name}_rolling_8-hour_Value'] = ref_df[f'{param_name}_Value'].rolling(window=8).mean() elif param_name == 'NO2': conc_goal = 30 # Concentration goal: 30 ppbv for at least one day ref_df[f'{param_name}_rolling_8-hour_Value'] = ref_df[f'{param_name}_Value'].rolling(window=8).mean() #elif param_name == 'SO2': # conc_goal = 30 # Concentration goal: 30 ppbv for at least one day # ref_df[f'{param_name}_rolling_8-hour_Value'] = ref_df[f'{param_name}_Value'].rolling(window=8).mean() elif param_name == 'CO': conc_goal = 0.5 # Concentration goal: 0.5 ppmv for at least one day ref_df[f'{param_name}_rolling_8-hour_Value'] = ref_df[f'{param_name}_Value'].rolling(window=8).mean() else: conc_goal = None for group in deploy_dict['Deployment Groups']: deploy = deploy_dict['Deployment Groups'][group] start = deploy['eval_start'] end = deploy['eval_end'] ref_data = ref_df.loc[start:end, param_name + '_Value'] if param_name not in deploy: deploy[param_name] = {} deploy[param_name]['Reference'] = {} if 'Reference' not in deploy[param_name]: deploy[param_name]['Reference'] = {} stats_loc = deploy[param_name]['Reference'] stats_loc['reference_name'] = ref_name stats_loc['conc_min' + avg_suffix] = \ float("{0:.3f}".format(ref_data.min())) stats_loc['conc_max' + avg_suffix] = \ float("{0:.3f}".format(ref_data.max())) stats_loc['conc_mean' + avg_suffix] = \ float("{0:.3f}".format(ref_data.mean())) stats_loc['n_exceed_conc_goal' + avg_suffix] = \ int(ref_data.where(ref_data > conc_goal).count()) if ref_data.dropna().empty: stats_loc['conc_min' + avg_suffix] = None stats_loc['conc_max' + avg_suffix] = None stats_loc['n_exceed_conc_goal' + avg_suffix] = None # add 8-hr rolling statistics if param_name =='O3' or param_name =='CO' or param_name =='NO2': avg_suffix = '_rolling_8-hour' ref_data = ref_df.loc[start:end, f'{param_name}{avg_suffix}_Value'] stats_loc['conc_min' + avg_suffix] = \ float("{0:.3f}".format(ref_data.min())) stats_loc['conc_max' + avg_suffix] = \ float("{0:.3f}".format(ref_data.max())) stats_loc['conc_mean' + avg_suffix] = \ float("{0:.3f}".format(ref_data.mean())) return deploy_dict
[docs]def deploy_met_stats(deploy_dict, df_list, met_ref_df, operational_range): """Add meteorological instrument statistics to the parameter statistics subfield in the deployment dictionary. Details added include: * The name of the instrument collocated nearby sensor deployment location. * The minimum value recorded at the specified interval averaging. * The maximum value recorded at the specified interval averaging. * The number of intervals during which the instrument exceeds the manufacturer's recommended target range for instrument performance. This is provisionally set for RH (exceedence when :math:`\\leq` 10% or :math:`\\geq` 90%) and Temp (exceedence when :math:`\\leq` -20 C or :math:`\\geq` 40 C). Args: deploy_dict (dict): Dictionary containing separate deployment group start and end times (based on the latest (max) start timestamp and earliest (min) end timestamp in group), deployment duration, and sensor serial IDs for devices within each deployment group. df_list (list): List of pandas dataframes for sensor measurements at either 1-hr or 24-hr averaging intervals. met_ref_df (pandas dataframe): A dataframe containing meteorological parameters recorded at the testing site during the evaluation period (either 1-hr or 24-hr averaging intervals). operational_range (dict): Dictionary for listing the operational range indicated by the sensor manufacturer for meteorological parameters, such as temp and RH. Returns: deploy_dict: Dictionary containing separate deployment group start and end times (based on the latest (max) start timestamp and earliest (min) end timestamp in group), deployment duration, and sensor serial IDs for devices within each deployment group. """ met_str = 'Meteorological Conditions' date_index, avg_suffix = deploy_timestamp_index(met_ref_df, averaging_suffix=True) #cal_check_dict = cal_check_dict['Met cal checks'] for name in ['Temp', 'RH']: param_obj = Parameter(name) param_name = param_obj.name fmt_param = param_obj.format_name #fmt_param_units = param_obj.units no_data = False try: ref_name = met_ref_df.loc[:, param_name + '_Method'].dropna().apply( lambda x: str(x)).unique()[0] except IndexError: ref_name = 'Unknown Reference' except KeyError: # No met parameter data in passed reference dataframe no_data = True max_criterion = operational_range[param_name][1] min_criterion = operational_range[param_name][0] for group in deploy_dict['Deployment Groups']: deploy = deploy_dict['Deployment Groups'][group] start = deploy['eval_start'] end = deploy['eval_end'] if met_str not in deploy: deploy[met_str] = {} if fmt_param not in deploy[met_str]: deploy[met_str][fmt_param] = {} stats_loc = deploy[met_str][fmt_param] if not no_data: ref_data = met_ref_df.loc[start:end, param_name + '_Value'] grp_idx = [int(i) - 1 for i in deploy['sensors'].keys()] data_pairs = [] for idx in grp_idx: df = df_list[idx] start = df.index.min() end = df.index.max() data_pairs.append( met_ref_df.loc[start:end, param_name + '_Value'].dropna().size) stats_loc['instrument_name'] = ref_name stats_loc['min' + avg_suffix] = \ float("{0:.3f}".format(ref_data.min())) stats_loc['max' + avg_suffix] = \ float("{0:.3f}".format(ref_data.max())) if (max_criterion and min_criterion): value = int(ref_data.where((ref_data > max_criterion) | (ref_data < min_criterion)).count()) else: value = None stats_loc['n_exceed_target_criteria' + avg_suffix] = value stats_loc['n_measurement_pairs' + avg_suffix] = np.mean(data_pairs) #deploy[met_str]['cal_check_dates'] = cal_check_dict else: stats_loc['instrument_name'] = '' stats_loc['min' + avg_suffix] = '' stats_loc['max' + avg_suffix] = '' stats_loc['n_exceed_target_criteria' + avg_suffix] = '' stats_loc['n_measurement_pairs' + avg_suffix] = '' return deploy_dict