diff --git a/quidel_covidtest/delphi_quidel_covidtest/constants.py b/quidel_covidtest/delphi_quidel_covidtest/constants.py index 71c403cb5..8e35fff30 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/constants.py +++ b/quidel_covidtest/delphi_quidel_covidtest/constants.py @@ -1,4 +1,4 @@ -"""Registry for constants""" +"""Registry for constants.""" # global constants MIN_OBS = 50 # minimum number of observations in order to compute a proportion. POOL_DAYS = 7 # number of days in the past (including today) to pool over diff --git a/quidel_covidtest/delphi_quidel_covidtest/export.py b/quidel_covidtest/delphi_quidel_covidtest/export.py index 41ed35e95..4507d5e43 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/export.py +++ b/quidel_covidtest/delphi_quidel_covidtest/export.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- -"""Function to export the dataset in the format expected of the API. -""" +"""Function to export the dataset in the format expected of the API.""" import numpy as np import pandas as pd def export_csv(df, geo_name, sensor, receiving_dir, start_date, end_date): - """Export data set in format expected for injestion by the API + """Export data set in format expected for ingestion by the API. + Parameters ---------- df: pd.DataFrame @@ -23,7 +23,6 @@ def export_csv(df, geo_name, sensor, receiving_dir, start_date, end_date): end_date: datetime.datetime The last date to report """ - df = df.copy() df = df[np.logical_and(df["timestamp"] >= start_date, df["timestamp"] <= end_date)] diff --git a/quidel_covidtest/delphi_quidel_covidtest/generate_sensor.py b/quidel_covidtest/delphi_quidel_covidtest/generate_sensor.py index caa8deece..f48b361d1 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/generate_sensor.py +++ b/quidel_covidtest/delphi_quidel_covidtest/generate_sensor.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- -""" -Functions to help generate sensor for different geographical levels -""" +"""Functions to help generate sensor for different geographical levels.""" import pandas as pd from .data_tools import (fill_dates, raw_positive_prop, smoothed_positive_prop, @@ -13,7 +11,8 @@ def generate_sensor_for_states(state_groups, smooth, device, first_date, last_date): """ - fit over states + Fit over states. + Args: state_groups: pd.groupby.generic.DataFrameGroupBy state_key: "state_id" @@ -70,7 +69,8 @@ def generate_sensor_for_states(state_groups, smooth, device, first_date, last_da def generate_sensor_for_other_geores(state_groups, data, res_key, smooth, device, first_date, last_date): """ - fit over counties/HRRs/MSAs + Fit over counties/HRRs/MSAs. + Args: data: pd.DataFrame res_key: "fips", "cbsa_id" or "hrrnum" diff --git a/quidel_covidtest/delphi_quidel_covidtest/handle_wip_sensor.py b/quidel_covidtest/delphi_quidel_covidtest/handle_wip_sensor.py index fa3e5540b..73ab71ba0 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/handle_wip_sensor.py +++ b/quidel_covidtest/delphi_quidel_covidtest/handle_wip_sensor.py @@ -1,8 +1,9 @@ -"""This file checks the wip status of signals""" +"""This file checks the wip status of signals.""" import covidcast def add_prefix(signal_names, wip_signal, prefix): - """Adds prefix to signal if there is a WIP signal + """Add prefix to signal if there is a WIP signal. + Parameters ---------- signal_names: List[str] @@ -18,7 +19,6 @@ def add_prefix(signal_names, wip_signal, prefix): List of signal names wip/non wip signals for further computation """ - if wip_signal is True: return [prefix + signal for signal in signal_names] if isinstance(wip_signal, list): @@ -37,7 +37,8 @@ def add_prefix(signal_names, wip_signal, prefix): def public_signal(signal_): - """Checks if the signal name is already public using COVIDcast + """Check if the signal name is already public using COVIDcast. + Parameters ---------- signal_ : str diff --git a/quidel_covidtest/delphi_quidel_covidtest/pull.py b/quidel_covidtest/delphi_quidel_covidtest/pull.py index c19ddabb5..8d7b023f1 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/pull.py +++ b/quidel_covidtest/delphi_quidel_covidtest/pull.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- """Simply downloads email attachments. + Uses this handy package: https://pypi.org/project/imap-tools/ """ import io @@ -15,7 +16,8 @@ def get_from_email(start_date, end_date, mail_server, account, sender, password): """ - Get raw data from email account + Get raw data from email account. + Args: start_date: datetime.datetime pull data from email received from the start date @@ -56,9 +58,7 @@ def get_from_email(start_date, end_date, mail_server, return df, time_flag def fix_zipcode(df): - """ - Fix zipcode that is 9 digit instead of 5 digit - """ + """Fix zipcode that is 9 digit instead of 5 digit.""" zipcode5 = [] fixnum = 0 for zipcode in df['Zip'].values: @@ -74,6 +74,8 @@ def fix_zipcode(df): def fix_date(df): """ + Adjust tests based on their test and storage dates. + Quidel Covid Test are labeled with Test Date and Storage Date. In principle, the TestDate should reflect when the test was performed and the StorageDate when the test was logged in the MyVirena cloud storage device. We expect @@ -101,6 +103,7 @@ def preprocess_new_data(start_date, end_date, mail_server, account, sender, password, test_mode): """ Pull and pre-process Quidel Covid Test data from datadrop email. + Drop unnecessary columns. Temporarily consider the positive rate sensor only which is related to number of total tests and number of positive tests. @@ -173,9 +176,7 @@ def preprocess_new_data(start_date, end_date, mail_server, account, return df_merged, time_flag def check_intermediate_file(cache_dir, pull_start_date): - """ - Check whether there is a cache file containing historical data already - """ + """Check whether there is a cache file containing historical data already.""" for filename in os.listdir(cache_dir): if ".csv" in filename: pull_start_date = datetime.strptime(filename.split("_")[2].split(".")[0], @@ -187,8 +188,9 @@ def check_intermediate_file(cache_dir, pull_start_date): def pull_quidel_covidtest(params): """ - Pull the quidel covid test data. Decide whether to combine the newly - received data with stored historical records in ./cache + Pull the quidel covid test data. + + Decide whether to combine the newly received data with stored historical records in ./cache Parameters: params: dict @@ -240,7 +242,8 @@ def pull_quidel_covidtest(params): def check_export_end_date(input_export_end_date, _end_date, END_FROM_TODAY_MINUS): """ - Update the export_end_date according to the data received + Update the export_end_date according to the data received. + By default, set the export end date to be the last pulling date - 5 days (END_FROM_TODAY_MINUS = 5). Otherwise, use the required date if it is earlier than the default one. @@ -267,6 +270,8 @@ def check_export_end_date(input_export_end_date, _end_date, def check_export_start_date(export_start_date, export_end_date, EXPORT_DAY_RANGE): """ + Update the export_start_date according to the export_end_date and date range. + Update the export_start_date according to the export_end_date so that it could be export_end_date - EXPORT_DAY_RANGE @@ -296,7 +301,7 @@ def check_export_start_date(export_start_date, export_end_date, def update_cache_file(df, _end_date, cache_dir): """ - Update cache file. Remove the old one, export the new one + Update cache file. Remove the old one, export the new one. Parameter: df: pd.DataFrame diff --git a/quidel_covidtest/delphi_quidel_covidtest/run.py b/quidel_covidtest/delphi_quidel_covidtest/run.py index f46a55f7f..41218069b 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/run.py +++ b/quidel_covidtest/delphi_quidel_covidtest/run.py @@ -26,6 +26,7 @@ def run_module(): + """Run module for processing Quidel COVID test data.""" params = read_params() cache_dir = params["cache_dir"] export_dir = params["export_dir"]