Skip to content

Get quidel covidtest to pass pydocstyle #571

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion quidel_covidtest/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ install: venv

lint:
. env/bin/activate; \
pylint $(dir)
pylint $(dir); \
pydocstyle $(dir)

test:
. env/bin/activate ;\
Expand Down
2 changes: 1 addition & 1 deletion quidel_covidtest/delphi_quidel_covidtest/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Registry for constants"""
"""Registry for constants."""
# global constants
MIN_OBS = 50 # minimum number of observations in order to compute a proportion.
POOL_DAYS = 7 # number of days in the past (including today) to pool over
Expand Down
46 changes: 22 additions & 24 deletions quidel_covidtest/delphi_quidel_covidtest/data_tools.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
"""
Functions to calculate the quidel sensor statistic.
"""
"""Functions to calculate the quidel sensor statistic."""

import numpy as np
import pandas as pd


def remove_null_samples(df):
"""Removes entries in a data frame whose sample sizes are null."""
"""Remove entries in a data frame whose sample sizes are null."""
return df[df["sample_size"].notnull()]


def _prop_var(p, n):
"""var(X/n) = 1/(n^2)var(X) = (npq)/(n^2) = pq/n"""
"""
Calculate variance of proportion.

var(X/n) = 1/(n^2)var(X) = (npq)/(n^2) = pq/n
"""
return p * (1 - p) / n

def fill_dates(y_data, first_date, last_date):
"""
Ensure all dates are listed in the data, otherwise, add days with 0 counts.

Args:
y_data: dataframe with datetime index
first_date: datetime.datetime
Expand All @@ -42,8 +45,9 @@ def fill_dates(y_data, first_date, last_date):

def _slide_window_sum(arr, k):
"""
Sliding window sum, with fixed window size k. For indices 0:k, we
DO compute a sum, using whatever points are available.
Sliding window sum, with fixed window size k.

For indices 0:k, we DO compute a sum, using whatever points are available.

Reference: https://stackoverflow.com/a/38507725

Expand All @@ -57,7 +61,6 @@ def _slide_window_sum(arr, k):
sarr: np.ndarray
Array of same length of arr, holding the sliding window sum.
"""

if not isinstance(k, int):
raise ValueError('k must be int.')
temp = np.append(np.zeros(k - 1), arr)
Expand All @@ -67,12 +70,11 @@ def _slide_window_sum(arr, k):

def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs):
"""
Calculates the proportion of parent samples (tests) that must be "borrowed"
in order to properly compute the statistic. If there are no samples
available in the parent, the borrow_prop is 0. If the parent does not
Calculate proportion of parent samples (tests) that must be "borrowed" in order to compute the statistic.

If there are no samples available in the parent, the borrow_prop is 0. If the parent does not
have enough samples, we return a borrow_prop of 1, and the fact that the
pooled samples are insufficient are handled in the statistic fitting
step.
pooled samples are insufficient are handled in the statistic fitting step.

Args:
tpooled_tests: np.ndarray[float]
Expand Down Expand Up @@ -115,8 +117,7 @@ def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs):

def raw_positive_prop(positives, tests, min_obs):
"""
Calculates the proportion of positive tests for a single geographic
location, without any temporal smoothing.
Calculate the proportion of positive tests for a single geographic location, without any temporal smoothing.

If on any day t, tests[t] < min_obs, then we report np.nan.

Expand Down Expand Up @@ -169,8 +170,7 @@ def raw_positive_prop(positives, tests, min_obs):
def smoothed_positive_prop(positives, tests, min_obs, pool_days,
parent_positives=None, parent_tests=None):
"""
Calculates the proportion of negative tests for a single geographic
location, with temporal smoothing.
Calculate the proportion of negative tests for a single geographic location, with temporal smoothing.

For a given day t, if sum(tests[(t-pool_days+1):(t+1)]) < min_obs, then we
'borrow' min_obs - sum(tests[(t-pool_days+1):(t+1)]) observations from the
Expand Down Expand Up @@ -215,7 +215,6 @@ def smoothed_positive_prop(positives, tests, min_obs, pool_days,
np.ndarray
Effective sample size (after temporal and geographic pooling).
"""

positives = positives.astype(float)
tests = tests.astype(float)
if (parent_positives is None) or (parent_tests is None):
Expand Down Expand Up @@ -259,9 +258,8 @@ def smoothed_positive_prop(positives, tests, min_obs, pool_days,


def raw_tests_per_device(devices, tests, min_obs):
'''
Calculates the tests per device for a single geographic
location, without any temporal smoothing.
"""
Calculate the tests per device for a single geographic location, without any temporal smoothing.

If on any day t, tests[t] < min_obs, then we report np.nan.
The second and third returned np.ndarray are the standard errors,
Expand All @@ -284,7 +282,7 @@ def raw_tests_per_device(devices, tests, min_obs):
Placeholder for standard errors
np.ndarray
Sample size used to compute estimates.
'''
"""
devices = devices.astype(float)
tests = tests.astype(float)
if (np.any(np.isnan(devices)) or np.any(np.isnan(tests))):
Expand All @@ -304,8 +302,8 @@ def raw_tests_per_device(devices, tests, min_obs):
def smoothed_tests_per_device(devices, tests, min_obs, pool_days,
parent_devices=None, parent_tests=None):
"""
Calculates the ratio of tests per device for a single geographic
location, with temporal smoothing.
Calculate the ratio of tests per device for a single geographic location, with temporal smoothing.

For a given day t, if sum(tests[(t-pool_days+1):(t+1)]) < min_obs, then we
'borrow' min_obs - sum(tests[(t-pool_days+1):(t+1)]) observations from the
parents over the same timespan. Importantly, it will make sure NOT to
Expand Down
10 changes: 5 additions & 5 deletions quidel_covidtest/delphi_quidel_covidtest/generate_sensor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-
"""
Functions to help generate sensor for different geographical levels
"""
"""Functions to help generate sensor for different geographical levels."""
import pandas as pd
from .data_tools import (fill_dates, raw_positive_prop,
smoothed_positive_prop,
Expand All @@ -13,7 +11,8 @@

def generate_sensor_for_states(state_groups, smooth, device, first_date, last_date):
"""
fit over states
Fit over states.

Args:
state_groups: pd.groupby.generic.DataFrameGroupBy
state_key: "state_id"
Expand Down Expand Up @@ -70,7 +69,8 @@ def generate_sensor_for_states(state_groups, smooth, device, first_date, last_da
def generate_sensor_for_other_geores(state_groups, data, res_key, smooth,
device, first_date, last_date):
"""
fit over counties/HRRs/MSAs
Fit over counties/HRRs/MSAs.

Args:
data: pd.DataFrame
res_key: "fips", "cbsa_id" or "hrrnum"
Expand Down
6 changes: 3 additions & 3 deletions quidel_covidtest/delphi_quidel_covidtest/geo_maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@


def geo_map(geo_res, df):
"""
Map a geocode to a new value.
"""
"""Map a geocode to a new value."""
data = df.copy()
geo_key = GEO_KEY_DICT[geo_res]
# Add population for each zipcode
Expand All @@ -32,6 +30,8 @@ def geo_map(geo_res, df):

def add_parent_state(data, geo_res, geo_key):
"""
Add parent state column to DataFrame.

- map from msa/hrr to state, going by the state with the largest
population (since a msa/hrr may span multiple states)
- map from county to the corresponding state
Expand Down
26 changes: 13 additions & 13 deletions quidel_covidtest/delphi_quidel_covidtest/pull.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
"""Simply downloads email attachments.

Uses this handy package: https://pypi.org/project/imap-tools/
"""
import io
Expand All @@ -15,7 +16,8 @@
def get_from_email(start_date, end_date, mail_server,
account, sender, password):
"""
Get raw data from email account
Get raw data from email account.

Args:
start_date: datetime.datetime
pull data from email received from the start date
Expand Down Expand Up @@ -56,9 +58,7 @@ def get_from_email(start_date, end_date, mail_server,
return df, time_flag

def fix_zipcode(df):
"""
Fix zipcode that is 9 digit instead of 5 digit
"""
"""Fix zipcode that is 9 digit instead of 5 digit."""
zipcode5 = []
fixnum = 0
for zipcode in df['Zip'].values:
Expand All @@ -74,6 +74,8 @@ def fix_zipcode(df):

def fix_date(df):
"""
Remove invalid dates and select correct test date to use.

Quidel Covid Test are labeled with Test Date and Storage Date. In principle,
the TestDate should reflect when the test was performed and the StorageDate
when the test was logged in the MyVirena cloud storage device. We expect
Expand Down Expand Up @@ -101,6 +103,7 @@ def preprocess_new_data(start_date, end_date, mail_server, account,
sender, password, test_mode):
"""
Pull and pre-process Quidel Covid Test data from datadrop email.

Drop unnecessary columns. Temporarily consider the positive rate
sensor only which is related to number of total tests and number
of positive tests.
Expand Down Expand Up @@ -173,9 +176,7 @@ def preprocess_new_data(start_date, end_date, mail_server, account,
return df_merged, time_flag

def check_intermediate_file(cache_dir, pull_start_date):
"""
Check whether there is a cache file containing historical data already
"""
"""Check whether there is a cache file containing historical data already."""
for filename in os.listdir(cache_dir):
if ".csv" in filename:
pull_start_date = datetime.strptime(filename.split("_")[2].split(".")[0],
Expand All @@ -187,8 +188,7 @@ def check_intermediate_file(cache_dir, pull_start_date):

def pull_quidel_covidtest(params):
"""
Pull the quidel covid test data. Decide whether to combine the newly
received data with stored historical records in ./cache
Pull the quidel covid test data and ecide whether to combine the new data with stored historical records in ./cache.

Parameters:
params: dict
Expand Down Expand Up @@ -240,7 +240,8 @@ def pull_quidel_covidtest(params):
def check_export_end_date(input_export_end_date, _end_date,
end_from_today_minus):
"""
Update the export_end_date according to the data received
Update the export_end_date according to the data received.

By default, set the export end date to be the last pulling date - 5 days
(end_from_today_minus = 5).
Otherwise, use the required date if it is earlier than the default one.
Expand All @@ -267,8 +268,7 @@ def check_export_end_date(input_export_end_date, _end_date,
def check_export_start_date(export_start_date, export_end_date,
export_day_range):
"""
Update the export_start_date according to the export_end_date so that it
could be export_end_date - export_day_range
Update export_start_date according to the export_end_date so that it could be export_end_date - export_day_range.

Parameters:
export_start_date: str
Expand Down Expand Up @@ -296,7 +296,7 @@ def check_export_start_date(export_start_date, export_end_date,

def update_cache_file(df, _end_date, cache_dir):
"""
Update cache file. Remove the old one, export the new one
Update cache file. Remove the old one, export the new one.

Parameter:
df: pd.DataFrame
Expand Down