Skip to content

Get quidel to pass pydocstyle #572

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion quidel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ install: venv

lint:
. env/bin/activate; \
pylint $(dir)
pylint $(dir); \
pydocstyle $(dir)

test:
. env/bin/activate ;\
Expand Down
2 changes: 1 addition & 1 deletion quidel/delphi_quidel/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Registry for constants"""
"""Registry for constants."""
# global constants
MIN_OBS = 50 # minimum number of observations in order to compute a proportion.
MAX_BORROW_OBS = 20 # maximum number of observations can be borrowed in geographical pooling
Expand Down
44 changes: 21 additions & 23 deletions quidel/delphi_quidel/data_tools.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
"""
Functions to calculate the quidel sensor statistic.
"""
"""Functions to calculate the quidel sensor statistic."""

import numpy as np
import pandas as pd

def _prop_var(p, n):
"""var(X/n) = 1/(n^2)var(X) = (npq)/(n^2) = pq/n"""
"""
Calculate variance of proportion.

var(X/n) = 1/(n^2)var(X) = (npq)/(n^2) = pq/n
"""
return p * (1 - p) / n

def fill_dates(y_data, first_date, last_date):
"""
Ensure all dates are listed in the data, otherwise, add days with 0 counts.

Args:
y_data: dataframe with datetime index
first_date: datetime.datetime
Expand All @@ -36,8 +39,9 @@ def fill_dates(y_data, first_date, last_date):

def _slide_window_sum(arr, k):
"""
Sliding window sum, with fixed window size k. For indices 0:k, we
DO compute a sum, using whatever points are available.
Sliding window sum, with fixed window size k.

For indices 0:k, we DO compute a sum, using whatever points are available.

Reference: https://stackoverflow.com/a/38507725

Expand All @@ -51,7 +55,6 @@ def _slide_window_sum(arr, k):
sarr: np.ndarray
Array of same length of arr, holding the sliding window sum.
"""

if not isinstance(k, int):
raise ValueError('k must be int.')
temp = np.append(np.zeros(k - 1), arr)
Expand All @@ -61,12 +64,11 @@ def _slide_window_sum(arr, k):

def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs, max_borrow_obs):
"""
Calculates the proportion of parent samples (tests) that must be "borrowed"
in order to properly compute the statistic. If there are no samples
available in the parent, the borrow_prop is 0. If the parent does not
Calculate proportion of parent samples (tests) that must be "borrowed" in order to compute the statistic.

If there are no samples available in the parent, the borrow_prop is 0. If the parent does not
have enough samples, we return a borrow_prop of 1, and the fact that the
pooled samples are insufficient are handled in the statistic fitting
step.
pooled samples are insufficient are handled in the statistic fitting step.

Args:
tpooled_tests: np.ndarray[float]
Expand Down Expand Up @@ -117,8 +119,7 @@ def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs, max_borrow_obs

def raw_positive_prop(positives, tests, min_obs):
"""
Calculates the proportion of positive tests for a single geographic
location, without any temporal smoothing.
Calculate the proportion of positive tests for a single geographic location, without any temporal smoothing.

If on any day t, tests[t] < min_obs, then we report np.nan.

Expand Down Expand Up @@ -171,8 +172,7 @@ def raw_positive_prop(positives, tests, min_obs):
def smoothed_positive_prop(positives, tests, min_obs, max_borrow_obs, pool_days,
parent_positives=None, parent_tests=None):
"""
Calculates the proportion of negative tests for a single geographic
location, with temporal smoothing.
Calculate the proportion of negative tests for a single geographic location, with temporal smoothing.

For a given day t, if sum(tests[(t-pool_days+1):(t+1)]) < min_obs, then we
'borrow' min_obs - sum(tests[(t-pool_days+1):(t+1)]) observations from the
Expand Down Expand Up @@ -219,7 +219,6 @@ def smoothed_positive_prop(positives, tests, min_obs, max_borrow_obs, pool_days,
np.ndarray
Effective sample size (after temporal and geographic pooling).
"""

positives = positives.astype(float)
tests = tests.astype(float)
if (parent_positives is None) or (parent_tests is None):
Expand Down Expand Up @@ -264,9 +263,8 @@ def smoothed_positive_prop(positives, tests, min_obs, max_borrow_obs, pool_days,


def raw_tests_per_device(devices, tests, min_obs):
'''
Calculates the tests per device for a single geographic
location, without any temporal smoothing.
"""
Calculate the tests per device for a single geographic location, without any temporal smoothing.

If on any day t, tests[t] < min_obs, then we report np.nan.
The second and third returned np.ndarray are the standard errors,
Expand All @@ -289,7 +287,7 @@ def raw_tests_per_device(devices, tests, min_obs):
Placeholder for standard errors
np.ndarray
Sample size used to compute estimates.
'''
"""
devices = devices.astype(float)
tests = tests.astype(float)
if (np.any(np.isnan(devices)) or np.any(np.isnan(tests))):
Expand All @@ -309,8 +307,8 @@ def raw_tests_per_device(devices, tests, min_obs):
def smoothed_tests_per_device(devices, tests, min_obs, max_borrow_obs, pool_days,
parent_devices=None, parent_tests=None):
"""
Calculates the ratio of tests per device for a single geographic
location, with temporal smoothing.
Calculate the ratio of tests per device for a single geographic location, with temporal smoothing.

For a given day t, if sum(tests[(t-pool_days+1):(t+1)]) < min_obs, then we
'borrow' min_obs - sum(tests[(t-pool_days+1):(t+1)]) observations from the
parents over the same timespan. Importantly, it will make sure NOT to
Expand Down
10 changes: 5 additions & 5 deletions quidel/delphi_quidel/generate_sensor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-
"""
Functions to help generate sensor for different geographical levels
"""
"""Functions to help generate sensor for different geographical levels."""
import pandas as pd
from .data_tools import (fill_dates, raw_positive_prop,
smoothed_positive_prop,
Expand All @@ -11,7 +9,8 @@

def generate_sensor_for_states(state_groups, smooth, device, first_date, last_date):
"""
fit over states
Fit over states.

Args:
state_groups: pd.groupby.generic.DataFrameGroupBy
state_key: "state_id"
Expand Down Expand Up @@ -70,7 +69,8 @@ def generate_sensor_for_states(state_groups, smooth, device, first_date, last_da
def generate_sensor_for_other_geores(state_groups, data, res_key, smooth,
device, first_date, last_date):
"""
fit over counties/HRRs/MSAs
Fit over counties/HRRs/MSAs.

Args:
data: pd.DataFrame
res_key: "fips", "cbsa_id" or "hrrnum"
Expand Down
5 changes: 5 additions & 0 deletions quidel/delphi_quidel/geo_maps.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Contains geographic mapping tools."""

def geo_map(geo_res, data, map_df):
"""Call appropriate mapping function based on desired geo resolution."""
if geo_res == "county":
Expand All @@ -11,6 +12,7 @@ def geo_map(geo_res, data, map_df):

def zip_to_msa(data, map_df):
"""Map from zipcode to MSA (along with parent state).

Args:
data: dataframe at the day-zip resolution.
Returns:
Expand All @@ -35,6 +37,7 @@ def zip_to_msa(data, map_df):

def zip_to_hrr(data, map_df):
"""Map from zipcode to HRR (along with parent state).

Args:
data: dataframe at the day-zip resolution.
Returns:
Expand All @@ -59,6 +62,7 @@ def zip_to_hrr(data, map_df):

def zip_to_county(data, map_df):
"""Aggregate zip codes to the county resolution, along with its parent state.

Args:
data: dataframe aggregated to the day-zip resolution
Returns:
Expand All @@ -74,6 +78,7 @@ def zip_to_county(data, map_df):

def zip_to_state(data, map_df):
"""Aggregate zip codes to the state resolution.

Args:
data: dataframe aggregated to the day-zip resolution
Returns:
Expand Down
39 changes: 18 additions & 21 deletions quidel/delphi_quidel/pull.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
"""Simply downloads email attachments.

Uses this handy package: https://pypi.org/project/imap-tools/
"""
import io
Expand All @@ -26,6 +27,7 @@
def compare_dates(date1, date2, flag):
"""
Compare two dates.

If op == "l" return the larger date
If op == "s" return the smaller date
"""
Expand All @@ -38,20 +40,15 @@ def compare_dates(date1, date2, flag):
return date1

def check_whether_date_in_range(search_date, start_date, end_date):
"""
Check whether the search date is in a valid time range
"""
"""Check whether the search date is in a valid time range."""
if search_date > end_date:
return False
if search_date < start_date:
return False
return True

def read_historical_data():
"""
Read historical flu antigen test data stored in
midas /common/quidel-historical-raw
"""
"""Read historical flu antigen test data stored in midas /common/quidel-historical-raw."""
pull_dir = "/common/quidel-historical-raw"
columns = ['SofiaSerNum', 'TestDate', 'Facility', 'ZipCode',
'FluA', 'FluB', 'StorageDate']
Expand All @@ -65,9 +62,9 @@ def read_historical_data():

def regulate_column_names(df, test_type):
"""
Regulate column names for flu_ag test data since Quidel changed their
column names multiple times. We want to finalize the column name list
to be:
Regulate column names for flu_ag test data since Quidel changed their column names multiple times.

We want to finalize the column name list to be:
['SofiaSerNum', 'TestDate', 'Facility',
'Zip', 'FluA', 'FluB', 'StorageDate']
"""
Expand All @@ -87,7 +84,7 @@ def regulate_column_names(df, test_type):
def get_from_email(column_names, start_dates, end_dates, mail_server,
account, sender, password):
"""
Get raw data from email account
Get raw data from email account.

Parameters:
start_date: datetime.datetime
Expand Down Expand Up @@ -145,9 +142,7 @@ def get_from_email(column_names, start_dates, end_dates, mail_server,
return dfs, time_flag

def fix_zipcode(df):
"""
Fix zipcode that is 9 digit instead of 5 digit
"""
"""Fix zipcode that is 9 digit instead of 5 digit."""
zipcode5 = []
fixnum = 0
for zipcode in df['Zip'].values:
Expand All @@ -163,6 +158,8 @@ def fix_zipcode(df):

def fix_date(df):
"""
Remove invalid dates and select correct test date to use.

Quidel antigen tests are labeled with Test Date and Storage Date. In principle,
the TestDate should reflect when the test was performed and the StorageDate
when the test was logged in the MyVirena cloud storage device. We expect
Expand Down Expand Up @@ -190,6 +187,7 @@ def preprocess_new_data(start_dates, end_dates, mail_server, account,
sender, password, test_mode):
"""
Pull and pre-process Quidel Antigen Test data from datadrop email.

Drop unnecessary columns. Temporarily consider the positive rate
sensor only which is related to number of total tests and number
of positive tests.
Expand Down Expand Up @@ -285,7 +283,7 @@ def preprocess_new_data(start_dates, end_dates, mail_server, account,

def check_intermediate_file(cache_dir, pull_start_dates):
"""
Check whether there is a cache file containing historical data already
Check whether there is a cache file containing historical data already.

Parameters:
cache_dir: str
Expand Down Expand Up @@ -313,8 +311,7 @@ def check_intermediate_file(cache_dir, pull_start_dates):

def pull_quidel_data(params):
"""
Pull the quidel test data. Decide whether to combine the newly
received data with stored historical records in ./cache
Pull the quidel test data and decide whether to combine the new data with stored historical records in ./cache.

Parameters:
params: dict
Expand Down Expand Up @@ -371,7 +368,8 @@ def pull_quidel_data(params):
def check_export_end_date(input_export_end_dates, _end_date,
end_from_today_minus):
"""
Update the export_end_date according to the data received
Update the export_end_date according to the data received.

By default, set the export end date to be the last pulling date - 5 days
(END_FROM_TODAY_MINUS = 5).
Otherwise, use the required date if it is earlier than the default one.
Expand Down Expand Up @@ -404,8 +402,7 @@ def check_export_end_date(input_export_end_dates, _end_date,
def check_export_start_date(export_start_dates, export_end_dates,
export_day_range):
"""
Update the export_start_date according to the export_end_date so that it
could be export_end_date - EXPORT_DAY_RANGE
Update export_start_date according to the export_end_date so that it could be export_end_date - EXPORT_DAY_RANGE.

Parameters:
export_start_date: dict
Expand Down Expand Up @@ -438,7 +435,7 @@ def check_export_start_date(export_start_dates, export_end_dates,

def update_cache_file(dfs, _end_date, cache_dir):
"""
Update cache file. Remove the old one, export the new one
Update cache file. Remove the old one, export the new one.

Parameter:
df: pd.DataFrame
Expand Down