Skip to content

Commit 8a734ff

Browse files
authored
Merge pull request #571 from cmu-delphi/quidel-pydocs
Get quidel covidtest to pass pydocstyle
2 parents d2e5a29 + 44b4754 commit 8a734ff

File tree

6 files changed

+46
-47
lines changed

6 files changed

+46
-47
lines changed

quidel_covidtest/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ install: venv
1313

1414
lint:
1515
. env/bin/activate; \
16-
pylint $(dir)
16+
pylint $(dir); \
17+
pydocstyle $(dir)
1718

1819
test:
1920
. env/bin/activate ;\

quidel_covidtest/delphi_quidel_covidtest/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Registry for constants"""
1+
"""Registry for constants."""
22
# global constants
33
MIN_OBS = 50 # minimum number of observations in order to compute a proportion.
44
POOL_DAYS = 7 # number of days in the past (including today) to pool over

quidel_covidtest/delphi_quidel_covidtest/data_tools.py

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,26 @@
1-
"""
2-
Functions to calculate the quidel sensor statistic.
3-
"""
1+
"""Functions to calculate the quidel sensor statistic."""
42

53
import numpy as np
64
import pandas as pd
75

86

97
def remove_null_samples(df):
10-
"""Removes entries in a data frame whose sample sizes are null."""
8+
"""Remove entries in a data frame whose sample sizes are null."""
119
return df[df["sample_size"].notnull()]
1210

1311

1412
def _prop_var(p, n):
15-
"""var(X/n) = 1/(n^2)var(X) = (npq)/(n^2) = pq/n"""
13+
"""
14+
Calculate variance of proportion.
15+
16+
var(X/n) = 1/(n^2)var(X) = (npq)/(n^2) = pq/n
17+
"""
1618
return p * (1 - p) / n
1719

1820
def fill_dates(y_data, first_date, last_date):
1921
"""
2022
Ensure all dates are listed in the data, otherwise, add days with 0 counts.
23+
2124
Args:
2225
y_data: dataframe with datetime index
2326
first_date: datetime.datetime
@@ -42,8 +45,9 @@ def fill_dates(y_data, first_date, last_date):
4245

4346
def _slide_window_sum(arr, k):
4447
"""
45-
Sliding window sum, with fixed window size k. For indices 0:k, we
46-
DO compute a sum, using whatever points are available.
48+
Sliding window sum, with fixed window size k.
49+
50+
For indices 0:k, we DO compute a sum, using whatever points are available.
4751
4852
Reference: https://stackoverflow.com/a/38507725
4953
@@ -57,7 +61,6 @@ def _slide_window_sum(arr, k):
5761
sarr: np.ndarray
5862
Array of same length of arr, holding the sliding window sum.
5963
"""
60-
6164
if not isinstance(k, int):
6265
raise ValueError('k must be int.')
6366
temp = np.append(np.zeros(k - 1), arr)
@@ -67,12 +70,11 @@ def _slide_window_sum(arr, k):
6770

6871
def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs):
6972
"""
70-
Calculates the proportion of parent samples (tests) that must be "borrowed"
71-
in order to properly compute the statistic. If there are no samples
72-
available in the parent, the borrow_prop is 0. If the parent does not
73+
Calculate proportion of parent samples (tests) that must be "borrowed" in order to compute the statistic.
74+
75+
If there are no samples available in the parent, the borrow_prop is 0. If the parent does not
7376
have enough samples, we return a borrow_prop of 1, and the fact that the
74-
pooled samples are insufficient are handled in the statistic fitting
75-
step.
77+
pooled samples are insufficient are handled in the statistic fitting step.
7678
7779
Args:
7880
tpooled_tests: np.ndarray[float]
@@ -115,8 +117,7 @@ def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs):
115117

116118
def raw_positive_prop(positives, tests, min_obs):
117119
"""
118-
Calculates the proportion of positive tests for a single geographic
119-
location, without any temporal smoothing.
120+
Calculate the proportion of positive tests for a single geographic location, without any temporal smoothing.
120121
121122
If on any day t, tests[t] < min_obs, then we report np.nan.
122123
@@ -169,8 +170,7 @@ def raw_positive_prop(positives, tests, min_obs):
169170
def smoothed_positive_prop(positives, tests, min_obs, pool_days,
170171
parent_positives=None, parent_tests=None):
171172
"""
172-
Calculates the proportion of negative tests for a single geographic
173-
location, with temporal smoothing.
173+
Calculate the proportion of negative tests for a single geographic location, with temporal smoothing.
174174
175175
For a given day t, if sum(tests[(t-pool_days+1):(t+1)]) < min_obs, then we
176176
'borrow' min_obs - sum(tests[(t-pool_days+1):(t+1)]) observations from the
@@ -215,7 +215,6 @@ def smoothed_positive_prop(positives, tests, min_obs, pool_days,
215215
np.ndarray
216216
Effective sample size (after temporal and geographic pooling).
217217
"""
218-
219218
positives = positives.astype(float)
220219
tests = tests.astype(float)
221220
if (parent_positives is None) or (parent_tests is None):
@@ -259,9 +258,8 @@ def smoothed_positive_prop(positives, tests, min_obs, pool_days,
259258

260259

261260
def raw_tests_per_device(devices, tests, min_obs):
262-
'''
263-
Calculates the tests per device for a single geographic
264-
location, without any temporal smoothing.
261+
"""
262+
Calculate the tests per device for a single geographic location, without any temporal smoothing.
265263
266264
If on any day t, tests[t] < min_obs, then we report np.nan.
267265
The second and third returned np.ndarray are the standard errors,
@@ -284,7 +282,7 @@ def raw_tests_per_device(devices, tests, min_obs):
284282
Placeholder for standard errors
285283
np.ndarray
286284
Sample size used to compute estimates.
287-
'''
285+
"""
288286
devices = devices.astype(float)
289287
tests = tests.astype(float)
290288
if (np.any(np.isnan(devices)) or np.any(np.isnan(tests))):
@@ -304,8 +302,8 @@ def raw_tests_per_device(devices, tests, min_obs):
304302
def smoothed_tests_per_device(devices, tests, min_obs, pool_days,
305303
parent_devices=None, parent_tests=None):
306304
"""
307-
Calculates the ratio of tests per device for a single geographic
308-
location, with temporal smoothing.
305+
Calculate the ratio of tests per device for a single geographic location, with temporal smoothing.
306+
309307
For a given day t, if sum(tests[(t-pool_days+1):(t+1)]) < min_obs, then we
310308
'borrow' min_obs - sum(tests[(t-pool_days+1):(t+1)]) observations from the
311309
parents over the same timespan. Importantly, it will make sure NOT to

quidel_covidtest/delphi_quidel_covidtest/generate_sensor.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
# -*- coding: utf-8 -*-
2-
"""
3-
Functions to help generate sensor for different geographical levels
4-
"""
2+
"""Functions to help generate sensor for different geographical levels."""
53
import pandas as pd
64
from .data_tools import (fill_dates, raw_positive_prop,
75
smoothed_positive_prop,
@@ -13,7 +11,8 @@
1311

1412
def generate_sensor_for_states(state_groups, smooth, device, first_date, last_date):
1513
"""
16-
fit over states
14+
Fit over states.
15+
1716
Args:
1817
state_groups: pd.groupby.generic.DataFrameGroupBy
1918
state_key: "state_id"
@@ -70,7 +69,8 @@ def generate_sensor_for_states(state_groups, smooth, device, first_date, last_da
7069
def generate_sensor_for_other_geores(state_groups, data, res_key, smooth,
7170
device, first_date, last_date):
7271
"""
73-
fit over counties/HRRs/MSAs
72+
Fit over counties/HRRs/MSAs.
73+
7474
Args:
7575
data: pd.DataFrame
7676
res_key: "fips", "cbsa_id" or "hrrnum"

quidel_covidtest/delphi_quidel_covidtest/geo_maps.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,7 @@
1313

1414

1515
def geo_map(geo_res, df):
16-
"""
17-
Map a geocode to a new value.
18-
"""
16+
"""Map a geocode to a new value."""
1917
data = df.copy()
2018
geo_key = GEO_KEY_DICT[geo_res]
2119
# Add population for each zipcode
@@ -32,6 +30,8 @@ def geo_map(geo_res, df):
3230

3331
def add_parent_state(data, geo_res, geo_key):
3432
"""
33+
Add parent state column to DataFrame.
34+
3535
- map from msa/hrr to state, going by the state with the largest
3636
population (since a msa/hrr may span multiple states)
3737
- map from county to the corresponding state

quidel_covidtest/delphi_quidel_covidtest/pull.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22
"""Simply downloads email attachments.
3+
34
Uses this handy package: https://pypi.org/project/imap-tools/
45
"""
56
import io
@@ -15,7 +16,8 @@
1516
def get_from_email(start_date, end_date, mail_server,
1617
account, sender, password):
1718
"""
18-
Get raw data from email account
19+
Get raw data from email account.
20+
1921
Args:
2022
start_date: datetime.datetime
2123
pull data from email received from the start date
@@ -56,9 +58,7 @@ def get_from_email(start_date, end_date, mail_server,
5658
return df, time_flag
5759

5860
def fix_zipcode(df):
59-
"""
60-
Fix zipcode that is 9 digit instead of 5 digit
61-
"""
61+
"""Fix zipcode that is 9 digit instead of 5 digit."""
6262
zipcode5 = []
6363
fixnum = 0
6464
for zipcode in df['Zip'].values:
@@ -74,6 +74,8 @@ def fix_zipcode(df):
7474

7575
def fix_date(df):
7676
"""
77+
Remove invalid dates and select correct test date to use.
78+
7779
Quidel Covid Test are labeled with Test Date and Storage Date. In principle,
7880
the TestDate should reflect when the test was performed and the StorageDate
7981
when the test was logged in the MyVirena cloud storage device. We expect
@@ -101,6 +103,7 @@ def preprocess_new_data(start_date, end_date, mail_server, account,
101103
sender, password, test_mode):
102104
"""
103105
Pull and pre-process Quidel Covid Test data from datadrop email.
106+
104107
Drop unnecessary columns. Temporarily consider the positive rate
105108
sensor only which is related to number of total tests and number
106109
of positive tests.
@@ -173,9 +176,7 @@ def preprocess_new_data(start_date, end_date, mail_server, account,
173176
return df_merged, time_flag
174177

175178
def check_intermediate_file(cache_dir, pull_start_date):
176-
"""
177-
Check whether there is a cache file containing historical data already
178-
"""
179+
"""Check whether there is a cache file containing historical data already."""
179180
for filename in os.listdir(cache_dir):
180181
if ".csv" in filename:
181182
pull_start_date = datetime.strptime(filename.split("_")[2].split(".")[0],
@@ -187,8 +188,7 @@ def check_intermediate_file(cache_dir, pull_start_date):
187188

188189
def pull_quidel_covidtest(params):
189190
"""
190-
Pull the quidel covid test data. Decide whether to combine the newly
191-
received data with stored historical records in ./cache
191+
Pull the quidel covid test data and ecide whether to combine the new data with stored historical records in ./cache.
192192
193193
Parameters:
194194
params: dict
@@ -240,7 +240,8 @@ def pull_quidel_covidtest(params):
240240
def check_export_end_date(input_export_end_date, _end_date,
241241
end_from_today_minus):
242242
"""
243-
Update the export_end_date according to the data received
243+
Update the export_end_date according to the data received.
244+
244245
By default, set the export end date to be the last pulling date - 5 days
245246
(end_from_today_minus = 5).
246247
Otherwise, use the required date if it is earlier than the default one.
@@ -267,8 +268,7 @@ def check_export_end_date(input_export_end_date, _end_date,
267268
def check_export_start_date(export_start_date, export_end_date,
268269
export_day_range):
269270
"""
270-
Update the export_start_date according to the export_end_date so that it
271-
could be export_end_date - export_day_range
271+
Update export_start_date according to the export_end_date so that it could be export_end_date - export_day_range.
272272
273273
Parameters:
274274
export_start_date: str
@@ -296,7 +296,7 @@ def check_export_start_date(export_start_date, export_end_date,
296296

297297
def update_cache_file(df, _end_date, cache_dir):
298298
"""
299-
Update cache file. Remove the old one, export the new one
299+
Update cache file. Remove the old one, export the new one.
300300
301301
Parameter:
302302
df: pd.DataFrame

0 commit comments

Comments
 (0)