1
- """
2
- Functions to calculate the quidel sensor statistic.
3
- """
1
+ """Functions to calculate the quidel sensor statistic."""
4
2
5
3
import numpy as np
6
4
import pandas as pd
7
5
8
6
9
7
def remove_null_samples (df ):
10
- """Removes entries in a data frame whose sample sizes are null."""
8
+ """Remove entries in a data frame whose sample sizes are null."""
11
9
return df [df ["sample_size" ].notnull ()]
12
10
13
11
14
12
def _prop_var (p , n ):
15
- """var(X/n) = 1/(n^2)var(X) = (npq)/(n^2) = pq/n"""
13
+ """
14
+ Calculate variance of proportion.
15
+
16
+ var(X/n) = 1/(n^2)var(X) = (npq)/(n^2) = pq/n
17
+ """
16
18
return p * (1 - p ) / n
17
19
18
20
def fill_dates (y_data , first_date , last_date ):
19
21
"""
20
22
Ensure all dates are listed in the data, otherwise, add days with 0 counts.
23
+
21
24
Args:
22
25
y_data: dataframe with datetime index
23
26
first_date: datetime.datetime
@@ -42,8 +45,9 @@ def fill_dates(y_data, first_date, last_date):
42
45
43
46
def _slide_window_sum (arr , k ):
44
47
"""
45
- Sliding window sum, with fixed window size k. For indices 0:k, we
46
- DO compute a sum, using whatever points are available.
48
+ Sliding window sum, with fixed window size k.
49
+
50
+ For indices 0:k, we DO compute a sum, using whatever points are available.
47
51
48
52
Reference: https://stackoverflow.com/a/38507725
49
53
@@ -57,7 +61,6 @@ def _slide_window_sum(arr, k):
57
61
sarr: np.ndarray
58
62
Array of same length of arr, holding the sliding window sum.
59
63
"""
60
-
61
64
if not isinstance (k , int ):
62
65
raise ValueError ('k must be int.' )
63
66
temp = np .append (np .zeros (k - 1 ), arr )
@@ -67,12 +70,11 @@ def _slide_window_sum(arr, k):
67
70
68
71
def _geographical_pooling (tpooled_tests , tpooled_ptests , min_obs ):
69
72
"""
70
- Calculates the proportion of parent samples (tests) that must be "borrowed"
71
- in order to properly compute the statistic. If there are no samples
72
- available in the parent, the borrow_prop is 0. If the parent does not
73
+ Calculate proportion of parent samples (tests) that must be "borrowed" in order to compute the statistic.
74
+
75
+ If there are no samples available in the parent, the borrow_prop is 0. If the parent does not
73
76
have enough samples, we return a borrow_prop of 1, and the fact that the
74
- pooled samples are insufficient are handled in the statistic fitting
75
- step.
77
+ pooled samples are insufficient are handled in the statistic fitting step.
76
78
77
79
Args:
78
80
tpooled_tests: np.ndarray[float]
@@ -115,8 +117,7 @@ def _geographical_pooling(tpooled_tests, tpooled_ptests, min_obs):
115
117
116
118
def raw_positive_prop (positives , tests , min_obs ):
117
119
"""
118
- Calculates the proportion of positive tests for a single geographic
119
- location, without any temporal smoothing.
120
+ Calculate the proportion of positive tests for a single geographic location, without any temporal smoothing.
120
121
121
122
If on any day t, tests[t] < min_obs, then we report np.nan.
122
123
@@ -169,8 +170,7 @@ def raw_positive_prop(positives, tests, min_obs):
169
170
def smoothed_positive_prop (positives , tests , min_obs , pool_days ,
170
171
parent_positives = None , parent_tests = None ):
171
172
"""
172
- Calculates the proportion of negative tests for a single geographic
173
- location, with temporal smoothing.
173
+ Calculate the proportion of negative tests for a single geographic location, with temporal smoothing.
174
174
175
175
For a given day t, if sum(tests[(t-pool_days+1):(t+1)]) < min_obs, then we
176
176
'borrow' min_obs - sum(tests[(t-pool_days+1):(t+1)]) observations from the
@@ -215,7 +215,6 @@ def smoothed_positive_prop(positives, tests, min_obs, pool_days,
215
215
np.ndarray
216
216
Effective sample size (after temporal and geographic pooling).
217
217
"""
218
-
219
218
positives = positives .astype (float )
220
219
tests = tests .astype (float )
221
220
if (parent_positives is None ) or (parent_tests is None ):
@@ -259,9 +258,8 @@ def smoothed_positive_prop(positives, tests, min_obs, pool_days,
259
258
260
259
261
260
def raw_tests_per_device (devices , tests , min_obs ):
262
- '''
263
- Calculates the tests per device for a single geographic
264
- location, without any temporal smoothing.
261
+ """
262
+ Calculate the tests per device for a single geographic location, without any temporal smoothing.
265
263
266
264
If on any day t, tests[t] < min_obs, then we report np.nan.
267
265
The second and third returned np.ndarray are the standard errors,
@@ -284,7 +282,7 @@ def raw_tests_per_device(devices, tests, min_obs):
284
282
Placeholder for standard errors
285
283
np.ndarray
286
284
Sample size used to compute estimates.
287
- '''
285
+ """
288
286
devices = devices .astype (float )
289
287
tests = tests .astype (float )
290
288
if (np .any (np .isnan (devices )) or np .any (np .isnan (tests ))):
@@ -304,8 +302,8 @@ def raw_tests_per_device(devices, tests, min_obs):
304
302
def smoothed_tests_per_device (devices , tests , min_obs , pool_days ,
305
303
parent_devices = None , parent_tests = None ):
306
304
"""
307
- Calculates the ratio of tests per device for a single geographic
308
- location, with temporal smoothing.
305
+ Calculate the ratio of tests per device for a single geographic location, with temporal smoothing.
306
+
309
307
For a given day t, if sum(tests[(t-pool_days+1):(t+1)]) < min_obs, then we
310
308
'borrow' min_obs - sum(tests[(t-pool_days+1):(t+1)]) observations from the
311
309
parents over the same timespan. Importantly, it will make sure NOT to
0 commit comments