@@ -25,7 +25,7 @@ class DoctorVisitsSensor:
25
25
26
26
@staticmethod
27
27
def transform (
28
- sig , h = Config .SMOOTHER_BANDWIDTH , smoother = left_gauss_linear , base = None
28
+ sig , h = Config .SMOOTHER_BANDWIDTH , smoother = left_gauss_linear , base = None
29
29
):
30
30
"""Transform signal by applying a smoother, and/or adjusting by a base.
31
31
@@ -80,12 +80,12 @@ def fill_dates(y_data, dates):
80
80
81
81
@staticmethod
82
82
def backfill (
83
- num ,
84
- den ,
85
- k = Config .MAX_BACKFILL_WINDOW ,
86
- min_visits_to_fill = Config .MIN_CUM_VISITS ,
87
- min_visits_to_include = Config .MIN_RECENT_VISITS ,
88
- min_recent_obs_to_include = Config .MIN_RECENT_OBS ,
83
+ num ,
84
+ den ,
85
+ k = Config .MAX_BACKFILL_WINDOW ,
86
+ min_visits_to_fill = Config .MIN_CUM_VISITS ,
87
+ min_visits_to_include = Config .MIN_RECENT_VISITS ,
88
+ min_recent_obs_to_include = Config .MIN_RECENT_OBS ,
89
89
):
90
90
"""
91
91
Adjust for backfill (retroactively added observations) by using a
@@ -129,17 +129,17 @@ def backfill(
129
129
for j in range (p ):
130
130
new_num [i , j ] = revnum [i , j ]
131
131
else :
132
- den_bin = revden [i : (i + closest_fill_day + 1 )]
132
+ den_bin = revden [i : (i + closest_fill_day + 1 )]
133
133
new_den [i ] = den_bin .sum ()
134
134
135
135
for j in range (p ):
136
- num_bin = revnum [i : (i + closest_fill_day + 1 ), j ]
136
+ num_bin = revnum [i : (i + closest_fill_day + 1 ), j ]
137
137
new_num [i , j ] = num_bin .sum ()
138
138
139
139
# if we do not observe at least min_visits_to_include in the denominator or
140
140
# if we observe 0 counts for min_recent_obs window, don't show.
141
141
if (new_den [i ] < min_visits_to_include ) or (
142
- revden [i :][:min_recent_obs_to_include ].sum () == 0
142
+ revden [i :][:min_recent_obs_to_include ].sum () == 0
143
143
):
144
144
include [i ] = False
145
145
@@ -156,7 +156,13 @@ def backfill(
156
156
return new_num , new_den , include
157
157
158
158
@staticmethod
159
- def fit (y_data , fit_dates , sensor_dates , geo_id , recent_min_visits , min_recent_obs ):
159
+ def fit (y_data ,
160
+ fit_dates ,
161
+ sensor_dates ,
162
+ geo_id ,
163
+ recent_min_visits ,
164
+ min_recent_obs ,
165
+ jeffreys ):
160
166
"""Fitting routine.
161
167
162
168
Args:
@@ -168,6 +174,9 @@ def fit(y_data, fit_dates, sensor_dates, geo_id, recent_min_visits, min_recent_o
168
174
<RECENT_LENGTH> days
169
175
min_recent_obs: location is sparse also if it has 0 observations in the
170
176
last min_recent_obs days
177
+ jeffreys: boolean whether to use Jeffreys estimate for binomial proportion, this
178
+ is currently only applied if we are writing SEs out. The estimate is
179
+ p_hat = (x + 0.5)/(n + 1).
171
180
172
181
Returns: dictionary of results
173
182
"""
@@ -176,28 +185,43 @@ def fit(y_data, fit_dates, sensor_dates, geo_id, recent_min_visits, min_recent_o
176
185
sensor_idxs = np .where (y_data .index >= sensor_dates [0 ])[0 ]
177
186
n_dates = y_data .shape [0 ]
178
187
188
+ # combine Flu_like and Mixed columns
189
+ y_data ["Flu_like_Mixed" ] = y_data ["Flu_like" ] + y_data ["Mixed" ]
190
+ NEW_CLI_COLS = list (set (Config .CLI_COLS ) - {"Flu_like" , "Mixed" }) + [
191
+ "Flu_like_Mixed" ]
192
+
193
+ # small backfill correction
179
194
total_visits = y_data ["Denominator" ]
180
- total_counts = y_data [Config . CLI_COLS + Config .FLU1_COL ]
195
+ total_counts = y_data [NEW_CLI_COLS + Config .FLU1_COL ]
181
196
total_counts , total_visits , include = DoctorVisitsSensor .backfill (
182
197
total_counts ,
183
198
total_visits ,
184
199
min_visits_to_include = recent_min_visits ,
185
- min_recent_obs_to_include = min_recent_obs ,
200
+ min_recent_obs_to_include = min_recent_obs
186
201
)
187
- total_rates = total_counts .div (total_visits , axis = 0 )
202
+
203
+ # jeffreys inflation
204
+ if jeffreys :
205
+ total_counts [NEW_CLI_COLS ] = total_counts [NEW_CLI_COLS ] + 0.5
206
+ total_rates = total_counts .div (total_visits + 1 , axis = 0 )
207
+ else :
208
+ total_rates = total_counts .div (total_visits , axis = 0 )
209
+
188
210
total_rates .fillna (0 , inplace = True )
189
211
flu1 = total_rates [Config .FLU1_COL ]
190
212
new_rates = []
191
- for code in Config . CLI_COLS :
213
+ for code in NEW_CLI_COLS :
192
214
code_vals = total_rates [code ]
193
215
194
216
# if all rates are zero, don't bother
195
217
if code_vals .sum () == 0 :
218
+ if jeffreys :
219
+ logging .error ("p is 0 even though we used Jefferys estimate" )
196
220
new_rates .append (np .zeros ((n_dates ,)))
197
221
continue
198
222
199
223
# include adjustment for flu like codes
200
- base = flu1 if code in ["Flu_like" , "Mixed " ] else None
224
+ base = flu1 if code in ["Flu_like_Mixed " ] else None
201
225
fitted_codes = DoctorVisitsSensor .transform (
202
226
code_vals .values .reshape (- 1 , 1 ), base = base
203
227
)
@@ -211,9 +235,9 @@ def fit(y_data, fit_dates, sensor_dates, geo_id, recent_min_visits, min_recent_o
211
235
den = total_visits [sensor_idxs ].values
212
236
213
237
# calculate standard error
214
- mask = den < 1
215
- se = np .sqrt (np . divide (( new_rates * ( 1 - new_rates )), den , where = den != 0 ))
216
- se [ mask ] = np .nan # handle case where we observe no visits
238
+ se = np . full_like ( new_rates , np . nan )
239
+ se [ include ] = np .sqrt (
240
+ np .divide (( new_rates [ include ] * ( 1 - new_rates [ include ])), den [ include ]))
217
241
218
242
logging .debug (f"{ geo_id } : { new_rates [- 1 ]:.3f} ,[{ se [- 1 ]:.3f} ]" )
219
243
return {"geo_id" : geo_id , "rate" : new_rates , "se" : se , "incl" : include }
0 commit comments