@@ -280,7 +280,7 @@ def retain_header(header):
280
280
header .find (" age" ) < 0 ,
281
281
# exclude "Confirmed COVID-19 admissions per 100 inpatient beds - last 7 days"
282
282
header .find (" beds" ) < 0 ,
283
- ])) or all ([
283
+ ])) or ( all ([
284
284
# include "People who are fully vaccinated"
285
285
# include "People who have received a booster dose since August 13, 2021"
286
286
header .startswith ("People who" ),
@@ -292,7 +292,13 @@ def retain_header(header):
292
292
header .find (" age" ) < 0 ,
293
293
# exclude "People who are fully vaccinated - 12-17" ...
294
294
header .find ("-" ) < 0 ,
295
- ]))
295
+
296
+ ]) or all ([
297
+ # include "People with full course administered"
298
+ header .startswith ("People with full course" ),
299
+ # exclude "People with full course administered as % of adult population"
300
+ header .find ("%" ) < 0 ,
301
+ ])))
296
302
def _parse_sheet (self , sheet ):
297
303
"""Extract data frame for this sheet."""
298
304
df = pd .read_excel (
@@ -338,8 +344,8 @@ def select_fn(h):
338
344
is_fully_vax_msa_before_apr11 = (sheet .level == "msa" or sheet .level == "county" ) \
339
345
and self .publish_date <= datetime .date (2021 , 4 , 11 ) \
340
346
and sig == "fully vaccinated"
341
- # People fully vaccinated not available before March 08 , 2021 at any geo level.
342
- is_fully_vax_before_mar8 = self .publish_date <= datetime .date (2021 , 3 , 8 ) \
347
+ # People fully vaccinated not available before Jan 15 , 2021 at any geo level.
348
+ is_fully_vax_before_mar8 = self .publish_date <= datetime .date (2021 , 1 , 14 ) \
343
349
and sig == "fully vaccinated"
344
350
345
351
if any ([is_hosp_adm_before_jan8 ,
@@ -356,7 +362,10 @@ def select_fn(h):
356
362
continue
357
363
358
364
sig_select = [s for s in select if s [- 1 ].find (sig ) >= 0 ]
359
-
365
+ # The name of the cumulative vaccination was changed after 03/09/2021
366
+ # when J&J vaccines were added.
367
+ if (sig == "fully vaccinated" ) and (len (sig_select )== 0 ):
368
+ sig_select = [s for s in select if s [- 1 ].find ("people with full course" ) >= 0 ]
360
369
# Since "doses administered" is a substring of another desired header,
361
370
# "booster doses administered", we need to more strictly check if "doses administered"
362
371
# occurs at the beginning of a header to find the correct match.
@@ -404,7 +413,6 @@ def fetch_listing(params):
404
413
)
405
414
for el in listing if el ['filename' ].endswith ("xlsx" )
406
415
]
407
-
408
416
if params ['indicator' ]['reports' ] == 'new' :
409
417
# drop files we already have in the input cache
410
418
listing = [el for el in listing if not os .path .exists (el ['cached_filename' ])]
@@ -485,6 +493,7 @@ def fetch_new_reports(params, logger=None):
485
493
486
494
if len (latest_sig_df .index ) > 0 :
487
495
latest_sig_df = latest_sig_df .reset_index (drop = True )
496
+ latest_sig_df .to_csv ("problem.csv" )
488
497
assert all (latest_sig_df .groupby (
489
498
["timestamp" , "geo_id" ]
490
499
).size (
0 commit comments