Skip to content

Commit 83cca2d

Browse files
committed
added original vaccination signal (pre JJ) and lint
1 parent 8e3b7f8 commit 83cca2d

File tree

1 file changed

+15
-6
lines changed
  • dsew_community_profile/delphi_dsew_community_profile

1 file changed

+15
-6
lines changed

dsew_community_profile/delphi_dsew_community_profile/pull.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def retain_header(header):
280280
header.find(" age") < 0,
281281
# exclude "Confirmed COVID-19 admissions per 100 inpatient beds - last 7 days"
282282
header.find(" beds") < 0,
283-
])) or all([
283+
])) or (all([
284284
# include "People who are fully vaccinated"
285285
# include "People who have received a booster dose since August 13, 2021"
286286
header.startswith("People who"),
@@ -292,7 +292,13 @@ def retain_header(header):
292292
header.find(" age") < 0,
293293
# exclude "People who are fully vaccinated - 12-17" ...
294294
header.find("-") < 0,
295-
]))
295+
296+
]) or all([
297+
# include "People with full course administered"
298+
header.startswith("People with full course"),
299+
# exclude "People with full course administered as % of adult population"
300+
header.find("%") < 0,
301+
])))
296302
def _parse_sheet(self, sheet):
297303
"""Extract data frame for this sheet."""
298304
df = pd.read_excel(
@@ -338,8 +344,8 @@ def select_fn(h):
338344
is_fully_vax_msa_before_apr11 = (sheet.level == "msa" or sheet.level == "county") \
339345
and self.publish_date <= datetime.date(2021, 4, 11) \
340346
and sig == "fully vaccinated"
341-
# People fully vaccinated not available before March 08, 2021 at any geo level.
342-
is_fully_vax_before_mar8 = self.publish_date <= datetime.date(2021, 3, 8) \
347+
# People fully vaccinated not available before Jan 15, 2021 at any geo level.
348+
is_fully_vax_before_mar8 = self.publish_date <= datetime.date(2021, 1, 14) \
343349
and sig == "fully vaccinated"
344350

345351
if any([is_hosp_adm_before_jan8,
@@ -356,7 +362,10 @@ def select_fn(h):
356362
continue
357363

358364
sig_select = [s for s in select if s[-1].find(sig) >= 0]
359-
365+
# The name of the cumulative vaccination was changed after 03/09/2021
366+
# when J&J vaccines were added.
367+
if (sig == "fully vaccinated") and (len(sig_select)==0):
368+
sig_select = [s for s in select if s[-1].find("people with full course") >= 0]
360369
# Since "doses administered" is a substring of another desired header,
361370
# "booster doses administered", we need to more strictly check if "doses administered"
362371
# occurs at the beginning of a header to find the correct match.
@@ -404,7 +413,6 @@ def fetch_listing(params):
404413
)
405414
for el in listing if el['filename'].endswith("xlsx")
406415
]
407-
408416
if params['indicator']['reports'] == 'new':
409417
# drop files we already have in the input cache
410418
listing = [el for el in listing if not os.path.exists(el['cached_filename'])]
@@ -485,6 +493,7 @@ def fetch_new_reports(params, logger=None):
485493

486494
if len(latest_sig_df.index) > 0:
487495
latest_sig_df = latest_sig_df.reset_index(drop=True)
496+
latest_sig_df.to_csv("problem.csv")
488497
assert all(latest_sig_df.groupby(
489498
["timestamp", "geo_id"]
490499
).size(

0 commit comments

Comments
 (0)