Skip to content

Small fixes to CPR vax signals to handle cases when fields are not available #1524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 15, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 36 additions & 32 deletions dsew_community_profile/delphi_dsew_community_profile/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,14 @@ def _parse_times_for_sheet(self, sheet):
self.times[dt.column][sig] = dt[sig]
else:
self.times[dt.column] = dt
assert len(self.times) == 3, \
f"No times extracted from overheaders:\n{NEWLINE.join(str(s) for s in overheaders)}"

if self.publish_date <= datetime.date(2021, 1, 11):
# No vaccination data available, so we only have hospitalization and testing overheaders
assert len(self.times) == 2, \
f"No times extracted from overheaders:\n{NEWLINE.join(str(s) for s in overheaders)}"
else:
assert len(self.times) == 3, \
f"No times extracted from overheaders:\n{NEWLINE.join(str(s) for s in overheaders)}"

@staticmethod
def retain_header(header):
Expand All @@ -251,7 +257,7 @@ def retain_header(header):
# include "Total RT-PCR diagnostic tests - [last|previous] 7 days ..."
# include "NAAT positivity rate - [last|previous] 7 days ..."
# include "Viral (RT-PCR) lab test positivity rate - [last|previous] 7 days ..."
# include "Booster doses administerd - [last|previous] 7 days ..."
# include "Booster doses administered - [last|previous] 7 days ..."
# include "Doses administered - [last|previous] 7 days ..."
(header.startswith("Total NAATs") or
header.startswith("NAAT positivity rate") or
Expand Down Expand Up @@ -284,6 +290,8 @@ def retain_header(header):
# exclude "People who are fully vaccinated - ages 5-11" ...
# exclude "People who have received a booster dose - ages 65+" ...
header.find(" age") < 0,
# exclude "People who are fully vaccinated - 12-17" ...
header.find("-") < 0,
]))
def _parse_sheet(self, sheet):
"""Extract data frame for this sheet."""
Expand Down Expand Up @@ -311,40 +319,36 @@ def select_fn(h):
]

for sig in SIGNALS:
## Check if field is known to be missing
# Hospital admissions not available at the county or CBSA level prior to Jan 8, 2021.
if (sheet.level == "msa" or sheet.level == "county") \
is_hosp_adm_before_jan8 = (sheet.level == "msa" or sheet.level == "county") \
and self.publish_date < datetime.date(2021, 1, 8) \
and sig == "confirmed covid-19 admissions":
self.dfs[(sheet.level, sig, NOT_PROP)] = pd.DataFrame(
columns = ["geo_id", "timestamp", "val", \
"se", "sample_size", "publish_date"]
)
continue


# Vaccine data not available before May 2021.
if self.publish_date < datetime.date(2021, 5, 1) \
and (sig in ["fully vaccinated", "doses administered"]) :
self.dfs[(sheet.level, sig, NOT_PROP)] = pd.DataFrame(
columns = ["geo_id", "timestamp", "val", \
"se", "sample_size", "publish_date"]
)
continue


and sig == "confirmed covid-19 admissions"
# Booster data not available before November 2021.
if self.publish_date < datetime.date(2021, 11, 1) \
and (sig in ["booster dose since", "booster doses administered"]) :
self.dfs[(sheet.level, sig, NOT_PROP)] = pd.DataFrame(
columns = ["geo_id", "timestamp", "val", \
"se", "sample_size", "publish_date"]
)
continue

is_booster_before_nov1 = self.publish_date < datetime.date(2021, 11, 1) \
and (sig in ["booster dose since", "booster doses administered"])
# Booster and weekly doses administered not available below the state level.
if ((sheet.level != "hhs" and sheet.level != "state") \
is_booster_below_state = ((sheet.level != "hhs" and sheet.level != "state") \
and (sig in ["doses administered", \
"booster doses administered", "booster dose since"])):
"booster doses administered", "booster dose since"]))
# Weekly doses administered not available before Apr 29, 2021.
is_dose_admin_apr29 = self.publish_date <= datetime.date(2021, 4, 29) \
and sig == "doses administered"
# People fully vaccinated not available before Apr 11, 2021 at the CBSA level.
is_fully_vax_msa_before_apr11 = (sheet.level == "msa" or sheet.level == "county") \
and self.publish_date <= datetime.date(2021, 4, 11) \
and sig == "fully vaccinated"
# People fully vaccinated not available before March 08, 2021 at any geo level.
is_fully_vax_before_mar8 = self.publish_date <= datetime.date(2021, 3, 8) \
and sig == "fully vaccinated"

if any([is_hosp_adm_before_jan8,
is_booster_before_nov1,
is_booster_below_state,
is_dose_admin_apr29,
is_fully_vax_msa_before_apr11,
is_fully_vax_before_mar8
]):
self.dfs[(sheet.level, sig, NOT_PROP)] = pd.DataFrame(
columns = ["geo_id", "timestamp", "val", \
"se", "sample_size", "publish_date"]
Expand Down