Skip to content

Minor clean up of changehc sensor updating #1266

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 18, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions _delphi_utils_python/tests/validator/test_dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def test_half_padding(self):
assert new_ref_df.time_value.max() == datetime.strptime("2021-01-11",
"%Y-%m-%d").date()
assert new_ref_df.shape[0] == 11
assert new_ref_df.loc[:, "val"].iloc[5] == 2
assert new_ref_df["val"].iloc[5] == 2

def test_full_padding(self):
validator = DynamicValidator(self.params)
Expand All @@ -71,7 +71,7 @@ def test_full_padding(self):
assert new_ref_df.time_value.max() == datetime.strptime("2021-01-15",
"%Y-%m-%d").date()
assert new_ref_df.shape[0] == 15
assert new_ref_df.loc[:, "val"].iloc[5] == 2
assert new_ref_df["val"].iloc[5] == 2

class TestCheckRapidChange:
params = {
Expand Down
2 changes: 1 addition & 1 deletion cdc_covidnet/delphi_cdc_covidnet/update_sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def update_sensor(
right_on=["year", "weeknumber"])

# Select relevant columns and standardize naming
hosp_df = hosp_df.loc[:, APIConfig.HOSP_RENAME_COLS.keys()]\
hosp_df = hosp_df[APIConfig.HOSP_RENAME_COLS.keys()]\
.rename(columns=APIConfig.HOSP_RENAME_COLS)

# Restrict to start and end date
Expand Down
45 changes: 20 additions & 25 deletions changehc/delphi_changehc/update_sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@ def write_to_csv(df, geo_level, write_se, day_shift, out_name, output_path=".",

# shift dates forward for labeling
df["timestamp"] += day_shift
if start_date is None:
start_date = min(df["timestamp"])
if end_date is None:
end_date = max(df["timestamp"])

# suspicious value warnings
suspicious_se_mask = df["se"].gt(5)
Expand All @@ -49,7 +45,7 @@ def write_to_csv(df, geo_level, write_se, day_shift, out_name, output_path=".",
if write_se:
logging.info("========= WARNING: WRITING SEs TO {0} =========".format(out_name))
else:
df.loc[:, "se"] = np.nan
df["se"] = np.nan

assert not df["val"].isna().any(), " val contains nan values"
suspicious_val_mask = df["val"].gt(90)
Expand Down Expand Up @@ -113,13 +109,13 @@ def __init__(self,

# output file naming
if self.numtype == "covid":
signals = [SMOOTHED_ADJ if self.weekday else SMOOTHED]
signal_name = SMOOTHED_ADJ if self.weekday else SMOOTHED
elif self.numtype == "cli":
signals = [SMOOTHED_ADJ_CLI if self.weekday else SMOOTHED_CLI]
signal_names = add_prefix(
signals,
wip_signal=wip_signal)
self.updated_signal_names = signal_names
signal_name = SMOOTHED_ADJ_CLI if self.weekday else SMOOTHED_CLI
else:
raise ValueError(f'Unsupported numtype received "{numtype}",'
f' must be one of ["covid", "cli"]')
self.signal_name = add_prefix([signal_name], wip_signal=wip_signal)[0]

# initialize members set in shift_dates().
self.burnindate = None
Expand Down Expand Up @@ -231,21 +227,20 @@ def update_sensor(self,
# conform to naming expected by create_export_csv()
df = df.reset_index().rename(columns={"date": "timestamp", "rate": "val"})
# df.loc[~df['incl'], ["val", "se"]] = np.nan # update to this line after nancodes get merged in
df = df[df['incl']]
df = df[df["incl"]]

# write out results
dates = write_to_csv(
df,
geo_level=self.geo,
start_date=min(self.sensor_dates),
end_date=max(self.sensor_dates),
write_se=self.se,
day_shift=Config.DAY_SHIFT,
out_name=self.signal_name,
output_path=output_path
)
stats = []
for signal in self.updated_signal_names:
dates = write_to_csv(
df,
geo_level=self.geo,
start_date=min(self.sensor_dates),
end_date=max(self.sensor_dates),
write_se=self.se,
day_shift=Config.DAY_SHIFT,
out_name=signal,
output_path=output_path
)
if len(dates) > 0:
stats.append((max(dates), len(dates)))
if len(dates) > 0:
stats = [(max(dates), len(dates))]
return stats
2 changes: 1 addition & 1 deletion nchs_mortality/delphi_nchs_mortality/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def pull_nchs_mortality_data(token: str, test_file: Optional[str]=None):

# Drop rows for locations outside US
df = df[df["state"] != "United States"]
df = df.loc[:, keep_columns + ["timestamp", "state"]].set_index("timestamp")
df = df[keep_columns + ["timestamp", "state"]].set_index("timestamp")

# NCHS considers NYC as an individual state, however, we want it included
# in NY. If values are nan for both NYC and NY, the aggreagtion should
Expand Down