Skip to content

Commit ef13336

Browse files
authored
Merge pull request #1266 from cmu-delphi/clean-up
Minor clean up of changehc sensor updating
2 parents 410b791 + c4a21ee commit ef13336

File tree

7 files changed

+35
-40
lines changed

7 files changed

+35
-40
lines changed

_delphi_utils_python/tests/validator/test_dynamic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def test_half_padding(self):
4949
assert new_ref_df.time_value.max() == datetime.strptime("2021-01-11",
5050
"%Y-%m-%d").date()
5151
assert new_ref_df.shape[0] == 11
52-
assert new_ref_df.loc[:, "val"].iloc[5] == 2
52+
assert new_ref_df["val"].iloc[5] == 2
5353

5454
def test_full_padding(self):
5555
validator = DynamicValidator(self.params)
@@ -71,7 +71,7 @@ def test_full_padding(self):
7171
assert new_ref_df.time_value.max() == datetime.strptime("2021-01-15",
7272
"%Y-%m-%d").date()
7373
assert new_ref_df.shape[0] == 15
74-
assert new_ref_df.loc[:, "val"].iloc[5] == 2
74+
assert new_ref_df["val"].iloc[5] == 2
7575

7676
class TestCheckRapidChange:
7777
params = {

cdc_covidnet/delphi_cdc_covidnet/update_sensor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def update_sensor(
7777
right_on=["year", "weeknumber"])
7878

7979
# Select relevant columns and standardize naming
80-
hosp_df = hosp_df.loc[:, APIConfig.HOSP_RENAME_COLS.keys()]\
80+
hosp_df = hosp_df[APIConfig.HOSP_RENAME_COLS.keys()]\
8181
.rename(columns=APIConfig.HOSP_RENAME_COLS)
8282

8383
# Restrict to start and end date

changehc/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ should not include critical sub-routines.
7373

7474
## Code tour
7575

76-
- update_sensor.py: CHCSensorUpdator: reads the data, makes transformations, writes results to file
76+
- update_sensor.py: CHCSensorUpdater: reads the data, makes transformations, writes results to file
7777
- sensor.py: CHCSensor: methods for transforming data, including backfill and smoothing
7878
- smooth.py: implements local linear left Gaussian filter
7979
- load_data.py: methods for loading denominator and covid data

changehc/delphi_changehc/run.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# first party
1717
from .download_ftp_files import download_covid, download_cli
1818
from .load_data import load_combined_data, load_cli_data
19-
from .update_sensor import CHCSensorUpdator
19+
from .update_sensor import CHCSensorUpdater
2020

2121

2222
def retrieve_files(params, filedate, logger):
@@ -164,7 +164,7 @@ def run_module(params: Dict[str, Dict[str, Any]]):
164164
logger.info("starting weekday adj", geo = geo, numtype = numtype)
165165
else:
166166
logger.info("starting no adj", geo = geo, numtype = numtype)
167-
su_inst = CHCSensorUpdator(
167+
su_inst = CHCSensorUpdater(
168168
startdate,
169169
enddate,
170170
dropdate,

changehc/delphi_changehc/update_sensor.py

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,6 @@ def write_to_csv(df, geo_level, write_se, day_shift, out_name, logger, output_pa
3737

3838
# shift dates forward for labeling
3939
df["timestamp"] += day_shift
40-
if start_date is None:
41-
start_date = min(df["timestamp"])
42-
if end_date is None:
43-
end_date = max(df["timestamp"])
4440

4541
# suspicious value warnings
4642
suspicious_se_mask = df["se"].gt(5)
@@ -49,7 +45,7 @@ def write_to_csv(df, geo_level, write_se, day_shift, out_name, logger, output_pa
4945
if write_se:
5046
logger.info("========= WARNING: WRITING SEs TO {0} =========".format(out_name))
5147
else:
52-
df.loc[:, "se"] = np.nan
48+
df["se"] = np.nan
5349

5450
assert not df["val"].isna().any(), " val contains nan values"
5551
suspicious_val_mask = df["val"].gt(90)
@@ -75,7 +71,7 @@ def write_to_csv(df, geo_level, write_se, day_shift, out_name, logger, output_pa
7571
return dates
7672

7773

78-
class CHCSensorUpdator: # pylint: disable=too-many-instance-attributes
74+
class CHCSensorUpdater: # pylint: disable=too-many-instance-attributes
7975
"""Contains methods to update sensor and write results to csv."""
8076

8177
def __init__(self,
@@ -89,7 +85,7 @@ def __init__(self,
8985
se,
9086
wip_signal,
9187
logger):
92-
"""Init Sensor Updator.
88+
"""Init Sensor Updater.
9389
9490
Args:
9591
startdate: first sensor date (YYYY-mm-dd)
@@ -116,13 +112,13 @@ def __init__(self,
116112

117113
# output file naming
118114
if self.numtype == "covid":
119-
signals = [SMOOTHED_ADJ if self.weekday else SMOOTHED]
115+
signal_name = SMOOTHED_ADJ if self.weekday else SMOOTHED
120116
elif self.numtype == "cli":
121-
signals = [SMOOTHED_ADJ_CLI if self.weekday else SMOOTHED_CLI]
122-
signal_names = add_prefix(
123-
signals,
124-
wip_signal=wip_signal)
125-
self.updated_signal_names = signal_names
117+
signal_name = SMOOTHED_ADJ_CLI if self.weekday else SMOOTHED_CLI
118+
else:
119+
raise ValueError(f'Unsupported numtype received "{numtype}",'
120+
f' must be one of ["covid", "cli"]')
121+
self.signal_name = add_prefix([signal_name], wip_signal=wip_signal)[0]
126122

127123
# initialize members set in shift_dates().
128124
self.burnindate = None
@@ -234,22 +230,21 @@ def update_sensor(self,
234230
# conform to naming expected by create_export_csv()
235231
df = df.reset_index().rename(columns={"rate": "val"})
236232
# df.loc[~df['incl'], ["val", "se"]] = np.nan # update to this line after nancodes get merged in
237-
df = df[df['incl']]
233+
df = df[df["incl"]]
238234

239235
# write out results
236+
dates = write_to_csv(
237+
df,
238+
geo_level=self.geo,
239+
start_date=min(self.sensor_dates),
240+
end_date=max(self.sensor_dates),
241+
write_se=self.se,
242+
day_shift=Config.DAY_SHIFT,
243+
out_name=self.signal_name,
244+
output_path=output_path,
245+
logger=self.logger
246+
)
240247
stats = []
241-
for signal in self.updated_signal_names:
242-
dates = write_to_csv(
243-
df,
244-
geo_level=self.geo,
245-
start_date=min(self.sensor_dates),
246-
end_date=max(self.sensor_dates),
247-
write_se=self.se,
248-
day_shift=Config.DAY_SHIFT,
249-
out_name=signal,
250-
output_path=output_path,
251-
logger=self.logger
252-
)
253-
if len(dates) > 0:
254-
stats.append((max(dates), len(dates)))
248+
if len(dates) > 0:
249+
stats = [(max(dates), len(dates))]
255250
return stats

changehc/tests/test_update_sensor.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
# first party
1616
from delphi_changehc.config import Config
17-
from delphi_changehc.update_sensor import write_to_csv, CHCSensorUpdator
17+
from delphi_changehc.update_sensor import write_to_csv, CHCSensorUpdater
1818

1919
CONFIG = Config()
2020
PARAMS = {
@@ -30,7 +30,7 @@
3030
OUTPATH="test_data/"
3131
TEST_LOGGER = logging.getLogger()
3232

33-
class TestCHCSensorUpdator:
33+
class TestCHCSensorUpdater:
3434
"""Tests for updating the sensors."""
3535
geo = "county"
3636
parallel = False
@@ -46,7 +46,7 @@ class TestCHCSensorUpdator:
4646

4747
def test_shift_dates(self):
4848
"""Tests that dates in the data are shifted according to the burn-in and lag."""
49-
su_inst = CHCSensorUpdator(
49+
su_inst = CHCSensorUpdater(
5050
"02-01-2020",
5151
"06-01-2020",
5252
"06-12-2020",
@@ -71,7 +71,7 @@ def test_shift_dates(self):
7171
def test_geo_reindex(self):
7272
"""Tests that the geo reindexer changes the geographic resolution."""
7373
for geo, multiple in [("nation", 1), ("county", 2), ("hhs", 2)]:
74-
su_inst = CHCSensorUpdator(
74+
su_inst = CHCSensorUpdater(
7575
"02-01-2020",
7676
"06-01-2020",
7777
"06-12-2020",
@@ -98,7 +98,7 @@ def test_update_sensor(self):
9898
outputs = {}
9999
for geo in ["county", "state", "hhs", "nation"]:
100100
td = TemporaryDirectory()
101-
su_inst = CHCSensorUpdator(
101+
su_inst = CHCSensorUpdater(
102102
"03-01-2020",
103103
"03-22-2020",
104104
"03-27-2020",

nchs_mortality/delphi_nchs_mortality/pull.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def pull_nchs_mortality_data(token: str, test_file: Optional[str]=None):
9898

9999
# Drop rows for locations outside US
100100
df = df[df["state"] != "United States"]
101-
df = df.loc[:, keep_columns + ["timestamp", "state"]].set_index("timestamp")
101+
df = df[keep_columns + ["timestamp", "state"]].set_index("timestamp")
102102

103103
# NCHS considers NYC as an individual state, however, we want it included
104104
# in NY. If values are nan for both NYC and NY, the aggreagtion should

0 commit comments

Comments
 (0)