Skip to content

Commit 16cd368

Browse files
authored
Merge pull request #456 from cmu-delphi/nchs_mortality
Update nchs_mortality pipeline with new signal names
2 parents c2c0c5e + f81fd45 commit 16cd368

File tree

3 files changed

+22
-11
lines changed

3 files changed

+22
-11
lines changed

nchs_mortality/DETAILS.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# NCHS Mortality Data
22

3-
We import the NCHS Mortality Data from CDC website and export
3+
We import the Mortality Data from NCHS website and export
44
the state-level data as-is in a weekly format.
55

66
In order to avoid confusing public consumers of the data, we maintain
@@ -57,6 +57,6 @@ as the corresponding epiweek of date(D + 1).
5757
### Data Versioning
5858
Data versions are tracked on both a daily and weekly level.
5959
On a daily level, we check for updates for NCHS mortality data every weekday as how it is reported by
60-
CDC and stash these daily updates on S3, but not our API.
60+
NCHS and stash these daily updates on S3, but not our API.
6161
On a weekly level (on Mondays), we additionally upload the changes to the data
6262
made over the past week (due to backfill) to our API.

nchs_mortality/delphi_nchs_mortality/run.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,15 @@
2222
'pneumonia_deaths', 'pneumonia_and_covid_deaths', 'influenza_deaths',
2323
'pneumonia_influenza_or_covid_19_deaths'
2424
]
25+
SENSOR_NAME_MAP = {
26+
"covid_deaths": "deaths_covid_incidence",
27+
"total_deaths": "deaths_allcause_incidence",
28+
"percent_of_expected_deaths": "deaths_percent_of_expected",
29+
"pneumonia_deaths": "deaths_pneumonia_notflu_incidence",
30+
"pneumonia_and_covid_deaths": "deaths_covid_and_pneumonia_notflu_incidence",
31+
"influenza_deaths": "deaths_flu_incidence",
32+
"pneumonia_influenza_or_covid_19_deaths": "deaths_pneumonia_or_flu_or_covid_incidence"
33+
}
2534
SENSORS = [
2635
"num",
2736
"prop"
@@ -62,7 +71,7 @@ def run_module(): # pylint: disable=too-many-branches,too-many-statements
6271
df["val"] = df[metric]
6372
df["se"] = np.nan
6473
df["sample_size"] = np.nan
65-
sensor_name = "_".join(["wip", metric])
74+
sensor_name = "_".join(["wip", SENSOR_NAME_MAP[metric]])
6675
export_csv(
6776
df,
6877
geo_name=GEO_RES,
@@ -79,7 +88,7 @@ def run_module(): # pylint: disable=too-many-branches,too-many-statements
7988
df["val"] = df[metric] / df["population"] * INCIDENCE_BASE
8089
df["se"] = np.nan
8190
df["sample_size"] = np.nan
82-
sensor_name = "_".join(["wip", metric, sensor])
91+
sensor_name = "_".join(["wip", SENSOR_NAME_MAP[metric], sensor])
8392
export_csv(
8493
df,
8594
geo_name=GEO_RES,

nchs_mortality/tests/test_run.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,19 @@ def test_output_files_exist(self, run_as_module, date):
3232
"202025",
3333
"202026",
3434
]
35-
metrics = [
36-
'covid_deaths', 'total_deaths', 'pneumonia_deaths',
37-
'pneumonia_and_covid_deaths', 'influenza_deaths',
38-
'pneumonia_influenza_or_covid_19_deaths'
39-
]
35+
metrics = ['deaths_covid_incidence',
36+
'deaths_allcause_incidence',
37+
'deaths_percent_of_expected',
38+
'deaths_pneumonia_notflu_incidence',
39+
'deaths_covid_and_pneumonia_notflu_incidence',
40+
'deaths_flu_incidence',
41+
'deaths_pneumonia_or_flu_or_covid_incidence']
4042
sensors = ["num", "prop"]
4143

4244
expected_files = []
4345
for date in dates:
4446
for metric in metrics:
45-
if metric == "percent_of_expected_deaths":
47+
if metric == "deaths_percent_of_expected":
4648
expected_files += ["weekly_" + date + "_state_wip_" \
4749
+ metric + ".csv"]
4850
else:
@@ -61,6 +63,6 @@ def test_output_file_format(self, run_as_module, date):
6163

6264
for output_folder in folders:
6365
df = pd.read_csv(
64-
join(output_folder, "weekly_202026_state_wip_covid_deaths_prop.csv")
66+
join(output_folder, "weekly_202026_state_wip_deaths_covid_incidence_prop.csv")
6567
)
6668
assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all()

0 commit comments

Comments
 (0)