Skip to content

Commit 300d41c

Browse files
authored
Merge pull request #1016 from dshemetov/nans_nchs
Add NAN code support to NCHS Mortality
2 parents c9843c7 + 95741af commit 300d41c

File tree

3 files changed

+25
-4
lines changed

3 files changed

+25
-4
lines changed

nchs_mortality/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ params.json
55

66
# Do not commit output files
77
receiving/*.csv
8+
daily_receiving/*.csv
9+
cache/*.csv
10+
daily_cache/*.csv
811

912
# Do not commit test files
1013
tests/receiving/*.csv

nchs_mortality/delphi_nchs_mortality/run.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,26 @@
99
from typing import Dict, Any
1010

1111
import numpy as np
12-
from delphi_utils import S3ArchiveDiffer, get_structured_logger, create_export_csv
12+
from delphi_utils import S3ArchiveDiffer, get_structured_logger, create_export_csv, Nans
1313

1414
from .archive_diffs import arch_diffs
1515
from .constants import (METRICS, SENSOR_NAME_MAP,
1616
SENSORS, INCIDENCE_BASE, GEO_RES)
1717
from .pull import pull_nchs_mortality_data
1818

1919

20+
def add_nancodes(df):
21+
"""Add nancodes to the dataframe."""
22+
# Default missingness codes
23+
df["missing_val"] = Nans.NOT_MISSING
24+
df["missing_se"] = Nans.NOT_APPLICABLE
25+
df["missing_sample_size"] = Nans.NOT_APPLICABLE
26+
27+
# Mark any remaining nans with unknown
28+
remaining_nans_mask = df["val"].isnull()
29+
df.loc[remaining_nans_mask, "missing_val"] = Nans.OTHER
30+
return df
31+
2032
def run_module(params: Dict[str, Any]):
2133
"""Run module for processing NCHS mortality data.
2234
@@ -67,7 +79,8 @@ def run_module(params: Dict[str, Any]):
6779
df["val"] = df[metric]
6880
df["se"] = np.nan
6981
df["sample_size"] = np.nan
70-
df = df[~df["val"].isnull()]
82+
df = add_nancodes(df)
83+
# df = df[~df["val"].isnull()]
7184
sensor_name = "_".join([SENSOR_NAME_MAP[metric]])
7285
dates = create_export_csv(
7386
df,
@@ -91,7 +104,8 @@ def run_module(params: Dict[str, Any]):
91104
df["val"] = df[metric] / df["population"] * INCIDENCE_BASE
92105
df["se"] = np.nan
93106
df["sample_size"] = np.nan
94-
df = df[~df["val"].isnull()]
107+
df = add_nancodes(df)
108+
# df = df[~df["val"].isnull()]
95109
sensor_name = "_".join([SENSOR_NAME_MAP[metric], sensor])
96110
dates = create_export_csv(
97111
df,

nchs_mortality/tests/test_run.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,8 @@ def test_output_file_format(self, run_as_module, date):
6060
df = pd.read_csv(
6161
join(output_folder, "weekly_202026_state_deaths_covid_incidence_prop.csv")
6262
)
63-
assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all()
63+
expected_columns = [
64+
"geo_id", "val", "se", "sample_size",
65+
"missing_val", "missing_se", "missing_sample_size"
66+
]
67+
assert (df.columns.values == expected_columns).all()

0 commit comments

Comments
 (0)