Skip to content

Commit 80134af

Browse files
committed
Add nancodes to cdc_covidnet
1 parent 801f04c commit 80134af

File tree

3 files changed

+28
-3
lines changed

3 files changed

+28
-3
lines changed

cdc_covidnet/delphi_cdc_covidnet/update_sensor.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
import numpy as np
1313
import pandas as pd
14-
from delphi_utils import GeoMapper, add_prefix
14+
from delphi_utils import GeoMapper, add_prefix, Nans
1515

1616
from .api_config import APIConfig
1717
from .constants import SIGNALS
@@ -47,6 +47,19 @@ def write_to_csv(data: pd.DataFrame, out_name: str, output_path: str):
4747
sub_df.drop("epiweek", axis=1).to_csv(filename, na_rep="NA")
4848

4949

50+
def add_nancodes(df: pd.DataFrame) -> pd.DataFrame:
51+
# Default missing code
52+
df["missing_val"] = Nans.NOT_MISSING
53+
missing_mask = ~df["val"].isnull()
54+
df.loc[missing_mask, "missing_val"] = Nans.UNKNOWN
55+
56+
# Fill in remaining expected columns
57+
df["missing_se"] = Nans.NOT_APPLICABLE
58+
df["missing_sample_size"] = Nans.NOT_APPLICABLE
59+
60+
return df
61+
62+
5063
def update_sensor(
5164
state_files: List[str],
5265
mmwr_info: pd.DataFrame,
@@ -99,10 +112,11 @@ def update_sensor(
99112
assert not hosp_df.duplicated(["date", "geo_id"]).any(), "Non-unique (date, geo_id) pairs"
100113
hosp_df.set_index(["date", "geo_id"], inplace=True)
101114

102-
# Fill in remaining expected columns
103115
hosp_df["se"] = np.nan
104116
hosp_df["sample_size"] = np.nan
105117

118+
hosp_df = add_nancodes(hosp_df)
119+
106120
# Write results
107121
signals = add_prefix(SIGNALS,
108122
wip_signal=wip_signal,

cdc_covidnet/tests/test_run.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pandas.testing import assert_frame_equal
66

77
from delphi_cdc_covidnet.run import run_module
8+
from delphi_cdc_covidnet.update_sensor import add_nancodes
89

910

1011
class TestRun:
@@ -55,5 +56,6 @@ def test_match_old_to_new_output(self):
5556

5657
# Contents match
5758
expected_df = pd.read_csv(join("receiving_test", fname))
59+
expected_df = add_nancodes(expected_df)
5860
actual_df = pd.read_csv(join("receiving", fname))
5961
assert_frame_equal(expected_df, actual_df, check_less_precise=5)

cdc_covidnet/tests/test_update_sensor.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,16 @@ def test_syn_update_sensor(self):
9595

9696
for i, exp_file in enumerate(expected_files):
9797
data = pd.read_csv(exp_file)
98-
assert (data.columns == ["geo_id", "val", "se", "sample_size"]).all()
98+
expected_columns = [
99+
"geo_id",
100+
"val",
101+
"se",
102+
"sample_size",
103+
"missing_val",
104+
"missing_se",
105+
"missing_sample_size"
106+
]
107+
assert (data.columns == expected_columns).all()
99108

100109
# Check data for NA
101110
assert (~pd.isna(data["geo_id"])).all()

0 commit comments

Comments
 (0)