Skip to content

Commit 3dda978

Browse files
committed
NANS for HHS:
* add missing columns
1 parent 0b7103a commit 3dda978

File tree

2 files changed

+48
-24
lines changed

2 files changed

+48
-24
lines changed

hhs_hosp/delphi_hhs/run.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@
99

1010
import time
1111
from delphi_epidata import Epidata
12-
from delphi_utils.export import create_export_csv
13-
from delphi_utils.geomap import GeoMapper
14-
from delphi_utils import get_structured_logger
12+
from delphi_utils import create_export_csv, get_structured_logger, Nans, GeoMapper
1513
import numpy as np
1614
import pandas as pd
1715

@@ -63,6 +61,17 @@ def generate_date_ranges(start, end):
6361
output.append(Epidata.range(_date_to_int(start), _date_to_int(end)))
6462
return output
6563

64+
def add_nancodes(df):
65+
"""Add nancodes to a signal dataframe."""
66+
# Default missingness codes
67+
df["missing_val"] = Nans.NOT_MISSING
68+
df["missing_se"] = Nans.NOT_APPLICABLE
69+
df["missing_sample_size"] = Nans.NOT_APPLICABLE
70+
71+
# Mark any remaining nans with unknown
72+
remaining_nans_mask = df["val"].isnull()
73+
df.loc[remaining_nans_mask, "missing_val"] = Nans.UNKNOWN
74+
return df
6675

6776
def run_module(params):
6877
"""
@@ -114,9 +123,12 @@ def run_module(params):
114123
"state_code",
115124
from_col="state")
116125
if sensor.endswith("_prop"):
117-
df=pop_proportion(df, geo_mapper)
126+
df = pop_proportion(df, geo_mapper)
118127
df = make_geo(df, geo, geo_mapper)
128+
df["se"] = np.nan
129+
df["sample_size"] = np.nan
119130
df = smooth_values(df, smoother[0])
131+
df = add_nancodes(df)
120132
if df.empty:
121133
continue
122134
sensor_name = sensor + smoother[1]
@@ -162,12 +174,9 @@ def make_geo(state, geo, geo_mapper):
162174
if geo == "state":
163175
exported = state.rename(columns={"state": "geo_id"})
164176
else:
165-
exported = geo_mapper.replace_geocode(state, "state_code", geo, new_col="geo_id")
166-
exported["se"] = np.nan
167-
exported["sample_size"] = np.nan
177+
exported = geo_mapper.replace_geocode(state, "state_code", geo, new_col="geo_id", date_col="timestamp")
168178
return exported
169179

170-
171180
def make_signal(all_columns, sig):
172181
"""Generate column sums according to signal name."""
173182
assert sig in SIGNALS, f"Unexpected signal name '{sig}';" + \

hhs_hosp/tests/test_run.py

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
import tempfile
55
import os
66

7-
from delphi_hhs.run import _date_to_int, int_date_to_previous_day_datetime, generate_date_ranges, \
7+
from delphi_hhs.run import _date_to_int, add_nancodes, int_date_to_previous_day_datetime, generate_date_ranges, \
88
make_signal, make_geo, run_module, pop_proportion
99
from delphi_hhs.constants import SMOOTHERS, GEOS, SIGNALS, \
1010
CONFIRMED, SUM_CONF_SUSP, CONFIRMED_FLU, CONFIRMED_PROP, SUM_CONF_SUSP_PROP, CONFIRMED_FLU_PROP
11-
from delphi_utils.geomap import GeoMapper
11+
from delphi_utils import GeoMapper, Nans
1212
from freezegun import freeze_time
1313
import numpy as np
1414
import pandas as pd
@@ -85,15 +85,15 @@ def test_make_signal():
8585
})
8686
pd.testing.assert_frame_equal(expected_flu, make_signal(data, CONFIRMED_FLU))
8787
pd.testing.assert_frame_equal(expected_flu, make_signal(data, CONFIRMED_FLU_PROP))
88-
88+
8989
with pytest.raises(Exception):
9090
make_signal(data, "zig")
9191

9292
def test_pop_proportion():
9393
geo_mapper = GeoMapper()
9494
state_pop = geo_mapper.get_crosswalk("state_code", "pop")
9595

96-
test_df = pd.DataFrame({
96+
test_df = pd.DataFrame({
9797
'state': ['PA'],
9898
'state_code': [42],
9999
'timestamp': [datetime(year=2020, month=1, day=1)],
@@ -109,7 +109,7 @@ def test_pop_proportion():
109109
'val': [15/pa_pop*100000],})
110110
)
111111

112-
test_df= pd.DataFrame({
112+
test_df= pd.DataFrame({
113113
'state': ['WV'],
114114
'state_code': [54],
115115
'timestamp': [datetime(year=2020, month=1, day=1)],
@@ -137,30 +137,23 @@ def test_make_geo():
137137
'val': [1., 2., 4.],
138138
})
139139

140-
template = {
141-
'se': np.nan,
142-
'sample_size': np.nan,
143-
}
144140
expecteds = {
145141
"state": pd.DataFrame(
146-
dict(template,
147-
geo_id=data.state,
142+
dict(geo_id=data.state,
148143
timestamp=data.timestamp,
149144
val=data.val)),
150145
"hhs": pd.DataFrame(
151-
dict(template,
152-
geo_id=['3', '5'],
146+
dict(geo_id=['3', '5'],
153147
timestamp=[test_timestamp] * 2,
154148
val=[3., 4.])),
155149
"nation": pd.DataFrame(
156-
dict(template,
157-
geo_id=['us'],
150+
dict(geo_id=['us'],
158151
timestamp=[test_timestamp],
159152
val=[7.]))
160153
}
161154
for geo, expected in expecteds.items():
162155
result = make_geo(data, geo, geo_mapper)
163-
for series in ["geo_id", "timestamp", "val", "se", "sample_size"]:
156+
for series in ["geo_id", "timestamp", "val"]:
164157
pd.testing.assert_series_equal(expected[series], result[series], obj=f"{geo}:{series}")
165158

166159

@@ -207,3 +200,25 @@ def test_ignore_last_range_no_results(mock_covid_hosp, mock_export):
207200
}
208201
}
209202
assert not run_module(params) # function should not raise value error and has no return value
203+
204+
def test_add_nancode():
205+
data = pd.DataFrame({
206+
'state': ['PA','WV','OH'],
207+
'state_code': [42, 54, 39],
208+
'timestamp': [pd.to_datetime("20200601")]*3,
209+
'val': [1, 2, np.nan],
210+
'se': [np.nan] * 3,
211+
'sample_size': [np.nan] * 3,
212+
})
213+
expected = pd.DataFrame({
214+
'state': ['PA','WV','OH'],
215+
'state_code': [42, 54, 39],
216+
'timestamp': [pd.to_datetime("20200601")]*3,
217+
'val': [1, 2, np.nan],
218+
'se': [np.nan] * 3,
219+
'sample_size': [np.nan] * 3,
220+
'missing_val': [Nans.NOT_MISSING] * 2 + [Nans.UNKNOWN],
221+
'missing_se': [Nans.NOT_APPLICABLE] * 3,
222+
'missing_sample_size': [Nans.NOT_APPLICABLE] * 3,
223+
})
224+
pd.testing.assert_frame_equal(expected, add_nancodes(data))

0 commit comments

Comments
 (0)