Skip to content

Commit af1fee0

Browse files
authored
Merge pull request #1505 from cmu-delphi/ndefries/add-hosp-prop
Add hospital admissions prop signal to CPR
2 parents dbea6ad + 043e485 commit af1fee0

File tree

7 files changed

+152
-19
lines changed

7 files changed

+152
-19
lines changed

ansible/templates/dsew_community_profile-params-prod.json.j2

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@
2626
"ref_window_size": 7,
2727
"smoothed_signals": [
2828
"naats_total_7dav",
29-
"naats_positivity_7dav"
29+
"naats_positivity_7dav",
30+
"confirmed_admissions_covid_1d_prop_7dav",
31+
"confirmed_admissions_covid_1d_7dav"
3032
]
3133
}
3234
}

dsew_community_profile/delphi_dsew_community_profile/constants.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,34 @@ class Transform:
5050
SIGNALS = {
5151
"total": {
5252
"is_rate" : False,
53-
"api_name": "naats_total_7dav"
53+
"api_name": "naats_total_7dav",
54+
"make_prop": False
5455
},
5556
"positivity": {
5657
"is_rate" : True,
57-
"api_name": "naats_positivity_7dav"
58+
"api_name": "naats_positivity_7dav",
59+
"make_prop": False
5860
},
5961
"confirmed covid-19 admissions": {
6062
"is_rate" : False,
61-
"api_name": "confirmed_admissions_covid_1d_7dav"
63+
"api_name": "confirmed_admissions_covid_1d_7dav",
64+
"make_prop": True,
65+
"api_prop_name": "confirmed_admissions_covid_1d_prop_7dav"
6266
}
6367
}
6468

6569
COUNTS_7D_SIGNALS = {key for key, value in SIGNALS.items() if not value["is_rate"]}
6670

67-
def make_signal_name(key):
68-
"""Convert a signal key to the corresponding signal name for the API."""
71+
def make_signal_name(key, is_prop=False):
72+
"""Convert a signal key to the corresponding signal name for the API.
73+
74+
Note, this function gets called twice with the same `key` for signals that support
75+
population-proportion ("prop") variants.
76+
"""
77+
if is_prop:
78+
return SIGNALS[key]["api_prop_name"]
6979
return SIGNALS[key]["api_name"]
7080

71-
NEWLINE="\n"
81+
NEWLINE = "\n"
82+
IS_PROP = True
83+
NOT_PROP = False

dsew_community_profile/delphi_dsew_community_profile/pull.py

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@
1111

1212
from delphi_utils.geomap import GeoMapper
1313

14-
from .constants import TRANSFORMS, SIGNALS, COUNTS_7D_SIGNALS, NEWLINE
15-
from .constants import DOWNLOAD_ATTACHMENT, DOWNLOAD_LISTING
14+
from .constants import (TRANSFORMS, SIGNALS, COUNTS_7D_SIGNALS, NEWLINE,
15+
IS_PROP, NOT_PROP,
16+
DOWNLOAD_ATTACHMENT, DOWNLOAD_LISTING)
1617

1718
# YYYYMMDD
1819
# example: "Community Profile Report 20211104.xlsx"
@@ -248,7 +249,7 @@ def _parse_sheet(self, sheet):
248249
if (sheet.level == "msa" or sheet.level == "county") \
249250
and self.publish_date < datetime.date(2021, 1, 8) \
250251
and sig == "confirmed covid-19 admissions":
251-
self.dfs[(sheet.level, sig)] = pd.DataFrame(
252+
self.dfs[(sheet.level, sig, NOT_PROP)] = pd.DataFrame(
252253
columns = ["geo_id", "timestamp", "val", \
253254
"se", "sample_size", "publish_date"]
254255
)
@@ -258,7 +259,7 @@ def _parse_sheet(self, sheet):
258259
assert len(sig_select) > 0, \
259260
f"No {sig} in any of {select}\n\nAll headers:\n{NEWLINE.join(list(df.columns))}"
260261

261-
self.dfs[(sheet.level, sig)] = pd.concat([
262+
self.dfs[(sheet.level, sig, NOT_PROP)] = pd.concat([
262263
pd.DataFrame({
263264
"geo_id": sheet.geo_id_select(df).apply(sheet.geo_id_apply),
264265
"timestamp": pd.to_datetime(self.times[si[0]][sig]),
@@ -271,7 +272,7 @@ def _parse_sheet(self, sheet):
271272
])
272273

273274
for sig in COUNTS_7D_SIGNALS:
274-
self.dfs[(sheet.level, sig)]["val"] /= 7 # 7-day total -> 7-day average
275+
self.dfs[(sheet.level, sig, NOT_PROP)]["val"] /= 7 # 7-day total -> 7-day average
275276

276277

277278
def as_cached_filename(params, config):
@@ -390,13 +391,46 @@ def fetch_new_reports(params, logger=None):
390391
# add nation from state
391392
geomapper = GeoMapper()
392393
for sig in SIGNALS:
393-
state_key = ("state", sig)
394+
state_key = ("state", sig, NOT_PROP)
394395
if state_key not in ret:
395396
continue
396-
ret[("nation", sig)] = nation_from_state(
397+
ret[("nation", sig, NOT_PROP)] = nation_from_state(
397398
ret[state_key].rename(columns={"geo_id": "state_id"}),
398399
sig,
399400
geomapper
400401
)
401402

403+
for key, df in ret.copy().items():
404+
(geo, sig, _) = key
405+
if SIGNALS[sig]["make_prop"]:
406+
ret[(geo, sig, IS_PROP)] = generate_prop_signal(df, geo, geomapper)
407+
402408
return ret
409+
410+
def generate_prop_signal(df, geo, geo_mapper):
411+
"""Transform base df into a proportion (per 100k population)."""
412+
if geo == "state":
413+
geo = "state_id"
414+
if geo == "county":
415+
geo = "fips"
416+
417+
# Add population data
418+
if geo == "msa":
419+
map_df = geo_mapper.get_crosswalk("fips", geo)
420+
map_df = geo_mapper.add_population_column(
421+
map_df, "fips"
422+
).drop(
423+
"fips", axis=1
424+
).groupby(
425+
geo
426+
).sum(
427+
).reset_index(
428+
)
429+
df = pd.merge(df, map_df, left_on="geo_id", right_on=geo, how="inner")
430+
else:
431+
df = geo_mapper.add_population_column(df, geo, geocode_col="geo_id")
432+
433+
df["val"] = df["val"] / df["population"] * 100000
434+
df.drop(["population", geo], axis=1, inplace=True)
435+
436+
return df

dsew_community_profile/delphi_dsew_community_profile/run.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,14 @@ def replace_date_param(p):
5858
run_stats = []
5959
dfs = fetch_new_reports(params, logger)
6060
for key, df in dfs.items():
61-
(geo, sig) = key
61+
(geo, sig, is_prop) = key
6262
if sig not in params["indicator"]["export_signals"]:
6363
continue
6464
dates = create_export_csv(
6565
df,
6666
params['common']['export_dir'],
6767
geo,
68-
make_signal_name(sig),
68+
make_signal_name(sig, is_prop),
6969
**export_params
7070
)
7171
if len(dates)>0:

dsew_community_profile/params.json.template

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232
"ref_window_size": 7,
3333
"smoothed_signals": [
3434
"naats_total_7dav",
35-
"naats_positivity_7dav"
35+
"naats_positivity_7dav",
36+
"confirmed_admissions_covid_1d_prop_7dav",
37+
"confirmed_admissions_covid_1d_7dav"
3638
]
3739
}
3840
}

dsew_community_profile/tests/params.json.template

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@
2525
"ref_window_size": 7,
2626
"smoothed_signals": [
2727
"naats_total_7dav",
28-
"naats_positivity_7dav"
28+
"naats_positivity_7dav",
29+
"confirmed_admissions_covid_1d_prop_7dav",
30+
"confirmed_admissions_covid_1d_7dav"
2931
]
3032
}
3133
}

dsew_community_profile/tests/test_pull.py

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from delphi_dsew_community_profile.pull import DatasetTimes
1111
from delphi_dsew_community_profile.pull import Dataset
12-
from delphi_dsew_community_profile.pull import fetch_listing, nation_from_state
12+
from delphi_dsew_community_profile.pull import fetch_listing, nation_from_state, generate_prop_signal
1313

1414
example = namedtuple("example", "given expected")
1515

@@ -213,3 +213,84 @@ def test_nation_from_state(self):
213213
'sample_size': [None],}),
214214
check_like=True
215215
)
216+
217+
def test_generate_prop_signal_msa(self):
218+
geomapper = GeoMapper()
219+
county_pop = geomapper.get_crosswalk("fips", "pop")
220+
county_msa = geomapper.get_crosswalk("fips", "msa")
221+
msa_pop = county_pop.merge(county_msa, on="fips", how="inner").groupby("msa").sum().reset_index()
222+
223+
test_df = pd.DataFrame({
224+
'geo_id': ['35620', '31080'],
225+
'timestamp': [datetime(year=2020, month=1, day=1)]*2,
226+
'val': [15., 150.],
227+
'se': [None, None],
228+
'sample_size': [None, None],})
229+
230+
nyc_pop = int(msa_pop.loc[msa_pop.msa == "35620", "pop"])
231+
la_pop = int(msa_pop.loc[msa_pop.msa == "31080", "pop"])
232+
233+
expected_df = pd.DataFrame({
234+
'geo_id': ['35620', '31080'],
235+
'timestamp': [datetime(year=2020, month=1, day=1)]*2,
236+
'val': [15. / nyc_pop * 100000, 150. / la_pop * 100000],
237+
'se': [None, None],
238+
'sample_size': [None, None],})
239+
240+
pd.testing.assert_frame_equal(
241+
generate_prop_signal(
242+
test_df.copy(),
243+
"msa",
244+
geomapper
245+
),
246+
expected_df,
247+
check_like=True
248+
)
249+
def test_generate_prop_signal_non_msa(self):
250+
geomapper = GeoMapper()
251+
252+
geos = {
253+
"state": {
254+
"code_name": "state_id",
255+
"geo_names": ['pa', 'wv']
256+
},
257+
"county": {
258+
"code_name": "fips",
259+
"geo_names": ['36061', '06037']
260+
},
261+
# nation uses the same logic path so no need to test separately
262+
"hhs": {
263+
"code_name": "hhs",
264+
"geo_names": ["1", "4"]
265+
}
266+
}
267+
268+
for geo, settings in geos.items():
269+
geo_pop = geomapper.get_crosswalk(settings["code_name"], "pop")
270+
271+
test_df = pd.DataFrame({
272+
'geo_id': settings["geo_names"],
273+
'timestamp': [datetime(year=2020, month=1, day=1)]*2,
274+
'val': [15., 150.],
275+
'se': [None, None],
276+
'sample_size': [None, None],})
277+
278+
pop1 = int(geo_pop.loc[geo_pop[settings["code_name"]] == settings["geo_names"][0], "pop"])
279+
pop2 = int(geo_pop.loc[geo_pop[settings["code_name"]] == settings["geo_names"][1], "pop"])
280+
281+
expected_df = pd.DataFrame({
282+
'geo_id': settings["geo_names"],
283+
'timestamp': [datetime(year=2020, month=1, day=1)]*2,
284+
'val': [15. / pop1 * 100000, 150. / pop2 * 100000],
285+
'se': [None, None],
286+
'sample_size': [None, None],})
287+
288+
pd.testing.assert_frame_equal(
289+
generate_prop_signal(
290+
test_df.copy(),
291+
geo,
292+
geomapper
293+
),
294+
expected_df,
295+
check_like=True
296+
)

0 commit comments

Comments
 (0)