Skip to content

Commit 43e2151

Browse files
committed
Merge branch 'main' into zips-fips-crosswalk
2 parents b46ebb8 + 784cfd8 commit 43e2151

File tree

11 files changed

+163
-26
lines changed

11 files changed

+163
-26
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.2.25
2+
current_version = 0.3.1
33
commit = True
44
message = chore: bump covidcast-indicators to {new_version}
55
tag = False

_delphi_utils_python/.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.2.11
2+
current_version = 0.3.0
33
commit = True
44
message = chore: bump delphi_utils to {new_version}
55
tag = False

_delphi_utils_python/delphi_utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@
1515
from .nancodes import Nans
1616
from .weekday import Weekday
1717

18-
__version__ = "0.2.11"
18+
__version__ = "0.3.0"

_delphi_utils_python/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
setup(
2828
name="delphi_utils",
29-
version="0.2.11",
29+
version="0.3.0",
3030
description="Shared Utility Functions for Indicators",
3131
long_description=long_description,
3232
long_description_content_type="text/markdown",

ansible/templates/dsew_community_profile-params-prod.json.j2

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
"common": {
1313
"data_source": "dsew-cpr",
1414
"span_length": 14,
15-
"min_expected_lag": {"all": "5"},
15+
"min_expected_lag": {"all": "3"},
1616
"max_expected_lag": {"all": "9"},
1717
"dry_run": true,
1818
"suppressed_errors": []
@@ -26,7 +26,9 @@
2626
"ref_window_size": 7,
2727
"smoothed_signals": [
2828
"naats_total_7dav",
29-
"naats_positivity_7dav"
29+
"naats_positivity_7dav",
30+
"confirmed_admissions_covid_1d_prop_7dav",
31+
"confirmed_admissions_covid_1d_7dav"
3032
]
3133
}
3234
}

dsew_community_profile/delphi_dsew_community_profile/constants.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,34 @@ class Transform:
5050
SIGNALS = {
5151
"total": {
5252
"is_rate" : False,
53-
"api_name": "naats_total_7dav"
53+
"api_name": "naats_total_7dav",
54+
"make_prop": False
5455
},
5556
"positivity": {
5657
"is_rate" : True,
57-
"api_name": "naats_positivity_7dav"
58+
"api_name": "naats_positivity_7dav",
59+
"make_prop": False
5860
},
5961
"confirmed covid-19 admissions": {
6062
"is_rate" : False,
61-
"api_name": "confirmed_admissions_covid_1d_7dav"
63+
"api_name": "confirmed_admissions_covid_1d_7dav",
64+
"make_prop": True,
65+
"api_prop_name": "confirmed_admissions_covid_1d_prop_7dav"
6266
}
6367
}
6468

6569
COUNTS_7D_SIGNALS = {key for key, value in SIGNALS.items() if not value["is_rate"]}
6670

67-
def make_signal_name(key):
68-
"""Convert a signal key to the corresponding signal name for the API."""
71+
def make_signal_name(key, is_prop=False):
72+
"""Convert a signal key to the corresponding signal name for the API.
73+
74+
Note, this function gets called twice with the same `key` for signals that support
75+
population-proportion ("prop") variants.
76+
"""
77+
if is_prop:
78+
return SIGNALS[key]["api_prop_name"]
6979
return SIGNALS[key]["api_name"]
7080

71-
NEWLINE="\n"
81+
NEWLINE = "\n"
82+
IS_PROP = True
83+
NOT_PROP = False

dsew_community_profile/delphi_dsew_community_profile/pull.py

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@
1111

1212
from delphi_utils.geomap import GeoMapper
1313

14-
from .constants import TRANSFORMS, SIGNALS, COUNTS_7D_SIGNALS, NEWLINE
15-
from .constants import DOWNLOAD_ATTACHMENT, DOWNLOAD_LISTING
14+
from .constants import (TRANSFORMS, SIGNALS, COUNTS_7D_SIGNALS, NEWLINE,
15+
IS_PROP, NOT_PROP,
16+
DOWNLOAD_ATTACHMENT, DOWNLOAD_LISTING)
1617

1718
# YYYYMMDD
1819
# example: "Community Profile Report 20211104.xlsx"
@@ -248,7 +249,7 @@ def _parse_sheet(self, sheet):
248249
if (sheet.level == "msa" or sheet.level == "county") \
249250
and self.publish_date < datetime.date(2021, 1, 8) \
250251
and sig == "confirmed covid-19 admissions":
251-
self.dfs[(sheet.level, sig)] = pd.DataFrame(
252+
self.dfs[(sheet.level, sig, NOT_PROP)] = pd.DataFrame(
252253
columns = ["geo_id", "timestamp", "val", \
253254
"se", "sample_size", "publish_date"]
254255
)
@@ -258,7 +259,7 @@ def _parse_sheet(self, sheet):
258259
assert len(sig_select) > 0, \
259260
f"No {sig} in any of {select}\n\nAll headers:\n{NEWLINE.join(list(df.columns))}"
260261

261-
self.dfs[(sheet.level, sig)] = pd.concat([
262+
self.dfs[(sheet.level, sig, NOT_PROP)] = pd.concat([
262263
pd.DataFrame({
263264
"geo_id": sheet.geo_id_select(df).apply(sheet.geo_id_apply),
264265
"timestamp": pd.to_datetime(self.times[si[0]][sig]),
@@ -271,14 +272,18 @@ def _parse_sheet(self, sheet):
271272
])
272273

273274
for sig in COUNTS_7D_SIGNALS:
274-
self.dfs[(sheet.level, sig)]["val"] /= 7 # 7-day total -> 7-day average
275+
self.dfs[(sheet.level, sig, NOT_PROP)]["val"] /= 7 # 7-day total -> 7-day average
275276

276277

277278
def as_cached_filename(params, config):
278279
"""Formulate a filename to uniquely identify this report in the input cache."""
280+
# eg "Community Profile Report 20220128.xlsx"
281+
# but delimiters vary; don't get tripped up if they do something wacky like
282+
# Community.Profile.Report.20220128.xlsx
283+
name, _, ext = config['filename'].rpartition(".")
279284
return os.path.join(
280285
params['indicator']['input_cache'],
281-
f"{config['assetId']}--{config['filename']}"
286+
f"{name}--{config['assetId']}.{ext}"
282287
)
283288

284289
def fetch_listing(params):
@@ -390,13 +395,46 @@ def fetch_new_reports(params, logger=None):
390395
# add nation from state
391396
geomapper = GeoMapper()
392397
for sig in SIGNALS:
393-
state_key = ("state", sig)
398+
state_key = ("state", sig, NOT_PROP)
394399
if state_key not in ret:
395400
continue
396-
ret[("nation", sig)] = nation_from_state(
401+
ret[("nation", sig, NOT_PROP)] = nation_from_state(
397402
ret[state_key].rename(columns={"geo_id": "state_id"}),
398403
sig,
399404
geomapper
400405
)
401406

407+
for key, df in ret.copy().items():
408+
(geo, sig, _) = key
409+
if SIGNALS[sig]["make_prop"]:
410+
ret[(geo, sig, IS_PROP)] = generate_prop_signal(df, geo, geomapper)
411+
402412
return ret
413+
414+
def generate_prop_signal(df, geo, geo_mapper):
415+
"""Transform base df into a proportion (per 100k population)."""
416+
if geo == "state":
417+
geo = "state_id"
418+
if geo == "county":
419+
geo = "fips"
420+
421+
# Add population data
422+
if geo == "msa":
423+
map_df = geo_mapper.get_crosswalk("fips", geo)
424+
map_df = geo_mapper.add_population_column(
425+
map_df, "fips"
426+
).drop(
427+
"fips", axis=1
428+
).groupby(
429+
geo
430+
).sum(
431+
).reset_index(
432+
)
433+
df = pd.merge(df, map_df, left_on="geo_id", right_on=geo, how="inner")
434+
else:
435+
df = geo_mapper.add_population_column(df, geo, geocode_col="geo_id")
436+
437+
df["val"] = df["val"] / df["population"] * 100000
438+
df.drop(["population", geo], axis=1, inplace=True)
439+
440+
return df

dsew_community_profile/delphi_dsew_community_profile/run.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,14 @@ def replace_date_param(p):
5858
run_stats = []
5959
dfs = fetch_new_reports(params, logger)
6060
for key, df in dfs.items():
61-
(geo, sig) = key
61+
(geo, sig, is_prop) = key
6262
if sig not in params["indicator"]["export_signals"]:
6363
continue
6464
dates = create_export_csv(
6565
df,
6666
params['common']['export_dir'],
6767
geo,
68-
make_signal_name(sig),
68+
make_signal_name(sig, is_prop),
6969
**export_params
7070
)
7171
if len(dates)>0:

dsew_community_profile/params.json.template

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"common": {
1919
"data_source": "dsew_cpr",
2020
"span_length": 14,
21-
"min_expected_lag": {"all": "5"},
21+
"min_expected_lag": {"all": "3"},
2222
"max_expected_lag": {"all": "9"},
2323
"dry_run": true,
2424
"suppressed_errors": []
@@ -32,7 +32,9 @@
3232
"ref_window_size": 7,
3333
"smoothed_signals": [
3434
"naats_total_7dav",
35-
"naats_positivity_7dav"
35+
"naats_positivity_7dav",
36+
"confirmed_admissions_covid_1d_prop_7dav",
37+
"confirmed_admissions_covid_1d_7dav"
3638
]
3739
}
3840
}

dsew_community_profile/tests/params.json.template

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@
2525
"ref_window_size": 7,
2626
"smoothed_signals": [
2727
"naats_total_7dav",
28-
"naats_positivity_7dav"
28+
"naats_positivity_7dav",
29+
"confirmed_admissions_covid_1d_prop_7dav",
30+
"confirmed_admissions_covid_1d_7dav"
2931
]
3032
}
3133
}

dsew_community_profile/tests/test_pull.py

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from delphi_dsew_community_profile.pull import DatasetTimes
1111
from delphi_dsew_community_profile.pull import Dataset
12-
from delphi_dsew_community_profile.pull import fetch_listing, nation_from_state
12+
from delphi_dsew_community_profile.pull import fetch_listing, nation_from_state, generate_prop_signal
1313

1414
example = namedtuple("example", "given expected")
1515

@@ -213,3 +213,84 @@ def test_nation_from_state(self):
213213
'sample_size': [None],}),
214214
check_like=True
215215
)
216+
217+
def test_generate_prop_signal_msa(self):
218+
geomapper = GeoMapper()
219+
county_pop = geomapper.get_crosswalk("fips", "pop")
220+
county_msa = geomapper.get_crosswalk("fips", "msa")
221+
msa_pop = county_pop.merge(county_msa, on="fips", how="inner").groupby("msa").sum().reset_index()
222+
223+
test_df = pd.DataFrame({
224+
'geo_id': ['35620', '31080'],
225+
'timestamp': [datetime(year=2020, month=1, day=1)]*2,
226+
'val': [15., 150.],
227+
'se': [None, None],
228+
'sample_size': [None, None],})
229+
230+
nyc_pop = int(msa_pop.loc[msa_pop.msa == "35620", "pop"])
231+
la_pop = int(msa_pop.loc[msa_pop.msa == "31080", "pop"])
232+
233+
expected_df = pd.DataFrame({
234+
'geo_id': ['35620', '31080'],
235+
'timestamp': [datetime(year=2020, month=1, day=1)]*2,
236+
'val': [15. / nyc_pop * 100000, 150. / la_pop * 100000],
237+
'se': [None, None],
238+
'sample_size': [None, None],})
239+
240+
pd.testing.assert_frame_equal(
241+
generate_prop_signal(
242+
test_df.copy(),
243+
"msa",
244+
geomapper
245+
),
246+
expected_df,
247+
check_like=True
248+
)
249+
def test_generate_prop_signal_non_msa(self):
250+
geomapper = GeoMapper()
251+
252+
geos = {
253+
"state": {
254+
"code_name": "state_id",
255+
"geo_names": ['pa', 'wv']
256+
},
257+
"county": {
258+
"code_name": "fips",
259+
"geo_names": ['36061', '06037']
260+
},
261+
# nation uses the same logic path so no need to test separately
262+
"hhs": {
263+
"code_name": "hhs",
264+
"geo_names": ["1", "4"]
265+
}
266+
}
267+
268+
for geo, settings in geos.items():
269+
geo_pop = geomapper.get_crosswalk(settings["code_name"], "pop")
270+
271+
test_df = pd.DataFrame({
272+
'geo_id': settings["geo_names"],
273+
'timestamp': [datetime(year=2020, month=1, day=1)]*2,
274+
'val': [15., 150.],
275+
'se': [None, None],
276+
'sample_size': [None, None],})
277+
278+
pop1 = int(geo_pop.loc[geo_pop[settings["code_name"]] == settings["geo_names"][0], "pop"])
279+
pop2 = int(geo_pop.loc[geo_pop[settings["code_name"]] == settings["geo_names"][1], "pop"])
280+
281+
expected_df = pd.DataFrame({
282+
'geo_id': settings["geo_names"],
283+
'timestamp': [datetime(year=2020, month=1, day=1)]*2,
284+
'val': [15. / pop1 * 100000, 150. / pop2 * 100000],
285+
'se': [None, None],
286+
'sample_size': [None, None],})
287+
288+
pd.testing.assert_frame_equal(
289+
generate_prop_signal(
290+
test_df.copy(),
291+
geo,
292+
geomapper
293+
),
294+
expected_df,
295+
check_like=True
296+
)

0 commit comments

Comments
 (0)