Skip to content

Commit b9df5a4

Browse files
authored
Merge pull request #1424 from cmu-delphi/release/indicators_v0.2.15_utils_v0.2.7
Release covidcast-indicators 0.2.15
2 parents 3a7db25 + b422ba8 commit b9df5a4

File tree

4 files changed

+105
-14
lines changed

4 files changed

+105
-14
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.2.14
2+
current_version = 0.2.15
33
commit = True
44
message = chore: bump covidcast-indicators to {new_version}
55
tag = False

google_symptoms/delphi_google_symptoms/geo.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,29 @@ def generate_transition_matrix(geo_res):
2323
The first is a data frame for HRR regions and the second are MSA
2424
regions.
2525
"""
26-
map_df = gmpr.get_crosswalk("fips", geo_res)
27-
# Add population as weights
28-
map_df = gmpr.add_population_column(map_df, "fips")
26+
if geo_res in ["hrr", "msa"]:
27+
mapping_flag = "fips"
28+
map_df = gmpr.get_crosswalk("fips", geo_res)
29+
# Add population as weights
30+
map_df = gmpr.add_population_column(map_df, "fips")
31+
else:
32+
mapping_flag = "state_id"
33+
map_df = gmpr.get_crosswalk("state", "state")
34+
map_df = gmpr.add_geocode(map_df, "state_code", geo_res)
35+
map_df = gmpr.add_population_column(map_df, "state_code")
36+
2937
if geo_res == "hrr":
3038
map_df["population"] = map_df["population"] * map_df["weight"]
31-
msa_pop = map_df.groupby(geo_res).sum().reset_index()
39+
40+
aggregated_pop = map_df.groupby(geo_res).sum().reset_index()
3241
map_df = map_df.merge(
33-
msa_pop, on=geo_res, how="inner", suffixes=["_raw", "_groupsum"]
42+
aggregated_pop, on=geo_res, how="inner", suffixes=["_raw", "_groupsum"]
3443
)
3544
map_df["weight"] = map_df["population_raw"] / map_df["population_groupsum"]
3645

3746
map_df = pd.pivot_table(
38-
map_df, values='weight', index=["fips"], columns=[geo_res]
39-
).fillna(0).reset_index().rename({"fips": "geo_id"}, axis = 1)
47+
map_df, values='weight', index=[mapping_flag], columns=[geo_res]
48+
).fillna(0).reset_index().rename({mapping_flag: "geo_id"}, axis = 1)
4049
return map_df
4150

4251
def geo_map(df, geo_res):
@@ -49,7 +58,7 @@ def geo_map(df, geo_res):
4958
a data frame with columns "geo_id", "timestamp",
5059
and columns for signal vals
5160
geo_res: str
52-
"msa" or "hrr"
61+
"msa", "hrr", "hhs" or "nation"
5362
5463
Returns
5564
-------
@@ -58,7 +67,7 @@ def geo_map(df, geo_res):
5867
and columns for signal vals.
5968
The geo_id has been converted from fips to HRRs/MSAs
6069
"""
61-
if geo_res in {"county", "state"}:
70+
if geo_res == "county":
6271
return df
6372

6473
map_df = generate_transition_matrix(geo_res)

google_symptoms/delphi_google_symptoms/run.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from pandas import to_datetime
1414
from delphi_utils import (
1515
create_export_csv,
16-
geomap,
1716
get_structured_logger
1817
)
1918
from delphi_utils.validator.utils import lag_converter
@@ -84,14 +83,12 @@ def run_module(params):
8483
export_start_date,
8584
export_end_date,
8685
num_export_days)
87-
gmpr = geomap.GeoMapper()
8886

8987
for geo_res in GEO_RESOLUTIONS:
9088
if geo_res == "state":
9189
df_pull = dfs["state"]
9290
elif geo_res in ["hhs", "nation"]:
93-
df_pull = gmpr.replace_geocode(dfs["county"], "fips", geo_res, from_col="geo_id")
94-
df_pull.rename(columns={geo_res: "geo_id"}, inplace=True)
91+
df_pull = geo_map(dfs["state"], geo_res)
9592
else:
9693
df_pull = geo_map(dfs["county"], geo_res)
9794

google_symptoms/tests/test_geo.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,88 @@ def test_msa(self):
110110
assert new_df[METRICS[0]].values == pytest.approx(df_plus[METRICS[0]].tolist())
111111
assert new_df[METRICS[1]].values == pytest.approx(df_plus[METRICS[1]].tolist())
112112
assert new_df[COMBINED_METRIC].values == pytest.approx(df_plus[COMBINED_METRIC].tolist())
113+
114+
def test_hhs(self):
115+
gmpr = GeoMapper()
116+
df = pd.DataFrame(
117+
{
118+
"geo_id": ["al", "fl", "tx"],
119+
"timestamp": ["2020-02-15", "2020-02-15", "2020-02-15"],
120+
METRICS[0]: [10, 15, 2],
121+
METRICS[1]: [100, 20, 45],
122+
COMBINED_METRIC: [110, 35, 47],
123+
}
124+
)
125+
126+
state2hhs = gmpr.add_population_column(gmpr.get_crosswalk("state", "state"), "state_code")
127+
state2hhs = gmpr.add_geocode(state2hhs, "state_code", "hhs")
128+
hhs_pop = state2hhs.groupby("hhs"
129+
).sum(
130+
).reset_index(
131+
).rename(columns={"population": "hhs_pop"})
132+
df_plus = df.merge(state2hhs, left_on="geo_id", right_on="state_id", how="left"
133+
).merge(hhs_pop, on="hhs", how="left"
134+
).assign(
135+
fractional_pop = lambda x: x.population / x.hhs_pop,
136+
metric_0 = lambda x: x.fractional_pop * x[METRICS[0]],
137+
metric_1 = lambda x: x.fractional_pop * x[METRICS[1]],
138+
combined_metric = lambda x: x.metric_0 + x.metric_1
139+
).groupby("hhs"
140+
).sum(
141+
).drop(
142+
labels=[METRICS[0], METRICS[1], COMBINED_METRIC],
143+
axis="columns"
144+
).rename(
145+
columns={"metric_0": METRICS[0], "metric_1": METRICS[1], "combined_metric": COMBINED_METRIC}
146+
)
147+
148+
new_df = geo_map(df, "hhs").dropna()
149+
150+
assert set(new_df.keys()) == set(df.keys())
151+
assert set(new_df["geo_id"]) == set(["4", "6"])
152+
assert new_df[METRICS[0]].values == pytest.approx(df_plus[METRICS[0]].tolist())
153+
assert new_df[METRICS[1]].values == pytest.approx(df_plus[METRICS[1]].tolist())
154+
assert new_df[COMBINED_METRIC].values == pytest.approx(df_plus[COMBINED_METRIC].tolist())
155+
156+
def test_nation(self):
157+
gmpr = GeoMapper()
158+
df = pd.DataFrame(
159+
{
160+
"geo_id": ["al", "il", "tx"],
161+
"timestamp": ["2020-02-15", "2020-02-15", "2020-02-15"],
162+
METRICS[0]: [10, 15, 2],
163+
METRICS[1]: [100, 20, 45],
164+
COMBINED_METRIC: [110, 35, 47],
165+
}
166+
)
167+
168+
state2nation = gmpr.add_population_column(gmpr.get_crosswalk("state", "state"), "state_code")
169+
state2nation = gmpr.add_geocode(state2nation, "state_code", "nation")
170+
nation_pop = state2nation.groupby("nation"
171+
).sum(
172+
).reset_index(
173+
).rename(columns={"population": "nation_pop"})
174+
df_plus = df.merge(state2nation, left_on="geo_id", right_on="state_id", how="left"
175+
).merge(nation_pop, on="nation", how="left"
176+
).assign(
177+
fractional_pop = lambda x: x.population / x.nation_pop,
178+
metric_0 = lambda x: x.fractional_pop * x[METRICS[0]],
179+
metric_1 = lambda x: x.fractional_pop * x[METRICS[1]],
180+
combined_metric = lambda x: x.metric_0 + x.metric_1
181+
).groupby("nation"
182+
).sum(
183+
).drop(
184+
labels=[METRICS[0], METRICS[1], COMBINED_METRIC],
185+
axis="columns"
186+
).rename(
187+
columns={"metric_0": METRICS[0], "metric_1": METRICS[1], "combined_metric": COMBINED_METRIC}
188+
)
189+
190+
new_df = geo_map(df, "nation").dropna()
191+
192+
assert set(new_df.keys()) == set(df.keys())
193+
assert set(new_df["geo_id"]) == set(["us"])
194+
assert new_df[METRICS[0]].values == pytest.approx(df_plus[METRICS[0]].tolist())
195+
assert new_df[METRICS[1]].values == pytest.approx(df_plus[METRICS[1]].tolist())
196+
assert new_df[COMBINED_METRIC].values == pytest.approx(df_plus[COMBINED_METRIC].tolist())
197+

0 commit comments

Comments
 (0)