Skip to content

Commit 3b13267

Browse files
Jingjing TangJingjing Tang
Jingjing Tang
authored and
Jingjing Tang
committed
update code and unit tests for mapping from state to hhs and nation
1 parent fc5938a commit 3b13267

File tree

3 files changed

+27
-19
lines changed

3 files changed

+27
-19
lines changed

google_symptoms/delphi_google_symptoms/geo.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,24 +23,30 @@ def generate_transition_matrix(geo_res):
2323
The first is a data frame for HRR regions and the second are MSA
2424
regions.
2525
"""
26-
if geo_res != "nation":
26+
if geo_res in ["hrr", "msa"]:
27+
mapping_flag = "fips"
2728
map_df = gmpr.get_crosswalk("fips", geo_res)
29+
# Add population as weights
30+
map_df = gmpr.add_population_column(map_df, "fips")
2831
else:
29-
map_df = gmpr.get_crosswalk("fips", "hhs")
30-
map_df[geo_res] = "nation"
31-
# Add population as weights
32-
map_df = gmpr.add_population_column(map_df, "fips")
32+
mapping_flag = "state_id"
33+
map_df = gmpr.get_crosswalk("state", "state")
34+
map_df = gmpr.add_geocode(map_df, "state_code", "hhs")
35+
map_df = gmpr.add_geocode(map_df, "state_code", "nation")
36+
map_df = gmpr.add_population_column(map_df, "state_code")
37+
3338
if geo_res == "hrr":
3439
map_df["population"] = map_df["population"] * map_df["weight"]
40+
3541
aggregated_pop = map_df.groupby(geo_res).sum().reset_index()
3642
map_df = map_df.merge(
3743
aggregated_pop, on=geo_res, how="inner", suffixes=["_raw", "_groupsum"]
3844
)
3945
map_df["weight"] = map_df["population_raw"] / map_df["population_groupsum"]
4046

4147
map_df = pd.pivot_table(
42-
map_df, values='weight', index=["fips"], columns=[geo_res]
43-
).fillna(0).reset_index().rename({"fips": "geo_id"}, axis = 1)
48+
map_df, values='weight', index=[mapping_flag], columns=[geo_res]
49+
).fillna(0).reset_index().rename({mapping_flag: "geo_id"}, axis = 1)
4450
return map_df
4551

4652
def geo_map(df, geo_res):

google_symptoms/delphi_google_symptoms/run.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ def run_module(params):
8787
for geo_res in GEO_RESOLUTIONS:
8888
if geo_res == "state":
8989
df_pull = dfs["state"]
90+
elif geo_res in ["hhs", "nation"]:
91+
df_pull = geo_map(dfs["state"], geo_res)
9092
else:
9193
df_pull = geo_map(dfs["county"], geo_res)
9294

google_symptoms/tests/test_geo.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -115,20 +115,21 @@ def test_hhs(self):
115115
gmpr = GeoMapper()
116116
df = pd.DataFrame(
117117
{
118-
"geo_id": ["01001", "01009", "01007"],
118+
"geo_id": ["al", "fl", "tx"],
119119
"timestamp": ["2020-02-15", "2020-02-15", "2020-02-15"],
120120
METRICS[0]: [10, 15, 2],
121121
METRICS[1]: [100, 20, 45],
122122
COMBINED_METRIC: [110, 35, 47],
123123
}
124124
)
125125

126-
fips2hhs = gmpr.add_population_column(gmpr.get_crosswalk("fips", "hhs"), "fips")
127-
hhs_pop = fips2hhs.groupby("hhs"
126+
state2hhs = gmpr.add_population_column(gmpr.get_crosswalk("state", "state"), "state_code")
127+
state2hhs = gmpr.add_geocode(state2hhs, "state_code", "hhs")
128+
hhs_pop = state2hhs.groupby("hhs"
128129
).sum(
129130
).reset_index(
130131
).rename(columns={"population": "hhs_pop"})
131-
df_plus = df.merge(fips2hhs, left_on="geo_id", right_on="fips", how="left"
132+
df_plus = df.merge(state2hhs, left_on="geo_id", right_on="state_id", how="left"
132133
).merge(hhs_pop, on="hhs", how="left"
133134
).assign(
134135
fractional_pop = lambda x: x.population / x.hhs_pop,
@@ -147,7 +148,7 @@ def test_hhs(self):
147148
new_df = geo_map(df, "hhs").dropna()
148149

149150
assert set(new_df.keys()) == set(df.keys())
150-
assert set(new_df["geo_id"]) == set(["4"])
151+
assert set(new_df["geo_id"]) == set(["4", "6"])
151152
assert new_df[METRICS[0]].values == pytest.approx(df_plus[METRICS[0]].tolist())
152153
assert new_df[METRICS[1]].values == pytest.approx(df_plus[METRICS[1]].tolist())
153154
assert new_df[COMBINED_METRIC].values == pytest.approx(df_plus[COMBINED_METRIC].tolist())
@@ -156,22 +157,21 @@ def test_nation(self):
156157
gmpr = GeoMapper()
157158
df = pd.DataFrame(
158159
{
159-
"geo_id": ["01001", "01009", "01007"],
160+
"geo_id": ["al", "il", "tx"],
160161
"timestamp": ["2020-02-15", "2020-02-15", "2020-02-15"],
161162
METRICS[0]: [10, 15, 2],
162163
METRICS[1]: [100, 20, 45],
163164
COMBINED_METRIC: [110, 35, 47],
164165
}
165166
)
166167

167-
fips2nation = gmpr.add_population_column(gmpr.get_crosswalk("fips", "hhs"), "fips")
168-
fips2nation.rename({"hhs": "nation"}, axis=1, inplace=True)
169-
fips2nation["nation"] = "nation"
170-
nation_pop = fips2nation.groupby("nation"
168+
state2nation = gmpr.add_population_column(gmpr.get_crosswalk("state", "state"), "state_code")
169+
state2nation = gmpr.add_geocode(state2nation, "state_code", "nation")
170+
nation_pop = state2nation.groupby("nation"
171171
).sum(
172172
).reset_index(
173173
).rename(columns={"population": "nation_pop"})
174-
df_plus = df.merge(fips2nation, left_on="geo_id", right_on="fips", how="left"
174+
df_plus = df.merge(state2nation, left_on="geo_id", right_on="state_id", how="left"
175175
).merge(nation_pop, on="nation", how="left"
176176
).assign(
177177
fractional_pop = lambda x: x.population / x.nation_pop,
@@ -190,7 +190,7 @@ def test_nation(self):
190190
new_df = geo_map(df, "nation").dropna()
191191

192192
assert set(new_df.keys()) == set(df.keys())
193-
assert set(new_df["geo_id"]) == set(["nation"])
193+
assert set(new_df["geo_id"]) == set(["us"])
194194
assert new_df[METRICS[0]].values == pytest.approx(df_plus[METRICS[0]].tolist())
195195
assert new_df[METRICS[1]].values == pytest.approx(df_plus[METRICS[1]].tolist())
196196
assert new_df[COMBINED_METRIC].values == pytest.approx(df_plus[COMBINED_METRIC].tolist())

0 commit comments

Comments
 (0)