Skip to content

Commit 1357f24

Browse files
committed
Add hhs to utils
1 parent 3bac59a commit 1357f24

File tree

2 files changed

+44
-9
lines changed

2 files changed

+44
-9
lines changed

_delphi_utils_python/delphi_utils/geomap.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,15 @@
2222
"msa": join(DATA_PATH, "zip_msa_table.csv"),
2323
"pop": join(DATA_PATH, "zip_pop.csv"),
2424
"state": join(DATA_PATH, "zip_state_code_table.csv"),
25+
"hhs_region_number": join(DATA_PATH, "zip_hhs_table.csv")
2526
},
2627
"fips": {
2728
"zip": join(DATA_PATH, "fips_zip_table.csv"),
2829
"hrr": join(DATA_PATH, "fips_hrr_table.csv"),
2930
"msa": join(DATA_PATH, "fips_msa_table.csv"),
3031
"pop": join(DATA_PATH, "fips_pop.csv"),
3132
"state": join(DATA_PATH, "fips_state_table.csv"),
33+
"hhs_region_number": join(DATA_PATH, "fips_hhs_table.csv"),
3234
},
3335
"state": {"state": join(DATA_PATH, "state_codes_table.csv")},
3436
"state_code": {
@@ -55,12 +57,14 @@ class GeoMapper: # pylint: disable=too-many-public-methods
5557
- [x] zip -> hrr : unweighted
5658
- [x] zip -> msa : unweighted
5759
- [x] zip -> state
60+
- [x] zip -> hhs_region_number
5861
- [x] zip -> population
5962
- [x] state code -> hhs_region_number
6063
- [x] fips -> state : unweighted
6164
- [x] fips -> msa : unweighted
6265
- [x] fips -> megacounty
6366
- [x] fips -> hrr
67+
- [x] fips -> hhs_region_number
6468
- [x] nation
6569
- [ ] zip -> dma (postponed)
6670
@@ -102,8 +106,10 @@ def __init__(self):
102106
"""
103107
self.crosswalk_filepaths = CROSSWALK_FILEPATHS
104108
self.crosswalks = {
105-
"zip": {"fips": None, "hrr": None, "msa": None, "pop": None, "state": None},
106-
"fips": {"zip": None, "hrr": None, "msa": None, "pop": None, "state": None},
109+
"zip": {"fips": None, "hrr": None, "msa": None,
110+
"pop": None, "state": None, "hhs_region_number": None},
111+
"fips": {"zip": None, "hrr": None, "msa": None,
112+
"pop": None, "state": None, "hhs_region_number": None},
107113
"state": {"state": None},
108114
"state_code": {"hhs_region_number": None},
109115
"jhu_uid": {"fips": None},
@@ -123,6 +129,7 @@ def _load_crosswalk(self, from_code, to_code):
123129
("jhu_uid", "fips"),
124130
("zip", "msa"),
125131
("fips", "hrr"),
132+
("zip", "hhs_region_number")
126133
]:
127134
self.crosswalks[from_code][to_code] = pd.read_csv(
128135
stream,
@@ -136,6 +143,8 @@ def _load_crosswalk(self, from_code, to_code):
136143
elif (from_code, to_code) in [
137144
("zip", "hrr"),
138145
("fips", "msa"),
146+
("fips", "hhs_region_number"),
147+
("state_code", "hhs_region_number")
139148
]:
140149
self.crosswalks[from_code][to_code] = pd.read_csv(
141150
stream,
@@ -151,11 +160,6 @@ def _load_crosswalk(self, from_code, to_code):
151160
"state_name": str,
152161
},
153162
)
154-
elif (from_code, to_code) == ("state_code", "hhs_region_number"):
155-
self.crosswalks[from_code][to_code] = pd.read_csv(
156-
stream,
157-
dtype={"state_code": str, "hhs_region_number": str},
158-
)
159163
elif (from_code, to_code) == ("zip", "state"):
160164
self.crosswalks[from_code][to_code] = pd.read_csv(
161165
stream,
@@ -255,8 +259,8 @@ def add_geocode(
255259
"""Add a new geocode column to a dataframe.
256260
257261
Currently supported conversions:
258-
- fips -> state_code, state_id, state_name, zip, msa, hrr, nation
259-
- zip -> state_code, state_id, state_name, fips, msa, hrr, nation
262+
- fips -> state_code, state_id, state_name, zip, msa, hrr, nation, hhs_region_number
263+
- zip -> state_code, state_id, state_name, fips, msa, hrr, nation, hhs_region_number
260264
- jhu_uid -> fips
261265
- state_x -> state_y, where x and y are in {code, id, name}
262266
- state_code -> hhs_region_number

_delphi_utils_python/tests/test_geomap.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ def test_crosswalks(self):
137137
# assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all()
138138
cw = gmpr._load_crosswalk(from_code="zip", to_code="state")
139139
assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all()
140+
cw = gmpr._load_crosswalk(from_code="zip", to_code="hhs_region_number")
141+
assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all()
142+
140143

141144
def test_load_zip_fips_table(self):
142145
gmpr = GeoMapper()
@@ -261,3 +264,31 @@ def test_add_geocode(self):
261264
}
262265
)
263266
)
267+
268+
# fips -> hhs
269+
new_data = gmpr.replace_geocode(self.fips_data_3.drop(columns=["date"]),
270+
"fips", "hhs_region_number", date_col=None)
271+
assert new_data.equals(
272+
pd.DataFrame().from_dict(
273+
{
274+
"hhs_region_number": {0: "2", 1: "6"},
275+
"count": {0: 12, 1: 6},
276+
"total": {0: 111, 1: 13}
277+
}
278+
)
279+
)
280+
281+
# zip -> hhs
282+
new_data = gmpr.replace_geocode(self.zip_data, "zip", "hhs_region_number")
283+
new_data = new_data.round(10) # get rid of a floating point error with 99.00000000000001
284+
assert new_data.equals(
285+
pd.DataFrame().from_dict(
286+
{
287+
"date": {0: pd.Timestamp("2018-01-01"), 1: pd.Timestamp("2018-01-01"),
288+
2: pd.Timestamp("2018-01-03"), 3: pd.Timestamp("2018-01-03")},
289+
"hhs_region_number": {0: "5", 1: "9", 2: "5", 3: "9"},
290+
"count": {0: 99.0, 1: 801.0, 2: 100.0, 3: 786.0},
291+
"total": {0: 198.0, 1: 1602.0, 2: 200.0, 3: 1572.0}
292+
}
293+
)
294+
)

0 commit comments

Comments
 (0)