cmu-delphi · krivard · Dec 9, 2020 · Dec 8, 2020 · Dec 8, 2020 · Dec 8, 2020
diff --git a/_delphi_utils_python/delphi_utils/geomap.py b/_delphi_utils_python/delphi_utils/geomap.py
@@ -22,13 +22,15 @@
         "msa": join(DATA_PATH, "zip_msa_table.csv"),
         "pop": join(DATA_PATH, "zip_pop.csv"),
         "state": join(DATA_PATH, "zip_state_code_table.csv"),
+        "hhs_region_number": join(DATA_PATH, "zip_hhs_table.csv")
     },
     "fips": {
         "zip": join(DATA_PATH, "fips_zip_table.csv"),
         "hrr": join(DATA_PATH, "fips_hrr_table.csv"),
         "msa": join(DATA_PATH, "fips_msa_table.csv"),
         "pop": join(DATA_PATH, "fips_pop.csv"),
         "state": join(DATA_PATH, "fips_state_table.csv"),
+        "hhs_region_number": join(DATA_PATH, "fips_hhs_table.csv"),
     },
     "state": {"state": join(DATA_PATH, "state_codes_table.csv")},
     "state_code": {
@@ -55,12 +57,14 @@ class GeoMapper:  # pylint: disable=too-many-public-methods
     - [x] zip -> hrr : unweighted
     - [x] zip -> msa : unweighted
     - [x] zip -> state
+    - [x] zip -> hhs_region_number
     - [x] zip -> population
     - [x] state code -> hhs_region_number
     - [x] fips -> state : unweighted
     - [x] fips -> msa : unweighted
     - [x] fips -> megacounty
     - [x] fips -> hrr
+    - [x] fips -> hhs_region_number
     - [x] nation
     - [ ] zip -> dma (postponed)
 
@@ -102,8 +106,10 @@ def __init__(self):
         """
         self.crosswalk_filepaths = CROSSWALK_FILEPATHS
         self.crosswalks = {
-            "zip": {"fips": None, "hrr": None, "msa": None, "pop": None, "state": None},
-            "fips": {"zip": None, "hrr": None, "msa": None, "pop": None, "state": None},
+            "zip": {"fips": None, "hrr": None, "msa": None,
+                    "pop": None, "state": None, "hhs_region_number": None},
+            "fips": {"zip": None, "hrr": None, "msa": None,
+                     "pop": None, "state": None, "hhs_region_number": None},
             "state": {"state": None},
             "state_code": {"hhs_region_number": None},
             "jhu_uid": {"fips": None},
@@ -123,6 +129,7 @@ def _load_crosswalk(self, from_code, to_code):
                 ("jhu_uid", "fips"),
                 ("zip", "msa"),
                 ("fips", "hrr"),
+                ("zip", "hhs_region_number")
             ]:
                 self.crosswalks[from_code][to_code] = pd.read_csv(
                     stream,
@@ -136,6 +143,8 @@ def _load_crosswalk(self, from_code, to_code):
             elif (from_code, to_code) in [
                 ("zip", "hrr"),
                 ("fips", "msa"),
+                ("fips", "hhs_region_number"),
+                ("state_code", "hhs_region_number")
             ]:
                 self.crosswalks[from_code][to_code] = pd.read_csv(
                     stream,
@@ -151,11 +160,6 @@ def _load_crosswalk(self, from_code, to_code):
                         "state_name": str,
                     },
                 )
-            elif (from_code, to_code) == ("state_code", "hhs_region_number"):
-                self.crosswalks[from_code][to_code] = pd.read_csv(
-                    stream,
-                    dtype={"state_code": str, "hhs_region_number": str},
-                )
             elif (from_code, to_code) == ("zip", "state"):
                 self.crosswalks[from_code][to_code] = pd.read_csv(
                     stream,
@@ -255,8 +259,8 @@ def add_geocode(
         """Add a new geocode column to a dataframe.
 
         Currently supported conversions:
-        - fips -> state_code, state_id, state_name, zip, msa, hrr, nation
-        - zip -> state_code, state_id, state_name, fips, msa, hrr, nation
+        - fips -> state_code, state_id, state_name, zip, msa, hrr, nation, hhs_region_number
+        - zip -> state_code, state_id, state_name, fips, msa, hrr, nation, hhs_region_number
         - jhu_uid -> fips
         - state_x -> state_y, where x and y are in {code, id, name}
         - state_code -> hhs_region_number

diff --git a/_delphi_utils_python/tests/test_geomap.py b/_delphi_utils_python/tests/test_geomap.py
@@ -137,6 +137,9 @@ def test_crosswalks(self):
         # assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all()
         cw = gmpr._load_crosswalk(from_code="zip", to_code="state")
         assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all()
+        cw = gmpr._load_crosswalk(from_code="zip", to_code="hhs_region_number")
+        assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all()
+
 
     def test_load_zip_fips_table(self):
         gmpr = GeoMapper()
@@ -261,3 +264,31 @@ def test_add_geocode(self):
                 }
             )
         )
+
+        # fips -> hhs
+        new_data = gmpr.replace_geocode(self.fips_data_3.drop(columns=["date"]),
+                                        "fips", "hhs_region_number", date_col=None)
+        assert new_data.equals(
+            pd.DataFrame().from_dict(
+                {
+                    "hhs_region_number": {0: "2", 1: "6"},
+                    "count": {0: 12, 1: 6},
+                    "total": {0: 111, 1: 13}
+                }
+            )
+        )
+
+        # zip -> hhs
+        new_data = gmpr.replace_geocode(self.zip_data, "zip", "hhs_region_number")
+        new_data = new_data.round(10)  # get rid of a floating point error with 99.00000000000001
+        assert new_data.equals(
+            pd.DataFrame().from_dict(
+                {
+                    "date": {0: pd.Timestamp("2018-01-01"), 1: pd.Timestamp("2018-01-01"),
+                             2: pd.Timestamp("2018-01-03"), 3: pd.Timestamp("2018-01-03")},
+                    "hhs_region_number": {0: "5", 1: "9", 2: "5", 3: "9"},
+                    "count": {0: 99.0, 1: 801.0, 2: 100.0, 3: 786.0},
+                    "total": {0: 198.0, 1: 1602.0, 2: 200.0, 3: 1572.0}
+                }
+            )
+        )