cmu-delphi · krivard · Dec 9, 2020 · Dec 8, 2020 · Dec 8, 2020 · Dec 8, 2020
diff --git a/_delphi_utils_python/data_proc/geomap/geo_data_proc.py b/_delphi_utils_python/data_proc/geomap/geo_data_proc.py
@@ -45,7 +45,7 @@
 ZIP_STATE_CODE_OUT_FILENAME = "zip_state_code_table.csv"
 ZIP_HHS_FILENAME = "zip_hhs_table.csv"
 STATE_OUT_FILENAME = "state_codes_table.csv"
-STATE_HHS_OUT_FILENAME = "state_code_hhs_region_number_table.csv"
+STATE_HHS_OUT_FILENAME = "state_code_hhs_table.csv"
 JHU_FIPS_OUT_FILENAME = "jhu_uid_fips_table.csv"
 
 
@@ -334,12 +334,12 @@ def create_state_hhs_crosswalk():
     hhs_state_pairs.append((9, "Northern Mariana Islands"))
 
     # Make dataframe
-    hhs_df = pd.DataFrame(hhs_state_pairs, columns=["hhs_region_number", "state_name"])
-    hhs_df["hhs_region_number"] = hhs_df["hhs_region_number"].astype(str)
+    hhs_df = pd.DataFrame(hhs_state_pairs, columns=["hhs", "state_name"])
+    hhs_df["hhs"] = hhs_df["hhs"].astype(str)
 
     (
         ss_df.merge(hhs_df, on="state_name", how="left")
-        .dropna()[["state_code", "hhs_region_number"]]
+        .dropna()[["state_code", "hhs"]]
         .to_csv(join(OUTPUT_DIR, STATE_HHS_OUT_FILENAME), index=False)
     )
 

diff --git a/_delphi_utils_python/delphi_utils/data/fips_hhs_table.csv b/_delphi_utils_python/delphi_utils/data/fips_hhs_table.csv
@@ -1,4 +1,4 @@
-fips,hhs_region_number
+fips,hhs
 01000,4
 01001,4
 01003,4

diff --git a/...ta/state_code_hhs_region_number_table.csv → ...elphi_utils/data/state_code_hhs_table.csv b/...ta/state_code_hhs_region_number_table.csv → ...elphi_utils/data/state_code_hhs_table.csv
@@ -1,4 +1,4 @@
-state_code,hhs_region_number
+state_code,hhs
 01,4
 02,10
 04,9

diff --git a/_delphi_utils_python/delphi_utils/data/zip_hhs_table.csv b/_delphi_utils_python/delphi_utils/data/zip_hhs_table.csv
@@ -1,4 +1,4 @@
-zip,weight,hhs_region_number
+zip,weight,hhs
 00601,0.994345718901454,2
 00601,0.005654281098546043,2
 00602,1.0,2
@@ -2780,6 +2780,7 @@ zip,weight,hhs_region_number
 08901,1.0,2
 08902,1.0,2
 08904,1.0,2
+>>>>>>> add-hhs
 10001,1.0,2
 10002,1.0,2
 10003,1.0,2

diff --git a/_delphi_utils_python/delphi_utils/geomap.py b/_delphi_utils_python/delphi_utils/geomap.py
@@ -22,17 +22,19 @@
         "msa": join(DATA_PATH, "zip_msa_table.csv"),
         "pop": join(DATA_PATH, "zip_pop.csv"),
         "state": join(DATA_PATH, "zip_state_code_table.csv"),
+        "hhs": join(DATA_PATH, "zip_hhs_table.csv")
     },
     "fips": {
         "zip": join(DATA_PATH, "fips_zip_table.csv"),
         "hrr": join(DATA_PATH, "fips_hrr_table.csv"),
         "msa": join(DATA_PATH, "fips_msa_table.csv"),
         "pop": join(DATA_PATH, "fips_pop.csv"),
         "state": join(DATA_PATH, "fips_state_table.csv"),
+        "hhs": join(DATA_PATH, "fips_hhs_table.csv"),
     },
     "state": {"state": join(DATA_PATH, "state_codes_table.csv")},
     "state_code": {
-        "hhs_region_number": join(DATA_PATH, "state_code_hhs_region_number_table.csv")
+        "hhs": join(DATA_PATH, "state_code_hhs_table.csv")
     },
     "jhu_uid": {"fips": join(DATA_PATH, "jhu_uid_fips_table.csv")},
 }
@@ -55,12 +57,14 @@ class GeoMapper:  # pylint: disable=too-many-public-methods
     - [x] zip -> hrr : unweighted
     - [x] zip -> msa : unweighted
     - [x] zip -> state
+    - [x] zip -> hhs
     - [x] zip -> population
-    - [x] state code -> hhs_region_number
+    - [x] state code -> hhs
     - [x] fips -> state : unweighted
     - [x] fips -> msa : unweighted
     - [x] fips -> megacounty
     - [x] fips -> hrr
+    - [x] fips -> hhs
     - [x] nation
     - [ ] zip -> dma (postponed)
 
@@ -102,10 +106,14 @@ def __init__(self):
         """
         self.crosswalk_filepaths = CROSSWALK_FILEPATHS
         self.crosswalks = {
-            "zip": {"fips": None, "hrr": None, "msa": None, "pop": None, "state": None},
-            "fips": {"zip": None, "hrr": None, "msa": None, "pop": None, "state": None},
+            "zip": {
+                geo: None for geo in ["fips", "hrr", "msa", "pop", "state", "hhs"]
+            },
+            "fips": {
+                geo: None for geo in ["zip", "hrr", "msa", "pop", "state", "hhs"]
+            },
             "state": {"state": None},
-            "state_code": {"hhs_region_number": None},
+            "state_code": {"hhs": None},
             "jhu_uid": {"fips": None},
         }
 
@@ -123,6 +131,7 @@ def _load_crosswalk(self, from_code, to_code):
                 ("jhu_uid", "fips"),
                 ("zip", "msa"),
                 ("fips", "hrr"),
+                ("zip", "hhs")
             ]:
                 self.crosswalks[from_code][to_code] = pd.read_csv(
                     stream,
@@ -136,6 +145,8 @@ def _load_crosswalk(self, from_code, to_code):
             elif (from_code, to_code) in [
                 ("zip", "hrr"),
                 ("fips", "msa"),
+                ("fips", "hhs"),
+                ("state_code", "hhs")
             ]:
                 self.crosswalks[from_code][to_code] = pd.read_csv(
                     stream,
@@ -151,11 +162,6 @@ def _load_crosswalk(self, from_code, to_code):
                         "state_name": str,
                     },
                 )
-            elif (from_code, to_code) == ("state_code", "hhs_region_number"):
-                self.crosswalks[from_code][to_code] = pd.read_csv(
-                    stream,
-                    dtype={"state_code": str, "hhs_region_number": str},
-                )
             elif (from_code, to_code) == ("zip", "state"):
                 self.crosswalks[from_code][to_code] = pd.read_csv(
                     stream,
@@ -255,11 +261,11 @@ def add_geocode(
         """Add a new geocode column to a dataframe.
 
         Currently supported conversions:
-        - fips -> state_code, state_id, state_name, zip, msa, hrr, nation
-        - zip -> state_code, state_id, state_name, fips, msa, hrr, nation
+        - fips -> state_code, state_id, state_name, zip, msa, hrr, nation, hhs
+        - zip -> state_code, state_id, state_name, fips, msa, hrr, nation, hhs
         - jhu_uid -> fips
         - state_x -> state_y, where x and y are in {code, id, name}
-        - state_code -> hhs_region_number
+        - state_code -> hhs
 
         Parameters
         ---------
@@ -268,7 +274,7 @@ def add_geocode(
         from_code: {'fips', 'zip', 'jhu_uid', 'state_code', 'state_id', 'state_name'}
             Specifies the geocode type of the data in from_col.
         new_code: {'fips', 'zip', 'state_code', 'state_id', 'state_name', 'hrr', 'msa',
-                   'hhs_region_number'}
+                   'hhs'}
             Specifies the geocode type in new_col.
         from_col: str, default None
             Name of the column in dataframe containing from_code. If None, then the name
@@ -358,7 +364,7 @@ def replace_geocode(
         - zip -> state_code, state_id, state_name, fips, msa, hrr, nation
         - jhu_uid -> fips
         - state_x -> state_y, where x and y are in {code, id, name}
-        - state_code -> hhs_region_number
+        - state_code -> hhs
 
         Parameters
         ---------
@@ -371,7 +377,7 @@ def replace_geocode(
         new_col: str
             Name of the new column to add to data.
         new_code: {'fips', 'zip', 'state_code', 'state_id', 'state_name', 'hrr', 'msa',
-                   'hhs_region_number'}
+                   'hhs'}
             Specifies the geocode type of the data in new_col.
         date_col: str or None, default "date"
             Specify which column contains the date values. Used for value aggregation.

diff --git a/_delphi_utils_python/tests/test_geomap.py b/_delphi_utils_python/tests/test_geomap.py
@@ -137,6 +137,9 @@ def test_crosswalks(self):
         # assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all()
         cw = gmpr._load_crosswalk(from_code="zip", to_code="state")
         assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all()
+        cw = gmpr._load_crosswalk(from_code="zip", to_code="hhs")
+        assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all()
+
 
     def test_load_zip_fips_table(self):
         gmpr = GeoMapper()
@@ -202,8 +205,8 @@ def test_add_geocode(self):
 
         # state_code -> hhs
         new_data = gmpr.add_geocode(self.zip_data, "zip", "state_code")
-        new_data2 = gmpr.add_geocode(new_data, "state_code", "hhs_region_number")
-        assert new_data2["hhs_region_number"].unique().size == 2
+        new_data2 = gmpr.add_geocode(new_data, "state_code", "hhs")
+        assert new_data2["hhs"].unique().size == 2
 
         # state_name -> state_id
         new_data = gmpr.replace_geocode(self.zip_data, "zip", "state_name")
@@ -261,3 +264,31 @@ def test_add_geocode(self):
                 }
             )
         )
+
+        # fips -> hhs
+        new_data = gmpr.replace_geocode(self.fips_data_3.drop(columns=["date"]),
+                                        "fips", "hhs", date_col=None)
+        assert new_data.equals(
+            pd.DataFrame().from_dict(
+                {
+                    "hhs": {0: "2", 1: "6"},
+                    "count": {0: 12, 1: 6},
+                    "total": {0: 111, 1: 13}
+                }
+            )
+        )
+
+        # zip -> hhs
+        new_data = gmpr.replace_geocode(self.zip_data, "zip", "hhs")
+        new_data = new_data.round(10)  # get rid of a floating point error with 99.00000000000001
+        assert new_data.equals(
+            pd.DataFrame().from_dict(
+                {
+                    "date": {0: pd.Timestamp("2018-01-01"), 1: pd.Timestamp("2018-01-01"),
+                             2: pd.Timestamp("2018-01-03"), 3: pd.Timestamp("2018-01-03")},
+                    "hhs": {0: "5", 1: "9", 2: "5", 3: "9"},
+                    "count": {0: 99.0, 1: 801.0, 2: 100.0, 3: 786.0},
+                    "total": {0: 198.0, 1: 1602.0, 2: 200.0, 3: 1572.0}
+                }
+            )
+        )