rename to hhs

chinandrew · chinandrew · commit 90aafdd6913f · 2020-12-08T14:32:28.000-08:00
diff --git a/_delphi_utils_python/data_proc/geomap/geo_data_proc.py b/_delphi_utils_python/data_proc/geomap/geo_data_proc.py
@@ -45,7 +45,7 @@
 ZIP_STATE_CODE_OUT_FILENAME = "zip_state_code_table.csv"
 ZIP_HHS_FILENAME = "zip_hhs_table.csv"
 STATE_OUT_FILENAME = "state_codes_table.csv"
-STATE_HHS_OUT_FILENAME = "state_code_hhs_region_number_table.csv"
+STATE_HHS_OUT_FILENAME = "state_code_hhs_table.csv"
 JHU_FIPS_OUT_FILENAME = "jhu_uid_fips_table.csv"
 
 
@@ -334,12 +334,12 @@ def create_state_hhs_crosswalk():
     hhs_state_pairs.append((9, "Northern Mariana Islands"))
 
     # Make dataframe
-    hhs_df = pd.DataFrame(hhs_state_pairs, columns=["hhs_region_number", "state_name"])
-    hhs_df["hhs_region_number"] = hhs_df["hhs_region_number"].astype(str)
+    hhs_df = pd.DataFrame(hhs_state_pairs, columns=["hhs", "state_name"])
+    hhs_df["hhs"] = hhs_df["hhs"].astype(str)
 
     (
         ss_df.merge(hhs_df, on="state_name", how="left")
-        .dropna()[["state_code", "hhs_region_number"]]
+        .dropna()[["state_code", "hhs"]]
         .to_csv(join(OUTPUT_DIR, STATE_HHS_OUT_FILENAME), index=False)
     )
 
diff --git a/_delphi_utils_python/delphi_utils/data/fips_hhs_table.csv b/_delphi_utils_python/delphi_utils/data/fips_hhs_table.csv
@@ -1,4 +1,4 @@
-fips,hhs_region_number
+fips,hhs
 01000,4
 01001,4
 01003,4
diff --git a/_delphi_utils_python/delphi_utils/data/state_code_hhs_table.csv b/_delphi_utils_python/delphi_utils/data/state_code_hhs_table.csv
@@ -1,4 +1,4 @@
-state_code,hhs_region_number
+state_code,hhs
 01,4
 02,10
 04,9
diff --git a/_delphi_utils_python/delphi_utils/data/zip_hhs_table.csv b/_delphi_utils_python/delphi_utils/data/zip_hhs_table.csv
@@ -1,4 +1,4 @@
-zip,weight,hhs_region_number
+zip,weight,hhs
 601,0.994345718901454,2
 601,0.005654281098546043,2
 602,1.0,2
diff --git a/_delphi_utils_python/delphi_utils/geomap.py b/_delphi_utils_python/delphi_utils/geomap.py
@@ -22,19 +22,19 @@
         "msa": join(DATA_PATH, "zip_msa_table.csv"),
         "pop": join(DATA_PATH, "zip_pop.csv"),
         "state": join(DATA_PATH, "zip_state_code_table.csv"),
-        "hhs_region_number": join(DATA_PATH, "zip_hhs_table.csv")
+        "hhs": join(DATA_PATH, "zip_hhs_table.csv")
     },
     "fips": {
         "zip": join(DATA_PATH, "fips_zip_table.csv"),
         "hrr": join(DATA_PATH, "fips_hrr_table.csv"),
         "msa": join(DATA_PATH, "fips_msa_table.csv"),
         "pop": join(DATA_PATH, "fips_pop.csv"),
         "state": join(DATA_PATH, "fips_state_table.csv"),
-        "hhs_region_number": join(DATA_PATH, "fips_hhs_table.csv"),
+        "hhs": join(DATA_PATH, "fips_hhs_table.csv"),
     },
     "state": {"state": join(DATA_PATH, "state_codes_table.csv")},
     "state_code": {
-        "hhs_region_number": join(DATA_PATH, "state_code_hhs_region_number_table.csv")
+        "hhs": join(DATA_PATH, "state_code_hhs_table.csv")
     },
     "jhu_uid": {"fips": join(DATA_PATH, "jhu_uid_fips_table.csv")},
 }
@@ -57,14 +57,14 @@ class GeoMapper:  # pylint: disable=too-many-public-methods
     - [x] zip -> hrr : unweighted
     - [x] zip -> msa : unweighted
     - [x] zip -> state
-    - [x] zip -> hhs_region_number
+    - [x] zip -> hhs
     - [x] zip -> population
-    - [x] state code -> hhs_region_number
+    - [x] state code -> hhs
     - [x] fips -> state : unweighted
     - [x] fips -> msa : unweighted
     - [x] fips -> megacounty
     - [x] fips -> hrr
-    - [x] fips -> hhs_region_number
+    - [x] fips -> hhs
     - [x] nation
     - [ ] zip -> dma (postponed)
 
@@ -107,13 +107,13 @@ def __init__(self):
         self.crosswalk_filepaths = CROSSWALK_FILEPATHS
         self.crosswalks = {
             "zip": {
-                geo: None for geo in ["fips", "hrr", "msa", "pop", "state", "hhs_region_number"]
+                geo: None for geo in ["fips", "hrr", "msa", "pop", "state", "hhs"]
             },
             "fips": {
-                geo: None for geo in ["zip", "hrr", "msa", "pop", "state", "hhs_region_number"]
+                geo: None for geo in ["zip", "hrr", "msa", "pop", "state", "hhs"]
             },
             "state": {"state": None},
-            "state_code": {"hhs_region_number": None},
+            "state_code": {"hhs": None},
             "jhu_uid": {"fips": None},
         }
 
@@ -131,7 +131,7 @@ def _load_crosswalk(self, from_code, to_code):
                 ("jhu_uid", "fips"),
                 ("zip", "msa"),
                 ("fips", "hrr"),
-                ("zip", "hhs_region_number")
+                ("zip", "hhs")
             ]:
                 self.crosswalks[from_code][to_code] = pd.read_csv(
                     stream,
@@ -145,8 +145,8 @@ def _load_crosswalk(self, from_code, to_code):
             elif (from_code, to_code) in [
                 ("zip", "hrr"),
                 ("fips", "msa"),
-                ("fips", "hhs_region_number"),
-                ("state_code", "hhs_region_number")
+                ("fips", "hhs"),
+                ("state_code", "hhs")
             ]:
                 self.crosswalks[from_code][to_code] = pd.read_csv(
                     stream,
@@ -261,11 +261,11 @@ def add_geocode(
         """Add a new geocode column to a dataframe.
 
         Currently supported conversions:
-        - fips -> state_code, state_id, state_name, zip, msa, hrr, nation, hhs_region_number
-        - zip -> state_code, state_id, state_name, fips, msa, hrr, nation, hhs_region_number
+        - fips -> state_code, state_id, state_name, zip, msa, hrr, nation, hhs
+        - zip -> state_code, state_id, state_name, fips, msa, hrr, nation, hhs
         - jhu_uid -> fips
         - state_x -> state_y, where x and y are in {code, id, name}
-        - state_code -> hhs_region_number
+        - state_code -> hhs
 
         Parameters
         ---------
@@ -274,7 +274,7 @@ def add_geocode(
         from_code: {'fips', 'zip', 'jhu_uid', 'state_code', 'state_id', 'state_name'}
             Specifies the geocode type of the data in from_col.
         new_code: {'fips', 'zip', 'state_code', 'state_id', 'state_name', 'hrr', 'msa',
-                   'hhs_region_number'}
+                   'hhs'}
             Specifies the geocode type in new_col.
         from_col: str, default None
             Name of the column in dataframe containing from_code. If None, then the name
@@ -364,7 +364,7 @@ def replace_geocode(
         - zip -> state_code, state_id, state_name, fips, msa, hrr, nation
         - jhu_uid -> fips
         - state_x -> state_y, where x and y are in {code, id, name}
-        - state_code -> hhs_region_number
+        - state_code -> hhs
 
         Parameters
         ---------
@@ -377,7 +377,7 @@ def replace_geocode(
         new_col: str
             Name of the new column to add to data.
         new_code: {'fips', 'zip', 'state_code', 'state_id', 'state_name', 'hrr', 'msa',
-                   'hhs_region_number'}
+                   'hhs'}
             Specifies the geocode type of the data in new_col.
         date_col: str or None, default "date"
             Specify which column contains the date values. Used for value aggregation.
diff --git a/_delphi_utils_python/tests/test_geomap.py b/_delphi_utils_python/tests/test_geomap.py
@@ -137,7 +137,7 @@ def test_crosswalks(self):
         # assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all()
         cw = gmpr._load_crosswalk(from_code="zip", to_code="state")
         assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all()
-        cw = gmpr._load_crosswalk(from_code="zip", to_code="hhs_region_number")
+        cw = gmpr._load_crosswalk(from_code="zip", to_code="hhs")
         assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all()
 
 
@@ -205,8 +205,8 @@ def test_add_geocode(self):
 
         # state_code -> hhs
         new_data = gmpr.add_geocode(self.zip_data, "zip", "state_code")
-        new_data2 = gmpr.add_geocode(new_data, "state_code", "hhs_region_number")
-        assert new_data2["hhs_region_number"].unique().size == 2
+        new_data2 = gmpr.add_geocode(new_data, "state_code", "hhs")
+        assert new_data2["hhs"].unique().size == 2
 
         # state_name -> state_id
         new_data = gmpr.replace_geocode(self.zip_data, "zip", "state_name")
@@ -267,26 +267,26 @@ def test_add_geocode(self):
 
         # fips -> hhs
         new_data = gmpr.replace_geocode(self.fips_data_3.drop(columns=["date"]),
-                                        "fips", "hhs_region_number", date_col=None)
+                                        "fips", "hhs", date_col=None)
         assert new_data.equals(
             pd.DataFrame().from_dict(
                 {
-                    "hhs_region_number": {0: "2", 1: "6"},
+                    "hhs": {0: "2", 1: "6"},
                     "count": {0: 12, 1: 6},
                     "total": {0: 111, 1: 13}
                 }
             )
         )
 
         # zip -> hhs
-        new_data = gmpr.replace_geocode(self.zip_data, "zip", "hhs_region_number")
+        new_data = gmpr.replace_geocode(self.zip_data, "zip", "hhs")
         new_data = new_data.round(10)  # get rid of a floating point error with 99.00000000000001
         assert new_data.equals(
             pd.DataFrame().from_dict(
                 {
                     "date": {0: pd.Timestamp("2018-01-01"), 1: pd.Timestamp("2018-01-01"),
                              2: pd.Timestamp("2018-01-03"), 3: pd.Timestamp("2018-01-03")},
-                    "hhs_region_number": {0: "5", 1: "9", 2: "5", 3: "9"},
+                    "hhs": {0: "5", 1: "9", 2: "5", 3: "9"},
                     "count": {0: 99.0, 1: 801.0, 2: 100.0, 3: 786.0},
                     "total": {0: 198.0, 1: 1602.0, 2: 200.0, 3: 1572.0}
                 }

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-fips,hhs_region_number`
	`1`	`+fips,hhs`
`2`	`2`	`01000,4`
`3`	`3`	`01001,4`
`4`	`4`	`01003,4`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-state_code,hhs_region_number`
	`1`	`+state_code,hhs`
`2`	`2`	`01,4`
`3`	`3`	`02,10`
`4`	`4`	`04,9`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-zip,weight,hhs_region_number`
	`1`	`+zip,weight,hhs`
`2`	`2`	`601,0.994345718901454,2`
`3`	`3`	`601,0.005654281098546043,2`
`4`	`4`	`602,1.0,2`