diff --git a/_delphi_utils_python/delphi_utils/geomap.py b/_delphi_utils_python/delphi_utils/geomap.py index 65885ffea..b3fe682a1 100644 --- a/_delphi_utils_python/delphi_utils/geomap.py +++ b/_delphi_utils_python/delphi_utils/geomap.py @@ -392,17 +392,17 @@ def replace_geocode( df = df.groupby([date_col, new_col]).sum().reset_index() return df - def add_population_column(self, geocode_type, data=None, geocode_col=None, dropna=True): + def add_population_column(self, data, geocode_type, geocode_col=None, dropna=True): """ Appends a population column to a dataframe, based on the FIPS or ZIP code. If no dataframe is provided, the full crosswalk from geocode to population is returned. Parameters --------- - geocode_type: {"fips", "zip"} - The type of the geocode contained in geocode_col. data: pd.DataFrame The dataframe with a FIPS code column. + geocode_type: {"fips", "zip"} + The type of the geocode contained in geocode_col. geocode_col: str, default None The name of the column containing the geocodes. If None, uses the geocode_type as the name. @@ -413,6 +413,7 @@ def add_population_column(self, geocode_type, data=None, geocode_col=None, dropn A dataframe with a population column appended. """ geocode_col = geocode_type if geocode_col is None else geocode_col + data = data.copy() if geocode_type not in ["fips", "zip"]: raise ValueError( @@ -422,15 +423,12 @@ def add_population_column(self, geocode_type, data=None, geocode_col=None, dropn pop_df = self._load_crosswalk(from_code=geocode_type, to_code="pop") - if data is None: - return pop_df.rename(columns={"pop": "population"}) - if not is_string_dtype(data[geocode_col]): data[geocode_col] = data[geocode_col].astype(str).str.zfill(5) - merge_type = "left" if dropna else "inner" + merge_type = "inner" if dropna else "left" data_with_pop = ( - data.copy() + data .merge(pop_df, left_on=geocode_col, right_on=geocode_type, how=merge_type) .rename(columns={"pop": "population"}) ) diff --git a/_delphi_utils_python/tests/test_geomap.py b/_delphi_utils_python/tests/test_geomap.py index b756d1618..fca26b8ea 100644 --- a/_delphi_utils_python/tests/test_geomap.py +++ b/_delphi_utils_python/tests/test_geomap.py @@ -277,14 +277,12 @@ def test_zip_to_state_id(self): def test_add_population_column(self): gmpr = GeoMapper() - new_data = gmpr.add_population_column("fips", self.fips_data_3) + new_data = gmpr.add_population_column(self.fips_data_3, "fips") assert new_data["population"].sum() == 274963 - new_data = gmpr.add_population_column("zip", self.zip_data) + new_data = gmpr.add_population_column(self.zip_data, "zip") assert new_data["population"].sum() == 274902 with pytest.raises(ValueError): - new_data = gmpr.add_population_column("hrr", self.zip_data) - pop_df = gmpr.add_population_column("fips") - assert pop_df.shape == (3274, 2) + new_data = gmpr.add_population_column(self.zip_data, "hrr") def test_add_geocode(self): gmpr = GeoMapper() diff --git a/jhu/delphi_jhu/pull.py b/jhu/delphi_jhu/pull.py index 0330db62b..849c1f98a 100644 --- a/jhu/delphi_jhu/pull.py +++ b/jhu/delphi_jhu/pull.py @@ -101,7 +101,7 @@ def pull_jhu_data(base_url: str, metric: str, gmpr: GeoMapper) -> pd.DataFrame: ) # Merge in population, set population as NAN for fake fips - df = gmpr.add_population_column("fips", df) + df = gmpr.add_population_column(df, "fips") df = create_diffs_column(df)