Skip to content

Commit 7ebc144

Browse files
authored
Merge pull request #301 from cmu-delphi/jhu_nyc_population_fix
Fix JHU's fips-population source file
2 parents 93b5ba2 + c8605ad commit 7ebc144

File tree

7 files changed

+68
-3232
lines changed

7 files changed

+68
-3232
lines changed

_delphi_utils_python/delphi_utils/geomap.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -392,16 +392,17 @@ def replace_geocode(
392392
df = df.groupby([date_col, new_col]).sum().reset_index()
393393
return df
394394

395-
def add_population_column(self, data, geocode_type, geocode_col=None):
395+
def add_population_column(self, geocode_type, data=None, geocode_col=None):
396396
"""
397-
Appends a population column to a dateframe, based on the FIPS or ZIP code.
397+
Appends a population column to a dataframe, based on the FIPS or ZIP code. If no
398+
dataframe is provided, the full crosswalk from geocode to population is returned.
398399
399400
Parameters
400401
---------
401-
data: pd.DataFrame
402-
The dataframe with a FIPS code column.
403402
geocode_type: {"fips", "zip"}
404403
The type of the geocode contained in geocode_col.
404+
data: pd.DataFrame
405+
The dataframe with a FIPS code column.
405406
geocode_col: str, default None
406407
The name of the column containing the geocodes. If None, uses the geocode_type
407408
as the name.
@@ -419,11 +420,14 @@ def add_population_column(self, data, geocode_type, geocode_col=None):
419420
For other codes, aggregate those."
420421
)
421422

423+
pop_df = self._load_crosswalk(from_code=geocode_type, to_code="pop")
424+
425+
if data is None:
426+
return pop_df.rename(columns={"pop": "population"})
427+
422428
if not is_string_dtype(data[geocode_col]):
423429
data[geocode_col] = data[geocode_col].astype(str).str.zfill(5)
424430

425-
pop_df = self._load_crosswalk(from_code=geocode_type, to_code="pop")
426-
427431
data_with_pop = (
428432
data.copy()
429433
.merge(pop_df, left_on=geocode_col, right_on=geocode_type, how="inner")

_delphi_utils_python/tests/test_geomap.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,12 +277,14 @@ def test_zip_to_state_id(self):
277277

278278
def test_add_population_column(self):
279279
gmpr = GeoMapper()
280-
new_data = gmpr.add_population_column(self.fips_data_3, "fips")
280+
new_data = gmpr.add_population_column("fips", self.fips_data_3)
281281
assert new_data["population"].sum() == 274963
282-
new_data = gmpr.add_population_column(self.zip_data, "zip")
282+
new_data = gmpr.add_population_column("zip", self.zip_data)
283283
assert new_data["population"].sum() == 274902
284284
with pytest.raises(ValueError):
285-
new_data = gmpr.add_population_column(self.zip_data, "hrr")
285+
new_data = gmpr.add_population_column("hrr", self.zip_data)
286+
pop_df = gmpr.add_population_column("fips")
287+
assert pop_df.shape == (3274, 2)
286288

287289
def test_add_geocode(self):
288290
gmpr = GeoMapper()

jhu/delphi_jhu/pull.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,7 @@ def pull_jhu_data(base_url: str, metric: str, pop_df: pd.DataFrame) -> pd.DataFr
6969

7070
gmpr = GeoMapper()
7171
df = gmpr.replace_geocode(df, "jhu_uid", "fips", from_col="UID", date_col="timestamp")
72-
73-
# Merge in population LOWERCASE, consistent across confirmed and deaths
74-
# Set population as NAN for fake fips
75-
pop_df.rename(columns={'FIPS':'fips'}, inplace=True)
76-
pop_df['fips'] = pop_df['fips'].astype(int).\
77-
astype(str).str.zfill(5)
72+
# Merge in population, set population as NAN for fake fips
7873
df = pd.merge(df, pop_df, on="fips", how='left')
7974

8075
# Add a dummy first row here on day before first day

jhu/delphi_jhu/run.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
S3ArchiveDiffer,
1818
)
1919

20+
from delphi_utils import GeoMapper
2021
from .geo import geo_map
2122
from .pull import pull_jhu_data
2223
from .smooth import (
@@ -84,10 +85,7 @@ def run_module():
8485
else:
8586
arch_diff = None
8687

87-
pop_df = pd.read_csv(
88-
join(static_file_dir, "fips_population.csv"),
89-
dtype={"fips": float, "population": float},
90-
)
88+
pop_df = GeoMapper().add_population_column("fips")
9189

9290
dfs = {metric: pull_jhu_data(base_url, metric, pop_df) for metric in METRICS}
9391
for metric, geo_res, sensor, smoother in product(

0 commit comments

Comments
 (0)