Skip to content

Commit 3b6d139

Browse files
committed
Geocode updates:
* update replace_geocode documentation to be clear about data columns * add test cases for renaming columns in replace_geocode * fix the state to state conversion dropped columns issue
1 parent 3d8c331 commit 3b6d139

File tree

2 files changed

+37
-7
lines changed

2 files changed

+37
-7
lines changed

_delphi_utils_python/delphi_utils/geomap.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ class GeoMapper:
7777
==========
7878
The main GeoMapper object loads and stores crosswalk dataframes on-demand.
7979
80+
When replacing geocodes with a new one an aggregation step is performed on the data columns
81+
to merge entries (i.e. in the case of a many to one mapping or a weighted mapping). This
82+
requires a specification of the data columns, which are assumed to be all the columns that
83+
are not the geocodes or the date column specified in date_col.
84+
8085
Example 1: to add a new column with a new geocode, possibly with weights:
8186
> gmpr = GeoMapper()
8287
> df = gmpr.add_geocode(df, "fips", "zip", from_col="fips", new_col="geo_id",
@@ -327,8 +332,12 @@ def add_geocode(
327332
df = df.merge(crosswalk, left_on=from_col, right_on=from_col, how="left")
328333

329334
# Drop extra state columns
330-
if new_code in state_codes:
335+
if new_code in state_codes and not from_code in state_codes:
336+
state_codes.remove(new_code)
337+
df.drop(columns=state_codes, inplace=True)
338+
elif new_code in state_codes and from_code in state_codes:
331339
state_codes.remove(new_code)
340+
state_codes.remove(from_code)
332341
df.drop(columns=state_codes, inplace=True)
333342

334343
return df

_delphi_utils_python/tests/test_geomap.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -278,11 +278,13 @@ def test_zip_to_state_id(self):
278278
def test_add_population_column(self):
279279
gmpr = GeoMapper()
280280
new_data = gmpr.add_population_column(self.fips_data_3, "fips")
281-
assert new_data["population"].sum() == 274963
281+
assert new_data.shape == (5, 5)
282282
new_data = gmpr.add_population_column(self.zip_data, "zip")
283-
assert new_data["population"].sum() == 274902
283+
assert new_data.shape == (6, 5)
284284
with pytest.raises(ValueError):
285285
new_data = gmpr.add_population_column(self.zip_data, "hrr")
286+
new_data = gmpr.add_population_column(self.fips_data_5, "fips")
287+
assert new_data.shape == (4, 5)
286288

287289
def test_add_geocode(self):
288290
gmpr = GeoMapper()
@@ -383,17 +385,19 @@ def test_add_geocode(self):
383385
assert new_data2["hhs_region_number"].unique().size == 2
384386

385387
# state_name -> state_id
386-
new_data = gmpr.add_geocode(self.zip_data, "zip", "state_name")
388+
new_data = gmpr.replace_geocode(self.zip_data, "zip", "state_name")
387389
new_data2 = gmpr.add_geocode(new_data, "state_name", "state_id")
388-
assert new_data2.shape == (12, 6)
390+
assert new_data2.shape == (4, 5)
391+
new_data2 = gmpr.replace_geocode(new_data, "state_name", "state_id", new_col="abbr")
392+
assert "abbr" in new_data2.columns
389393

390394
# fips -> nation
391-
new_data = gmpr.replace_geocode(self.fips_data_5, "fips", "nation")
395+
new_data = gmpr.replace_geocode(self.fips_data_5, "fips", "nation", new_col="NATION")
392396
assert new_data.equals(
393397
pd.DataFrame().from_dict(
394398
{
395399
"date": {0: pd.Timestamp("2018-01-01 00:00:00")},
396-
"nation": {0: "us"},
400+
"NATION": {0: "us"},
397401
"count": {0: 10024.0},
398402
"total": {0: 100006.0},
399403
}
@@ -416,6 +420,23 @@ def test_add_geocode(self):
416420
)
417421
)
418422

423+
# hrr -> nation
424+
with pytest.raises(ValueError):
425+
new_data = gmpr.replace_geocode(self.zip_data, "zip", "hrr")
426+
new_data2 = gmpr.replace_geocode(new_data, "hrr", "nation")
427+
428+
# hrr -> nation
429+
with pytest.raises(ValueError):
430+
new_data = gmpr.replace_geocode(self.zip_data, "zip", "hrr")
431+
new_data2 = gmpr.replace_geocode(new_data, "hrr", "nation")
432+
433+
# hrr -> nation
434+
with pytest.raises(ValueError):
435+
new_data = gmpr.replace_geocode(self.zip_data, "zip", "hrr")
436+
new_data2 = gmpr.replace_geocode(new_data, "hrr", "nation")
437+
419438
# fips -> hrr (dropna=True/False check)
420439
assert not gmpr.add_geocode(self.fips_data_3, "fips", "hrr").isna().any().any()
421440
assert gmpr.add_geocode(self.fips_data_3, "fips", "hrr", dropna=False).isna().any().any()
441+
442+
TestGeoMapper().test_add_geocode()

0 commit comments

Comments
 (0)