Skip to content

Commit 445b583

Browse files
authored
Reformat NSSP county zip code (#1976)
* Format county fips to all be 5 digits with leading zeros + Add test * Update test_pull.py wordings * linter
1 parent ae6f011 commit 445b583

File tree

4 files changed

+26
-1
lines changed

4 files changed

+26
-1
lines changed

nssp/delphi_nssp/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,6 @@
3737
"timestamp": "datetime64[ns]",
3838
"geography": str,
3939
"county": str,
40-
"fips": int,
40+
"fips": str,
4141
}
4242
)

nssp/delphi_nssp/pull.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,5 +67,8 @@ def pull_nssp_data(socrata_token: str):
6767
except KeyError as exc:
6868
raise ValueError(warn_string(df_ervisits, TYPE_DICT)) from exc
6969

70+
# Format county fips to all be 5 digits with leading zeros
71+
df_ervisits["fips"] = df_ervisits["fips"].apply(lambda x: str(x).zfill(5) if str(x) != "0" else "0")
72+
7073
keep_columns = ["timestamp", "geography", "county", "fips"]
7174
return df_ervisits[SIGNALS + keep_columns]

nssp/tests/test_data/page.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,5 +61,26 @@
6161
"hsa_nci_id": "All",
6262
"fips": "0",
6363
"trend_source": "United States"
64+
},
65+
{
66+
"week_end": "2023-05-13T00:00:00.000",
67+
"geography": "Colorado",
68+
"county": "Jefferson",
69+
"percent_visits_combined": "0.84",
70+
"percent_visits_covid": "0.59",
71+
"percent_visits_influenza": "0.23",
72+
"percent_visits_rsv": "0.03",
73+
"percent_visits_smoothed": "0.83",
74+
"percent_visits_smoothed_covid": "0.62",
75+
"percent_visits_smoothed_1": "0.18",
76+
"percent_visits_smoothed_rsv": "0.02",
77+
"ed_trends_covid": "Decreasing",
78+
"ed_trends_influenza": "No Change",
79+
"ed_trends_rsv": "Decreasing",
80+
"hsa": "Denver (Denver), CO - Jefferson, CO",
81+
"hsa_counties": "Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit",
82+
"hsa_nci_id": "688",
83+
"fips": "8059",
84+
"trend_source": "HSA"
6485
}
6586
]

nssp/tests/test_pull.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def test_pull_nssp_data(self, mock_socrata):
4949
assert result["geography"].notnull().all(), "geography has rogue NaN"
5050
assert result["county"].notnull().all(), "county has rogue NaN"
5151
assert result["fips"].notnull().all(), "fips has rogue NaN"
52+
assert result["fips"].apply(lambda x: isinstance(x, str) and len(x) != 4).all(), "fips formatting should always be 5 digits; include leading zeros if aplicable"
5253

5354
# Check for each signal in SIGNALS
5455
for signal in SIGNALS:

0 commit comments

Comments
 (0)