Skip to content

Commit aa52e44

Browse files
aysim319minhkhul
andauthored
2129 fix hrr for nssp (#2131)
* first implimentation * tests and clean up * fixing copy pasta * name change for host * adding test data * suggested commits * re adding content * moving to proper place * lint * fixed test * fix import error * adding fixture for hrr run * comment on test for hrr * Apply suggestions from code review Co-authored-by: minhkhul <[email protected]> --------- Co-authored-by: minhkhul <[email protected]>
1 parent 454ac56 commit aa52e44

File tree

12 files changed

+608
-215
lines changed

12 files changed

+608
-215
lines changed

nssp/delphi_nssp/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Registry for variations."""
22

3+
DATASET_ID = "rdmq-nq56"
4+
35
GEOS = [
46
"hrr",
57
"msa",

nssp/delphi_nssp/pull.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from delphi_utils import create_backup_csv
1414
from sodapy import Socrata
1515

16-
from .constants import NEWLINE, SIGNALS, SIGNALS_MAP, TYPE_DICT
16+
from .constants import DATASET_ID, NEWLINE, SIGNALS, SIGNALS_MAP, TYPE_DICT
1717

1818

1919
def print_callback(remote_file_name, logger, bytes_so_far, bytes_total, progress_chunks):
@@ -148,7 +148,7 @@ def pull_nssp_data(
148148
Dataframe as described above.
149149
"""
150150
if not custom_run:
151-
socrata_results = pull_with_socrata_api(socrata_token, "rdmq-nq56")
151+
socrata_results = pull_with_socrata_api(socrata_token, DATASET_ID)
152152
df_ervisits = pd.DataFrame.from_records(socrata_results)
153153
create_backup_csv(df_ervisits, backup_dir, custom_run, logger=logger)
154154
logger.info("Number of records grabbed", num_records=len(df_ervisits), source="Socrata API")

nssp/delphi_nssp/run.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,10 @@ def run_module(params, logger=None):
118118
)
119119
elif geo == "hrr":
120120
df = df[["fips", "val", "timestamp"]]
121-
# fips -> hrr has a weighted version
122-
df = geo_mapper.replace_geocode(df, "fips", "hrr")
123-
df = df.rename(columns={"hrr": "geo_id"})
121+
df = geo_mapper.add_population_column(df, geocode_type="fips", geocode_col="fips")
122+
df = geo_mapper.add_geocode(df, "fips", "hrr", from_col="fips", new_col="geo_id")
123+
df = geo_mapper.aggregate_by_weighted_sum(df, "geo_id", "val", "timestamp", "population")
124+
df = df.rename(columns={"weighted_val": "val"})
124125
elif geo == "msa":
125126
df = df[["fips", "val", "timestamp"]]
126127
# fips -> msa doesn't have a weighted version, so we need to add columns and sum ourselves

nssp/tests/conftest.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import copy
2+
import json
3+
import time
4+
from unittest.mock import patch, MagicMock
5+
6+
import pytest
7+
from pathlib import Path
8+
9+
from delphi_nssp.run import run_module
10+
from delphi_nssp.constants import DATASET_ID
11+
12+
TEST_DIR = Path(__file__).parent
13+
14+
# test data generated with following url with socrata:
15+
# https://data.cdc.gov/resource/rdmq-nq56.json?$where=week_end >= '2022-10-01T00:00:00.000' AND week_end <= '2022-10-20T00:00:00.000'
16+
17+
with open(f"{TEST_DIR}/test_data/page.json", "r") as f:
18+
TEST_DATA = json.load(f)
19+
20+
with open(f"{TEST_DIR}/test_data/page_100_hrr.json", "r") as f:
21+
HRR_TEST_DATA = json.load(f)
22+
23+
@pytest.fixture(scope="session")
24+
def params():
25+
params = {
26+
"common": {
27+
"export_dir": f"{TEST_DIR}/receiving",
28+
"log_filename": f"{TEST_DIR}/test.log",
29+
"backup_dir": f"{TEST_DIR}/test_raw_data_backups",
30+
"custom_run": False
31+
},
32+
"indicator": {
33+
"wip_signal": True,
34+
"export_start_date": "2020-08-01",
35+
"static_file_dir": "./static",
36+
"socrata_token": "test_token"
37+
},
38+
"validation": {
39+
"common": {
40+
"span_length": 14,
41+
"min_expected_lag": {"all": "3"},
42+
"max_expected_lag": {"all": "4"},
43+
}
44+
}
45+
}
46+
return copy.deepcopy(params)
47+
48+
@pytest.fixture
49+
def params_w_patch(params):
50+
params_copy = copy.deepcopy(params)
51+
params_copy["common"]["custom_run"] = True
52+
params_copy["patch"] = {
53+
"patch_dir": f"{TEST_DIR}/patch_dir",
54+
"source_dir": "test_source_dir",
55+
"source_host": "host",
56+
"user": "test_user",
57+
"start_issue": "2023-01-01",
58+
"end_issue": "2023-01-03",
59+
}
60+
61+
return params_copy
62+
63+
@pytest.fixture(scope="function")
64+
def run_as_module(params):
65+
"""
66+
Fixture to use TEST_DATA when testing run_module.
67+
68+
This fixture patches Socrara to return the predefined test data
69+
"""
70+
71+
with patch('sodapy.Socrata.get') as mock_get:
72+
def side_effect(*args, **kwargs):
73+
if kwargs['offset'] == 0:
74+
if DATASET_ID in args[0]:
75+
return TEST_DATA
76+
else:
77+
return []
78+
mock_get.side_effect = side_effect
79+
run_module(params)
80+
81+
82+
83+
@pytest.fixture(scope="function")
84+
def run_as_module_hrr(params):
85+
"""
86+
Fixture to use HRR_TEST_DATA when testing run_module.
87+
88+
This fixture patches socrara to return the predefined test data for HRR region.
89+
"""
90+
91+
with patch('sodapy.Socrata.get') as mock_get, \
92+
patch('delphi_nssp.run.GEOS', ["hrr"]):
93+
def side_effect(*args, **kwargs):
94+
if kwargs['offset'] == 0:
95+
if DATASET_ID in args[0]:
96+
return HRR_TEST_DATA
97+
else:
98+
return []
99+
mock_get.side_effect = side_effect
100+
run_module(params)
101+

nssp/tests/page_secondary_1.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

nssp/tests/receiving/.gitignore

Whitespace-only changes.

nssp/tests/test_data/page.json

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
[
2+
{
3+
"week_end": "2022-10-01T00:00:00.000",
4+
"geography": "United States",
5+
"county": "All",
6+
"percent_visits_combined": "2.84",
7+
"percent_visits_covid": "1.84",
8+
"percent_visits_influenza": "0.48",
9+
"percent_visits_rsv": "0.55",
10+
"percent_visits_smoothed": "2.83",
11+
"percent_visits_smoothed_covid": "2.07",
12+
"percent_visits_smoothed_1": "0.34",
13+
"percent_visits_smoothed_rsv": "0.44",
14+
"ed_trends_covid": "Decreasing",
15+
"ed_trends_influenza": "Increasing",
16+
"ed_trends_rsv": "Increasing",
17+
"hsa": "All",
18+
"hsa_counties": "All",
19+
"hsa_nci_id": "All",
20+
"fips": "0",
21+
"trend_source": "United States",
22+
"buildnumber": "2025-02-28"
23+
},
24+
{
25+
"week_end": "2022-10-01T00:00:00.000",
26+
"geography": "Colorado",
27+
"county": "All",
28+
"percent_visits_combined": "1.86",
29+
"percent_visits_covid": "1.61",
30+
"percent_visits_influenza": "0.1",
31+
"percent_visits_rsv": "0.16",
32+
"percent_visits_smoothed": "1.83",
33+
"percent_visits_smoothed_covid": "1.61",
34+
"percent_visits_smoothed_1": "0.1",
35+
"percent_visits_smoothed_rsv": "0.12",
36+
"ed_trends_covid": "Decreasing",
37+
"ed_trends_influenza": "Increasing",
38+
"ed_trends_rsv": "Increasing",
39+
"hsa": "All",
40+
"hsa_counties": "All",
41+
"hsa_nci_id": "All",
42+
"fips": "8000",
43+
"trend_source": "State",
44+
"buildnumber": "2025-02-28"
45+
},
46+
{
47+
"week_end": "2022-10-01T00:00:00.000",
48+
"geography": "Colorado",
49+
"county": "Broomfield",
50+
"percent_visits_combined": "2.29",
51+
"percent_visits_covid": "1.78",
52+
"percent_visits_influenza": "0.11",
53+
"percent_visits_rsv": "0.4",
54+
"percent_visits_smoothed": "2.06",
55+
"percent_visits_smoothed_covid": "1.7",
56+
"percent_visits_smoothed_1": "0.07",
57+
"percent_visits_smoothed_rsv": "0.32",
58+
"ed_trends_covid": "No Change",
59+
"ed_trends_influenza": "Increasing",
60+
"ed_trends_rsv": "Increasing",
61+
"hsa": "Boulder, CO - Broomfield, CO",
62+
"hsa_counties": "Boulder, Broomfield",
63+
"hsa_nci_id": "795",
64+
"fips": "8014",
65+
"trend_source": "HSA",
66+
"buildnumber": "2025-02-28"
67+
},
68+
{
69+
"week_end": "2022-10-08T00:00:00.000",
70+
"geography": "United States",
71+
"county": "All",
72+
"percent_visits_combined": "2.93",
73+
"percent_visits_covid": "1.68",
74+
"percent_visits_influenza": "0.68",
75+
"percent_visits_rsv": "0.6",
76+
"percent_visits_smoothed": "2.85",
77+
"percent_visits_smoothed_covid": "1.85",
78+
"percent_visits_smoothed_1": "0.49",
79+
"percent_visits_smoothed_rsv": "0.53",
80+
"ed_trends_covid": "Decreasing",
81+
"ed_trends_influenza": "Increasing",
82+
"ed_trends_rsv": "Increasing",
83+
"hsa": "All",
84+
"hsa_counties": "All",
85+
"hsa_nci_id": "All",
86+
"fips": "0",
87+
"trend_source": "United States",
88+
"buildnumber": "2025-02-28"
89+
},
90+
{
91+
"week_end": "2022-10-08T00:00:00.000",
92+
"geography": "Colorado",
93+
"county": "All",
94+
"percent_visits_combined": "1.82",
95+
"percent_visits_covid": "1.54",
96+
"percent_visits_influenza": "0.09",
97+
"percent_visits_rsv": "0.2",
98+
"percent_visits_smoothed": "1.83",
99+
"percent_visits_smoothed_covid": "1.58",
100+
"percent_visits_smoothed_1": "0.1",
101+
"percent_visits_smoothed_rsv": "0.16",
102+
"ed_trends_covid": "Decreasing",
103+
"ed_trends_influenza": "No Change",
104+
"ed_trends_rsv": "Increasing",
105+
"hsa": "All",
106+
"hsa_counties": "All",
107+
"hsa_nci_id": "All",
108+
"fips": "8000",
109+
"trend_source": "State",
110+
"buildnumber": "2025-02-28"
111+
},
112+
{
113+
"week_end": "2022-10-08T00:00:00.000",
114+
"geography": "Colorado",
115+
"county": "Arapahoe",
116+
"percent_visits_combined": "1.78",
117+
"percent_visits_covid": "1.43",
118+
"percent_visits_influenza": "0.12",
119+
"percent_visits_rsv": "0.23",
120+
"percent_visits_smoothed": "1.74",
121+
"percent_visits_smoothed_covid": "1.4",
122+
"percent_visits_smoothed_1": "0.12",
123+
"percent_visits_smoothed_rsv": "0.25",
124+
"ed_trends_covid": "Increasing",
125+
"ed_trends_influenza": "Increasing",
126+
"ed_trends_rsv": "Increasing",
127+
"hsa": "Denver (Denver), CO - Jefferson, CO",
128+
"hsa_counties": "Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit",
129+
"hsa_nci_id": "688",
130+
"fips": "8005",
131+
"trend_source": "HSA",
132+
"buildnumber": "2025-02-28"
133+
},
134+
{
135+
"week_end": "2022-10-15T00:00:00.000",
136+
"geography": "United States",
137+
"county": "All",
138+
"percent_visits_combined": "3.25",
139+
"percent_visits_covid": "1.64",
140+
"percent_visits_influenza": "0.9",
141+
"percent_visits_rsv": "0.74",
142+
"percent_visits_smoothed": "3.01",
143+
"percent_visits_smoothed_covid": "1.72",
144+
"percent_visits_smoothed_1": "0.69",
145+
"percent_visits_smoothed_rsv": "0.63",
146+
"ed_trends_covid": "Decreasing",
147+
"ed_trends_influenza": "Increasing",
148+
"ed_trends_rsv": "Increasing",
149+
"hsa": "All",
150+
"hsa_counties": "All",
151+
"hsa_nci_id": "All",
152+
"fips": "0",
153+
"trend_source": "United States",
154+
"buildnumber": "2025-02-28"
155+
},
156+
{
157+
"week_end": "2022-10-15T00:00:00.000",
158+
"geography": "Colorado",
159+
"county": "All",
160+
"percent_visits_combined": "2.22",
161+
"percent_visits_covid": "1.7",
162+
"percent_visits_influenza": "0.14",
163+
"percent_visits_rsv": "0.39",
164+
"percent_visits_smoothed": "1.97",
165+
"percent_visits_smoothed_covid": "1.62",
166+
"percent_visits_smoothed_1": "0.11",
167+
"percent_visits_smoothed_rsv": "0.25",
168+
"ed_trends_covid": "No Change",
169+
"ed_trends_influenza": "No Change",
170+
"ed_trends_rsv": "Increasing",
171+
"hsa": "All",
172+
"hsa_counties": "All",
173+
"hsa_nci_id": "All",
174+
"fips": "8000",
175+
"trend_source": "State",
176+
"buildnumber": "2025-02-28"
177+
},
178+
{
179+
"week_end": "2022-10-15T00:00:00.000",
180+
"geography": "Colorado",
181+
"county": "Pueblo",
182+
"percent_visits_combined": "3.24",
183+
"percent_visits_covid": "2.97",
184+
"percent_visits_influenza": "0.14",
185+
"percent_visits_rsv": "0.14",
186+
"percent_visits_smoothed": "2.99",
187+
"percent_visits_smoothed_covid": "2.82",
188+
"percent_visits_smoothed_1": "0.07",
189+
"percent_visits_smoothed_rsv": "0.1",
190+
"ed_trends_covid": "Increasing",
191+
"ed_trends_influenza": "Increasing",
192+
"ed_trends_rsv": "Increasing",
193+
"hsa": "Pueblo (Pueblo), CO - Las Animas, CO",
194+
"hsa_counties": "Huerfano, Las Animas, Pueblo",
195+
"hsa_nci_id": "704",
196+
"fips": "8101",
197+
"trend_source": "HSA",
198+
"buildnumber": "2025-02-28"
199+
}
200+
]

0 commit comments

Comments
 (0)