Skip to content

Commit 41de06b

Browse files
authored
Merge pull request #314 from chinandrew/hhs-support
Add HHS support and tests
2 parents 38ff1f5 + 68db661 commit 41de06b

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

src/acquisition/covidcast/csv_importer.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class CsvImporter:
2727
PATTERN_ISSUE_DIR = re.compile(r'^.*/([^/]*)/issue_(\d{8})$')
2828

2929
# set of allowed resolutions (aka "geo_type")
30-
GEOGRAPHIC_RESOLUTIONS = {'county', 'hrr', 'msa', 'dma', 'state', 'nation'}
30+
GEOGRAPHIC_RESOLUTIONS = {'county', 'hrr', 'msa', 'dma', 'state', 'hhs', 'nation'}
3131

3232
# set of required CSV columns
3333
REQUIRED_COLUMNS = {'geo_id', 'val', 'se', 'sample_size'}
@@ -209,7 +209,7 @@ def extract_and_check_row(row, geo_type):
209209
# geo_id was `None`
210210
return (None, 'geo_id')
211211

212-
if geo_type in ('hrr', 'msa', 'dma'):
212+
if geo_type in ('hrr', 'msa', 'dma', 'hhs'):
213213
# these particular ids are prone to be written as ints -- and floats
214214
try:
215215
geo_id = str(CsvImporter.floaty_int(geo_id))
@@ -239,6 +239,10 @@ def extract_and_check_row(row, geo_type):
239239
if len(geo_id) != 2 or not 'aa' <= geo_id <= 'zz':
240240
return (None, 'geo_id')
241241

242+
elif geo_type == 'hhs':
243+
if not 1 <= int(geo_id) <= 10:
244+
return (None, 'geo_id')
245+
242246
elif geo_type == 'nation':
243247
# geo_id is lowercase
244248
if len(geo_id) != 2 or not 'aa' <= geo_id <= 'zz':

tests/acquisition/covidcast/test_csv_importer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ def test_find_csv_files(self):
4545
path_prefix + 'ght/20200408_state_rawsearch.csv',
4646
# valid national
4747
path_prefix + 'valid/20200408_nation_sig.csv',
48+
# valid hhs
49+
path_prefix + 'valid/20200408_hhs_sig.csv',
4850
# invalid
4951
path_prefix + 'invalid/hello_world.csv',
5052
# invalid day
@@ -68,10 +70,11 @@ def test_find_csv_files(self):
6870
(glob_paths[0], ('fb_survey', 'cli', 'week', 'county', 202015, expected_issue_week, delta_epiweeks(202015, expected_issue_week))),
6971
(glob_paths[1], ('ght', 'rawsearch', 'day', 'state', time_value_day, expected_issue_day, (date.today() - date(year=time_value_day // 10000, month=(time_value_day // 100) % 100, day=time_value_day % 100)).days)),
7072
(glob_paths[2], ('valid', 'sig', 'day', 'nation', time_value_day, expected_issue_day, (date.today() - date(year=time_value_day // 10000, month=(time_value_day // 100) % 100, day=time_value_day % 100)).days)),
71-
(glob_paths[3], None),
73+
(glob_paths[3], ('valid', 'sig', 'day', 'hhs', time_value_day, expected_issue_day, (date.today() - date(year=time_value_day // 10000, month=(time_value_day // 100) % 100, day=time_value_day % 100)).days)),
7274
(glob_paths[4], None),
7375
(glob_paths[5], None),
7476
(glob_paths[6], None),
77+
(glob_paths[7], None),
7578
])
7679
self.assertEqual(found, expected)
7780

@@ -137,6 +140,7 @@ def make_row(
137140
(make_row(geo_type='state', geo_id='48'), 'geo_id'),
138141
(make_row(geo_type='state', geo_id='iowa'), 'geo_id'),
139142
(make_row(geo_type='nation', geo_id='0000'), 'geo_id'),
143+
(make_row(geo_type='hhs', geo_id='0'), 'geo_id'),
140144
(make_row(geo_type='province', geo_id='ab'), 'geo_type'),
141145
(make_row(se='-1'), 'se'),
142146
(make_row(geo_type=None), 'geo_type'),

0 commit comments

Comments
 (0)