Skip to content

Commit 57aa137

Browse files
committed
Acquisition: change PathDetails order to path order
* update covidcast_nowcast * update tests
1 parent 51722fa commit 57aa137

File tree

5 files changed

+31
-29
lines changed

5 files changed

+31
-29
lines changed

src/acquisition/covidcast/csv_importer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from delphi.epidata.acquisition.covidcast.logger import get_structured_logger
2020

2121
DFRow = NamedTuple('DFRow', [('geo_id', str), ('value', float), ('stderr', float), ('sample_size', float), ('missing_value', int), ('missing_stderr', int), ('missing_sample_size', int)])
22-
PathDetails = NamedTuple('PathDetails', [('source', str), ('signal', str), ('time_type', str), ('geo_type', str), ('time_value', int), ('issue', int), ('lag', int)])
22+
PathDetails = NamedTuple('PathDetails', [('issue', int), ('lag', int), ('source', str), ('signal', str), ('time_type', str), ('time_value', int), ('geo_type', str)])
2323

2424

2525
@dataclass
@@ -185,7 +185,7 @@ def find_csv_files(scan_dir, issue=(date.today(), epi.Week.fromdate(date.today()
185185
yield (path, None)
186186
continue
187187

188-
yield (path, PathDetails(source, signal, time_type, geo_type, time_value, issue_value, lag_value))
188+
yield (path, PathDetails(issue_value, lag_value, source, signal, time_type, time_value, geo_type))
189189

190190

191191
@staticmethod

src/acquisition/covidcast_nowcast/load_sensors.py

+9-10
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import sqlalchemy
77

88
import delphi.operations.secrets as secrets
9-
from delphi.epidata.acquisition.covidcast.csv_importer import CsvImporter
9+
from delphi.epidata.acquisition.covidcast.csv_importer import CsvImporter, PathDetails
1010

1111
SENSOR_CSV_PATH = "/common/covidcast_nowcast/receiving/"
1212
SUCCESS_DIR = "archive/successful"
@@ -52,7 +52,7 @@ def main(csv_path: str = SENSOR_CSV_PATH) -> None:
5252
_move_after_processing(filepath, success=True)
5353

5454

55-
def load_and_prepare_file(filepath: str, attributes: tuple) -> pd.DataFrame:
55+
def load_and_prepare_file(filepath: str, attributes: PathDetails) -> pd.DataFrame:
5656
"""
5757
Read CSV file into a DataFrame and add relevant attributes as new columns to match DB table.
5858
@@ -68,15 +68,14 @@ def load_and_prepare_file(filepath: str, attributes: tuple) -> pd.DataFrame:
6868
-------
6969
DataFrame with additional attributes added as columns based on filename and current date.
7070
"""
71-
source, signal, time_type, geo_type, time_value, issue_value, lag_value = attributes
7271
data = pd.read_csv(filepath, dtype=CSV_DTYPES)
73-
data["source"] = source
74-
data["signal"] = signal
75-
data["time_type"] = time_type
76-
data["geo_type"] = geo_type
77-
data["time_value"] = time_value
78-
data["issue"] = issue_value
79-
data["lag"] = lag_value
72+
data["source"] = attributes.source
73+
data["signal"] = attributes.signal
74+
data["time_type"] = attributes.time_type
75+
data["geo_type"] = attributes.geo_type
76+
data["time_value"] = attributes.time_value
77+
data["issue"] = attributes.issue
78+
data["lag"] = attributes.lag
8079
data["value_updated_timestamp"] = int(time.time())
8180
return data
8281

tests/acquisition/covidcast/test_csv_importer.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,10 @@ def test_find_csv_files(self, mock_glob: MagicMock):
105105
expected_issue_week=int(str(epi.Week.fromdate(date.today())))
106106
time_value_day = 20200408
107107
expected = set([
108-
(glob_paths[0], ('fb_survey', 'cli', 'week', 'county', 202015, expected_issue_week, delta_epiweeks(202015, expected_issue_week))),
109-
(glob_paths[1], ('ght', 'rawsearch', 'day', 'state', time_value_day, expected_issue_day, (date.today() - date(year=time_value_day // 10000, month=(time_value_day // 100) % 100, day=time_value_day % 100)).days)),
110-
(glob_paths[2], ('valid', 'sig', 'day', 'nation', time_value_day, expected_issue_day, (date.today() - date(year=time_value_day // 10000, month=(time_value_day // 100) % 100, day=time_value_day % 100)).days)),
111-
(glob_paths[3], ('valid', 'sig', 'day', 'hhs', time_value_day, expected_issue_day, (date.today() - date(year=time_value_day // 10000, month=(time_value_day // 100) % 100, day=time_value_day % 100)).days)),
108+
(glob_paths[0], PathDetails(expected_issue_week, delta_epiweeks(202015, expected_issue_week), 'fb_survey', 'cli', 'week', 202015, 'county')),
109+
(glob_paths[1], PathDetails(expected_issue_day, (date.today() - date(year=time_value_day // 10000, month=(time_value_day // 100) % 100, day=time_value_day % 100)).days, 'ght', 'rawsearch', 'day', time_value_day, 'state')),
110+
(glob_paths[2], PathDetails(expected_issue_day, (date.today() - date(year=time_value_day // 10000, month=(time_value_day // 100) % 100, day=time_value_day % 100)).days, 'valid', 'sig', 'day', time_value_day, 'nation')),
111+
(glob_paths[3], PathDetails(expected_issue_day, (date.today() - date(year=time_value_day // 10000, month=(time_value_day // 100) % 100, day=time_value_day % 100)).days, 'valid', 'sig', 'day', time_value_day, 'hhs')),
112112
(glob_paths[4], None),
113113
(glob_paths[5], None),
114114
(glob_paths[6], None),
@@ -233,7 +233,7 @@ def test_load_csv_with_invalid_header(self, mock_read_csv):
233233

234234
data = {'foo': [1, 2, 3]}
235235
filepath = 'path/name.csv'
236-
details = PathDetails("src", "name", "day", "state", 20200101, 20200101, 0)
236+
details = PathDetails(20200101, 0, "src", "name", "day", 20200101, "state")
237237

238238
mock_read_csv.return_value = pd.DataFrame(data)
239239
rows = list(CsvImporter.load_csv(filepath, details))
@@ -255,7 +255,7 @@ def test_load_csv_with_valid_header(self, mock_read_csv):
255255
'sample_size': ['301', '302', '303', '304'],
256256
}
257257
filepath = 'path/name.csv'
258-
details = PathDetails("src", "name", "day", "state", 20200101, 20200101, 0)
258+
details = PathDetails(20200101, 0, "src", "name", "day", 20200101, "state")
259259

260260
mock_read_csv.return_value = pd.DataFrame(data=data)
261261
rows = list(CsvImporter.load_csv(filepath, details))
@@ -292,7 +292,7 @@ def test_load_csv_with_valid_header(self, mock_read_csv):
292292
'missing_sample_size': [Nans.NOT_MISSING] * 2 + [Nans.REGION_EXCEPTION] * 2 + [None]
293293
}
294294
filepath = 'path/name.csv'
295-
details = PathDetails("src", "name", "day", "state", 20200101, 20200101, 0)
295+
details = PathDetails(20200101, 0, "src", "name", "day", 20200101, "state")
296296

297297
mock_read_csv.return_value = pd.DataFrame(data)
298298
rows = list(CsvImporter.load_csv(filepath, details))

tests/acquisition/covidcast/test_csv_to_database.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ class UnitTests(unittest.TestCase):
1717
"""Basic unit tests."""
1818
_path_details = [
1919
# a good file
20-
('path/a.csv', PathDetails('src_a', 'sig_a', 'day', 'hrr', 20200419, 20200420, 1)),
20+
('path/a.csv', PathDetails(20200420, 1, 'src_a', 'sig_a', 'day', 20200419, 'hrr')),
2121
# a file with a data error
22-
('path/b.csv', PathDetails('src_b', 'sig_b', 'week', 'msa', 202016, 202017, 1)),
22+
('path/b.csv', PathDetails(202017, 1, 'src_b', 'sig_b', 'week', 202016, 'msa')),
2323
# emulate a file that's named incorrectly
2424
('path/c.csv', None)
2525
]
@@ -194,7 +194,7 @@ def test_database_exception_is_handled(self, mock_file_archiver: MagicMock, mock
194194
data_dir = 'data_dir'
195195
mock_database.insert_or_update_bulk.side_effect = Exception('testing')
196196
mock_csv_importer.find_csv_files.return_value = [
197-
('path/file.csv', PathDetails('src', 'sig', 'day', 'hrr', 20200423, 20200424, 1)),
197+
('path/file.csv', PathDetails(20200424, 1, 'src', 'sig', 'day', 20200423, 'hrr')),
198198
]
199199
mock_csv_importer.load_csv.return_value = [
200200
MagicMock(geo_value='geo', value=1, stderr=1, sample_size=1),

tests/acquisition/covidcast_nowcast/test_load_sensors.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pandas as pd
1010

1111
# first party
12+
from delphi.epidata.acquisition.covidcast.csv_importer import PathDetails
1213
from delphi.epidata.acquisition.covidcast_nowcast.load_sensors import main, load_and_prepare_file
1314

1415
# py3tester coverage target
@@ -20,13 +21,15 @@ class UpdateTests(unittest.TestCase):
2021
@mock.patch('time.time', mock.MagicMock(return_value=12345))
2122
def test_load_and_prepare_file(self):
2223

23-
test_attributes = ("test_source",
24-
"test_signal",
25-
"test_time_type",
26-
"test_geo_type",
27-
20201231,
28-
20210102,
29-
3)
24+
test_attributes = PathDetails(
25+
20210102,
26+
3,
27+
"test_source",
28+
"test_signal",
29+
"test_time_type",
30+
20201231,
31+
"test_geo_type",
32+
)
3033

3134
test_df = load_and_prepare_file(StringIO("sensor_name,geo_value,value\ntestname,01001,1.5"), test_attributes)
3235
pd.testing.assert_frame_equal(test_df,

0 commit comments

Comments
 (0)