Skip to content

Commit 68af3b6

Browse files
committed
catching up w/ versioning by adding issue and lag fields
1 parent 46d6a69 commit 68af3b6

File tree

3 files changed

+21
-12
lines changed

3 files changed

+21
-12
lines changed

src/acquisition/covidcast/csv_to_database.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def archive_as_successful(path_src, filename, source):
7878

7979
all_rows_valid = False
8080
try:
81-
cc_rows = CovidcastRow.fromCsvRows(csv_rows, source, signal, time_type, geo_type, time_value)
81+
cc_rows = CovidcastRow.fromCsvRows(csv_rows, source, signal, time_type, geo_type, time_value, issue, lag)
8282
rows_list = list(cc_rows)
8383
if not rows_list:
8484
raise ValueError("No data")

src/acquisition/covidcast/database.py

+18-10
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,21 @@ class CovidcastRow():
1717
"""A container for all the values of a single covidcast row."""
1818

1919
@staticmethod
20-
def fromCsvRowValue(row_value, source, signal, time_type, geo_type, time_value):
20+
def fromCsvRowValue(row_value, source, signal, time_type, geo_type, time_value, issue, lag):
2121
return CovidcastRow(source, signal, time_type, geo_type, time_value,
22-
row_value.geo_value,
23-
row_value.value,
24-
row_value.stderr,
25-
row_value.sample_size)
22+
row_value.geo_value,
23+
row_value.value,
24+
row_value.stderr,
25+
row_value.sample_size,
26+
issue, lag)
2627

2728
@staticmethod
28-
def fromCsvRows(row_values, source, signal, time_type, geo_type, time_value):
29+
def fromCsvRows(row_values, source, signal, time_type, geo_type, time_value, issue, lag):
2930
# NOTE: returns a generator, as row_values is expected to be a generator
30-
return (CovidcastRow.fromCsvRowValue(row_value, source, signal, time_type, geo_type, time_value) for row_value in row_values)
31+
return (CovidcastRow.fromCsvRowValue(row_value, source, signal, time_type, geo_type, time_value, issue, lag)
32+
for row_value in row_values)
3133

32-
def __init__(self, source, signal, time_type, geo_type, time_value, geo_value, value, stderr, sample_size):
34+
def __init__(self, source, signal, time_type, geo_type, time_value, geo_value, value, stderr, sample_size, issue, lag):
3335
self.id = None
3436
self.source = source
3537
self.signal = signal
@@ -42,6 +44,8 @@ def __init__(self, source, signal, time_type, geo_type, time_value, geo_value, v
4244
self.sample_size = sample_size # from CSV row
4345
self.timestamp2 = 0
4446
self.direction = None
47+
self.issue = issue
48+
self.lag = lag
4549

4650

4751
class Database:
@@ -98,11 +102,13 @@ def insert_or_update_batch(self, cc_rows, batch_size=0, commit_partial=False):
98102
INSERT INTO `covidcast`
99103
(`id`, `source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`,
100104
`timestamp1`, `value`, `stderr`, `sample_size`,
101-
`timestamp2`, `direction`)
105+
`timestamp2`, `direction`,
106+
`issue`, `lag`)
102107
VALUES
103108
(0, %s, %s, %s, %s, %s, %s,
104109
UNIX_TIMESTAMP(NOW()), %s, %s, %s,
105-
0, NULL)
110+
0, NULL,
111+
%s, %s)
106112
ON DUPLICATE KEY UPDATE
107113
`timestamp1` = VALUES(`timestamp1`),
108114
`value` = VALUES(`value`),
@@ -132,6 +138,8 @@ def insert_or_update_batch(self, cc_rows, batch_size=0, commit_partial=False):
132138
row.value,
133139
row.stderr,
134140
row.sample_size,
141+
row.issue,
142+
row.lag
135143
) for row in cc_rows[start:end]]
136144

137145
result = self._cursor.executemany(sql, args)

tests/acquisition/covidcast/test_csv_to_database.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ def load_csv_impl(path, *args):
7171
self.assertEqual(mock_database.insert_or_update_bulk.call_count, 2)
7272
call_args_list = mock_database.insert_or_update_bulk.call_args_list
7373
actual_args = [[(a.source, a.signal, a.time_type, a.geo_type, a.time_value,
74-
a.geo_value, a.value, a.stderr, a.sample_size) for a in call.args[0]] for call in call_args_list]
74+
a.geo_value, a.value, a.stderr, a.sample_size, a.issue, a.lag)
75+
for a in call.args[0]] for call in call_args_list]
7576
expected_args = [
7677
[('src_a', 'sig_a', 'day', 'hrr', 20200419, 'a1', 'a1', 'a1', 'a1', 20200420, 1),
7778
('src_a', 'sig_a', 'day', 'hrr', 20200419, 'a2', 'a2', 'a2', 'a2', 20200420, 1),

0 commit comments

Comments
 (0)