Skip to content

Commit 766959b

Browse files
authored
Merge pull request #1044 from cmu-delphi/ds/covidcast-row
Add `CovidcastRow` testing util and a few other changes
2 parents f949439 + b3cba4c commit 766959b

File tree

16 files changed

+636
-330
lines changed

16 files changed

+636
-330
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
geo_id,value,stderr,sample_size,issue,time_value,geo_type,signal,source
2-
d_nonlatest,0,0,0,1,0,geo,sig,src
3-
d_latest, 0,0,0,3,0,geo,sig,src
4-
d_justone, 0,0,0,1,0,geo,sig,src
2+
d_nonlatest,0,0,0,1,0,county,sig,src
3+
d_latest, 0,0,0,3,0,county,sig,src
4+
d_justone, 0,0,0,1,0,county,sig,src

integrations/acquisition/covidcast/test_csv_uploading.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,8 @@ def test_uploading(self):
213213
"time_value": [20200419],
214214
"signal": [signal_name],
215215
"direction": [None]})], axis=1).rename(columns=uploader_column_rename)
216-
expected_values_df["missing_value"].iloc[0] = Nans.OTHER
217-
expected_values_df["missing_sample_size"].iloc[0] = Nans.NOT_MISSING
216+
expected_values_df.loc[0, "missing_value"] = Nans.OTHER
217+
expected_values_df.loc[0, "missing_sample_size"] = Nans.NOT_MISSING
218218
expected_values = expected_values_df.to_dict(orient="records")
219219
expected_response = {'result': 1, 'epidata': self.apply_lag(expected_values), 'message': 'success'}
220220

integrations/acquisition/covidcast/test_db.py

+9-10
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
1-
import unittest
2-
31
from delphi_utils import Nans
4-
from delphi.epidata.acquisition.covidcast.database import Database, CovidcastRow, DBLoadStateException
5-
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase
6-
import delphi.operations.secrets as secrets
2+
3+
from delphi.epidata.acquisition.covidcast.database import DBLoadStateException
4+
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow
5+
76

87
# all the Nans we use here are just one value, so this is a shortcut to it:
98
nmv = Nans.NOT_MISSING.value
109

1110
class TestTest(CovidcastBase):
1211

1312
def _find_matches_for_row(self, row):
14-
# finds (if existing) row from both history and latest views that matches long-key of provided CovidcastRow
13+
# finds (if existing) row from both history and latest views that matches long-key of provided CovidcastTestRow
1514
cols = "source signal time_type time_value geo_type geo_value issue".split()
1615
results = {}
1716
cur = self._db._cursor
@@ -31,8 +30,8 @@ def _find_matches_for_row(self, row):
3130

3231
def test_insert_or_update_with_nonempty_load_table(self):
3332
# make rows
34-
a_row = self._make_placeholder_row()[0]
35-
another_row = self._make_placeholder_row(time_value=self.DEFAULT_TIME_VALUE+1, issue=self.DEFAULT_ISSUE+1)[0]
33+
a_row = CovidcastTestRow.make_default_row(time_value=2020_02_02)
34+
another_row = CovidcastTestRow.make_default_row(time_value=2020_02_03, issue=2020_02_03)
3635
# insert one
3736
self._db.insert_or_update_bulk([a_row])
3837
# put something into the load table
@@ -61,7 +60,7 @@ def test_id_sync(self):
6160
latest_view = 'epimetric_latest_v'
6261

6362
# add a data point
64-
base_row, _ = self._make_placeholder_row()
63+
base_row = CovidcastTestRow.make_default_row()
6564
self._insert_rows([base_row])
6665
# ensure the primary keys match in the latest and history tables
6766
matches = self._find_matches_for_row(base_row)
@@ -71,7 +70,7 @@ def test_id_sync(self):
7170
old_pk_id = matches[latest_view][pk_column]
7271

7372
# add a reissue for said data point
74-
next_row, _ = self._make_placeholder_row()
73+
next_row = CovidcastTestRow.make_default_row()
7574
next_row.issue += 1
7675
self._insert_rows([next_row])
7776
# ensure the new keys also match

integrations/acquisition/covidcast/test_delete_batch.py

+9-16
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,10 @@
55
import unittest
66
from os import path
77

8-
# third party
9-
import mysql.connector
10-
118
# first party
12-
from delphi_utils import Nans
13-
from delphi.epidata.acquisition.covidcast.database import Database, CovidcastRow
149
import delphi.operations.secrets as secrets
10+
from delphi.epidata.acquisition.covidcast.database import Database
11+
from delphi.epidata.acquisition.covidcast.test_utils import covidcast_rows_from_args
1512

1613
# py3tester coverage target (equivalent to `import *`)
1714
__test_target__ = 'delphi.epidata.acquisition.covidcast.database'
@@ -56,17 +53,13 @@ def test_delete_from_tuples(self):
5653

5754
def _test_delete_batch(self, cc_deletions):
5855
# load sample data
59-
rows = []
60-
for time_value in [0, 1]:
61-
rows += [
62-
# varying numeric column here (2nd to last) is `issue`
63-
CovidcastRow('src', 'sig', 'day', 'geo', time_value, "d_nonlatest", 0,0,0,0,0,0, 1, 0),
64-
CovidcastRow('src', 'sig', 'day', 'geo', time_value, "d_nonlatest", 0,0,0,0,0,0, 2, 0),
65-
CovidcastRow('src', 'sig', 'day', 'geo', time_value, "d_latest", 0,0,0,0,0,0, 1, 0),
66-
CovidcastRow('src', 'sig', 'day', 'geo', time_value, "d_latest", 0,0,0,0,0,0, 2, 0),
67-
CovidcastRow('src', 'sig', 'day', 'geo', time_value, "d_latest", 0,0,0,0,0,0, 3, 0)
68-
]
69-
rows.append(CovidcastRow('src', 'sig', 'day', 'geo', 0, "d_justone", 0,0,0,0,0,0, 1, 0))
56+
rows = covidcast_rows_from_args(
57+
time_value = [0] * 5 + [1] * 5 + [0],
58+
geo_value = ["d_nonlatest"] * 2 + ["d_latest"] * 3 + ["d_nonlatest"] * 2 + ["d_latest"] * 3 + ["d_justone"],
59+
issue = [1, 2] + [1, 2, 3] + [1, 2] + [1, 2, 3] + [1],
60+
sanitize_fields = True
61+
)
62+
7063
self._db.insert_or_update_bulk(rows)
7164

7265
# delete entries

integrations/client/test_delphi_epidata.py

+61-58
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
11
"""Integration tests for delphi_epidata.py."""
22

33
# standard library
4-
import unittest
54
import time
6-
from unittest.mock import patch, MagicMock
75
from json import JSONDecodeError
6+
from unittest.mock import MagicMock, patch
87

9-
# third party
10-
from aiohttp.client_exceptions import ClientResponseError
11-
import mysql.connector
8+
# first party
129
import pytest
10+
from aiohttp.client_exceptions import ClientResponseError
1311

14-
# first party
15-
from delphi_utils import Nans
16-
from delphi.epidata.client.delphi_epidata import Epidata
17-
from delphi.epidata.acquisition.covidcast.database import Database, CovidcastRow
18-
from delphi.epidata.acquisition.covidcast.covidcast_meta_cache_updater import main as update_covidcast_meta_cache
19-
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase
12+
# third party
2013
import delphi.operations.secrets as secrets
14+
from delphi.epidata.acquisition.covidcast.covidcast_meta_cache_updater import main as update_covidcast_meta_cache
15+
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow
16+
from delphi.epidata.client.delphi_epidata import Epidata
17+
from delphi_utils import Nans
18+
2119

2220
# py3tester coverage target
2321
__test_target__ = 'delphi.epidata.client.delphi_epidata'
22+
# all the Nans we use here are just one value, so this is a shortcut to it:
23+
nmv = Nans.NOT_MISSING.value
2424

2525
def fake_epidata_endpoint(func):
2626
"""This can be used as a decorator to enable a bogus Epidata endpoint to return 404 responses."""
@@ -30,9 +30,6 @@ def wrapper(*args):
3030
Epidata.BASE_URL = 'http://delphi_web_epidata/epidata/api.php'
3131
return wrapper
3232

33-
# all the Nans we use here are just one value, so this is a shortcut to it:
34-
nmv = Nans.NOT_MISSING.value
35-
3633
class DelphiEpidataPythonClientTests(CovidcastBase):
3734
"""Tests the Python client."""
3835

@@ -54,13 +51,11 @@ def test_covidcast(self):
5451

5552
# insert placeholder data: three issues of one signal, one issue of another
5653
rows = [
57-
self._make_placeholder_row(issue=self.DEFAULT_ISSUE + i, value=i, lag=i)[0]
54+
CovidcastTestRow.make_default_row(issue=2020_02_02 + i, value=i, lag=i)
5855
for i in range(3)
5956
]
6057
row_latest_issue = rows[-1]
61-
rows.append(
62-
self._make_placeholder_row(signal="sig2")[0]
63-
)
58+
rows.append(CovidcastTestRow.make_default_row(signal="sig2"))
6459
self._insert_rows(rows)
6560

6661
with self.subTest(name='request two signals'):
@@ -70,10 +65,11 @@ def test_covidcast(self):
7065
)
7166

7267
expected = [
73-
self.expected_from_row(row_latest_issue),
74-
self.expected_from_row(rows[-1])
68+
row_latest_issue.as_api_compatibility_row_dict(),
69+
rows[-1].as_api_compatibility_row_dict()
7570
]
7671

72+
self.assertEqual(response['epidata'], expected)
7773
# check result
7874
self.assertEqual(response, {
7975
'result': 1,
@@ -89,10 +85,10 @@ def test_covidcast(self):
8985

9086
expected = [{
9187
rows[0].signal: [
92-
self.expected_from_row(row_latest_issue, self.DEFAULT_MINUS + ['signal']),
88+
row_latest_issue.as_api_compatibility_row_dict(ignore_fields=['signal']),
9389
],
9490
rows[-1].signal: [
95-
self.expected_from_row(rows[-1], self.DEFAULT_MINUS + ['signal']),
91+
rows[-1].as_api_compatibility_row_dict(ignore_fields=['signal']),
9692
],
9793
}]
9894

@@ -109,12 +105,12 @@ def test_covidcast(self):
109105
**self.params_from_row(rows[0])
110106
)
111107

112-
expected = self.expected_from_row(row_latest_issue)
108+
expected = [row_latest_issue.as_api_compatibility_row_dict()]
113109

114110
# check result
115111
self.assertEqual(response_1, {
116112
'result': 1,
117-
'epidata': [expected],
113+
'epidata': expected,
118114
'message': 'success',
119115
})
120116

@@ -124,13 +120,13 @@ def test_covidcast(self):
124120
**self.params_from_row(rows[0], as_of=rows[1].issue)
125121
)
126122

127-
expected = self.expected_from_row(rows[1])
123+
expected = [rows[1].as_api_compatibility_row_dict()]
128124

129125
# check result
130126
self.maxDiff=None
131127
self.assertEqual(response_1a, {
132128
'result': 1,
133-
'epidata': [expected],
129+
'epidata': expected,
134130
'message': 'success',
135131
})
136132

@@ -141,8 +137,8 @@ def test_covidcast(self):
141137
)
142138

143139
expected = [
144-
self.expected_from_row(rows[0]),
145-
self.expected_from_row(rows[1])
140+
rows[0].as_api_compatibility_row_dict(),
141+
rows[1].as_api_compatibility_row_dict()
146142
]
147143

148144
# check result
@@ -158,12 +154,12 @@ def test_covidcast(self):
158154
**self.params_from_row(rows[0], lag=2)
159155
)
160156

161-
expected = self.expected_from_row(row_latest_issue)
157+
expected = [row_latest_issue.as_api_compatibility_row_dict()]
162158

163159
# check result
164160
self.assertDictEqual(response_3, {
165161
'result': 1,
166-
'epidata': [expected],
162+
'epidata': expected,
167163
'message': 'success',
168164
})
169165
with self.subTest(name='long request'):
@@ -223,16 +219,16 @@ def test_geo_value(self):
223219
# insert placeholder data: three counties, three MSAs
224220
N = 3
225221
rows = [
226-
self._make_placeholder_row(geo_type="county", geo_value=str(i)*5, value=i)[0]
222+
CovidcastTestRow.make_default_row(geo_type="county", geo_value=str(i)*5, value=i)
227223
for i in range(N)
228224
] + [
229-
self._make_placeholder_row(geo_type="msa", geo_value=str(i)*5, value=i*10)[0]
225+
CovidcastTestRow.make_default_row(geo_type="msa", geo_value=str(i)*5, value=i*10)
230226
for i in range(N)
231227
]
232228
self._insert_rows(rows)
233229

234230
counties = [
235-
self.expected_from_row(rows[i]) for i in range(N)
231+
rows[i].as_api_compatibility_row_dict() for i in range(N)
236232
]
237233

238234
def fetch(geo):
@@ -241,41 +237,48 @@ def fetch(geo):
241237
)
242238

243239
# test fetch all
244-
r = fetch('*')
245-
self.assertEqual(r['message'], 'success')
246-
self.assertEqual(r['epidata'], counties)
240+
request = fetch('*')
241+
self.assertEqual(request['message'], 'success')
242+
self.assertEqual(request['epidata'], counties)
247243
# test fetch a specific region
248-
r = fetch('11111')
249-
self.assertEqual(r['message'], 'success')
250-
self.assertEqual(r['epidata'], [counties[1]])
244+
request = fetch('11111')
245+
self.assertEqual(request['message'], 'success')
246+
self.assertEqual(request['epidata'], [counties[1]])
251247
# test fetch a specific yet not existing region
252-
r = fetch('55555')
253-
self.assertEqual(r['message'], 'no results')
248+
request = fetch('55555')
249+
self.assertEqual(request['message'], 'no results')
254250
# test fetch a multiple regions
255-
r = fetch(['11111', '22222'])
256-
self.assertEqual(r['message'], 'success')
257-
self.assertEqual(r['epidata'], [counties[1], counties[2]])
251+
request = fetch(['11111', '22222'])
252+
self.assertEqual(request['message'], 'success')
253+
self.assertEqual(request['epidata'], [counties[1], counties[2]])
258254
# test fetch a multiple regions in another variant
259-
r = fetch(['00000', '22222'])
260-
self.assertEqual(r['message'], 'success')
261-
self.assertEqual(r['epidata'], [counties[0], counties[2]])
255+
request = fetch(['00000', '22222'])
256+
self.assertEqual(request['message'], 'success')
257+
self.assertEqual(request['epidata'], [counties[0], counties[2]])
262258
# test fetch a multiple regions but one is not existing
263-
r = fetch(['11111', '55555'])
264-
self.assertEqual(r['message'], 'success')
265-
self.assertEqual(r['epidata'], [counties[1]])
259+
request = fetch(['11111', '55555'])
260+
self.assertEqual(request['message'], 'success')
261+
self.assertEqual(request['epidata'], [counties[1]])
266262
# test fetch a multiple regions but specify no region
267-
r = fetch([])
268-
self.assertEqual(r['message'], 'no results')
263+
request = fetch([])
264+
self.assertEqual(request['message'], 'no results')
269265

270266
def test_covidcast_meta(self):
271267
"""Test that the covidcast_meta endpoint returns expected data."""
272268

269+
DEFAULT_TIME_VALUE = 2020_02_02
270+
DEFAULT_ISSUE = 2020_02_02
271+
273272
# insert placeholder data: three dates, three issues. values are:
274273
# 1st issue: 0 10 20
275274
# 2nd issue: 1 11 21
276275
# 3rd issue: 2 12 22
277276
rows = [
278-
self._make_placeholder_row(time_value=self.DEFAULT_TIME_VALUE + t, issue=self.DEFAULT_ISSUE + i, value=t*10 + i)[0]
277+
CovidcastTestRow.make_default_row(
278+
time_value=DEFAULT_TIME_VALUE + t,
279+
issue=DEFAULT_ISSUE + i,
280+
value=t*10 + i
281+
)
279282
for i in range(3) for t in range(3)
280283
]
281284
self._insert_rows(rows)
@@ -299,14 +302,14 @@ def test_covidcast_meta(self):
299302
signal=rows[0].signal,
300303
time_type=rows[0].time_type,
301304
geo_type=rows[0].geo_type,
302-
min_time=self.DEFAULT_TIME_VALUE,
303-
max_time=self.DEFAULT_TIME_VALUE + 2,
305+
min_time=DEFAULT_TIME_VALUE,
306+
max_time=DEFAULT_TIME_VALUE + 2,
304307
num_locations=1,
305308
min_value=2.,
306309
mean_value=12.,
307310
max_value=22.,
308311
stdev_value=8.1649658, # population stdev, not sample, which is 10.
309-
max_issue=self.DEFAULT_ISSUE + 2,
312+
max_issue=DEFAULT_ISSUE + 2,
310313
min_lag=0,
311314
max_lag=0, # we didn't set lag when inputting data
312315
)
@@ -322,10 +325,10 @@ def test_async_epidata(self):
322325
# insert placeholder data: three counties, three MSAs
323326
N = 3
324327
rows = [
325-
self._make_placeholder_row(geo_type="county", geo_value=str(i)*5, value=i)[0]
328+
CovidcastTestRow.make_default_row(geo_type="county", geo_value=str(i)*5, value=i)
326329
for i in range(N)
327330
] + [
328-
self._make_placeholder_row(geo_type="msa", geo_value=str(i)*5, value=i*10)[0]
331+
CovidcastTestRow.make_default_row(geo_type="msa", geo_value=str(i)*5, value=i*10)
329332
for i in range(N)
330333
]
331334
self._insert_rows(rows)

0 commit comments

Comments
 (0)