Skip to content

Commit abb4f83

Browse files
authored
Merge pull request #196 from sgratzl/sgratzl/filterfields
filter fields and meta
2 parents 5183f9d + 203c924 commit abb4f83

File tree

4 files changed

+257
-0
lines changed

4 files changed

+257
-0
lines changed

integrations/server/test_covidcast.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,95 @@ def test_round_trip(self):
7979
'message': 'success',
8080
})
8181

82+
def test_fields(self):
83+
"""Test to limit fields field"""
84+
85+
# insert dummy data
86+
self.cur.execute('''
87+
insert into covidcast values
88+
(0, 'src', 'sig', 'day', 'county', 20200414, '01234',
89+
123, 1.5, 2.5, 3.5, 456, 4, 20200414, 0, 1, False)
90+
''')
91+
self.cnx.commit()
92+
93+
# make the request
94+
response = requests.get(BASE_URL, params={
95+
'source': 'covidcast',
96+
'data_source': 'src',
97+
'signal': 'sig',
98+
'time_type': 'day',
99+
'geo_type': 'county',
100+
'time_values': 20200414,
101+
'geo_value': '01234',
102+
})
103+
response.raise_for_status()
104+
response = response.json()
105+
106+
# assert that the right data came back
107+
self.assertEqual(response, {
108+
'result': 1,
109+
'epidata': [{
110+
'time_value': 20200414,
111+
'geo_value': '01234',
112+
'value': 1.5,
113+
'stderr': 2.5,
114+
'sample_size': 3.5,
115+
'direction': 4,
116+
'issue': 20200414,
117+
'lag': 0,
118+
'signal': 'sig'
119+
}],
120+
'message': 'success',
121+
})
122+
123+
# limit fields
124+
response = requests.get(BASE_URL, params={
125+
'source': 'covidcast',
126+
'data_source': 'src',
127+
'signal': 'sig',
128+
'time_type': 'day',
129+
'geo_type': 'county',
130+
'time_values': 20200414,
131+
'geo_value': '01234',
132+
'fields': 'time_value,geo_value'
133+
})
134+
response.raise_for_status()
135+
response = response.json()
136+
137+
# assert that the right data came back
138+
self.assertEqual(response, {
139+
'result': 1,
140+
'epidata': [{
141+
'time_value': 20200414,
142+
'geo_value': '01234'
143+
}],
144+
'message': 'success',
145+
})
146+
147+
# limit invalid values
148+
response = requests.get(BASE_URL, params={
149+
'source': 'covidcast',
150+
'data_source': 'src',
151+
'signal': 'sig',
152+
'time_type': 'day',
153+
'geo_type': 'county',
154+
'time_values': 20200414,
155+
'geo_value': '01234',
156+
'fields': 'time_value,geo_value,dummy'
157+
})
158+
response.raise_for_status()
159+
response = response.json()
160+
161+
# assert that the right data came back
162+
self.assertEqual(response, {
163+
'result': 1,
164+
'epidata': [{
165+
'time_value': 20200414,
166+
'geo_value': '01234'
167+
}],
168+
'message': 'success',
169+
})
170+
82171
def test_location_wildcard(self):
83172
"""Select all locations with a wildcard query."""
84173

integrations/server/test_covidcast_meta.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,114 @@ def test_round_trip(self):
8989
'message': 'success',
9090
})
9191

92+
93+
def test_filter(self):
94+
"""Test filtering options some sample data."""
95+
96+
# insert dummy data and accumulate expected results (in sort order)
97+
template = '''
98+
insert into covidcast values
99+
(0, "%s", "%s", "%s", "%s", %d, "%s", 123, %d, 0, 0, 456, 0, %d, 0, 1, %d)
100+
'''
101+
expected = []
102+
for src in ('src1', 'src2'):
103+
for sig in ('sig1', 'sig2'):
104+
for tt in ('day', 'week'):
105+
for gt in ('hrr', 'msa'):
106+
expected.append({
107+
'data_source': src,
108+
'signal': sig,
109+
'time_type': tt,
110+
'geo_type': gt,
111+
'min_time': 1,
112+
'max_time': 2,
113+
'num_locations': 2,
114+
'min_value': 10,
115+
'max_value': 20,
116+
'mean_value': 15,
117+
'stdev_value': 5,
118+
'last_update': 123,
119+
'max_issue': 2,
120+
'min_lag': 0,
121+
'max_lag': 0,
122+
})
123+
for tv in (1, 2):
124+
for gv, v in zip(('geo1', 'geo2'), (10, 20)):
125+
self.cur.execute(template % (src, sig, tt, gt, tv, gv, v, tv, False))
126+
self.cnx.commit()
127+
update_cache(args=None)
128+
129+
def fetch(**kwargs):
130+
# make the request
131+
params = kwargs.copy()
132+
params['source'] = 'covidcast_meta'
133+
response = requests.get(BASE_URL, params=params)
134+
response.raise_for_status()
135+
return response.json()
136+
137+
res = fetch()
138+
self.assertEqual(res['result'], 1)
139+
self.assertEqual(len(res['epidata']), len(expected))
140+
141+
# time types
142+
res = fetch(time_types='day')
143+
self.assertEqual(res['result'], 1)
144+
self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['time_type'] == 'day']))
145+
146+
res = fetch(time_types='day,week')
147+
self.assertEqual(res['result'], 1)
148+
self.assertEqual(len(res['epidata']), len(expected))
149+
150+
res = fetch(time_types='sec')
151+
self.assertEqual(res['result'], -2)
152+
153+
# geo types
154+
res = fetch(geo_types='hrr')
155+
self.assertEqual(res['result'], 1)
156+
self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['geo_type'] == 'hrr']))
157+
158+
res = fetch(geo_types='hrr,msa')
159+
self.assertEqual(res['result'], 1)
160+
self.assertEqual(len(res['epidata']), len(expected))
161+
162+
res = fetch(geo_types='state')
163+
self.assertEqual(res['result'], -2)
164+
165+
# signals
166+
res = fetch(signals='src1:sig1')
167+
self.assertEqual(res['result'], 1)
168+
self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['data_source'] == 'src1' and s['signal'] == 'sig1']))
169+
170+
res = fetch(signals='src1')
171+
self.assertEqual(res['result'], 1)
172+
self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['data_source'] == 'src1']))
173+
174+
res = fetch(signals='src1:*')
175+
self.assertEqual(res['result'], 1)
176+
self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['data_source'] == 'src1']))
177+
178+
res = fetch(signals='src1:src4')
179+
self.assertEqual(res['result'], -2)
180+
181+
res = fetch(signals='src1:*,src2:*')
182+
self.assertEqual(res['result'], 1)
183+
self.assertEqual(len(res['epidata']), len(expected))
184+
185+
# filter fields
186+
res = fetch(fields='data_source,min_time')
187+
self.assertEqual(res['result'], 1)
188+
self.assertEqual(len(res['epidata']), len(expected))
189+
self.assertTrue('data_source' in res['epidata'][0])
190+
self.assertTrue('min_time' in res['epidata'][0])
191+
self.assertFalse('max_time' in res['epidata'][0])
192+
self.assertFalse('signal' in res['epidata'][0])
193+
194+
res = fetch(fields='xx')
195+
self.assertEqual(res['result'], 1)
196+
self.assertEqual(len(res['epidata']), len(expected))
197+
self.assertEqual(res['epidata'][0], [])
198+
199+
92200
def test_suppress_work_in_progress(self):
93201
"""Don't surface signals that are a work-in-progress."""
94202

src/server/api.php

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,6 +1019,51 @@ function get_covidcast_meta() {
10191019
}
10201020
}
10211021

1022+
if ($epidata !== null) {
1023+
// filter rows
1024+
$time_types = extract_values($_REQUEST['time_types'], 'str');
1025+
$signals = isset($_REQUEST['signals']) ? array_map(function($signal) {
1026+
return explode(':', $signal, 2);
1027+
}, extract_values($_REQUEST['signals'], 'str')) : null;
1028+
$geo_types = extract_values($_REQUEST['geo_types'], 'str');
1029+
1030+
if ($time_types !== null || $signals !== null || $geo_types !== null) {
1031+
$epidata = array_filter($epidata, function($row) use(&$time_types, &$signals, &$geo_types) {
1032+
if ($time_types !== null && !in_array($row['time_type'], $time_types)) {
1033+
return false;
1034+
}
1035+
if ($geo_types !== null && !in_array($row['geo_type'], $geo_types)) {
1036+
return false;
1037+
}
1038+
if ($signals === null || count($signals) === 0) {
1039+
return true;
1040+
}
1041+
// filter by signal
1042+
foreach($signals as $signal) {
1043+
// match source and (signal or no signal or signal = *)
1044+
if ($row['data_source'] === $signal[0] && (count($signal) === 1 || $row['signal'] === $signal[1] || $signal[1] === '*')) {
1045+
return true;
1046+
}
1047+
}
1048+
return false;
1049+
});
1050+
}
1051+
// filter fields
1052+
if (isset($_REQUEST['fields'])) {
1053+
$fields = extract_values($_REQUEST['fields'], 'str');
1054+
1055+
$epidata = array_map(function($row) use(&$fields) {
1056+
$filtered_row = [];
1057+
foreach($fields as $field) {
1058+
if (isset($row[$field])) {
1059+
$filtered_row[$field] = $row[$field];
1060+
}
1061+
}
1062+
return $filtered_row;
1063+
}, $epidata);
1064+
}
1065+
}
1066+
10221067
// return the data
10231068
$has_values = $epidata !== null && count($epidata) > 0;
10241069
return $has_values ? $epidata : null;

src/server/api_helpers.php

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,21 @@ function execute_query($query, &$epidata, $fields_string, $fields_int, $fields_f
163163
error_log(mysqli_error($dbh));
164164
return;
165165
}
166+
167+
if (isset($_REQUEST['fields'])) {
168+
$fields = extract_values($_REQUEST['fields'], 'str');
169+
// limit fields to the selection
170+
if($fields_string !== null) {
171+
$fields_string = array_intersect($fields_string, $fields);
172+
}
173+
if($fields_int !== null) {
174+
$fields_int = array_intersect($fields_int, $fields);
175+
}
176+
if($fields_float !== null) {
177+
$fields_float = array_intersect($fields_float, $fields);
178+
}
179+
}
180+
166181
while($row = mysqli_fetch_array($result)) {
167182
if(count($epidata) < $MAX_RESULTS) {
168183
$values = array();

0 commit comments

Comments
 (0)