Skip to content

filter fields and meta #196

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Sep 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions integrations/server/test_covidcast.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,95 @@ def test_round_trip(self):
'message': 'success',
})

def test_fields(self):
"""Test to limit fields field"""

# insert dummy data
self.cur.execute('''
insert into covidcast values
(0, 'src', 'sig', 'day', 'county', 20200414, '01234',
123, 1.5, 2.5, 3.5, 456, 4, 20200414, 0, 1, False)
''')
self.cnx.commit()

# make the request
response = requests.get(BASE_URL, params={
'source': 'covidcast',
'data_source': 'src',
'signal': 'sig',
'time_type': 'day',
'geo_type': 'county',
'time_values': 20200414,
'geo_value': '01234',
})
response.raise_for_status()
response = response.json()

# assert that the right data came back
self.assertEqual(response, {
'result': 1,
'epidata': [{
'time_value': 20200414,
'geo_value': '01234',
'value': 1.5,
'stderr': 2.5,
'sample_size': 3.5,
'direction': 4,
'issue': 20200414,
'lag': 0,
'signal': 'sig'
}],
'message': 'success',
})

# limit fields
response = requests.get(BASE_URL, params={
'source': 'covidcast',
'data_source': 'src',
'signal': 'sig',
'time_type': 'day',
'geo_type': 'county',
'time_values': 20200414,
'geo_value': '01234',
'fields': 'time_value,geo_value'
})
response.raise_for_status()
response = response.json()

# assert that the right data came back
self.assertEqual(response, {
'result': 1,
'epidata': [{
'time_value': 20200414,
'geo_value': '01234'
}],
'message': 'success',
})

# limit invalid values
response = requests.get(BASE_URL, params={
'source': 'covidcast',
'data_source': 'src',
'signal': 'sig',
'time_type': 'day',
'geo_type': 'county',
'time_values': 20200414,
'geo_value': '01234',
'fields': 'time_value,geo_value,dummy'
})
response.raise_for_status()
response = response.json()

# assert that the right data came back
self.assertEqual(response, {
'result': 1,
'epidata': [{
'time_value': 20200414,
'geo_value': '01234'
}],
'message': 'success',
})

def test_location_wildcard(self):
"""Select all locations with a wildcard query."""

Expand Down
108 changes: 108 additions & 0 deletions integrations/server/test_covidcast_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,114 @@ def test_round_trip(self):
'message': 'success',
})


def test_filter(self):
"""Test filtering options some sample data."""

# insert dummy data and accumulate expected results (in sort order)
template = '''
insert into covidcast values
(0, "%s", "%s", "%s", "%s", %d, "%s", 123, %d, 0, 0, 456, 0, %d, 0, 1, %d)
'''
expected = []
for src in ('src1', 'src2'):
for sig in ('sig1', 'sig2'):
for tt in ('day', 'week'):
for gt in ('hrr', 'msa'):
expected.append({
'data_source': src,
'signal': sig,
'time_type': tt,
'geo_type': gt,
'min_time': 1,
'max_time': 2,
'num_locations': 2,
'min_value': 10,
'max_value': 20,
'mean_value': 15,
'stdev_value': 5,
'last_update': 123,
'max_issue': 2,
'min_lag': 0,
'max_lag': 0,
})
for tv in (1, 2):
for gv, v in zip(('geo1', 'geo2'), (10, 20)):
self.cur.execute(template % (src, sig, tt, gt, tv, gv, v, tv, False))
self.cnx.commit()
update_cache(args=None)

def fetch(**kwargs):
# make the request
params = kwargs.copy()
params['source'] = 'covidcast_meta'
response = requests.get(BASE_URL, params=params)
response.raise_for_status()
return response.json()

res = fetch()
self.assertEqual(res['result'], 1)
self.assertEqual(len(res['epidata']), len(expected))

# time types
res = fetch(time_types='day')
self.assertEqual(res['result'], 1)
self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['time_type'] == 'day']))

res = fetch(time_types='day,week')
self.assertEqual(res['result'], 1)
self.assertEqual(len(res['epidata']), len(expected))

res = fetch(time_types='sec')
self.assertEqual(res['result'], -2)

# geo types
res = fetch(geo_types='hrr')
self.assertEqual(res['result'], 1)
self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['geo_type'] == 'hrr']))

res = fetch(geo_types='hrr,msa')
self.assertEqual(res['result'], 1)
self.assertEqual(len(res['epidata']), len(expected))

res = fetch(geo_types='state')
self.assertEqual(res['result'], -2)

# signals
res = fetch(signals='src1:sig1')
self.assertEqual(res['result'], 1)
self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['data_source'] == 'src1' and s['signal'] == 'sig1']))

res = fetch(signals='src1')
self.assertEqual(res['result'], 1)
self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['data_source'] == 'src1']))

res = fetch(signals='src1:*')
self.assertEqual(res['result'], 1)
self.assertEqual(len(res['epidata']), sum([1 for s in expected if s['data_source'] == 'src1']))

res = fetch(signals='src1:src4')
self.assertEqual(res['result'], -2)

res = fetch(signals='src1:*,src2:*')
self.assertEqual(res['result'], 1)
self.assertEqual(len(res['epidata']), len(expected))

# filter fields
res = fetch(fields='data_source,min_time')
self.assertEqual(res['result'], 1)
self.assertEqual(len(res['epidata']), len(expected))
self.assertTrue('data_source' in res['epidata'][0])
self.assertTrue('min_time' in res['epidata'][0])
self.assertFalse('max_time' in res['epidata'][0])
self.assertFalse('signal' in res['epidata'][0])

res = fetch(fields='xx')
self.assertEqual(res['result'], 1)
self.assertEqual(len(res['epidata']), len(expected))
self.assertEqual(res['epidata'][0], [])


def test_suppress_work_in_progress(self):
"""Don't surface signals that are a work-in-progress."""

Expand Down
47 changes: 46 additions & 1 deletion src/server/api.php
Original file line number Diff line number Diff line change
Expand Up @@ -958,7 +958,7 @@ function get_covidcast($source, $signals, $time_type, $geo_type, $time_values, $
$condition_time_type = "t.`time_type` = '{$time_type}'";
$condition_geo_type = "t.`geo_type` = '{$geo_type}'";
$condition_time_value = filter_integers('t.`time_value`', $time_values);

if ($geo_value === '*') {
// the wildcard query should return data for all locations in `geo_type`
$condition_geo_value = 'TRUE';
Expand Down Expand Up @@ -1019,6 +1019,51 @@ function get_covidcast_meta() {
}
}

if ($epidata !== null) {
// filter rows
$time_types = extract_values($_REQUEST['time_types'], 'str');
$signals = isset($_REQUEST['signals']) ? array_map(function($signal) {
return explode(':', $signal, 2);
}, extract_values($_REQUEST['signals'], 'str')) : null;
$geo_types = extract_values($_REQUEST['geo_types'], 'str');

if ($time_types !== null || $signals !== null || $geo_types !== null) {
$epidata = array_filter($epidata, function($row) use(&$time_types, &$signals, &$geo_types) {
if ($time_types !== null && !in_array($row['time_type'], $time_types)) {
return false;
}
if ($geo_types !== null && !in_array($row['geo_type'], $geo_types)) {
return false;
}
if ($signals === null || count($signals) === 0) {
return true;
}
// filter by signal
foreach($signals as $signal) {
// match source and (signal or no signal or signal = *)
if ($row['data_source'] === $signal[0] && (count($signal) === 1 || $row['signal'] === $signal[1] || $signal[1] === '*')) {
return true;
}
}
return false;
});
}
// filter fields
if (isset($_REQUEST['fields'])) {
$fields = extract_values($_REQUEST['fields'], 'str');

$epidata = array_map(function($row) use(&$fields) {
$filtered_row = [];
foreach($fields as $field) {
if (isset($row[$field])) {
$filtered_row[$field] = $row[$field];
}
}
return $filtered_row;
}, $epidata);
}
}

// return the data
$has_values = $epidata !== null && count($epidata) > 0;
return $has_values ? $epidata : null;
Expand Down
15 changes: 15 additions & 0 deletions src/server/api_helpers.php
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,21 @@ function execute_query($query, &$epidata, $fields_string, $fields_int, $fields_f
error_log(mysqli_error($dbh));
return;
}

if (isset($_REQUEST['fields'])) {
$fields = extract_values($_REQUEST['fields'], 'str');
// limit fields to the selection
if($fields_string !== null) {
$fields_string = array_intersect($fields_string, $fields);
}
if($fields_int !== null) {
$fields_int = array_intersect($fields_int, $fields);
}
if($fields_float !== null) {
$fields_float = array_intersect($fields_float, $fields);
}
}

while($row = mysqli_fetch_array($result)) {
if(count($epidata) < $MAX_RESULTS) {
$values = array();
Expand Down