Skip to content

Commit c48af8f

Browse files
authored
Merge pull request #46 from dfarrow0/master
covidcast: split name into source, signal
2 parents 139daf9 + d3b7fa3 commit c48af8f

File tree

4 files changed

+118
-58
lines changed

4 files changed

+118
-58
lines changed

integrations/test_covidcast.py

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,16 @@ def test_round_trip(self):
4444
# insert dummy data
4545
self.cur.execute('''
4646
insert into covidcast values
47-
(0, 'sensor', 'county', '2020-04-14', '01234', 1.5, 2.5, 3.5, 4, 5.5)
47+
(0, 'src', 'sig', 'county', '2020-04-14', '01234',
48+
1.5, 2.5, 3.5, 4, 5.5)
4849
''')
4950
self.cnx.commit()
5051

5152
# make the request
5253
response = requests.get(BASE_URL, params={
5354
'source': 'covidcast',
54-
'name': 'sensor',
55+
'data_source': 'src',
56+
'signal': 'sig',
5557
'geo_type': 'county',
5658
'dates': 20200414,
5759
'geo_id': '01234',
@@ -80,19 +82,20 @@ def test_location_wildcard(self):
8082
# insert dummy data
8183
self.cur.execute('''
8284
insert into covidcast values
83-
(0, 'sensor', 'county', '2020-04-14', '11111', 10, 11, 12, 13, 14),
84-
(0, 'sensor', 'county', '2020-04-14', '22222', 20, 21, 22, 23, 24),
85-
(0, 'sensor', 'county', '2020-04-14', '33333', 30, 31, 32, 33, 34),
86-
(0, 'sensor', 'msa', '2020-04-14', '11111', 40, 41, 42, 43, 44),
87-
(0, 'sensor', 'msa', '2020-04-14', '22222', 50, 51, 52, 53, 54),
88-
(0, 'sensor', 'msa', '2020-04-14', '33333', 60, 61, 62, 63, 64)
85+
(0, 'src', 'sig', 'county', '2020-04-14', '11111', 10, 11, 12, 13, 14),
86+
(0, 'src', 'sig', 'county', '2020-04-14', '22222', 20, 21, 22, 23, 24),
87+
(0, 'src', 'sig', 'county', '2020-04-14', '33333', 30, 31, 32, 33, 34),
88+
(0, 'src', 'sig', 'msa', '2020-04-14', '11111', 40, 41, 42, 43, 44),
89+
(0, 'src', 'sig', 'msa', '2020-04-14', '22222', 50, 51, 52, 53, 54),
90+
(0, 'src', 'sig', 'msa', '2020-04-14', '33333', 60, 61, 62, 63, 64)
8991
''')
9092
self.cnx.commit()
9193

9294
# make the request
9395
response = requests.get(BASE_URL, params={
9496
'source': 'covidcast',
95-
'name': 'sensor',
97+
'data_source': 'src',
98+
'signal': 'sig',
9699
'geo_type': 'county',
97100
'dates': 20200414,
98101
'geo_id': '*',
@@ -139,19 +142,20 @@ def test_location_timeline(self):
139142
# insert dummy data
140143
self.cur.execute('''
141144
insert into covidcast values
142-
(0, 'sensor', 'county', '2020-04-11', '01234', 10, 11, 12, 13, 14),
143-
(0, 'sensor', 'county', '2020-04-12', '01234', 20, 21, 22, 23, 24),
144-
(0, 'sensor', 'county', '2020-04-13', '01234', 30, 31, 32, 33, 34),
145-
(0, 'sensor', 'county', '2020-04-11', '11111', 40, 41, 42, 43, 44),
146-
(0, 'sensor', 'county', '2020-04-12', '22222', 50, 51, 52, 53, 54),
147-
(0, 'sensor', 'county', '2020-04-13', '33333', 60, 61, 62, 63, 64)
145+
(0, 'src', 'sig', 'county', '2020-04-11', '01234', 10, 11, 12, 13, 14),
146+
(0, 'src', 'sig', 'county', '2020-04-12', '01234', 20, 21, 22, 23, 24),
147+
(0, 'src', 'sig', 'county', '2020-04-13', '01234', 30, 31, 32, 33, 34),
148+
(0, 'src', 'sig', 'county', '2020-04-11', '11111', 40, 41, 42, 43, 44),
149+
(0, 'src', 'sig', 'county', '2020-04-12', '22222', 50, 51, 52, 53, 54),
150+
(0, 'src', 'sig', 'county', '2020-04-13', '33333', 60, 61, 62, 63, 64)
148151
''')
149152
self.cnx.commit()
150153

151154
# make the request
152155
response = requests.get(BASE_URL, params={
153156
'source': 'covidcast',
154-
'name': 'sensor',
157+
'data_source': 'src',
158+
'signal': 'sig',
155159
'geo_type': 'county',
156160
'dates': '20200411-20200413',
157161
'geo_id': '01234',
@@ -198,15 +202,15 @@ def test_unique_key_constraint(self):
198202
# insert dummy data
199203
self.cur.execute('''
200204
insert into covidcast values
201-
(0, 'sensor', 'county', '2020-04-14', '01234', 0, 0, 0, 0, 0)
205+
(0, 'src', 'sig', 'county', '2020-04-14', '01234', 0, 0, 0, 0, 0)
202206
''')
203207
self.cnx.commit()
204208

205209
# fail to insert different dummy data under the same key
206210
with self.assertRaises(mysql.connector.errors.IntegrityError):
207211
self.cur.execute('''
208212
insert into covidcast values
209-
(0, 'sensor', 'county', '2020-04-14', '01234', 1, 1, 1, 1, 1)
213+
(0, 'src', 'sig', 'county', '2020-04-14', '01234', 1, 1, 1, 1, 1)
210214
''')
211215

212216
def test_nullable_columns(self):
@@ -215,15 +219,16 @@ def test_nullable_columns(self):
215219
# insert dummy data
216220
self.cur.execute('''
217221
insert into covidcast values
218-
(0, 'sensor', 'county', '2020-04-14', '01234', 0.123,
222+
(0, 'src', 'sig', 'county', '2020-04-14', '01234', 0.123,
219223
NULL, NULL, NULL, NULL)
220224
''')
221225
self.cnx.commit()
222226

223227
# make the request
224228
response = requests.get(BASE_URL, params={
225229
'source': 'covidcast',
226-
'name': 'sensor',
230+
'data_source': 'src',
231+
'signal': 'sig',
227232
'geo_type': 'county',
228233
'dates': 20200414,
229234
'geo_id': '01234',

integrations/test_covidcast_meta.py

Lines changed: 68 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -44,22 +44,38 @@ def test_round_trip(self):
4444
# insert dummy data
4545
self.cur.execute('''
4646
insert into covidcast values
47-
(0, 'sensor1', 'msa', '2020-04-01', 'a', 0, 0, 0, 0, 0),
48-
(0, 'sensor1', 'msa', '2020-04-01', 'b', 0, 0, 0, 0, 0),
49-
(0, 'sensor1', 'msa', '2020-04-02', 'c', 0, 0, 0, 0, 0),
50-
(0, 'sensor1', 'msa', '2020-04-02', 'd', 0, 0, 0, 0, 0),
51-
(0, 'sensor1', 'hrr', '2020-04-01', 'd', 0, 0, 0, 0, 0),
52-
(0, 'sensor1', 'hrr', '2020-04-03', 'e', 0, 0, 0, 0, 0),
53-
(0, 'sensor1', 'hrr', '2020-04-03', 'd', 0, 0, 0, 0, 0),
54-
(0, 'sensor1', 'hrr', '2020-04-02', 'e', 0, 0, 0, 0, 0),
55-
(0, 'sensor2', 'msa', '2020-04-01', 'a', 0, 0, 0, 0, 0),
56-
(0, 'sensor2', 'msa', '2020-04-01', 'b', 0, 0, 0, 0, 0),
57-
(0, 'sensor2', 'msa', '2020-04-02', 'c', 0, 0, 0, 0, 0),
58-
(0, 'sensor2', 'msa', '2020-04-04', 'd', 0, 0, 0, 0, 0),
59-
(0, 'sensor2', 'hrr', '2020-04-11', 'e', 0, 0, 0, 0, 0),
60-
(0, 'sensor2', 'hrr', '2020-04-12', 'e', 0, 0, 0, 0, 0),
61-
(0, 'sensor2', 'hrr', '2020-04-13', 'e', 0, 0, 0, 0, 0),
62-
(0, 'sensor2', 'hrr', '2020-04-14', 'e', 0, 0, 0, 0, 0)
47+
(0, 'src1', 'sig1', 'msa', '2020-04-01', 'a', 0, 0, 0, 0, 0),
48+
(0, 'src1', 'sig1', 'msa', '2020-04-01', 'b', 0, 0, 0, 0, 0),
49+
(0, 'src1', 'sig1', 'msa', '2020-04-02', 'c', 0, 0, 0, 0, 0),
50+
(0, 'src1', 'sig1', 'msa', '2020-04-02', 'd', 0, 0, 0, 0, 0),
51+
(0, 'src1', 'sig1', 'hrr', '2020-04-01', 'd', 0, 0, 0, 0, 0),
52+
(0, 'src1', 'sig1', 'hrr', '2020-04-03', 'e', 0, 0, 0, 0, 0),
53+
(0, 'src1', 'sig1', 'hrr', '2020-04-03', 'd', 0, 0, 0, 0, 0),
54+
(0, 'src1', 'sig1', 'hrr', '2020-04-02', 'e', 0, 0, 0, 0, 0),
55+
(0, 'src1', 'sig2', 'msa', '2020-04-01', 'a', 0, 0, 0, 0, 0),
56+
(0, 'src1', 'sig2', 'msa', '2020-04-01', 'b', 0, 0, 0, 0, 0),
57+
(0, 'src1', 'sig2', 'msa', '2020-04-02', 'c', 0, 0, 0, 0, 0),
58+
(0, 'src1', 'sig2', 'msa', '2020-04-04', 'd', 0, 0, 0, 0, 0),
59+
(0, 'src1', 'sig2', 'hrr', '2020-04-11', 'e', 0, 0, 0, 0, 0),
60+
(0, 'src1', 'sig2', 'hrr', '2020-04-12', 'e', 0, 0, 0, 0, 0),
61+
(0, 'src1', 'sig2', 'hrr', '2020-04-13', 'e', 0, 0, 0, 0, 0),
62+
(0, 'src1', 'sig2', 'hrr', '2020-04-14', 'e', 0, 0, 0, 0, 0),
63+
(0, 'src2', 'sig1', 'msa', '2020-04-01', 'a', 0, 0, 0, 0, 0),
64+
(0, 'src2', 'sig1', 'msa', '2020-04-01', 'b', 0, 0, 0, 0, 0),
65+
(0, 'src2', 'sig1', 'msa', '2020-04-02', 'c', 0, 0, 0, 0, 0),
66+
(0, 'src2', 'sig1', 'msa', '2020-04-02', 'd', 0, 0, 0, 0, 0),
67+
(0, 'src2', 'sig1', 'hrr', '2020-04-01', 'd', 0, 0, 0, 0, 0),
68+
(0, 'src2', 'sig1', 'hrr', '2020-04-03', 'e', 0, 0, 0, 0, 0),
69+
(0, 'src2', 'sig1', 'hrr', '2020-04-03', 'd', 0, 0, 0, 0, 0),
70+
(0, 'src2', 'sig1', 'hrr', '2020-04-02', 'e', 0, 0, 0, 0, 0),
71+
(0, 'src2', 'sig2', 'msa', '2020-04-01', 'a', 0, 0, 0, 0, 0),
72+
(0, 'src2', 'sig2', 'msa', '2020-04-01', 'b', 0, 0, 0, 0, 0),
73+
(0, 'src2', 'sig2', 'msa', '2020-04-02', 'c', 0, 0, 0, 0, 0),
74+
(0, 'src2', 'sig2', 'msa', '2020-04-04', 'd', 0, 0, 0, 0, 0),
75+
(0, 'src2', 'sig2', 'hrr', '2020-04-11', 'e', 0, 0, 0, 0, 0),
76+
(0, 'src2', 'sig2', 'hrr', '2020-04-12', 'e', 0, 0, 0, 0, 0),
77+
(0, 'src2', 'sig2', 'hrr', '2020-04-13', 'e', 0, 0, 0, 0, 0),
78+
(0, 'src2', 'sig2', 'hrr', '2020-04-14', 'e', 0, 0, 0, 0, 0)
6379
''')
6480
self.cnx.commit()
6581

@@ -73,25 +89,57 @@ def test_round_trip(self):
7389
'result': 1,
7490
'epidata': [
7591
{
76-
'name': 'sensor1',
92+
'source': 'src1',
93+
'signal': 'sig1',
7794
'geo_type': 'hrr',
7895
'min_date': '2020-04-01',
7996
'max_date': '2020-04-03',
8097
'num_locations': 2,
8198
}, {
82-
'name': 'sensor1',
99+
'source': 'src1',
100+
'signal': 'sig1',
83101
'geo_type': 'msa',
84102
'min_date': '2020-04-01',
85103
'max_date': '2020-04-02',
86104
'num_locations': 4,
87105
}, {
88-
'name': 'sensor2',
106+
'source': 'src1',
107+
'signal': 'sig2',
89108
'geo_type': 'hrr',
90109
'min_date': '2020-04-11',
91110
'max_date': '2020-04-14',
92111
'num_locations': 1,
93112
}, {
94-
'name': 'sensor2',
113+
'source': 'src1',
114+
'signal': 'sig2',
115+
'geo_type': 'msa',
116+
'min_date': '2020-04-01',
117+
'max_date': '2020-04-04',
118+
'num_locations': 4,
119+
}, {
120+
'source': 'src2',
121+
'signal': 'sig1',
122+
'geo_type': 'hrr',
123+
'min_date': '2020-04-01',
124+
'max_date': '2020-04-03',
125+
'num_locations': 2,
126+
}, {
127+
'source': 'src2',
128+
'signal': 'sig1',
129+
'geo_type': 'msa',
130+
'min_date': '2020-04-01',
131+
'max_date': '2020-04-02',
132+
'num_locations': 4,
133+
}, {
134+
'source': 'src2',
135+
'signal': 'sig2',
136+
'geo_type': 'hrr',
137+
'min_date': '2020-04-11',
138+
'max_date': '2020-04-14',
139+
'num_locations': 1,
140+
}, {
141+
'source': 'src2',
142+
'signal': 'sig2',
95143
'geo_type': 'msa',
96144
'min_date': '2020-04-01',
97145
'max_date': '2020-04-04',

src/ddl/covidcast.sql

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ Data is public.
1212
| Field | Type | Null | Key | Default | Extra |
1313
+-------------+-------------+------+-----+---------+----------------+
1414
| id | int(11) | NO | PRI | NULL | auto_increment |
15-
| name | varchar(32) | NO | MUL | NULL | |
15+
| source | varchar(32) | NO | MUL | NULL | |
16+
| signal | varchar(32) | NO | | NULL | |
1617
| geo_type | varchar(12) | NO | | NULL | |
1718
| date | date | NO | | NULL | |
1819
| geo_id | varchar(12) | NO | | NULL | |
@@ -25,8 +26,10 @@ Data is public.
2526
2627
- `id`
2728
unique identifier for each record
28-
- `name`
29-
data souce, and subtype if applicable (e.g. fb_survey_cli, fb_survey_ili)
29+
- `source`
30+
name of upstream data souce
31+
- `signal`
32+
name of signal derived from upstream data
3033
- `geo_type`
3134
geographic resolution (e.g. county, HRR, MSA, DMA, state)
3235
- `date`
@@ -55,7 +58,8 @@ Data is public.
5558

5659
CREATE TABLE `covidcast` (
5760
`id` int(11) NOT NULL AUTO_INCREMENT,
58-
`name` varchar(32) NOT NULL,
61+
`source` varchar(32) NOT NULL,
62+
`signal` varchar(32) NOT NULL,
5963
`geo_type` varchar(12) NOT NULL,
6064
`date` date NOT NULL,
6165
`geo_id` varchar(12) NOT NULL,
@@ -66,7 +70,7 @@ CREATE TABLE `covidcast` (
6670
`prob` double,
6771
PRIMARY KEY (`id`),
6872
-- for uniqueness, and also fast lookup of all locations on a given date
69-
UNIQUE KEY (`name`, `geo_type`, `date`, `geo_id`),
73+
UNIQUE KEY (`source`, `signal`, `geo_type`, `date`, `geo_id`),
7074
-- for fast lookup of a time-series for a given location
71-
KEY (`name`, `geo_type`, `geo_id`)
75+
KEY (`source`, `signal`, `geo_type`, `geo_id`)
7276
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

src/server/api.php

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -924,15 +924,17 @@ function get_dengue_nowcast($locations, $epiweeks) {
924924
}
925925

926926
// queries the `covidcast` table.
927-
// $name (required): name of sensor, including subtype (e.g. sensor-type)
927+
// $source (required): name of upstream data souce
928+
// $signal (required): name of signal derived from upstream data
928929
// $geo_type (required): geographic resolution (e.g. county, MSA, HRR)
929930
// $geo_id (required): location identifier or `*` as a wildcard for all
930931
// locations (specific to `$geo_type`)
931932
// $dates (required): array of date values/ranges
932-
function get_covidcast($name, $geo_type, $geo_id, $dates) {
933+
function get_covidcast($source, $signal, $geo_type, $geo_id, $dates) {
933934
// required for `mysqli_real_escape_string`
934935
global $dbh;
935-
$name = mysqli_real_escape_string($dbh, $name);
936+
$source = mysqli_real_escape_string($dbh, $source);
937+
$signal = mysqli_real_escape_string($dbh, $signal);
936938
$geo_type = mysqli_real_escape_string($dbh, $geo_type);
937939
$geo_id = mysqli_real_escape_string($dbh, $geo_id);
938940
// basic query info
@@ -943,8 +945,9 @@ function get_covidcast($name, $geo_type, $geo_id, $dates) {
943945
$fields_string = array('date', 'geo_id');
944946
$fields_int = array('direction');
945947
$fields_float = array('value', 'stderr', 'sample_size', 'prob');
946-
// build the name, date, and location (type and id) filters
947-
$condition_name = "t.`name` = '{$name}'";
948+
// build the source, signal, date, and location (type and id) filters
949+
$condition_source = "t.`source` = '{$source}'";
950+
$condition_signal = "t.`signal` = '{$signal}'";
948951
$condition_date = filter_dates('t.`date`', $dates);
949952
$condition_geo_type = "t.`geo_type` = '{$geo_type}'";
950953
if ($geo_id === '*') {
@@ -955,7 +958,7 @@ function get_covidcast($name, $geo_type, $geo_id, $dates) {
955958
$condition_geo_id = "t.`geo_id` = '{$geo_id}'";
956959
}
957960
// the query
958-
$query = "SELECT {$fields} FROM {$table} WHERE ({$condition_name}) AND ({$condition_date}) AND ({$condition_geo_type}) AND ({$condition_geo_id}) ORDER BY {$order}";
961+
$query = "SELECT {$fields} FROM {$table} WHERE ({$condition_source}) AND ({$condition_signal}) AND ({$condition_date}) AND ({$condition_geo_type}) AND ({$condition_geo_id}) ORDER BY {$order}";
959962
// get the data from the database
960963
$epidata = array();
961964
execute_query($query, $epidata, $fields_string, $fields_int, $fields_float);
@@ -967,11 +970,11 @@ function get_covidcast($name, $geo_type, $geo_id, $dates) {
967970
function get_covidcast_meta() {
968971
// basic query info
969972
$table = '`covidcast` t';
970-
$fields = "t.`name`, t.`geo_type`, MIN(t.`date`) AS `min_date`, MAX(t.`date`) AS `max_date`, COUNT(DISTINCT `geo_id`) AS `num_locations`";
971-
$group = "t.`name`, t.`geo_type`";
972-
$order = "t.`name` ASC, t.`geo_type` ASC";
973+
$fields = "t.`source`, t.`signal`, t.`geo_type`, MIN(t.`date`) AS `min_date`, MAX(t.`date`) AS `max_date`, COUNT(DISTINCT `geo_id`) AS `num_locations`";
974+
$group = "t.`source`, t.`signal`, t.`geo_type`";
975+
$order = "t.`source` ASC, t.`signal` ASC, t.`geo_type` ASC";
973976
// data type of each field
974-
$fields_string = array('name', 'geo_type', 'min_date', 'max_date');
977+
$fields_string = array('source', 'signal', 'geo_type', 'min_date', 'max_date');
975978
$fields_int = array('num_locations');
976979
$fields_float = null;
977980
// the query
@@ -1428,11 +1431,11 @@ function meta_delphi() {
14281431
}
14291432
}
14301433
} else if($source === 'covidcast') {
1431-
if(require_all($data, array('name', 'geo_type', 'dates', 'geo_id'))) {
1434+
if(require_all($data, array('data_source', 'signal', 'geo_type', 'dates', 'geo_id'))) {
14321435
// parse the request
14331436
$dates = extract_values($_REQUEST['dates'], 'int');
14341437
// get the data
1435-
$epidata = get_covidcast($_REQUEST['name'], $_REQUEST['geo_type'], $_REQUEST['geo_id'], $dates);
1438+
$epidata = get_covidcast($_REQUEST['data_source'], $_REQUEST['signal'], $_REQUEST['geo_type'], $_REQUEST['geo_id'], $dates);
14361439
store_result($data, $epidata);
14371440
}
14381441
} else if($source === 'covidcast_meta') {

0 commit comments

Comments
 (0)