Skip to content

Commit e24d5f1

Browse files
authored
Merge pull request #167 from melange396/meta_cache_breakdown
Meta cache breakdown
2 parents 611d200 + 5ca9d6a commit e24d5f1

13 files changed

+152
-106
lines changed

integrations/acquisition/covidcast/test_covidcast_meta_caching.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,14 @@ def test_caching(self):
6666
self.cur.execute('''
6767
insert into covidcast values
6868
(0, 'src', 'sig', 'day', 'state', 20200422, 'pa',
69-
123, 1, 2, 3, 456, 1, 20200422, 0, 1),
69+
123, 1, 2, 3, 456, 1, 20200422, 0, 1, False),
7070
(0, 'src', 'sig', 'day', 'state', 20200422, 'wa',
71-
789, 1, 2, 3, 456, 1, 20200423, 1, 1)
71+
789, 1, 2, 3, 456, 1, 20200423, 1, 1, False)
7272
''')
7373
self.cur.execute('''
7474
insert into covidcast values
7575
(100, 'src', 'wip_sig', 'day', 'state', 20200422, 'pa',
76-
456, 4, 5, 6, 789, -1, 20200422, 0, 1)
76+
456, 4, 5, 6, 789, -1, 20200422, 0, 1, True)
7777
''')
7878

7979
self.cnx.commit()

integrations/acquisition/covidcast/test_direction_updating.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -63,29 +63,29 @@ def test_uploading(self):
6363
self.cur.execute('''
6464
insert into covidcast values
6565
(0, 'src', 'sig1', 'day', 'state', 20201028, '1111',
66-
123, 2, 0, 0, 0, -1, 20201028, 0, 1),
66+
123, 2, 0, 0, 0, -1, 20201028, 0, 1, False),
6767
(0, 'src', 'sig1', 'day', 'state', 20201029, '1111',
68-
123, 6, 0, 0, 0, 0, 20201029, 0, 1),
68+
123, 6, 0, 0, 0, 0, 20201029, 0, 1, False),
6969
(0, 'src', 'sig1', 'day', 'state', 20201030, '1111',
70-
123, 5, 0, 0, 0, 1, 20201030, 0, 1),
70+
123, 5, 0, 0, 0, 1, 20201030, 0, 1, False),
7171
(0, 'src', 'sig', 'day', 'state', 20200228, 'ca',
72-
123, 2, 0, 0, 0, NULL, 20200228, 0, 1),
72+
123, 2, 0, 0, 0, NULL, 20200228, 0, 1, False),
7373
(0, 'src', 'sig', 'day', 'state', 20200229, 'ca',
74-
123, 6, 0, 0, 0, NULL, 20200229, 0, 1),
74+
123, 6, 0, 0, 0, NULL, 20200229, 0, 1, False),
7575
(0, 'src', 'sig', 'day', 'state', 20200301, 'ca',
76-
123, 5, 0, 0, 0, NULL, 20200301, 0, 1),
76+
123, 5, 0, 0, 0, NULL, 20200301, 0, 1, False),
7777
(0, 'src', 'sig', 'day', 'state', 20200511, 'fl',
78-
123, 1, 0, 0, 0, NULL, 20200511, 0, 1),
78+
123, 1, 0, 0, 0, NULL, 20200511, 0, 1, False),
7979
(0, 'src', 'sig', 'day', 'state', 20200512, 'fl',
80-
123, 2, 0, 0, 0, NULL, 20200512, 0, 1),
80+
123, 2, 0, 0, 0, NULL, 20200512, 0, 1, False),
8181
(0, 'src', 'sig', 'day', 'state', 20200517, 'fl',
82-
123, 2, 0, 0, 0, NULL, 20200517, 0, 1),
82+
123, 2, 0, 0, 0, NULL, 20200517, 0, 1, False),
8383
(0, 'src', 'sig', 'day', 'state', 20200615, 'tx',
84-
123, 9, 0, 0, 456, NULL, 20200615, 0, 1),
84+
123, 9, 0, 0, 456, NULL, 20200615, 0, 1, False),
8585
(0, 'src', 'sig', 'day', 'state', 20200616, 'tx',
86-
123, 5, 0, 0, 456, NULL, 20200616, 0, 1),
86+
123, 5, 0, 0, 456, NULL, 20200616, 0, 1, False),
8787
(0, 'src', 'sig', 'day', 'state', 20200617, 'tx',
88-
123, 1, 0, 0, 456, 1, 20200617, 0, 1)
88+
123, 1, 0, 0, 456, 1, 20200617, 0, 1, False)
8989
''')
9090
self.cnx.commit()
9191

integrations/acquisition/covidcast/test_fill_is_latest_issue.py

+14-14
Original file line numberDiff line numberDiff line change
@@ -53,19 +53,19 @@ def test_fill_is_latest_issue(self):
5353
self.cur.execute('''
5454
insert into covidcast values
5555
(0, 'src', 'sig', 'day', 'state', 20200228, 'ca',
56-
123, 2, 5, 5, 5, NULL, 20200228, 0, 1),
56+
123, 2, 5, 5, 5, NULL, 20200228, 0, 1, False),
5757
(0, 'src', 'sig', 'day', 'state', 20200228, 'ca',
58-
123, 2, 0, 0, 0, NULL, 20200229, 1, 1),
58+
123, 2, 0, 0, 0, NULL, 20200229, 1, 1, False),
5959
(0, 'src', 'sig', 'day', 'state', 20200229, 'ca',
60-
123, 6, 0, 0, 0, NULL, 20200301, 1, 1),
60+
123, 6, 0, 0, 0, NULL, 20200301, 1, 1, False),
6161
(0, 'src', 'sig', 'day', 'state', 20200229, 'ca',
62-
123, 6, 9, 9, 9, NULL, 20200229, 0, 1),
62+
123, 6, 9, 9, 9, NULL, 20200229, 0, 1, False),
6363
(0, 'src', 'sig', 'day', 'state', 20200301, 'ca',
64-
123, 5, 0, 0, 0, NULL, 20200303, 2, 1),
64+
123, 5, 0, 0, 0, NULL, 20200303, 2, 1, False),
6565
(0, 'src', 'sig', 'day', 'state', 20200301, 'ca',
66-
123, 5, 5, 5, 5, NULL, 20200302, 1, 1),
66+
123, 5, 5, 5, 5, NULL, 20200302, 1, 1, False),
6767
(0, 'src', 'sig', 'day', 'state', 20200301, 'ca',
68-
123, 5, 9, 8, 7, NULL, 20200301, 0, 1)
68+
123, 5, 9, 8, 7, NULL, 20200301, 0, 1, False)
6969
''')
7070
self.cnx.commit()
7171

@@ -76,19 +76,19 @@ def test_fill_is_latest_issue(self):
7676
result = list(self.cur)
7777
expected = [
7878
(1, 'src', 'sig', 'day', 'state', 20200228, 'ca',
79-
123, 2, 5, 5, 5, None, 20200228, 0, bytearray(b'0')),
79+
123, 2, 5, 5, 5, None, 20200228, 0, bytearray(b'0'), bytearray(b'0')),
8080
(2, 'src', 'sig', 'day', 'state', 20200228, 'ca',
81-
123, 2, 0, 0, 0, None, 20200229, 1, bytearray(b'1')),
81+
123, 2, 0, 0, 0, None, 20200229, 1, bytearray(b'1'), bytearray(b'0')),
8282
(3, 'src', 'sig', 'day', 'state', 20200229, 'ca',
83-
123, 6, 0, 0, 0, None, 20200301, 1, bytearray(b'1')),
83+
123, 6, 0, 0, 0, None, 20200301, 1, bytearray(b'1'), bytearray(b'0')),
8484
(4, 'src', 'sig', 'day', 'state', 20200229, 'ca',
85-
123, 6, 9, 9, 9, None, 20200229, 0, bytearray(b'0')),
85+
123, 6, 9, 9, 9, None, 20200229, 0, bytearray(b'0'), bytearray(b'0')),
8686
(5, 'src', 'sig', 'day', 'state', 20200301, 'ca',
87-
123, 5, 0, 0, 0, None, 20200303, 2, bytearray(b'1')),
87+
123, 5, 0, 0, 0, None, 20200303, 2, bytearray(b'1'), bytearray(b'0')),
8888
(6, 'src', 'sig', 'day', 'state', 20200301, 'ca',
89-
123, 5, 5, 5, 5, None, 20200302, 1, bytearray(b'0')),
89+
123, 5, 5, 5, 5, None, 20200302, 1, bytearray(b'0'), bytearray(b'0')),
9090
(7, 'src', 'sig', 'day', 'state', 20200301, 'ca',
91-
123, 5, 9, 8, 7, None, 20200301, 0, bytearray(b'0'))
91+
123, 5, 9, 8, 7, None, 20200301, 0, bytearray(b'0'), bytearray(b'0'))
9292
]
9393

9494
self.assertEqual(result, expected)

integrations/client/test_delphi_epidata.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,11 @@ def test_covidcast(self):
5050
self.cur.execute('''
5151
insert into covidcast values
5252
(0, 'src', 'sig', 'day', 'county', 20200414, '01234',
53-
123, 1.5, 2.5, 3.5, 456, 4, 20200414, 0, 0),
53+
123, 1.5, 2.5, 3.5, 456, 4, 20200414, 0, 0, False),
5454
(0, 'src', 'sig', 'day', 'county', 20200414, '01234',
55-
456, 5.5, 1.2, 10.5, 789, 0, 20200415, 1, 0),
55+
456, 5.5, 1.2, 10.5, 789, 0, 20200415, 1, 0, False),
5656
(0, 'src', 'sig', 'day', 'county', 20200414, '01234',
57-
345, 6.5, 2.2, 11.5, 678, 0, 20200416, 2, 1)
57+
345, 6.5, 2.2, 11.5, 678, 0, 20200416, 2, 1, False)
5858
''')
5959
self.cnx.commit()
6060

@@ -157,11 +157,11 @@ def test_covidcast_meta(self):
157157
self.cur.execute('''
158158
insert into covidcast values
159159
(0, 'src', 'sig', 'day', 'county', 20200414, '01234',
160-
123, 1.5, 2.5, 3.5, 456, 4, 20200414, 0, 0),
160+
123, 1.5, 2.5, 3.5, 456, 4, 20200414, 0, 0, False),
161161
(0, 'src', 'sig', 'day', 'county', 20200414, '01234',
162-
345, 6.0, 2.2, 11.5, 678, 0, 20200416, 2, 1),
162+
345, 6.0, 2.2, 11.5, 678, 0, 20200416, 2, 1, False),
163163
(0, 'src', 'sig', 'day', 'county', 20200415, '01234',
164-
345, 7.0, 2.0, 12.5, 678, 0, 20200416, 1, 1)
164+
345, 7.0, 2.0, 12.5, 678, 0, 20200416, 1, 1, False)
165165
''')
166166
self.cnx.commit()
167167

integrations/server/test_covidcast.py

+22-22
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_round_trip(self):
4545
self.cur.execute('''
4646
insert into covidcast values
4747
(0, 'src', 'sig', 'day', 'county', 20200414, '01234',
48-
123, 1.5, 2.5, 3.5, 456, 4, 20200414, 0, 1)
48+
123, 1.5, 2.5, 3.5, 456, 4, 20200414, 0, 1, False)
4949
''')
5050
self.cnx.commit()
5151

@@ -85,17 +85,17 @@ def test_location_wildcard(self):
8585
self.cur.execute('''
8686
insert into covidcast values
8787
(0, 'src', 'sig', 'day', 'county', 20200414, '11111',
88-
123, 10, 11, 12, 456, 13, 20200414, 0, 1),
88+
123, 10, 11, 12, 456, 13, 20200414, 0, 1, False),
8989
(0, 'src', 'sig', 'day', 'county', 20200414, '22222',
90-
123, 20, 21, 22, 456, 23, 20200414, 0, 1),
90+
123, 20, 21, 22, 456, 23, 20200414, 0, 1, False),
9191
(0, 'src', 'sig', 'day', 'county', 20200414, '33333',
92-
123, 30, 31, 32, 456, 33, 20200414, 0, 1),
92+
123, 30, 31, 32, 456, 33, 20200414, 0, 1, False),
9393
(0, 'src', 'sig', 'day', 'msa', 20200414, '11111',
94-
123, 40, 41, 42, 456, 43, 20200414, 0, 1),
94+
123, 40, 41, 42, 456, 43, 20200414, 0, 1, False),
9595
(0, 'src', 'sig', 'day', 'msa', 20200414, '22222',
96-
123, 50, 51, 52, 456, 53, 20200414, 0, 1),
96+
123, 50, 51, 52, 456, 53, 20200414, 0, 1, False),
9797
(0, 'src', 'sig', 'day', 'msa', 20200414, '33333',
98-
123, 60, 61, 62, 456, 634, 20200414, 0, 1)
98+
123, 60, 61, 62, 456, 634, 20200414, 0, 1, False)
9999
''')
100100
self.cnx.commit()
101101

@@ -155,17 +155,17 @@ def test_location_timeline(self):
155155
self.cur.execute('''
156156
insert into covidcast values
157157
(0, 'src', 'sig', 'day', 'county', 20200411, '01234',
158-
123, 10, 11, 12, 456, 13, 20200413, 2, 1),
158+
123, 10, 11, 12, 456, 13, 20200413, 2, 1, False),
159159
(0, 'src', 'sig', 'day', 'county', 20200412, '01234',
160-
123, 20, 21, 22, 456, 23, 20200413, 1, 1),
160+
123, 20, 21, 22, 456, 23, 20200413, 1, 1, False),
161161
(0, 'src', 'sig', 'day', 'county', 20200413, '01234',
162-
123, 30, 31, 32, 456, 33, 20200413, 0, 1),
162+
123, 30, 31, 32, 456, 33, 20200413, 0, 1, False),
163163
(0, 'src', 'sig', 'day', 'county', 20200411, '11111',
164-
123, 40, 41, 42, 456, 43, 20200413, 2, 1),
164+
123, 40, 41, 42, 456, 43, 20200413, 2, 1, False),
165165
(0, 'src', 'sig', 'day', 'county', 20200412, '22222',
166-
123, 50, 51, 52, 456, 53, 20200413, 1, 1),
166+
123, 50, 51, 52, 456, 53, 20200413, 1, 1, False),
167167
(0, 'src', 'sig', 'day', 'county', 20200413, '33333',
168-
123, 60, 61, 62, 456, 63, 20200413, 0, 1)
168+
123, 60, 61, 62, 456, 63, 20200413, 0, 1, False)
169169
''')
170170
self.cnx.commit()
171171

@@ -225,7 +225,7 @@ def test_unique_key_constraint(self):
225225
self.cur.execute('''
226226
insert into covidcast values
227227
(0, 'src', 'sig', 'day', 'county', 20200414, '01234',
228-
0, 0, 0, 0, 0, 0, 20200414, 0, 1)
228+
0, 0, 0, 0, 0, 0, 20200414, 0, 1, False)
229229
''')
230230
self.cnx.commit()
231231

@@ -234,14 +234,14 @@ def test_unique_key_constraint(self):
234234
self.cur.execute('''
235235
insert into covidcast values
236236
(0, 'src', 'sig', 'day', 'county', 20200414, '01234',
237-
1, 1, 1, 1, 1, 1, 20200414, 0, 1)
237+
1, 1, 1, 1, 1, 1, 20200414, 0, 1, False)
238238
''')
239239

240240
# succeed to insert different dummy data under a different issue
241241
self.cur.execute('''
242242
insert into covidcast values
243243
(0, 'src', 'sig', 'day', 'county', 20200414, '01234',
244-
1, 1, 1, 1, 1, 1, 20200415, 1, 1)
244+
1, 1, 1, 1, 1, 1, 20200415, 1, 1, False)
245245
''')
246246

247247
def test_nullable_columns(self):
@@ -251,7 +251,7 @@ def test_nullable_columns(self):
251251
self.cur.execute('''
252252
insert into covidcast values
253253
(0, 'src', 'sig', 'day', 'county', 20200414, '01234',
254-
123, 0.123, NULL, NULL, 456, NULL, 20200414, 0, 1)
254+
123, 0.123, NULL, NULL, 456, NULL, 20200414, 0, 1, False)
255255
''')
256256
self.cnx.commit()
257257

@@ -291,15 +291,15 @@ def test_temporal_partitioning(self):
291291
self.cur.execute('''
292292
insert into covidcast values
293293
(0, 'src', 'sig', 'hour', 'state', 2020041714, 'vi',
294-
123, 10, 11, 12, 456, 13, 2020041714, 0, 1),
294+
123, 10, 11, 12, 456, 13, 2020041714, 0, 1, False),
295295
(0, 'src', 'sig', 'day', 'state', 20200417, 'vi',
296-
123, 20, 21, 22, 456, 23, 20200417, 00, 1),
296+
123, 20, 21, 22, 456, 23, 20200417, 00, 1, False),
297297
(0, 'src', 'sig', 'week', 'state', 202016, 'vi',
298-
123, 30, 31, 32, 456, 33, 202016, 0, 1),
298+
123, 30, 31, 32, 456, 33, 202016, 0, 1, False),
299299
(0, 'src', 'sig', 'month', 'state', 202004, 'vi',
300-
123, 40, 41, 42, 456, 43, 202004, 0, 1),
300+
123, 40, 41, 42, 456, 43, 202004, 0, 1, False),
301301
(0, 'src', 'sig', 'year', 'state', 2020, 'vi',
302-
123, 50, 51, 52, 456, 53, 2020, 0, 1)
302+
123, 50, 51, 52, 456, 53, 2020, 0, 1, False)
303303
''')
304304
self.cnx.commit()
305305

integrations/server/test_covidcast_meta.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def setUp(self):
2828
database='epidata')
2929
cur = cnx.cursor()
3030
cur.execute('truncate table covidcast')
31+
cur.execute('update covidcast_meta_cache set timestamp = 0, epidata = ""')
3132
cnx.commit()
3233
cur.close()
3334

@@ -46,7 +47,7 @@ def test_round_trip(self):
4647
# insert dummy data and accumulate expected results (in sort order)
4748
template = '''
4849
insert into covidcast values
49-
(0, "%s", "%s", "%s", "%s", %d, "%s", 123, %d, 0, 0, 456, 0, %d, 0, 1)
50+
(0, "%s", "%s", "%s", "%s", %d, "%s", 123, %d, 0, 0, 456, 0, %d, 0, 1, %d)
5051
'''
5152
expected = []
5253
for src in ('src1', 'src2'):
@@ -72,7 +73,7 @@ def test_round_trip(self):
7273
})
7374
for tv in (1, 2):
7475
for gv, v in zip(('geo1', 'geo2'), (10, 20)):
75-
self.cur.execute(template % (src, sig, tt, gt, tv, gv, v, tv))
76+
self.cur.execute(template % (src, sig, tt, gt, tv, gv, v, tv, False))
7677
self.cnx.commit()
7778
update_cache(args=None)
7879

@@ -94,14 +95,18 @@ def test_suppress_work_in_progress(self):
9495
# insert dummy data and accumulate expected results (in sort order)
9596
template = '''
9697
insert into covidcast values
97-
(0, "%s", "%s", "%s", "%s", %d, "%s", 123, %d, 0, 0, 456, 0, %d, 0, 1)
98+
(0, "%s", "%s", "%s", "%s", %d, "%s", 123, %d, 0, 0, 456, 0, %d, 0, 1, %d)
9899
'''
99100
expected = []
100101
for src in ('src1', 'src2'):
101102
for sig in ('sig1', 'sig2', 'wip_sig3'):
102103
for tt in ('day', 'week'):
103104
for gt in ('hrr', 'msa'):
104-
if sig != 'wip_sig3':
105+
106+
if sig == 'wip_sig3':
107+
is_wip = True
108+
else:
109+
is_wip = False
105110
expected.append({
106111
'data_source': src,
107112
'signal': sig,
@@ -121,7 +126,7 @@ def test_suppress_work_in_progress(self):
121126
})
122127
for tv in (1, 2):
123128
for gv, v in zip(('geo1', 'geo2'), (10, 20)):
124-
self.cur.execute(template % (src, sig, tt, gt, tv, gv, v, tv))
129+
self.cur.execute(template % (src, sig, tt, gt, tv, gv, v, tv, is_wip))
125130
self.cnx.commit()
126131
update_cache(args=None)
127132

src/acquisition/covidcast/covidcast_meta_cache_updater.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
# standard library
44
import argparse
5-
import json
65
import sys
76

87
# first party
@@ -43,12 +42,9 @@ def main(args, epidata_impl=Epidata, database_impl=Database):
4342
print('unable to cache epidata')
4443
return False
4544

46-
# serialize the data
47-
epidata_json = json.dumps(metadata)
48-
4945
# update the cache
5046
try:
51-
database.update_covidcast_meta_cache(epidata_json)
47+
database.update_covidcast_meta_cache(metadata)
5248
print('successfully cached epidata')
5349
finally:
5450
# no catch block so that an exception above will cause the program to

src/acquisition/covidcast/csv_to_database.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,16 @@ def archive_as_successful(path_src, filename, source):
7474

7575
(source, signal, time_type, geo_type, time_value, issue, lag) = details
7676

77+
is_wip = False
78+
if signal[:4].lower() == "wip_":
79+
is_wip = True
80+
print(signal, is_wip)
81+
7782
csv_rows = csv_importer_impl.load_csv(path, geo_type)
7883

7984
all_rows_valid = False
8085
try:
81-
cc_rows = CovidcastRow.fromCsvRows(csv_rows, source, signal, time_type, geo_type, time_value, issue, lag)
86+
cc_rows = CovidcastRow.fromCsvRows(csv_rows, source, signal, time_type, geo_type, time_value, issue, lag, is_wip)
8287
rows_list = list(cc_rows)
8388
if not rows_list:
8489
raise ValueError("No data")

0 commit comments

Comments
 (0)