@@ -69,7 +69,6 @@ class Database:
69
69
70
70
def connect (self , connector_impl = mysql .connector ):
71
71
"""Establish a connection to the database."""
72
-
73
72
u , p = secrets .db .epi
74
73
self ._connector_impl = connector_impl
75
74
self ._connection = self ._connector_impl .connect (
@@ -247,6 +246,104 @@ def insert_or_update_batch(self, cc_rows, batch_size=2**20, commit_partial=False
247
246
self ._cursor .execute (drop_tmp_table_sql )
248
247
return total
249
248
249
+ def delete_batch (self , cc_deletions ):
250
+ """
251
+ Remove rows specified by a csv file or list of tuples.
252
+
253
+ If cc_deletions is a filename, the file should include a header row and use the following field order:
254
+ - geo_id
255
+ - value (ignored)
256
+ - stderr (ignored)
257
+ - sample_size (ignored)
258
+ - issue (YYYYMMDD format)
259
+ - time_value (YYYYMMDD format)
260
+ - geo_type
261
+ - signal
262
+ - source
263
+
264
+ If cc_deletions is a list of tuples, the tuples should use the following field order (=same as above, plus time_type):
265
+ - geo_id
266
+ - value (ignored)
267
+ - stderr (ignored)
268
+ - sample_size (ignored)
269
+ - issue (YYYYMMDD format)
270
+ - time_value (YYYYMMDD format)
271
+ - geo_type
272
+ - signal
273
+ - source
274
+ - time_type
275
+ """
276
+ tmp_table_name = "tmp_delete_table"
277
+ create_tmp_table_sql = f'''
278
+ CREATE OR REPLACE TABLE { tmp_table_name } LIKE covidcast;
279
+ '''
280
+
281
+ amend_tmp_table_sql = f'''
282
+ ALTER TABLE { tmp_table_name } ADD COLUMN covidcast_id bigint unsigned;
283
+ '''
284
+
285
+ load_tmp_table_infile_sql = f'''
286
+ LOAD DATA INFILE "{ cc_deletions } "
287
+ INTO TABLE { tmp_table_name }
288
+ FIELDS TERMINATED BY ","
289
+ IGNORE 1 LINES
290
+ (`geo_value`, `value`, `stderr`, `sample_size`, `issue`, `time_value`, `geo_type`, `signal`, `source`)
291
+ SET time_type="day";
292
+ '''
293
+
294
+ load_tmp_table_insert_sql = f'''
295
+ INSERT INTO { tmp_table_name }
296
+ (`geo_value`, `value`, `stderr`, `sample_size`, `issue`, `time_value`, `geo_type`, `signal`, `source`, `time_type`,
297
+ `value_updated_timestamp`, `direction_updated_timestamp`, `lag`, `direction`, `is_latest_issue`)
298
+ VALUES
299
+ (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
300
+ 0, 0, 0, 0, 0)
301
+ '''
302
+
303
+ add_id_sql = f'''
304
+ UPDATE { tmp_table_name } d INNER JOIN covidcast c USING
305
+ (`source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`, `issue`)
306
+ SET d.covidcast_id=c.id, d.is_latest_issue=c.is_latest_issue;
307
+ '''
308
+
309
+ delete_sql = f'''
310
+ DELETE c FROM { tmp_table_name } d INNER JOIN covidcast c WHERE d.covidcast_id=c.id;
311
+ '''
312
+
313
+ fix_latest_issue_sql = f'''
314
+ UPDATE
315
+ (SELECT `source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`, MAX(`issue`) AS `issue`
316
+ FROM
317
+ (SELECT DISTINCT `source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`
318
+ FROM { tmp_table_name } WHERE `is_latest_issue`=1) AS was_latest
319
+ LEFT JOIN covidcast c
320
+ USING (`source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`)
321
+ GROUP BY `source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`
322
+ ) AS TMP
323
+ LEFT JOIN `covidcast`
324
+ USING (`source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`, `issue`)
325
+ SET `covidcast`.`is_latest_issue`=1;
326
+ '''
327
+
328
+ drop_tmp_table_sql = f'DROP TABLE { tmp_table_name } '
329
+ try :
330
+ self ._cursor .execute (create_tmp_table_sql )
331
+ self ._cursor .execute (amend_tmp_table_sql )
332
+ if isinstance (cc_deletions , str ):
333
+ self ._cursor .execute (load_tmp_table_infile_sql )
334
+ elif isinstance (cc_deletions , list ):
335
+ self ._cursor .executemany (load_tmp_table_insert_sql , cc_deletions )
336
+ else :
337
+ raise Exception (f"Bad deletions argument: need a filename or a list of tuples; got a { type (cc_deletions )} " )
338
+ self ._cursor .execute (add_id_sql )
339
+ self ._cursor .execute (delete_sql )
340
+ self ._cursor .execute (fix_latest_issue_sql )
341
+ self ._connection .commit ()
342
+ except Exception as e :
343
+ raise e
344
+ finally :
345
+ self ._cursor .execute (drop_tmp_table_sql )
346
+
250
347
def compute_covidcast_meta (self , table_name = 'covidcast' , use_index = True ):
251
348
"""Compute and return metadata on all non-WIP COVIDcast signals."""
252
349
logger = get_structured_logger ("compute_covidcast_meta" )
0 commit comments