@@ -69,7 +69,7 @@ class Database:
69
69
70
70
def connect (self , connector_impl = mysql .connector ):
71
71
"""Establish a connection to the database."""
72
-
72
+ print ( secrets . db . host , secrets . db . epi )
73
73
u , p = secrets .db .epi
74
74
self ._connector_impl = connector_impl
75
75
self ._connection = self ._connector_impl .connect (
@@ -247,6 +247,104 @@ def insert_or_update_batch(self, cc_rows, batch_size=2**20, commit_partial=False
247
247
self ._cursor .execute (drop_tmp_table_sql )
248
248
return total
249
249
250
+ def delete_batch (self , cc_deletions ):
251
+ """
252
+ Remove rows specified by a csv file or list of tuples.
253
+
254
+ File should include a header row and use the following field order:
255
+ - geo_id
256
+ - value (ignored)
257
+ - stderr (ignored)
258
+ - sample_size (ignored)
259
+ - issue (YYYYMMDD format)
260
+ - time_value (YYYYMMDD format)
261
+ - geo_type
262
+ - signal
263
+ - source
264
+
265
+ List of tuples should use the following field order (same as above, plus time_type):
266
+ - geo_id
267
+ - value (ignored)
268
+ - stderr (ignored)
269
+ - sample_size (ignored)
270
+ - issue (YYYYMMDD format)
271
+ - time_value (YYYYMMDD format)
272
+ - geo_type
273
+ - signal
274
+ - source
275
+ - time_type
276
+ """
277
+ tmp_table_name = "tmp_delete_table"
278
+ create_tmp_table_sql = f'''
279
+ CREATE OR REPLACE TABLE { tmp_table_name } LIKE covidcast;
280
+ '''
281
+
282
+ amend_tmp_table_sql = f'''
283
+ ALTER TABLE { tmp_table_name } ADD COLUMN covidcast_id bigint unsigned;
284
+ '''
285
+
286
+ load_tmp_table_infile_sql = f'''
287
+ LOAD DATA INFILE "{ cc_deletions } "
288
+ INTO TABLE { tmp_table_name }
289
+ FIELDS TERMINATED BY ","
290
+ IGNORE 1 LINES
291
+ (`geo_value`, `value`, `stderr`, `sample_size`, `issue`, `time_value`, `geo_type`, `signal`, `source`)
292
+ SET time_type="day";
293
+ '''
294
+
295
+ load_tmp_table_insert_sql = f'''
296
+ INSERT INTO { tmp_table_name }
297
+ (`geo_value`, `value`, `stderr`, `sample_size`, `issue`, `time_value`, `geo_type`, `signal`, `source`, `time_type`,
298
+ `value_updated_timestamp`, `direction_updated_timestamp`, `lag`, `direction`, `is_latest_issue`)
299
+ VALUES
300
+ (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
301
+ 0, 0, 0, 0, 0)
302
+ '''
303
+
304
+ add_id_sql = f'''
305
+ UPDATE { tmp_table_name } d INNER JOIN covidcast c USING
306
+ (`source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`, `issue`)
307
+ SET d.covidcast_id=c.id, d.is_latest_issue=c.is_latest_issue;
308
+ '''
309
+
310
+ delete_sql = f'''
311
+ DELETE c FROM { tmp_table_name } d INNER JOIN covidcast c WHERE d.covidcast_id=c.id;
312
+ '''
313
+
314
+ fix_latest_issue_sql = f'''
315
+ UPDATE
316
+ (SELECT `source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`, MAX(`issue`) AS `issue`
317
+ FROM
318
+ (SELECT DISTINCT `source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`
319
+ FROM { tmp_table_name } WHERE `is_latest_issue`=1) AS was_latest
320
+ LEFT JOIN covidcast c
321
+ USING (`source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`)
322
+ GROUP BY `source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`
323
+ ) AS TMP
324
+ LEFT JOIN `covidcast`
325
+ USING (`source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`, `issue`)
326
+ SET `is_latest_issue`=1;
327
+ '''
328
+
329
+ drop_tmp_table_sql = f'DROP TABLE { tmp_table_name } '
330
+ try :
331
+ self ._cursor .execute (create_tmp_table_sql )
332
+ self ._cursor .execute (amend_tmp_table_sql )
333
+ if isinstance (cc_deletions , str ):
334
+ self ._cursor .execute (load_tmp_table_infile_sql )
335
+ elif isinstance (cc_deletions , list ):
336
+ self ._cursor .executemany (load_tmp_table_insert_sql , cc_deletions )
337
+ else :
338
+ raise Exception (f"Bad deletions argument: need a filename or a list of tuples; got a { type (cc_deletions )} " )
339
+ self ._cursor .execute (add_id_sql )
340
+ self ._cursor .execute (delete_sql )
341
+ self ._cursor .execute (fix_latest_issue_sql )
342
+ self ._connection .commit ()
343
+ except Exception as e :
344
+ raise e
345
+ finally :
346
+ self ._cursor .execute (drop_tmp_table_sql )
347
+
250
348
def compute_covidcast_meta (self , table_name = 'covidcast' , use_index = True ):
251
349
"""Compute and return metadata on all non-WIP COVIDcast signals."""
252
350
logger = get_structured_logger ("compute_covidcast_meta" )
0 commit comments