@@ -138,6 +138,17 @@ def _reset_load_table_ai_counter(self):
138
138
'1', '1', '1', '1', '1', 1, 1, 1, 1);""" )
139
139
self ._cursor .execute (f'DELETE FROM epimetric_load' )
140
140
141
+ def do_analyze (self ):
142
+ """performs and stores key distribution analyses, used for join order and index selection"""
143
+ # TODO: consider expanding this to update columns' histograms
144
+ # https://dev.mysql.com/doc/refman/8.0/en/analyze-table.html#analyze-table-histogram-statistics-analysis
145
+ self ._cursor .execute (
146
+ f'''ANALYZE TABLE
147
+ signal_dim, geo_dim,
148
+ { self .load_table } , { self .history_table } , { self .latest_table } ''' )
149
+ output = [self ._cursor .column_names ] + self ._cursor .fetchall ()
150
+ get_structured_logger ('do_analyze' ).info ("ANALYZE results" , results = str (output ))
151
+
141
152
def insert_or_update_bulk (self , cc_rows ):
142
153
return self .insert_or_update_batch (cc_rows )
143
154
@@ -476,16 +487,18 @@ def split_list(lst, n):
476
487
return total
477
488
478
489
479
- def compute_covidcast_meta (self , table_name = None ):
490
+ def compute_covidcast_meta (self , table_name = None , n_threads = None ):
480
491
"""Compute and return metadata on all COVIDcast signals."""
481
492
logger = get_structured_logger ("compute_covidcast_meta" )
482
493
483
494
if table_name is None :
484
495
table_name = self .latest_view
485
496
486
- n_threads = max (1 , cpu_count ()* 9 // 10 ) # aka number of concurrent db connections, which [sh|c]ould be ~<= 90% of the #cores available to SQL server
487
- # NOTE: this may present a small problem if this job runs on different hardware than the db,
488
- # but we should not run into that issue in prod.
497
+ if n_threads is None :
498
+ logger .info ("n_threads unspecified, automatically choosing based on number of detected cores..." )
499
+ n_threads = max (1 , cpu_count ()* 9 // 10 ) # aka number of concurrent db connections, which [sh|c]ould be ~<= 90% of the #cores available to SQL server
500
+ # NOTE: this may present a small problem if this job runs on different hardware than the db,
501
+ # which is why this value can be overriden by optional argument.
489
502
logger .info (f"using { n_threads } workers" )
490
503
491
504
srcsigs = Queue () # multi-consumer threadsafe!
0 commit comments