Skip to content

Commit a61a5db

Browse files
committed
added ANALYZE TABLE to the end of acquisition runs (and some whitespace fixes)
1 parent b9ab3f3 commit a61a5db

File tree

2 files changed

+19
-9
lines changed

2 files changed

+19
-9
lines changed

src/acquisition/covidcast/csv_to_database.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,12 @@ def upload_archive(
7777
csv_importer_impl=CsvImporter):
7878
"""Upload CSVs to the database and archive them using the specified handlers.
7979
80-
:path_details: output from CsvImporter.find*_csv_files
81-
80+
:path_details: output from CsvImporter.find*_csv_files
81+
8282
:database: an open connection to the epidata database
8383
8484
:handlers: functions for archiving (successful, failed) files
85-
85+
8686
:return: the number of modified rows
8787
"""
8888
archive_as_successful, archive_as_failed = handlers
@@ -130,7 +130,7 @@ def upload_archive(
130130
archive_as_successful(path_src, filename, source, logger)
131131
else:
132132
archive_as_failed(path_src, filename, source,logger)
133-
133+
134134
return total_modified_row_count
135135

136136

@@ -149,7 +149,7 @@ def main(
149149
if not path_details:
150150
logger.info('nothing to do; exiting...')
151151
return
152-
152+
153153
logger.info("Ingesting CSVs", csv_count = len(path_details))
154154

155155
database = database_impl()
@@ -161,13 +161,12 @@ def main(
161161
database,
162162
make_handlers(args.data_dir, args.specific_issue_date),
163163
logger)
164-
logger.info("Finished inserting database rows", row_count = modified_row_count)
165-
# the following print statement serves the same function as the logger.info call above
166-
# print('inserted/updated %d rows' % modified_row_count)
164+
logger.info("Finished inserting/updating database rows", row_count = modified_row_count)
167165
finally:
166+
database.do_analyze()
168167
# unconditionally commit database changes since CSVs have been archived
169168
database.disconnect(True)
170-
169+
171170
logger.info(
172171
"Ingested CSVs into database",
173172
total_runtime_in_seconds=round(time.time() - start_time, 2))

src/acquisition/covidcast/database.py

+11
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,17 @@ def _reset_load_table_ai_counter(self):
138138
'1', '1', '1', '1', '1', 1, 1, 1, 1);""")
139139
self._cursor.execute(f'DELETE FROM epimetric_load')
140140

141+
def do_analyze(self):
142+
"""performs and stores key distribution analyses, used for join order and index selection"""
143+
# TODO: consider expanding this to update columns' histograms
144+
# https://dev.mysql.com/doc/refman/8.0/en/analyze-table.html#analyze-table-histogram-statistics-analysis
145+
self._cursor.execute(
146+
f'''ANALYZE TABLE
147+
signal_dim, geo_dim,
148+
{self.load_table}, {self.history_table}, {self.latest_table}''')
149+
output = [self._cursor.column_names] + self._cursor.fetchall()
150+
get_structured_logger('do_analyze').info("ANALYZE results: "+str(output))
151+
141152
def insert_or_update_bulk(self, cc_rows):
142153
return self.insert_or_update_batch(cc_rows)
143154

0 commit comments

Comments
 (0)