From 900cd3f24723ecfc1aa546f34b017725e809f2d6 Mon Sep 17 00:00:00 2001
From: Rostyslav Zatserkovnyi <zatserkovnyi.rostyslav@gmail.com>
Date: Wed, 14 Jun 2023 18:26:09 +0300
Subject: [PATCH 01/43] Switch CSV file for perftests and add Redis step
 (#1199)

---
 .github/workflows/performance-tests.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/performance-tests.yml b/.github/workflows/performance-tests.yml
index 596d0a348..03541f770 100644
--- a/.github/workflows/performance-tests.yml
+++ b/.github/workflows/performance-tests.yml
@@ -60,6 +60,7 @@ jobs:
         run: |
           cd ../driver
           sudo make web sql="${{ secrets.DB_CONN_STRING }}"
+          sudo make redis
       - name: Check out delphi-admin
         uses: actions/checkout@v3
         with:
@@ -71,7 +72,7 @@ jobs:
         run: |
           cd delphi-admin/load-testing/locust
           docker build -t locust .
-          export CSV=v4-requests-as_of.csv
+          export CSV=v4-requests-small.csv
           touch output_stats.csv && chmod 666 output_stats.csv
           touch output_stats_history.csv && chmod 666 output_stats_history.csv
           touch output_failures.csv && chmod 666 output_failures.csv

From f5051d33bcf02cd3afbd132e67f3d20ed7024c72 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 23 Jun 2023 11:33:25 -0700
Subject: [PATCH 02/43] feat(acquisition): remove and deactivate norostat

---
 deploy.json                                   |   9 -
 .../norostat/norostat_add_history.py          |  45 --
 src/acquisition/norostat/norostat_raw.py      | 112 -----
 src/acquisition/norostat/norostat_sql.py      | 434 ------------------
 src/acquisition/norostat/norostat_update.py   |  28 --
 src/acquisition/norostat/norostat_utils.py    |  44 --
 .../norostat/sample_content.pickle            | Bin 37801 -> 0 bytes
 7 files changed, 672 deletions(-)
 delete mode 100644 src/acquisition/norostat/norostat_add_history.py
 delete mode 100644 src/acquisition/norostat/norostat_raw.py
 delete mode 100644 src/acquisition/norostat/norostat_sql.py
 delete mode 100644 src/acquisition/norostat/norostat_update.py
 delete mode 100644 src/acquisition/norostat/norostat_utils.py
 delete mode 100644 src/acquisition/norostat/sample_content.pickle

diff --git a/deploy.json b/deploy.json
index 59d141ba4..3396dbbf6 100644
--- a/deploy.json
+++ b/deploy.json
@@ -138,15 +138,6 @@
       "add-header-comment": true
     },
 
-    "// acquisition - norostat",
-    {
-      "type": "move",
-      "src": "src/acquisition/norostat/",
-      "dst": "[[package]]/acquisition/norostat/",
-      "match": "^.*\\.(py)$",
-      "add-header-comment": true
-    },
-
     "// acquisition - paho",
     {
       "type": "move",
diff --git a/src/acquisition/norostat/norostat_add_history.py b/src/acquisition/norostat/norostat_add_history.py
deleted file mode 100644
index 64fd11ff7..000000000
--- a/src/acquisition/norostat/norostat_add_history.py
+++ /dev/null
@@ -1,45 +0,0 @@
-"""
-Parses historical versions of the NoroSTAT data-table and updates the
-appropriate databases. Currently uses snapshots from the WayBack Machine
-(archive.org). A more comprehensive archival service may be mementoweb.org,
-which appears to pull from many services that implement the Memento protocol,
-including archive.org. Manually downloaded snapshots could be recorded via this
-script as well.
-"""
-
-# standard library
-import re
-import os
-import time
-import collections
-
-# first party
-from . import norostat_sql
-from . import norostat_raw
-
-
-
-def main():
-  norostat_sql.ensure_tables_exist()
-  snapshot_dir = os.path.expanduser("~/norostat_history/wayback/websites/www.cdc.gov/norovirus/reporting/norostat/data-table.html/")
-  snapshot_version_counter = collections.Counter()
-  for subdir in os.listdir(snapshot_dir):
-    if re.match(r'[0-9]+', subdir) is not None:
-      # appears to be snapshot dir
-      snapshot_version_counter[subdir] = 0 # register that loop found this snapshot directory
-      for norostat_capitalization in ["norostat","noroSTAT"]:
-        time.sleep(0.002) # ensure parse times are unique, assuming OS can accurately sleep and measure to ms precision
-        path = os.path.join(snapshot_dir,subdir,"norovirus","reporting",norostat_capitalization,"data-table.html")
-        if os.path.isfile(path):
-          print("Processing file ", path)
-          with open(path, 'r') as datatable_file:
-            content = datatable_file.read()
-          wide_raw = norostat_raw.parse_content_to_wide_raw(content)
-          long_raw = norostat_raw.melt_wide_raw_to_long_raw(wide_raw)
-          norostat_sql.record_long_raw(long_raw)
-          snapshot_version_counter[subdir] += 1
-  print('Successfully uploaded the following snapshots, with the count indicating the number of data-table versions found inside each snapshot (expected to be 1, or maybe 2 if there was a change in capitalization; 0 indicates the NoroSTAT page was not found within a snapshot directory); just "Counter()" indicates no snapshot directories were found:', snapshot_version_counter)
-  norostat_sql.update_point()
-
-if __name__ == '__main__':
-  main()
diff --git a/src/acquisition/norostat/norostat_raw.py b/src/acquisition/norostat/norostat_raw.py
deleted file mode 100644
index 582de9684..000000000
--- a/src/acquisition/norostat/norostat_raw.py
+++ /dev/null
@@ -1,112 +0,0 @@
-"""
-Functions to fetch, save, load, and format the NoroSTAT data-table. Formatting
-functions include conversion from html content to "wide_raw" --- a wide data
-frame in a tuple along with metadata --- and then to "long_raw" --- a long/tall
-data frame in a tuple along with metadata. Metadata: release_date, parse_time,
-and (constant) location. Here, the location will be (a str representing) a set
-of states.
-"""
-
-
-
-# standard library
-import datetime
-import re
-import pickle
-
-# third party
-import requests
-import lxml.html
-import pandas as pd
-
-# first party
-from .norostat_utils import *
-
-def fetch_content(norostat_datatable_url="https://www.cdc.gov/norovirus/reporting/norostat/data-table.html"):
-  """Download NoroSTAT data-table.  Returns the html content."""
-  headers = {
-    'User-Agent': 'delphibot/1.0 (+https://delphi.cmu.edu/)',
-  }
-  resp = requests.get(norostat_datatable_url, headers=headers)
-  expect_value_eq(resp.status_code, 200,
-                  'Wanted status code {}.  Received: ')
-  expect_value_eq(resp.headers.get("Content-Type"), "text/html",
-                  'Expected Content-Type "{}"; Received ')
-  return resp.content
-
-def save_sample_content(content, f="sample_content.pickle"):
-  """Save the content from fetch_content into a pickle file for most testing (don't download unnecessarily)."""
-  with open(f, "wb") as handle:
-    pickle.dump(content, handle)
-
-def load_sample_content(f="sample_content.pickle"):
-  """Load data from a past call to fetch_content from a pickle file for most testing (don't download unnecessarily)."""
-  with open(f, "rb") as handle:
-    content = pickle.load(handle)
-  return content
-
-def parse_content_to_wide_raw(content):
-  """Convert the html content for the data-table into a wide data frame, then stick it in a tuple along with the release_date, parse_time, and (constant) location."""
-  parse_time = datetime.datetime.now()
-  html_root = lxml.html.fromstring(content)
-  # Extract the release date, a.k.a. dateModified, a.k.a. "Page last updated" date; ~Dec 2018 this is only available in a meta tag; previously, it was available in a visible span
-  dateModified_meta_elts = html_root.xpath('//meta[@property="cdc:last_updated"]')
-  dateModified_span_elts = html_root.xpath('//span[@itemprop="dateModified"]')
-  if len(dateModified_meta_elts) == 1:
-    [dateModified_meta_elt] = dateModified_meta_elts
-    dateModified = dateModified_meta_elt.attrib['content']
-  elif len(dateModified_span_elts) == 1:
-    [dateModified_span_elt] = dateModified_span_elts
-    dateModified = dateModified_span_elt.text
-  else:
-    raise Exception("Could not find the expected number of dateModified meta or span tags.")
-  # FIXME check/enforce locale
-  release_date = datetime.datetime.strptime(dateModified, "%B %d, %Y").date()
-  # Check that table description still specifies suspected&confirmed norovirus
-  # outbreaks (insensitive to case of certain letters and allowing for both old
-  # "to the" and new "through the" text), then extract list of states from the
-  # description:
-  [description_elt] = html_root.xpath('''//p[
-    contains(translate(text(), "SCNORHD", "scnorhd"), "suspected and confirmed norovirus outbreaks reported by state health departments in") and
-    (
-      contains(text(), "to the") or
-      contains(text(), "through the")
-    )
-  ]''')
-  location = re.match(".*?[Dd]epartments in (.*?) (?:to)|(?:through) the.*$", description_elt.text).group(1)
-  # Attempt to find exactly 1 table (note: it would be nice to filter on the
-  # associated caption, but no such caption is present in earlier versions):
-  [table] = html_root.xpath('//table')
-  # Convert html table to DataFrame:
-  #   Directly reading in the table with pd.read_html performs unwanted dtype
-  #   inference, but reveals the column names:
-  [wide_raw_df_with_unwanted_conversions] = pd.read_html(lxml.html.tostring(table))
-  #   We want all columns to be string columns. However, there does not appear
-  #   to be an option to disable dtype inference in pd.read_html. Hide all
-  #   entries inside 1-tuple wrappers using pre-dtype-inference converters,
-  #   then unpack afterward (the entries fed to the converters should already
-  #   be strings, but "convert" them to strings just in case):
-  [wide_raw_df_with_wrappers] = pd.read_html(
-      lxml.html.tostring(table),
-      converters= {col: lambda entry: (str(entry),)
-                   for col in wide_raw_df_with_unwanted_conversions.columns}
-  )
-  #   Unwrap entries:
-  wide_raw_df = wide_raw_df_with_wrappers.applymap(lambda wrapper: wrapper[0])
-  # Check format:
-  expect_value_eq(wide_raw_df.columns[0], "Week",
-                  'Expected raw_colnames[0] to be "{}"; encountered ')
-  for colname in wide_raw_df.columns:
-    expect_result_eq(dtype_kind, wide_raw_df[colname].head(), "O",
-                     'Expected (head of) "%s" column to have dtype kind "{}"; instead had dtype kind & head '%(colname))
-  # Pack up df with metadata:
-  wide_raw = (wide_raw_df, release_date, parse_time, location)
-  return wide_raw
-
-def melt_wide_raw_to_long_raw(wide_raw):
-  (wide_raw_df, release_date, parse_time, location) = wide_raw
-  long_raw_df = wide_raw_df \
-                .melt(id_vars=["Week"], var_name="measurement_type", value_name="value") \
-                .rename(index=str, columns={"Week": "week"})
-  long_raw = (long_raw_df, release_date, parse_time, location)
-  return long_raw
diff --git a/src/acquisition/norostat/norostat_sql.py b/src/acquisition/norostat/norostat_sql.py
deleted file mode 100644
index 168e275eb..000000000
--- a/src/acquisition/norostat/norostat_sql.py
+++ /dev/null
@@ -1,434 +0,0 @@
-# standard library
-import re
-
-# third party
-import mysql.connector
-
-# first party
-from .norostat_utils import *
-import delphi.operations.secrets as secrets
-
-# Column names:
-# `release_date` :: release date as stated in the web page in the dateModified
-#     span, displayed on the web page with the label "Page last updated:"
-# `parse_time` :: time that we attempted to parse the data out of a downloaded
-#     version of the web page; when the scraper is running, this may be similar
-#     to a fetch time, but when loading in past versions that have been saved,
-#     it probably won't mean the same thing; this is tracked (a) in case the
-#     provided release date ever is out of date so that the raw data will still
-#     be recorded and we can recover later on, and (b) to provide a record of
-#     when parses/fetches happened; if there is a request for the data for a
-#     particular `release_date` with no restrictions on `parse_time`, the
-#     version with the latest `parse_time` should be selected
-# (`release_date`, `parse_time`) :: uniquely identify a version of the table
-# `measurement_type_id` :: "pointer" to an interned measurement_type string
-# `measurement_type` :: the name of some column other than "Week" in the
-#     data-table
-# `location_id` :: "pointer" to an interned location string
-# `location` :: a string containing the list of reporting states
-# `week_id` :: "pointer" to an interned week string
-# `week` :: a string entry from the "Week" column
-# `value` :: an string entry from some column other than "Week" in the
-#     data-table
-# `new_value` :: an update to a `value` provided by a new version of the data
-#     table: either a string representing an added or revised entry (or a
-#     redundant repetition of a value retained from a past issue --- although
-#     no such entries should be generated by the code in this file), or NULL
-#     representing a deletion of a cell/entry from the table
-#
-# Tables:
-# `norostat_raw_datatable_version_list` :: list of all versions of the raw
-#     data-table that have ever been successfully parsed
-# `<var>_pool` :: maps each encountered value of string `<var>` to a unique ID
-#     `<var>_id`, so that the string's character data is not duplicated in the
-#     tables on disk; serves a purpose similar to Java's interned string pool
-# `norostat_raw_datatable_diffs` :: contains diffs between consecutive versions
-#     of the raw data-table (when arranged according to the tuple
-#     (`release_date`,`parse_time`) using lexicographical tuple ordering)
-# `norostat_raw_datatable_parsed` :: a temporary table to hold the version of
-#     the raw data-table (in long/melted format) to be recorded; uses string
-#     values instead of interned string id's, so will need to be joined with
-#     `*_pool` tables for operations with other tables
-# `norostat_raw_datatable_previous` :: a temporary table to hold an
-#     already-recorded version of the raw data-table with the latest
-#     `release_date`, `parse_time` before those of the version to be recorded;
-#     if there is no such version, this table will be empty (as if we recorded
-#     an empty version of the table before all other versions); uses interned
-#     string id's
-# `norostat_raw_datatable_next` :: a temporary table to hold an
-#     already-recorded version of the raw data-table with the earliest
-#     `release_date`, `parse_time` after those of the version to be recorded;
-#     if there is no such version, this table will not be created or used; uses
-#     interned string id's
-
-def ensure_tables_exist():
-  (u, p) = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  try:
-    cursor = cnx.cursor()
-    cursor.execute('''
-      CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_version_list` (
-        `release_date` DATE NOT NULL,
-        `parse_time` DATETIME(6) NOT NULL,
-        PRIMARY KEY (`release_date`, `parse_time`)
-      );
-    ''')
-    cursor.execute('''
-      CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_measurement_type_pool` (
-        `measurement_type_id` INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
-        `measurement_type` NVARCHAR(255) NOT NULL UNIQUE KEY
-      );
-    ''')
-    cursor.execute('''
-      CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_location_pool` (
-        `location_id` INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
-        `location` NVARCHAR(255) NOT NULL UNIQUE KEY
-      );
-    ''')
-    cursor.execute('''
-      CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_week_pool` (
-        `week_id` INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
-        `week` NVARCHAR(255) NOT NULL UNIQUE KEY
-      );
-    ''')
-    cursor.execute('''
-      CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_diffs` (
-        `release_date` DATE NOT NULL,
-        `parse_time` DATETIME(6) NOT NULL,
-        `measurement_type_id` INT NOT NULL,
-        `location_id` INT NOT NULL,
-        `week_id` INT NOT NULL,
-        `new_value` NVARCHAR(255), -- allow NULL, with meaning "removed"
-        FOREIGN KEY (`release_date`,`parse_time`) REFERENCES `norostat_raw_datatable_version_list` (`release_date`,`parse_time`),
-        FOREIGN KEY (`measurement_type_id`) REFERENCES `norostat_raw_datatable_measurement_type_pool` (`measurement_type_id`),
-        FOREIGN KEY (`location_id`) REFERENCES `norostat_raw_datatable_location_pool` (`location_id`),
-        FOREIGN KEY (`week_id`) REFERENCES `norostat_raw_datatable_week_pool` (`week_id`),
-        UNIQUE KEY (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`),
-        PRIMARY KEY (`release_date`, `parse_time`, `measurement_type_id`, `location_id`, `week_id`)
-        -- (the indices here are larger than the data, but reducing the key
-        -- sizes and adding an id somehow seems to result in larger index sizes
-        -- somehow)
-      );
-    ''')
-    cnx.commit()
-  finally:
-    cnx.close()
-
-def dangerously_drop_all_norostat_tables():
-  (u, p) = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  try:
-    cursor = cnx.cursor()
-    # Drop tables in reverse order (to avoid foreign key related errors):
-    cursor.execute('''
-      DROP TABLE IF EXISTS `norostat_point_diffs`,
-                           `norostat_point_version_list`,
-                           `norostat_raw_datatable_diffs`,
-                           `norostat_raw_datatable_week_pool`,
-                           `norostat_raw_datatable_location_pool`,
-                           `norostat_raw_datatable_measurement_type_pool`,
-                           `norostat_raw_datatable_version_list`;
-    ''')
-    cnx.commit() # (might do nothing; each DROP commits itself anyway)
-  finally:
-    cnx.close()
-
-def record_long_raw(long_raw):
-  (long_raw_df, release_date, parse_time, location) = long_raw
-  (u, p) = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  try:
-    cursor = cnx.cursor()
-    cnx.start_transaction(isolation_level='SERIALIZABLE')
-    # Create, populate `norostat_raw_datatable_parsed`:
-    cursor.execute('''
-      CREATE TEMPORARY TABLE `norostat_raw_datatable_parsed` (
-        `measurement_type` NVARCHAR(255) NOT NULL,
-        `location` NVARCHAR(255) NOT NULL,
-        `week` NVARCHAR(255) NOT NULL,
-        `value` NVARCHAR(255) NOT NULL, -- forbid NULL; has special external meaning (see above)
-        PRIMARY KEY (`measurement_type`, `location`, `week`)
-      ) ENGINE=MEMORY;
-    ''')
-    cursor.executemany('''
-      INSERT INTO `norostat_raw_datatable_parsed` (`week`,`measurement_type`,`value`,`location`)
-      VALUES (%s, %s, %s, %s);
-    ''', [(week, measurement_type, value, location) for
-          (week, measurement_type, value) in long_raw_df[["week","measurement_type","value"]].astype(str).itertuples(index=False, name=None)
-    ])
-    # Create, populate `norostat_raw_datatable_previous`:
-    cursor.execute('''
-      CREATE TEMPORARY TABLE `norostat_raw_datatable_previous` (
-        `measurement_type_id` INT NOT NULL,
-        `location_id` INT NOT NULL,
-        `week_id` INT NOT NULL,
-        `value` NVARCHAR(255) NOT NULL, -- forbid NULL; has special external meaning (see above)
-        -- would like but not allowed: FOREIGN KEY (`measurement_type_id`) REFERENCES `norostat_raw_datatable_measurement_type_pool` (`measurement_type_id`),
-        -- would like but not allowed: FOREIGN KEY (`location_id`) REFERENCES `norostat_raw_datatable_location_pool` (`location_id`),
-        -- would like but not allowed: FOREIGN KEY (`week_id`) REFERENCES `norostat_raw_datatable_week_pool` (`week_id`),
-        PRIMARY KEY (`measurement_type_id`, `location_id`, `week_id`)
-      ) ENGINE=MEMORY;
-    ''')
-    cursor.execute('''
-      INSERT INTO `norostat_raw_datatable_previous` (`measurement_type_id`, `location_id`, `week_id`, `value`)
-        SELECT `latest`.`measurement_type_id`, `latest`.`location_id`, `latest`.`week_id`, `latest`.`new_value`
-        FROM `norostat_raw_datatable_diffs` AS `latest`
-        -- Get the latest `new_value` by "group" (measurement_type, location, week)
-        -- using the fact that there are no later measurements belonging to the
-        -- same group (find NULL entries in `later`.{release_date,parse_time}
-        -- in the LEFT JOIN below); if the latest `new_value` is NULL, don't
-        -- include it in the result; it means that the corresponding cell/entry
-        -- has been removed from the data-table:
-        LEFT JOIN (
-          SELECT * FROM `norostat_raw_datatable_diffs`
-          WHERE (`release_date`,`parse_time`) <= (%s,%s)
-        ) `later`
-        ON `latest`.`measurement_type_id` = `later`.`measurement_type_id` AND
-           `latest`.`location_id` = `later`.`location_id` AND
-           `latest`.`week_id` = `later`.`week_id` AND
-           (`latest`.`release_date`, `latest`.`parse_time`) <
-             (`later`.`release_date`, `later`.`parse_time`)
-        WHERE (`latest`.`release_date`, `latest`.`parse_time`) <= (%s, %s) AND
-              `later`.`parse_time` IS NULL AND
-              `latest`.`new_value` IS NOT NULL;
-    ''', (release_date, parse_time, release_date, parse_time))
-    # Find next recorded `release_date`, `parse_time` if any; create, populate
-    # `norostat_raw_datatable_next` if there is such a version:
-    cursor.execute('''
-      SELECT `release_date`, `parse_time`
-      FROM `norostat_raw_datatable_version_list`
-      WHERE (`release_date`, `parse_time`) > (%s,%s)
-      ORDER BY `release_date`, `parse_time`
-      LIMIT 1
-    ''', (release_date, parse_time))
-    next_version_if_any = cursor.fetchall()
-    expect_result_in(len, next_version_if_any, (0,1),
-                     'Bug: expected next-version query to return a number of results in {}; instead have len & val ')
-    if len(next_version_if_any) != 0:
-      cursor.execute('''
-        CREATE TEMPORARY TABLE `norostat_raw_datatable_next` (
-          `measurement_type_id` INT NOT NULL,
-          `location_id` INT NOT NULL,
-          `week_id` INT NOT NULL,
-          `value` NVARCHAR(255) NOT NULL, -- forbid NULL; has special external meaning (see above)
-          -- would like but not allowed: FOREIGN KEY (`measurement_type_id`) REFERENCES `norostat_raw_datatable_measurement_type_pool` (`measurement_type_id`),
-          -- would like but not allowed: FOREIGN KEY (`location_id`) REFERENCES `norostat_raw_datatable_location_pool` (`location_id`),
-          -- would like but not allowed: FOREIGN KEY (`week_id`) REFERENCES `norostat_raw_datatable_week_pool` (`week_id`),
-          PRIMARY KEY (`measurement_type_id`, `location_id`, `week_id`)
-        ) ENGINE=MEMORY;
-      ''')
-      cursor.execute('''
-        INSERT INTO `norostat_raw_datatable_next` (`measurement_type_id`, `location_id`, `week_id`, `value`)
-          SELECT `latest`.`measurement_type_id`, `latest`.`location_id`, `latest`.`week_id`, `latest`.`new_value`
-          FROM `norostat_raw_datatable_diffs` AS `latest`
-          -- Get the latest `new_value` by "group" (measurement_type, location, week)
-          -- using the fact that there are no later measurements belonging to the
-          -- same group (find NULL entries in `later`.{release_date,parse_time}
-          -- in the LEFT JOIN below); if the latest `new_value` is NULL, don't
-          -- include it in the result; it means that the corresponding cell/entry
-          -- has been removed from the data-table:
-          LEFT JOIN (
-            SELECT * FROM `norostat_raw_datatable_diffs`
-            WHERE (`release_date`,`parse_time`) <= (%s, %s)
-          ) `later`
-          ON `latest`.`measurement_type_id` = `later`.`measurement_type_id` AND
-             `latest`.`location_id` = `later`.`location_id` AND
-             `latest`.`week_id` = `later`.`week_id` AND
-             (`latest`.`release_date`, `latest`.`parse_time`) <
-               (`later`.`release_date`, `later`.`parse_time`)
-          WHERE (`latest`.`release_date`, `latest`.`parse_time`) <= (%s, %s) AND
-             `later`.`parse_time` IS NULL AND
-             `latest`.`new_value` IS NOT NULL -- NULL means value was removed
-      ''', next_version_if_any[0]+next_version_if_any[0])
-    # Register new version in version list:
-    try:
-      cursor.execute('''
-        INSERT INTO `norostat_raw_datatable_version_list` (`release_date`, `parse_time`)
-          VALUES (%s, %s)
-      ''', (release_date, parse_time))
-    except mysql.connector.errors.IntegrityError as e:
-      raise Exception(['Encountered an IntegrityError when updating the norostat_raw_datatable_version_list table; this probably indicates that a version with the same `release_date` and `parse_time` was already added to the database; parse_time has limited resolution, so this can happen from populating the database too quickly when there are duplicate release dates; original error: ', e])
-    # Add any new measurement_type, location, or week strings to the associated
-    # string pools:
-    cursor.execute('''
-      INSERT INTO `norostat_raw_datatable_measurement_type_pool` (`measurement_type`)
-        SELECT DISTINCT `measurement_type`
-        FROM `norostat_raw_datatable_parsed`
-        WHERE `measurement_type` NOT IN (
-          SELECT `norostat_raw_datatable_measurement_type_pool`.`measurement_type`
-          FROM `norostat_raw_datatable_measurement_type_pool`
-        );
-    ''')
-    cursor.execute('''
-      INSERT INTO `norostat_raw_datatable_location_pool` (`location`)
-        SELECT DISTINCT `location`
-        FROM `norostat_raw_datatable_parsed`
-        WHERE `location` NOT IN (
-          SELECT `norostat_raw_datatable_location_pool`.`location`
-          FROM `norostat_raw_datatable_location_pool`
-        );
-    ''')
-    cursor.execute('''
-      INSERT INTO `norostat_raw_datatable_week_pool` (`week`)
-        SELECT DISTINCT `week`
-        FROM `norostat_raw_datatable_parsed`
-        WHERE `week` NOT IN (
-          SELECT `norostat_raw_datatable_week_pool`.`week`
-          FROM `norostat_raw_datatable_week_pool`
-        );
-    ''')
-    # Record diff: [newly parsed version "minus" previous version] (first,
-    # record additions/updates, then record deletions):
-    cursor.execute('''
-      INSERT INTO `norostat_raw_datatable_diffs` (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`)
-        SELECT `measurement_type_id`, `location_id`, `week_id`, %s, %s, `value`
-        FROM `norostat_raw_datatable_parsed`
-        LEFT JOIN `norostat_raw_datatable_measurement_type_pool` USING (`measurement_type`)
-        LEFT JOIN `norostat_raw_datatable_location_pool` USING (`location`)
-        LEFT JOIN `norostat_raw_datatable_week_pool` USING (`week`)
-        WHERE (`measurement_type_id`, `location_id`, `week_id`, `value`) NOT IN (
-          SELECT `norostat_raw_datatable_previous`.`measurement_type_id`,
-                 `norostat_raw_datatable_previous`.`location_id`,
-                 `norostat_raw_datatable_previous`.`week_id`,
-                 `norostat_raw_datatable_previous`.`value`
-          FROM `norostat_raw_datatable_previous`
-        );
-    ''', (release_date, parse_time))
-    cursor.execute('''
-      INSERT INTO `norostat_raw_datatable_diffs` (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`)
-        SELECT `measurement_type_id`, `location_id`, `week_id`, %s, %s, NULL
-        FROM `norostat_raw_datatable_previous`
-        WHERE (`measurement_type_id`, `location_id`, `week_id`) NOT IN (
-          SELECT `norostat_raw_datatable_measurement_type_pool`.`measurement_type_id`,
-                 `norostat_raw_datatable_location_pool`.`location_id`,
-                 `norostat_raw_datatable_week_pool`.`week_id`
-          FROM `norostat_raw_datatable_parsed`
-          LEFT JOIN `norostat_raw_datatable_measurement_type_pool` USING (`measurement_type`)
-          LEFT JOIN `norostat_raw_datatable_location_pool` USING (`location`)
-          LEFT JOIN `norostat_raw_datatable_week_pool` USING (`week`)
-        );
-    ''', (release_date, parse_time))
-    # If there is an already-recorded next version, its diff is invalidated by
-    # the insertion of the newly parsed version; delete the [next version
-    # "minus" previous version] diff and record the [next version "minus" newly
-    # parsed] diff:
-    if len(next_version_if_any) != 0:
-      cursor.execute('''
-        DELETE FROM `norostat_raw_datatable_diffs`
-        WHERE `release_date`=%s AND `parse_time`=%s;
-      ''', next_version_if_any[0])
-      cursor.execute('''
-        INSERT INTO `norostat_raw_datatable_diffs` (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`)
-          SELECT `measurement_type_id`, `location_id`, `week_id`, %s, %s, `value`
-          FROM `norostat_raw_datatable_next`
-          WHERE (`measurement_type_id`, `location_id`, `week_id`, `value`) NOT IN (
-            SELECT
-              `norostat_raw_datatable_measurement_type_pool`.`measurement_type_id`,
-              `norostat_raw_datatable_location_pool`.`location_id`,
-              `norostat_raw_datatable_week_pool`.`week_id`,
-              `norostat_raw_datatable_parsed`.`value`
-            FROM `norostat_raw_datatable_parsed`
-            LEFT JOIN `norostat_raw_datatable_measurement_type_pool` USING (`measurement_type`)
-            LEFT JOIN `norostat_raw_datatable_location_pool` USING (`location`)
-            LEFT JOIN `norostat_raw_datatable_week_pool` USING (`week`)
-          );
-      ''', next_version_if_any[0])
-      cursor.execute('''
-        INSERT INTO `norostat_raw_datatable_diffs` (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`)
-          SELECT `measurement_type_id`, `location_id`, `week_id`, %s, %s, NULL
-          FROM `norostat_raw_datatable_parsed`
-          LEFT JOIN `norostat_raw_datatable_measurement_type_pool` USING (`measurement_type`)
-          LEFT JOIN `norostat_raw_datatable_location_pool` USING (`location`)
-          LEFT JOIN `norostat_raw_datatable_week_pool` USING (`week`)
-          WHERE (`measurement_type_id`, `location_id`, `week_id`) NOT IN (
-            SELECT `norostat_raw_datatable_next`.`measurement_type_id`,
-                   `norostat_raw_datatable_next`.`location_id`,
-                   `norostat_raw_datatable_next`.`week_id`
-            FROM `norostat_raw_datatable_next`
-          );
-      ''', next_version_if_any[0])
-    cursor.execute('''
-      CREATE TABLE IF NOT EXISTS `norostat_point_version_list` (
-        `release_date` DATE NOT NULL,
-        `parse_time` DATETIME(6) NOT NULL,
-        FOREIGN KEY (`release_date`,`parse_time`) REFERENCES `norostat_raw_datatable_version_list` (`release_date`,`parse_time`),
-        PRIMARY KEY (`release_date`, `parse_time`)
-      );
-    ''')
-    cursor.execute('''
-      CREATE TABLE IF NOT EXISTS `norostat_point_diffs` (
-        `release_date` DATE NOT NULL,
-        `parse_time` datetime(6) NOT NULL,
-        `location_id` INT NOT NULL,
-        `epiweek` INT NOT NULL,
-        `new_value` NVARCHAR(255), -- allow NULL, with meaning "removed"
-        FOREIGN KEY (`release_date`,`parse_time`) REFERENCES `norostat_point_version_list` (`release_date`,`parse_time`),
-        FOREIGN KEY (`location_id`) REFERENCES norostat_raw_datatable_location_pool (`location_id`),
-        UNIQUE KEY (`location_id`, `epiweek`, `release_date`, `parse_time`, `new_value`),
-        PRIMARY KEY (`release_date`, `parse_time`, `location_id`, `epiweek`)
-      );
-    ''')
-    cnx.commit() # (might do nothing; each statement above takes effect and/or commits immediately)
-  finally:
-    cnx.close()
-
-def update_point():
-  (u, p) = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  try:
-    cursor = cnx.cursor()
-    cnx.start_transaction(isolation_level='serializable')
-    cursor.execute('''
-      SELECT `release_date`, `parse_time`, `measurement_type`, `location_id`, `week`, `new_value`
-      FROM `norostat_raw_datatable_diffs`
-      LEFT JOIN `norostat_raw_datatable_measurement_type_pool` USING (`measurement_type_id`)
-      LEFT JOIN `norostat_raw_datatable_week_pool` USING (`week_id`)
-      WHERE (`release_date`, `parse_time`) NOT IN (
-        SELECT `norostat_point_version_list`.`release_date`,
-               `norostat_point_version_list`.`parse_time`
-        FROM `norostat_point_version_list`
-      );
-    ''')
-    raw_datatable_diff_selection = cursor.fetchall()
-    prog = re.compile(r"[0-9]+-[0-9]+$")
-    point_diff_insertion = [
-        (release_date, parse_time, location_id,
-         season_db_to_epiweek(measurement_type, week),
-         int(new_value_str) if new_value_str is not None else None
-        )
-        for (release_date, parse_time, measurement_type, location_id, week, new_value_str)
-        in raw_datatable_diff_selection
-        if prog.match(measurement_type) is not None and
-           new_value_str != ""
-    ]
-    cursor.execute('''
-      INSERT INTO `norostat_point_version_list` (`release_date`, `parse_time`)
-        SELECT DISTINCT `release_date`, `parse_time`
-        FROM `norostat_raw_datatable_version_list`
-        WHERE (`release_date`, `parse_time`) NOT IN (
-          SELECT `norostat_point_version_list`.`release_date`,
-                 `norostat_point_version_list`.`parse_time`
-          FROM `norostat_point_version_list`
-        );
-    ''')
-    cursor.executemany('''
-      INSERT INTO `norostat_point_diffs` (`release_date`, `parse_time`, `location_id`, `epiweek`, `new_value`)
-      VALUES (%s, %s, %s, %s, %s)
-    ''', point_diff_insertion)
-    cnx.commit()
-  finally:
-    cnx.close()
-
-# note there are more efficient ways to calculate diffs without forming ..._next table
-# todo give indices names
-# todo trim pool functionality for if data is deleted?
-# todo make classes to handle pool, keyval store, and diff table query formation
-# todo test mode w/ rollback
-# todo record position of rows and columns in raw data-table (using additional diff tables)
-# todo consider measurement index mapping <measurement_type_id, location_id, week_id> to another id
-# todo add fetch_time to version list
-# xxx replace "import *"'s
-# xxx should cursor be closed?
-# xxx is cnx auto-closed on errors?
-# xxx drop temporary tables?
-# fixme time zone issues
diff --git a/src/acquisition/norostat/norostat_update.py b/src/acquisition/norostat/norostat_update.py
deleted file mode 100644
index 4b0021dd5..000000000
--- a/src/acquisition/norostat/norostat_update.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""
-===============
-=== Purpose ===
-===============
-
-Fetch NoroSTAT data table from
-<https://www.cdc.gov/norovirus/reporting/norostat/data-table.html>;
-process and record it in the appropriate databases.
-"""
-
-# first party
-from . import norostat_sql
-from . import norostat_raw
-
-
-def main():
-  # Download the data:
-  # content = norostat_raw.load_sample_content()
-  content = norostat_raw.fetch_content()
-  # norostat_raw.save_sample_content(content)
-  wide_raw = norostat_raw.parse_content_to_wide_raw(content)
-  long_raw = norostat_raw.melt_wide_raw_to_long_raw(wide_raw)
-  norostat_sql.ensure_tables_exist()
-  norostat_sql.record_long_raw(long_raw)
-  norostat_sql.update_point()
-
-if __name__ == '__main__':
-  main()
diff --git a/src/acquisition/norostat/norostat_utils.py b/src/acquisition/norostat/norostat_utils.py
deleted file mode 100644
index a99a4dc96..000000000
--- a/src/acquisition/norostat/norostat_utils.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# standard library
-import re
-import datetime
-
-# first party
-from delphi.utils.epidate import EpiDate
-
-# helper funs for checking expectations, throwing exceptions on violations:
-def expect_value_eq(encountered, expected, mismatch_format):
-  if encountered != expected:
-    raise Exception([mismatch_format.format(expected), encountered])
-def expect_result_eq(f, value, expected, mismatch_format):
-  result = f(value)
-  if result != expected:
-    raise Exception([mismatch_format.format(expected), result, value])
-def expect_value_in(encountered, expected_candidates, mismatch_format):
-  if encountered not in expected_candidates:
-    raise Exception([mismatch_format.format(expected_candidates), encountered])
-def expect_result_in(f, value, expected_candidates, mismatch_format):
-  result = f(value)
-  if result not in expected_candidates:
-    raise Exception([mismatch_format.format(expected_candidates), result, value])
-def expect_str_contains(encountered, regex, mismatch_format):
-  if re.search(regex, encountered) is None:
-    raise Exception([mismatch_format.format(regex), encountered])
-
-# helper fun used with expect_* funs to check value of <obj>.dtype.kind:
-def dtype_kind(numpy_like):
-  return numpy_like.dtype.kind
-
-# helper fun used to convert season string ("YYYY-YY" or "YYYY-YYYY") and
-# "Week" string (strptime format "%d-%b") to the corresponding epiweek; assumes
-# by default that dates >= 1-Aug correspond to weeks of the first year:
-def season_db_to_epiweek(season_str, db_date_str, first_db_date_of_season_str="1-Aug"):
-  year_strs = season_str.split("-")
-  first_year = int(year_strs[0])
-  second_year = first_year + 1
-  # FIXME check/enforce locale
-  first_date_of_season = datetime.datetime.strptime(first_db_date_of_season_str+"-"+str(first_year), "%d-%b-%Y").date()
-  date_using_first_year = datetime.datetime.strptime(db_date_str+"-"+str(first_year), "%d-%b-%Y").date()
-  date_using_second_year = datetime.datetime.strptime(db_date_str+"-"+str(second_year), "%d-%b-%Y").date()
-  date = date_using_first_year if date_using_first_year >= first_date_of_season else date_using_second_year
-  epiweek = EpiDate(date.year, date.month, date.day).get_ew()
-  return epiweek
diff --git a/src/acquisition/norostat/sample_content.pickle b/src/acquisition/norostat/sample_content.pickle
deleted file mode 100644
index 1518dde0deb517bb8641573d685ba808d9e39c8d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 37801
zcmeHwZFAd5lIBF*Cp)75Lp9xJOFaT1l6tc(aYB~lwt8(zE6VnE_jM>hvM9m^0U7`$
z%N|F}f4KW{pYFfyFWD!vUQpnRM9RH8J9F;cu|S|ID=RBAD=RB2>u>+#+5h|BfBMt*
z_xs0tXTO{rh<-kZn%~{u{@(L`3wt8U#o>Y2`kh|UC(((5EZeEXiT5rOad0W}z8pyJ
zB9$^mX)lzuRrU9=`m-e}A`0S*or;XTQC4Y+?eBk+aX0M!?s-m2cW-8H_>ULt#(aRk
zKRo!oN%WeJMrELa$l-ZoB<ahb3uyPY2Qm*tryr!5%y%lIyytCIeEyP$c_f=hNt#@S
z=_nI_7008zoyy>SCQjurNpsm1c_OGp>umRIMeGNF^;faCzqjqH+TY!`ZN$Mq?o_%m
z>!jf@50kheI!T<%7|l}qxybs-RTy80cr<9sR3tso2{M_{Lu|1~3=UO|CU-yqe=o1E
zlC+ySkPUL83*1(w!4awMi~5@7LB1mLpdCpxpy{K)UxxCEy6u3!3cGoKrz<bRj`aA)
ziU{K{4}-|dIzc3NYK$hRaPQuB6vpq-T(naOhQmmDc{1wsy%1!mh%Edh!S%JZwU78;
zMf6kI+o||xaxjd7T%H7-_rZnq8`V`m9N;PQJKc^0!P7OX!}vl|W}fD1Xs&jr=2Xqa
zn%k^@+^pZBZ(MKArWPjL`sT;=&8fh_kZIl-<w6b7?T~u`7$J$P_{R*p5-`O;Y0^$|
zmpj9_D?hG?Ano+SOIZ<?;f854lxcnq(dc%b<eCZ2FJ+pQB?tVVN1NrKTu;l_dDwMW
zwpM>k4G~9@Y!*z?3F0IU!L(+8`guOgp7{RN)m0T}R4<ZC-wL9y#0K)p??`NXk};1(
zr%IZ9Hey$uL3Z9Ag^`QcT6MEp7oKQW8ygBk;72GzdqJo=(Wom0R8c+(AVJe5LzT*W
zQf~?e!CJE<d|7@S$*eCWB>9Fh?PQrBg>Ao`V9@e37}nRSc&CZy5N-i@kb8phmqxW-
zZB(}mEIk!o@KlI7gmwL-)@D-BoQauilV+N)=~fTQ*9)#>mSA!X<)+)W()4LtRWmhD
zad&Mvls7a}u%b)Rf$WCCPGy*e@$KWqrKV9>%{n4WJ2P149S?Tq50b7-<M31Jk=o7j
z>N^+|+rFyL^J4LMFYGOSk4aBf<K$}jvw7O-jp7c?aF}kcmX<%KG9RV*A+KnLluOIi
z3z?sV13Z2CLW6i2q+$r2ex8Q|ngDl7zyA7H@%hVBD(94`@-w6k^X3kNB^;KPxtwkP
zhaZ;Y@@HU)d9|BdRexCiOpVGYlb<o)6AqY9mrQx!$~C^MtgmX+d|$YqO-Uh_<VwE$
znJ|xcWHplUMc#jU)ej@N6nh>wDu0WAud<3e?k(lZO|NEYL7%>S(S+#*z~>8r%6H^a
zH|dN9unLGs2N5j4IM1G4p9L33Fd&vHtXP(RTm8Mtb|Ss3X2U4Vmnx#Nyi!rdVrBU$
zdTkN3Fgq|`a;d>CRfywIG_LZb>~I}k<<~>lE4lob`y};vqd5`d4@;C@+@d@fmCUdF
zIsbEo+5V};V@BVgwJO{<=m{9Gdmv|bDaKGN^T--r!O-R4lMEyO`3t6xE{u?ne#N6G
zA`Wo)m3}!U5*Z%Q@x+}TO-jzh0JDu${A{g#<1|##XHJ8I%x&@v{IQorN$RxwV0Cq)
z76e8Y&a9h#7zL@^T>ZGY8YLHr17Z4Hpsb{~VY~&oisU)h<au6Ae=b?dSgMw4>#HB}
zzdN<1>(130%tqO$-MuYccdoWn&!`6ecc-4|x-(jv&qN-xkXN!kl>2)avtBREcjBbN
zQ`<eE7K!5c<-RyOJbQT{o*g_tK0Oe%RsXSHU#+c)ZMDn*>%wp#TVE|z)H5wg^lN^E
z@(n~wFEWnoB4dvi8E0hin`MhK8rbkSI(Vxa-))KG=VHUJ<Nv8`HY^0}`jUV_h`LZQ
z4tJvPmbbd))i%#o*PpDdK55i`<sNRcnS#D<x0CKQ{?h+#cf(5&c6Tb-`)~;5H&R6d
zZK*uSBw34RK?)UuM$}rF$_}q9%@+QOZkWnW9$nMgOqJgD1D?4okl+)#dwcC&0`aSe
z!YrQvfrYt8_GJ*plBD?-L{L1uV6`&=DZ}DVdhK8iC^YGbgjbK*1RQPc?n1@ij!zcm
z(yTufq-`IRZ(8zS!Qg`l&eYd@h!vSkW!%A%6m(&go613QiDfWs6U<XkszC(8E0c4Z
zj8Lp&X)x8J;%ja~)2pDMVtHmcT~8aX_uTSna;1O}+kx>6vuIJ)gD?zYtmza)6oPdb
zo&>a3$8r<v&uo~)nG`|X73-^8;xfVF6|WVtSvCS;MkzW<JT$@z1zm_QVG8L1)WTty
zY04_0B3LYUDtiEeC5z}Ksn`#}jA(WbOp_)NH-D1KOGURTc@0E<g<sbwK1#v5_*4}w
zFb+(jmslTVE8-+gaxgc&Igtt0(A>-IaBy*hX{XoAgCN6<9CXjKQJWa4nq6L0L=eIM
z@LzzZk&u4a?ZQo!#Jt?2*?BSKYmmC=?}Pefn1xW11@204v~(OEw;l`Tv-aphzz_2t
z>)HhT0~I_7-b;~<QvAclljgQe^n*(<BL5`)EA#@FTs{~pL2OO3M$Cz3;HXj@o)5h&
z1s>70S;0-?!sdVeuqp~@fhU1vB|>(Zk~^({v20bYBhm2n#K9$u_$oDB)?6gStD;~R
z^O#?m`H|Wn2%0FWC>AS5G9OgnJ|(AHY0hZ2c+mg~w|FWs0?7*1;=c`%9T>=}{&T#k
z2$*O61R9N}pMaM+P;c5(pbA=Nf%>VnUN4kU7nW-os7LL1uChg^B6$Jpx2f@?b{!=3
zN2amcebtzP*Xhg7`*!lt=)mnLXiMu^hWrB$5Gss^aPBBW-&Qd+Q-$Cf1t^IQ>xYWC
z45AT;N~=N!t2K8q@>8(lmd3hS8a$1Cdz9x;&Ia#$+v<g(Sv!xtPK04Y;~5ylyw9-i
z%_Urb_?l!;ihBX3sR+ve6at<Kik%Q9fr2E1kVrBes=+RT7ZO$m7oG*&C8`E%NTw>n
z*GUc-_<}<$+=A;Ta68GT5D{pX5DU5MCyJAws+9<Fc|qH0MB^3<lBq!{4KA(XDgupl
zWIu^8qwQ2$2fL?xFN}nI7|HY+?Q#E=_@do0!P5Lliius^nz#-VxR^RfmvGTys@V(<
zcX)v`OFew>vA$Vr*pA;&FX;6^D>gKBWpW1(V2x?2ZTm*xXxQ-!Qe+sQtT%%CCr@+4
z5bH-T4KMn+(KZ+w^T>+FRW4-MC`+<QllQLot2iWwrYLa{IYd3|&S2tz3nC`5(S!@|
zNH&+H_t;32-_KX9H(i0c)m@>}N;Aj$++t5QMTKp#K>{|Dp0%y-ZI5U_iuf!Fxy0qL
z#-BA=tToxj=7LN;IC1j?N~;f<Lw6i?OB5lvSXy7s5T*^?u%^PRgQM3pi9nZxlSmXp
za|axPPtXHuTxPdmB%Afl9U4*fXZBTtH>6S{uSBne)3&e1z>Mg&cCx|OxkUOIvz6og
z65sFah(~0a_OSZsK3W!^-Oq5;#G~^lfyrBPjfpQHI`2MdN;{J$QyLy9u#U1R_2fz=
z4S#?=iqSSxp6eVBKKX(2IyZOA-J7FkTi1NHaLtaciCs5`k><fx`^cO73)hso&4Yz&
z_H@nX3)j5RHGf*TW?$EQv2e{$*F0Rf<~v>U=Y?y&*ERoT;hK@I`Euc!16}jg!ZqOr
zrM;f!(ZV$oUGsS1nnPXlWZ{}0bj^QVxMr$ro-SN7(=}TQ*UWXzvxRGpbj|YQGLKHU
z)HUBM+~$?8`F7!&A9c-tTe#-6uKCNtH9zT^zb;&JRo8sr*3>iDA2WerHoAHKDjUV@
zH5xaRYCbkoDPly=WXh??OFmj?dNAk6l`tAmAV%Rhs;}uaD;#n>8(Q;k!kSXB%f_U(
zZi{*H+alDgOuj8bX>Dtn=G!9l|LaAlQIw>erY`hK`U@qX$XZy=(ds)+lo@ZMLa?|M
z*tidS*W?+dBG@;^4oTd(hQlh$$lELSkHyjP8SR#e5bImG@~hw7XASvvs}0-K%_>_d
zV(#h<4t{o=(_V;?6VOKy=xrb2kG$z%_F)C~d`B5}AE^5Q{G`LMBl-yE;KdM;fhtIR
zjv^IVhk&M0)D>-V{)_E?t%*Rli>jR_&{Dr%sBS0Nhj;Pyv}d>3X!^U4O{{v8kTm~|
z53D7R-ZwBlPy3!W1k^4P^_iG~{<v$i2%%7L)Z<@IrYSm$?RTcncDuQo>z$YH;pS?J
z*KsEqVEqTT9&L$dv{`^a4BG2p_YV`qHf@;0a6{q0xuyMVXc2h$ptezax(h5i;ll^@
zwZ_v}p>hyWGzVA_jz<g%I+=!vKnxSMQUb!vmKWD35hWeWxG0RGS@2GBNzKJ!Ac7Ag
zKyPe51=DIYDL6{;uXdwoPkfAhKVIf)iW_lZ^3I@Z8_mPDhdtC>J`q0$mw^$n3>%6W
zIp@0qlm|`Y1fiOvp$-6TUyEznu4!XyDwAndz+J0o4<jQ>;uS#wMvyo;KqRN;VBW{O
z3ZsZvaRg^c5W<TVK}f0gAi+jK7=x<^ykjVWHlkD%P%X_X7){<yBCnzTL_nVi>y9bM
zttAkI)Mr5F@RT9WL38si!-k)PVjqa3tA>HM!T;7>@O3J}0J>d6n8q>0N=u9n+W3y4
zk`!`0(HC%;)*LPaJG+LgYa`jqIkH-DkCDjfTe{krR(1pma^bCWQ`@M<wk1KersULG
zT4wr<#dHiv(SV{EMcfQw33vA(fWsDVUC)Z6uy~|>S8b_*9#dZMI^6;yyB-YT?Xzn*
z{nMKMw^T>mV4MxYjKVfG$DKK^<^?k>HcHiMyVyCM7eo$u%t;MdH1ux=0D0k{(LvLh
z-GZE!$odkLPJ=v2ui^Z)B1<r7#BPSe!5Y}IK)iq0(@{wpyEhzizE}F>AUb2z3At=2
zJBVw;MhT`prT#U{mi~gxB=coHkO)A$fCz_%37kIfn@<j;IjD%ih`X6*gV{?YJ>!o&
zXC0ep_uC0N0kS=Uw>tJ-w{-WP@o(B%wL%~cqbP=b1Z~R`<Lhb*>Q6fnXR^RVOST)j
zr9Di+Q1vYnrdDdQ)TNtwZK5IUDo0(4m(mRt9eHcM+yQ+R5z&R|sKK6chy=Z$&F7+#
zX{gUJ*hQqYtB=$X3ToqFFc>}+tx<X@F(uN@-3_CSc*5q<@oC{pnmJ5Gvxw;qJ1|A^
z5}WPu+at-bJvR&MG*G?3_|l%I68mQyF0z|puT=c&h@wTXk9qrGOoa?<l~06BjB-IU
zFs#BKy}DZ~h|)67dP@mD!E!fnLqy_&$e0#$qb5$wD~?xz(zqi%z@M<N5O!qQ)5al@
zT?=kipB=;(;>s9ta~!|#vL|spMa>b=d6~lka%gMdEKFJp?2l%&0`N>eGlCA<l9>x3
zb!a<d?kPCgHb6wRqgLhy1YsZm?S(}6QD5v!;vSlt7vd1x&Q~(XT+A}gg8H{sUZ*xR
z?sh^AMiJuNDXL%d^DFzpsmw^h3Wm8iZUU19h=6Snu&a&Td*tM_j!x~(ghOCNhpe!D
z_(H%`r0@E_Z5WiN3IZa_;`bmsnV9sI{~_a9{WKLyFUS<tjZkc)$BQR4f%O=c(BKdC
zKy~<uf!Ku!?HuY;HN7P1W<gKp*L=99qDl*4pjVpDOC^Xt7NT7$fJ(YevhJxd|Hmwe
z;&VdoJc|;<#&7>{x^MgjFY)>eRMD;c(ETv>k<Bi=ZX%?fx<h}c3I01)mH;&?%Rl<o
z*L++55sF}X?N8}3mSX?@xBr`AC75&({YS(RZ)KYTHvc)ge;MXD@DN_anX_JTRPQ7c
z&@TQPswG-fP5k@c{tqG{Rp=z_ha(&JZ1ff)B^CK(N+%N$W|zT1^e^&SoF)|I{LjEe
z4jHMhic?$z=T<sC5wC*xWPn*#gXba<2LW3Y|M+s&55k)SOiey;I!O-;u%tuLL|t0M
zsYO%vPbGDZ(v8C)>{jEj&pMaArmiHOz>Spo`?42cTB-)w@Z%IERfwFW*NQk|Zxy8U
zFoyY?k1+SsYD>}iA4KTP%!7q@&c~0;takHcMy5yyZo6vS$wpP#9Zh3ABHA9E-(Y#e
z3Lieti|c;?!kFkJp2xNG)wL$r{mOH{eJXyzDXZPHsyhB<-yFH@dT_xYDw_6VjU~gU
za&1X^a6NhCcB?%u$~pxnTH(6mO_!k2Vg7C>#Yq@MV%xZM_U+k5+}63g#}%CN<b^Z+
zE+gUARhaY61n+U!K9mYwBWnXrry&H(_KdbEh;xb~Jo=~(-di25N~_k#`^^mywWl03
zDU-8=3Ms^h&D7F%j9bR>tD;f&i$l#KrqXuV_RT(&=~e<ogxhkf7X4uqUy&u<qBAsO
z8!3=jBhB3NbL%>W3!kiBOnnSj+=FeZ?O9b)?aiL|5HE|<&+qMdj_3A;I{NrvEbz~X
z%ZGifc4Dw?5DQb$#C}qzR6qhD?lh#)FfCgkl+>e$8^yTKL^{6;u_c69PkjdE%>2F`
z<+CfC2{+A*p=iNs4E1lWGJ)I-f||n$4|ukXrj49HRQf!JSri0cAN}+ib#)+*PqQ#H
z&~eIA94?RSGmi5-fVa;%L4kd#0H>W1A&(tt8V`hc)1MM0T>j8da13e&6gU9cAA)U6
z`)Hsk<I^y_`K!0Cut$c21#o?Kka~glHT4-`I&hT?g;tp4uY7Y-)B9TibCTZv@UZBn
z6dg!Iz&7<x#jAl!U6a>HooQSBF8y+Ul%BD`5_DRx*fX`_NU!bwhf;S|Y;7Tp<t}M_
zF|NND5u?*wb&?f&(R`-OKaKbh5&7ixCi9IPLeK-fFoo)xwlWaPrGbywxtryMAo{+7
zQ5bak*kI0ctS4TD$Wd_-U_b9w7{h0rP&$?4ewg6jR9?_VUn?2GnX!jcKR8#7uV<19
zW>T(T^^#uT)F)mNf^WkNbjZl*Oy}sNvuOuIf<2tZlrk$CjW${9t?LXcS+PW3&}AA}
z>d9Lv-;=Rr9NNb?;f6de_0`qK9v#5^@nJW(xRB{n__v3cu)rQTZlswSCkj16hTrkO
zi1DUSZ*nl)-|hr_;?R_KnBK{bV-R(6<J?5xIZ)pAb;E=%Z3fcclz(7<7x_VG%MXE<
z)O8$O?N5HeNQHyZU}d|FYD|jS#*C7|$2m)am>WdV&FO&X3lm&__8DVfr}5{LuBIQ|
zX3g6j!MB!k-|(||Bt-RF=W$)XRbUl0y3MUJl+~hYtx-HXk2O=I45zj}6A*RRAZaS&
z#dLh?^?Bf|6(9}tGCK9gWia)kf>YOdGzH3F*1eV-7F8V@n`Y4w=W$DyEkmi*%V11f
zsB5W-XH(e$IR@wPOgN7VB%MypAn`^Sj8nluf!SRTn;HHxIt}l*lNaqey>h@f3|KP_
zmBG|jXTou*<FccDKL%-YHYC&b7|mx;Xsry$8F$+W0_1eLX3CUNYShY*9A2qAvcM|A
z$jEe@HoYSXvOu#?0o|t4bls(~(?HEIMHwZWbSpt}YnvL>IhSJ7QL5KwLaIBE>V|z>
zl9h3)Z<Qh00Y;T|r}~<qGTt`~3Cf_>y?xm!=;YwB?ht&<>9u`d1_Y@sL2}1v&Be*!
z0h~4seq*2>kAZSAaYk&-&>5g?6J=2KjWJLTCL7L>xuDh!*fCJv&p}*J%CZX@-!L_(
zPq+8)aT!wGP+<(xy3<wLLK!DakR_z*PL*|MEH@kqZkn+ygQ_>mpc)RS4O4~s=TOk{
zS{YPhbqthK#SS@j*1>89(Ko#3vRx3p;gD+6>7T6)sA~rCGE%k17$|3)woDaNx9pAQ
zh8fo~sQT&{D2F^7jxu+prf!&R4Af>BlskZrO%-5LH<%DshMZ+k8{VrREl5>&5OK%M
z>Vet)1F1@&T;1jhjU^92SyfX;3ZyE7a!6GgG=~-HW@N@dZH|F*#=q1-2UNq5unel<
z?G7DMx&2#n$W|h{VUaQ<%*G{9?l75d(D1o9nddP`n`KB29<~<*$&modEM-XbwK61!
z<StXVIL#n=4HUmFSjs`kC5>zP*vYGm($;K14t5TFGqz<gu$oH<nb9RycIe`cr$w#|
z33hV{k`XCDsySomAU7Q&7|ta)&X||X9W&+x-?UZ6Xx;mH<W4UI*=+?>He85D)A=$O
zOv5EOPG=nQI&ZD*Fa`&cl#7o+nmXW;+Uc;ttqce=@GL-vq)un>XlPyrgV||>ECcw?
z_MC%`1I6i~DNu$2aV>#ydt%x}6Q(B)Vq<VNXTouCa0z9gm*GHEOZYg{bSt~8R0ibd
zdYS99GXcaIHh(;%$aF+4BkTiy63(N`NohqdO$N$QQLBq9I2|~;T4lb2n+ER7ekmH7
z%X4T6!qz!{q-*0xkSPV*Fx^ahIfhN+_Au@n_P>-JHR4@Dr>NNxV;$`Xt5PXkD7YFJ
znVcc?18e+sueL$w#1P<({jO`ADtoHCu<mpUg^}=wQPrtl+LXf?PD?b0SW$|Lho3M6
zlmg*><)<mRpBQW7L%IvT8_2%(Fm7kVr&E#M_!^`ON8H3_3pwaG;@Sk893fVke1FD$
z=sNkFuoknsGMT}^x(vzt$D2%!oBvzH%eRP^|BeAaPN)X_yDCTDLXjHZB3>vLxI@No
z5ieE|Lc^L2EdIBMmx37IB3=-*I7hIe+1B|M@!~{r-Yw##*ePYB$Hy5L?AvjmE`p=V
zmXF%I)c<nv6H0E6tv#IG#11C^bxkEMI=+wU;TUBSclj)swZjZ(g-(IW0~95&MI~c~
znHoB_XR=BekqN-19pQos1lG0ioOluUHSHB6?Sh+L(a~-SHwUPB<o>#FZUjHM6+Mhb
z*pu~CK0Ui}t==B@dR$?<zutU~Y*`4|qQjUOg4v?s6GTg|tKfp-Am6lnNXJ+Jq`rbk
zySguuE748FA!qZ#vC<x&W5@+knb9d_)xy%r{&QR<a=eFZpC`Cf<kiW+Ps{e?D1`z4
zg*JV4I01;@q-Qd+th+Ysq8D~{xE;d>sscxTn0N=BS#QJRZ@b+DCsu(W<Lrbq6d~Bi
zA*SmZEQXX=&&*NtV8In;U_sxon8S&U1cx|J5-b=`uzpiSoDU%^?69xB2u8?TjtbK;
zQqT&Na1slPJB;Xj$jFaKRJa&6A0tZ1KOGwlu0ZF2;4gsB)qpa$EihWD!gOr-+wQ0v
zCjJ|oT1ytd<HLeX!{w)AL7#T%!WkUQ3On!V@Wld%V70p7_@u#L=J=#IIDZO=CrJE=
z#KQCQ38!#eXkc`W=`_ia5VCoOw2%v+@F@iHi?D{OFdYH@_AAnL@2bmR7QiD5i2oO0
zQB`3&9{jC^+~)HSg&dCBiy@$EOvgZl7&hg}sJ#FZ@ER?KgQ_qc1^#w=IFkrt$(?v!
zFPuH)37I0qC<8qCy#_;TVOYzBzVRnJ<<h*Z5+tao9?q3OkLoQQU{Wb|acU?-8rnfn
zP0|Y=hhivJ-AwGRB|yEbubD4)hGe{p`H3uE9HgS$#1!a6z;KURmcxmt-XWU;(~A(I
zn}v2N^!e1*fD0Mkz%>f9B9A_mRf#OM%`(>1Xh?l8wWME7Hbf;~B`^^}s(>J&5%8*Q
za#L{T-q4ga^7v3B1t2P0QXN=(Vy#4bsL@Q<irT85s_=1&fVc1y$D-Y3gi7@biS>~w
zB<=ccd!v*x=$W85mA9Hbq=$pS$e_SpP&%6&>I3&WrFus8(25&G$ME;x?5hKI%v-8o
z+|GVTrY53t5ibX)Djc{jAbliToxkIOw6={JR{UFD9?<X7@qpr|dYZJSwOF{Hk<_It
zsx*3>45knq5~v>TV8V2v&X^%LHUHuWdVaK`gw!4_q+Nk~ghGJHivI3CUw;P=y1GCL
z7u9HJ$VS{MBB+^Es)3Ru6(y8G?6_Mku1e525r#JXRLUJ-qh-+8s2~-N$IYR;A>?7C
zq+^D$gIx+_EFnWqO9P9lZr_bjJRne}5L?dXgx+jqO#C&*W8mZX8O765WbRJoyp0p;
z$P_cp{sKy-;_YbJ(C!6L(?Qn;E4r;_Uc^lN_O0Y91%b~=E@Z&TM{V3~1!^H+HYJkb
z$}kxq0nmjy|1&qD8rJOu{3RKkq0t4%N4aW4oJ|TYV4^uhgesbIcx~<5(W669+|X16
ztp>3teDAsP1Igi|7juhGy<@Y^50ftLjl?~Bvv`y?2`Gb<L2Q=b&^cL&-CsS~IJtG_
z5M%TppOFI^VsaJao&N0@yrp6bAdn|XluZ!APqT~;vdtt2zTb|^X{$9AlgW`Z>fiiT
z3K$Omxiv7Y>TgiA?x${t4{#esaX*=<M1OP5Abs3mDoT4AB+m#cLy}UM#Gve$+3s%`
zqd_0^;TuczGjH0#W_fn}`fN%nba(|a*ViN5145R<{SDPpanigvQe^si+f1TzAR$RM
zpJYVdARuM3T`3oenFyv%O7D`k<)upVXZ)I@lA8`2m&H*I6&zw$52fmcZH)YDBSW8_
zUyRf*4}RKx`FAC?_H^OkLnkaUKIosNe$SqBp)vSGs)m(@mqEv*w>?q67KD^^an?S>
z(Zc7)hvkIiw__n?g5&8pZ<FGZk7Bb9R%#-i<E_}GxM^1>PGzDbL{go8Mjf+88g6#R
z0TI{`$A@F7yvgV!+1FVg3{SuU^rsteld`Jpd&6C>X=>CF-nm+$+o2T#I%&kp{dj{4
zL3fQ3h>*H3Qnz7QFtCzuG(CTcl6Da3rfaVCsdE-GArLBo<Q|!6NdKH~EkakfI?KiK
zVuZ+4GU50Z33o~IHEo?xKAC7&A#do)(`>PnB>PC_woGkfbye&|VGlWCk)<H$YE{VN
zIlc_lxV<F0<H8W=wH#Jy9_TtNH&r^77g+mLnm<7zcxInkc*s52!)2hAW@EL1h_ba+
zitL+RL*ulEE01zYP4~LxHlS<bB#yp&_h#bfGLpkST@uyYT3z*EPI`w&&yU3t{i-+W
z-r7cEW7+NQ-0;rMet9Ccwze3|MuS2tm4P!SYP{;{<m`056C-M#R{(gH)0|lF1}{^m
zt&(DE+;lsce@c0$vYhrN)IohCC&-=2lO7JbYE{FFAiB1N>8KQ!9m`@^@j)>y@WNY_
z2?5VHnJ_aYnHh^TkeO!jTqq4qM`N5UqAu$H4b{cpl`UpZF_h8H$*)x8V-OF;i_rk5
zkXtH8iZ*V>dBO-Ra`NzhH;dUHP6X5J{1ReF;lqDeWOlc5@Zv?wKgO4m1ejLFG=Y6h
zL@}Z@F;D#AeJ1}Bw>XDBsE)EgrA{*V*`7lG(w9<4Nf^xtf3>K)QaCB<H(DUZ6rpRG
z$WztSIj|p;{aj>;kdrG<aQ4}nPf4%mt{<?CaeQI`qJO9ag$0C6%38{ct8bRB;y&~2
z9CP#dt<fNhx)?`Sy5TlOeNlE5t_|FcZErHC)pQ9&bD~{x@Rcs)aZ6D4kgvy#bC4{A
z8MixuneB{2N171q?LpIsiwi@)z6SuZ7-^~1oAD<Hsv7gF&LB&>UhQ%HvGK36Z<gv$
ze50;Y)n_o6pe89j<+V;MooPp@qK9UdpIb~Fury3oDa~Im$Aw}zc|CrSdYRM4DtovM
zJf!2;*+ex$=>YgnL&zDeyWSiHY&%m$q;z`}6XxT@f8y1_+3De43xUnZ^W7%GA@G?(
zx+jVjFUm=nalayAKxTn<QYiZWlq9%9P3-TUsFe^q@Ksek3TmH-c0K)fHh(uPG$@es
zp6s3n@%bn?hffX{@TDEZ6B?T9CjAR;!hap(uGKWdisre*8F<>V@pcjBeU1E<nTIpw
zB=*oLNUpzw>4dYF;-WwOhMdKPF3??|@tWO%Qa{hI)kG0P;skdM(vH*~c7Bopaw<Vj
zs{1C#xNk<kr}r`Ko_84FIbGRn0M2Bx`LWTRXkpUYLuSzkNeUDf=qpX1h!@p*P3PL5
z&iiH@I3?A2g#2!B=Dky{Iiq-(@oui3A<;`$o?q5$CewKd#!aGYFbF8j#ShG&PlbaH
zTXhJzBYAVKWaBH^Xk@U*B;yYw+)0gF+EDJE{#8ZN2=cF{OW>HQvZ1RPmOUNhlFq8Q
zf^S8@fyC+imvq&8S(Uc2)*fRTsnI?6_r5IipLt={Z-!aF3(UsNFdKJ)i7nNe`o8AC
z{K9iR2m@I;qT5~(t25#>-<<#E>3#U{`T-JgAi0YTzOLZ4!>FKp{*u_Owp8K0$;$Fm
zUhz;&mP~OC^wHko={^)1>2|T@*VgF@^g7fJ^CMl*&h<qNvjvx_`D;|9Au3$8LR>e!
z%{tmNLE+Ty^iqwb%CJAg_vxS4mMUHBg=w?}UD~b?S`$cp2p+Wfu*#(5DfoZIPW7ED
zWQs|ys^rF@w8d4*6jaTVmlRw@x5F<Hm+8VA;{-Ne8mlwE)D~%BZDZcj*l2=snJf+0
zMGQ^=&fneledLUUM&*rk@%r?oL3gtOA)4B8T*?~eOOO8YX!*C*->dr91NCc?DDq8^
z{xPr^+1DH+z-NwF0pe;@Z3vh<I@G2pT2=PJQbvQLI*QCebUumiX-~0T%xe_^%``+V
z8yq=;XNkV^Wv_5QQ8F1mOcn6u<0L_dhJ+XZLKh0FljE+98}u3!mw5Qly@=A)dZgAq
zWK=*s{q_id&v$pvf!>=Sdh|OhH6b!+Lc~E5zB{vuqK7Q!{*Uf|=0=;{muCm3V)y8m
zvloX)KZ$1tFOS~}Jh#Nl!=nT7y>Ki2_@n>beGpZsX<{d<=FHw<Z%MrYT2_Tz(Y3<n
z{ZpXen80Qdnm+x)d;vskn9!y1Nm_-K`iA0rAzhr7>A1J_=vC|R;L&n(XBAB%E>x>6
z)g~fKGNiweOOO6%WAD-OpCA3N@4ZKVe&juZh>zi6F4vgcRws{otyZtrlg%m)w81X1
zxwT9f=2#IQ_XlJ?USj3w*L<3?J@2UHue#4j<0a{i+?`6T0uk!weSYHHTL{V#)>z^k
zT<jw+a3o*0zKQ7c;MMV)1C=<J^(dRQj$XdOfG7;}2@_>Myr5jMKP8vuhVPcTt))Ek
zPan52(-7v8x>s(h9YXjn+`L|(a)%2e*gTajXo*V_=Cq%+E%J3{KiVs=P)kNX&$Pzq
z`C-X$>@oy6=w2@sGxleaFEG(d+=xPP?>i!vS1jB54W>Nj7LyfGaWVmy^N3T{Lm$CV
zI#>3J$kP$>Y4wmn*!;nqShNLPa4l{Mk-iVx$PrgP<zd#tJxRz^1_4DFH*W4!&Vmnr
Gs{RMkyw1n~


From 9600dbc5931652251fd5e643d703c836761d3b78 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 23 Jun 2023 11:33:56 -0700
Subject: [PATCH 03/43] docs(norostat): note that updates stopped November 2020

---
 docs/api/norostat.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/api/norostat.md b/docs/api/norostat.md
index 6e801116c..734dd9aa9 100644
--- a/docs/api/norostat.md
+++ b/docs/api/norostat.md
@@ -13,6 +13,8 @@ General topics not specific to any particular endpoint are discussed in the
 [contributing](README.md#contributing), [citing](README.md#citing), and
 [data licensing](README.md#data-licensing).
 
+**NOTE**: This data source stopped acquiring data in November 2020.
+
 ## NoroSTAT Data
 
 ... <!-- TODO -->

From 32b05e976d66f50c6b0d7c5e04bf4f2923e503a5 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 23 Jun 2023 11:38:18 -0700
Subject: [PATCH 04/43] docs(norostat): correct wording

---
 docs/api/norostat.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/api/norostat.md b/docs/api/norostat.md
index 734dd9aa9..dded4ec13 100644
--- a/docs/api/norostat.md
+++ b/docs/api/norostat.md
@@ -13,7 +13,7 @@ General topics not specific to any particular endpoint are discussed in the
 [contributing](README.md#contributing), [citing](README.md#citing), and
 [data licensing](README.md#data-licensing).
 
-**NOTE**: This data source stopped acquiring data in November 2020.
+**NOTE**: Delphi stopped stopped acquiring data from this data source in November 2020.
 
 ## NoroSTAT Data
 

From c7e45c1a23873cddfeaf396742c8c7ac1240d8d2 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 23 Jun 2023 14:55:03 -0700
Subject: [PATCH 05/43] feat(afhsb): remove afhsb acquisition code

---
 src/acquisition/afhsb/afhsb_csv.py    | 351 --------------------------
 src/acquisition/afhsb/afhsb_sql.py    | 194 --------------
 src/acquisition/afhsb/afhsb_update.py |  39 ---
 3 files changed, 584 deletions(-)
 delete mode 100644 src/acquisition/afhsb/afhsb_csv.py
 delete mode 100644 src/acquisition/afhsb/afhsb_sql.py
 delete mode 100644 src/acquisition/afhsb/afhsb_update.py

diff --git a/src/acquisition/afhsb/afhsb_csv.py b/src/acquisition/afhsb/afhsb_csv.py
deleted file mode 100644
index b839c4053..000000000
--- a/src/acquisition/afhsb/afhsb_csv.py
+++ /dev/null
@@ -1,351 +0,0 @@
-'''
-afhsb_csv.py creates CSV files filled_00to13.csv, filled_13to17.csv and  simple_DMISID_FY2018.csv
-which will be later used to create MYSQL data tables. 
-
-Several intermediate files will be created, including:
-00to13.pickle  13to17.pickle 00to13.csv 13to17.csv
-
-Required source files:
-ili_1_2000_5_2013_new.sas7bdat and ili_1_2013_11_2017_new.sas7bdat under SOURCE_DIR
-country_codes.csv and DMISID_FY2018.csv under TARGET_DIR
-All intermediate files and final csv files will be stored in TARGET_DIR
-'''
-
-import csv
-import os
-
-import pickle
-import sas7bdat
-import epiweeks as epi
-
-
-DATAPATH = '/home/automation/afhsb_data'
-SOURCE_DIR = DATAPATH
-TARGET_DIR = DATAPATH
-
-INVALID_DMISIDS = set()
-
-def get_flu_cat(dx):
-	# flu1 (influenza)
-	if len(dx) == 0:
-		return None
-	dx = dx.capitalize()
-	if dx.isnumeric():
-		for prefix in ["487", "488"]:
-			if dx.startswith(prefix):
-				return 1
-		for i in range(0, 7):
-			prefix = str(480 + i)
-			if dx.startswith(prefix):
-				return 2
-		for i in range(0, 7):
-			prefix = str(460 + i)
-			if dx.startswith(prefix):
-				return 3
-		for prefix in ["07999", "3829", "7806", "7862"]:
-			if dx.startswith(prefix):
-				return 3
-	elif (dx[0].isalpha() and dx[1:].isnumeric()):
-		for prefix in ["J09", "J10", "J11"]:
-			if dx.startswith(prefix):
-				return 1
-		for i in range(12, 19):
-			prefix = "J{}".format(i)
-			if dx.startswith(prefix):
-				return 2
-		for i in range(0, 7):
-			prefix = "J0{}".format(i)
-			if dx.startswith(prefix):
-				return 3
-		for i in range(20, 23):
-			prefix = "J{}".format(i)
-			if dx.startswith(prefix):
-				return 3
-		for prefix in ["J40", "R05", "H669", "R509", "B9789"]:
-			if dx.startswith(prefix):
-				return 3
-	else:
-		return None
-
-def aggregate_data(sourcefile, targetfile):
-	reader = sas7bdat.SAS7BDAT(os.path.join(SOURCE_DIR, sourcefile), skip_header=True) 
-	# map column names to column indices
-	col_2_idx = {column.name.decode('utf-8'): column.col_id for column in reader.columns}
-
-	def get_field(row, column):
-		return row[col_2_idx[column]]
-
-	def row2flu(row):
-		for i in range(1, 9):
-			dx = get_field(row, "dx{}".format(i))
-			flu_cat = get_flu_cat(dx)
-			if flu_cat is not None:
-				return flu_cat
-		return 0
-
-	def row2epiweek(row):
-		date = get_field(row, 'd_event')
-		year, month, day = date.year, date.month, date.day
-		week_tuple = epi.Week.fromdate(year, month, day).weektuple()
-		year, week_num = week_tuple[0], week_tuple[1]
-		return year, week_num
-
-	results_dict = {}
-	for _, row in enumerate(reader):
-		# if (r >= 1000000): break
-		if get_field(row, 'type') != "Outpt":
-			continue
-		year, week_num = row2epiweek(row)
-		dmisid = get_field(row, 'DMISID')
-		flu_cat = row2flu(row)
-
-		key_list = [year, week_num, dmisid, flu_cat]
-		curr_dict = results_dict
-		for i, key in enumerate(key_list):
-			if i == len(key_list) - 1:
-				if key not in curr_dict:
-					curr_dict[key] = 0
-				curr_dict[key] += 1
-			else:
-				if key not in curr_dict:
-					curr_dict[key] = {}
-				curr_dict = curr_dict[key]
-
-	results_path = os.path.join(TARGET_DIR, targetfile)
-	with open(results_path, 'wb') as f:
-		pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)
-
-
-################# Functions for geographical information ####################
-
-def get_country_mapping():
-	filename = "country_codes.csv"
-	mapping = dict()
-	with open(os.path.join(TARGET_DIR, filename), "r") as csvfile:
-		reader = csv.DictReader(csvfile)
-		for row in reader:
-			print(row.keys())
-			alpha2 = row['alpha-2']
-			alpha3 = row['alpha-3']
-			mapping[alpha2] = alpha3
-
-	return mapping
-
-def format_dmisid_csv(filename, target_name):
-	src_path = os.path.join(TARGET_DIR, "{}.csv".format(filename))
-	dst_path = os.path.join(TARGET_DIR, target_name)
-
-	src_csv = open(src_path, "r", encoding='utf-8-sig')
-	reader = csv.DictReader(src_csv)
-
-	dst_csv = open(dst_path, "w")
-	fieldnames = ['dmisid', 'country', 'state', 'zip5']
-	writer = csv.DictWriter(dst_csv, fieldnames=fieldnames)
-	writer.writeheader()
-
-	country_mapping = get_country_mapping()
-
-	for row in reader:
-		country2 = row['Facility ISO Country Code']
-		if country2 == "":
-			country3 = ""
-		elif country2 not in country_mapping:
-			for key in row.keys():
-				print(key, row[key])
-			continue
-		else:
-			country3 = country_mapping[country2]
-		new_row = {'dmisid': row['DMIS ID'],
-					'country': country3,
-					'state': row['Facility State Code'],
-					'zip5': row['Facility 5-Digit ZIP Code']}
-		writer.writerow(new_row)
-
-def dmisid():
-	filename = 'DMISID_FY2018'
-	target_name = "simple_DMISID_FY2018.csv"
-	format_dmisid_csv(filename, target_name)
-
-
-cen2states = {'cen1': {'CT', 'ME', 'MA', 'NH', 'RI', 'VT'},
-            'cen2': {'NJ', 'NY', 'PA'},
-            'cen3': {'IL', 'IN', 'MI', 'OH', 'WI'},
-            'cen4': {'IA', 'KS', 'MN', 'MO', 'NE', 'ND', 'SD'},
-            'cen5': {'DE', 'DC', 'FL', 'GA', 'MD', 'NC', 'SC', 'VA', 'WV'},
-            'cen6': {'AL', 'KY', 'MS', 'TN'},
-            'cen7': {'AR', 'LA', 'OK', 'TX'},
-            'cen8': {'AZ', 'CO', 'ID', 'MT', 'NV', 'NM', 'UT', 'WY'},
-            'cen9': {'AK', 'CA', 'HI', 'OR', 'WA'}}
-
-hhs2states = {'hhs1': {'VT', 'CT', 'ME', 'MA', 'NH', 'RI'},
-            'hhs2': {'NJ', 'NY'},
-            'hhs3': {'DE', 'DC', 'MD', 'PA', 'VA', 'WV'},
-            'hhs4': {'AL', 'FL', 'GA', 'KY', 'MS', 'NC', 'TN', 'SC'},
-            'hhs5': {'IL', 'IN', 'MI', 'MN', 'OH', 'WI'},
-            'hhs6': {'AR', 'LA', 'NM', 'OK', 'TX'},
-            'hhs7': {'IA', 'KS', 'MO', 'NE'},
-            'hhs8': {'CO', 'MT', 'ND', 'SD', 'UT', 'WY'},
-            'hhs9': {'AZ', 'CA', 'HI', 'NV'},
-            'hhs10': {'AK', 'ID', 'OR', 'WA'}}
-
-def state2region(D):
-    results = dict()
-    for region in D.keys():
-        states = D[region]
-        for state in states:
-            assert state not in results
-            results[state] = region
-    return results
-
-def state2region_csv():
-	to_hhs = state2region(hhs2states)
-	to_cen = state2region(cen2states)
-	states = to_hhs.keys()
-	target_name = "state2region.csv"
-	fieldnames = ['state', 'hhs', 'cen']
-	with open(target_name, "w") as csvfile:
-		writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-		writer.writeheader()
-		for state in states:
-			content = {"state": state, "hhs": to_hhs[state], "cen": to_cen[state]}
-			writer.writerow(content)
-
-################# Functions for geographical information ####################
-
-######################### Functions for AFHSB data ##########################
-
-def write_afhsb_csv(period):
-	flu_mapping = {0: "ili-flu3", 1: "flu1", 2:"flu2-flu1", 3: "flu3-flu2"}
-	results_dict = pickle.load(open(os.path.join(TARGET_DIR, "{}.pickle".format(period)), 'rb'))
-
-	fieldnames = ["id", "epiweek", "dmisid", "flu_type", "visit_sum"]
-	with open(os.path.join(TARGET_DIR, "{}.csv".format(period)), 'w') as csvfile:
-		writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-		writer.writeheader()
-
-		i = 0
-		for year in sorted(results_dict.keys()):
-			year_dict = results_dict[year]
-			for week in sorted(year_dict.keys()):
-				week_dict = year_dict[week]
-				for dmisid in sorted(week_dict.keys()):
-					dmisid_dict = week_dict[dmisid]
-					for flu in sorted(dmisid_dict.keys()):
-						visit_sum = dmisid_dict[flu]
-						i += 1
-						epiweek = int("{}{:02d}".format(year, week))
-						flu_type = flu_mapping[flu]
-
-						row = {"epiweek": epiweek, "dmisid": None if (not dmisid.isnumeric()) else dmisid, 
-							"flu_type": flu_type, "visit_sum": visit_sum, "id": i}
-						writer.writerow(row)
-						if i % 100000 == 0:
-							print(row)
-
-def dmisid_start_time_from_file(filename):
-	starttime_record = dict()
-	with open(filename, 'r') as csvfile:
-		reader = csv.DictReader(csvfile)
-		for row in reader:
-			dmisid = row['dmisid']
-			epiweek = int(row['epiweek'])
-			if dmisid not in starttime_record:
-				starttime_record[dmisid] = epiweek
-			else:
-				starttime_record[dmisid] = min(epiweek, starttime_record[dmisid])
-	return starttime_record
-
-def dmisid_start_time():
-	record1 = dmisid_start_time_from_file(os.path.join(TARGET_DIR, "00to13.csv"))
-	record2 = dmisid_start_time_from_file(os.path.join(TARGET_DIR, "13to17.csv"))
-	record = record1
-	for dmisid, epiweek in record2.items():
-		if dmisid in record:
-			record[dmisid] = min(record[dmisid], epiweek)
-		else:
-			record[dmisid] = epiweek
-	return record
-
-def fillin_zero_to_csv(period, dmisid_start_record):
-	src_path = os.path.join(TARGET_DIR, "{}.csv".format(period))
-	dst_path = os.path.join(TARGET_DIR, "filled_{}.csv".format(period))
-
-	# Load data into a dictionary
-	src_csv = open(src_path, "r")
-	reader = csv.DictReader(src_csv)
-
-	results_dict = dict() # epiweek -> dmisid -> flu_type: visit_sum
-	for i, row in enumerate(reader):
-		epiweek = int(row['epiweek'])
-		dmisid = row['dmisid']
-		flu_type = row['flu_type']
-		visit_sum = row['visit_sum']
-		if epiweek not in results_dict:
-			results_dict[epiweek] = dict()
-		week_dict = results_dict[epiweek]
-		if dmisid not in week_dict:
-			week_dict[dmisid] = dict()
-		dmisid_dict = week_dict[dmisid]
-		dmisid_dict[flu_type] = visit_sum
-
-	# Fill in zero count records
-	dmisid_group = dmisid_start_record.keys()
-	flutype_group = ["ili-flu3", "flu1", "flu2-flu1", "flu3-flu2"]
-
-	for epiweek in results_dict.keys():
-		week_dict = results_dict[epiweek]
-		for dmisid in dmisid_group:
-			start_week = dmisid_start_record[dmisid]
-			if start_week > epiweek:
-				continue
-
-			if dmisid not in week_dict:
-				week_dict[dmisid] = dict()
-
-			dmisid_dict = week_dict[dmisid]
-			for flutype in flutype_group:
-				if flutype not in dmisid_dict:
-					dmisid_dict[flutype] = 0
-
-	# Write to csv files
-	dst_csv = open(dst_path, "w")
-	fieldnames = ["id", "epiweek", "dmisid", "flu_type", "visit_sum"]
-	writer = csv.DictWriter(dst_csv, fieldnames=fieldnames)
-	writer.writeheader()
-
-	i = 1
-	for epiweek in results_dict:
-		for dmisid in results_dict[epiweek]:
-			for flutype in results_dict[epiweek][dmisid]:
-				visit_sum = results_dict[epiweek][dmisid][flutype]
-				row = {"id": i, "epiweek": epiweek, "dmisid": dmisid,
-						"flu_type": flutype, "visit_sum": visit_sum}
-				writer.writerow(row)
-				if i % 100000 == 0:
-					print(row)
-				i += 1
-	print("Wrote {} rows".format(i))
-
-######################### Functions for AFHSB data ##########################
-
-def main():
-	# Build tables containing geographical information
-	state2region_csv()
-	dmisid()
-
-	# Aggregate raw data into pickle files
-	aggregate_data("ili_1_2000_5_2013_new.sas7bdat", "00to13.pickle")
-	aggregate_data("ili_1_2013_11_2017_new.sas7bdat", "13to17.pickle")
-
-    # write pickle content to csv files
-	write_afhsb_csv("00to13")
-	write_afhsb_csv("13to17")
-
-    # Fill in zero count records
-	dmisid_start_record = dmisid_start_time()
-	fillin_zero_to_csv("00to13", dmisid_start_record)
-	fillin_zero_to_csv("13to17", dmisid_start_record)
-
-
-if __name__ == '__main__':
-	main()
diff --git a/src/acquisition/afhsb/afhsb_sql.py b/src/acquisition/afhsb/afhsb_sql.py
deleted file mode 100644
index 278f3fc38..000000000
--- a/src/acquisition/afhsb/afhsb_sql.py
+++ /dev/null
@@ -1,194 +0,0 @@
-# standard library
-import os
-
-# third party
-import mysql.connector as connector
-
-# first party
-import delphi.operations.secrets as secrets
-
-
-def init_dmisid_table(sourcefile):
-    (u, p) = secrets.db.epi
-    cnx = connector.connect(user=u, passwd=p, database="epidata")
-    table_name = 'dmisid_table'
-    create_table_cmd = '''
-        CREATE TABLE `{}` (
-        `dmisid` INT(4) NOT NULL PRIMARY KEY,
-        `country` CHAR(3) NULL,
-        `state` CHAR(2) NULL
-        );
-        '''.format(table_name)
-    populate_table_cmd = '''
-        LOAD DATA INFILE '{}'
-        INTO TABLE {}
-        FIELDS TERMINATED BY ',' 
-        ENCLOSED BY '"'
-        LINES TERMINATED BY '\r\n'
-        IGNORE 1 ROWS
-        (@dmisid, @country, @state, @zip5)
-        SET    
-            dmisid = @dmisid,
-            country = nullif(@country, ''),
-            state = nullif(@state, '')
-        ;
-	'''.format(sourcefile, table_name)
-    try:
-        cursor = cnx.cursor()
-        cursor.execute(create_table_cmd)
-        cursor.execute(populate_table_cmd)
-        cnx.commit()
-    finally:
-        cnx.close()
-
-def init_region_table(sourcefile):
-    (u, p) = secrets.db.epi
-    cnx = connector.connect(user=u, passwd=p, database="epidata")
-    table_name = 'state2region_table'
-    create_table_cmd = '''
-        CREATE TABLE `{}` (
-        `state` CHAR(2) NOT NULL PRIMARY KEY,
-        `hhs` CHAR(5) NOT NULL,
-        `cen` CHAR(4) NOT NULL
-        );
-        '''.format(table_name)
-    populate_table_cmd = '''
-        LOAD DATA INFILE '{}'
-        INTO TABLE {}
-        FIELDS TERMINATED BY ',' 
-        ENCLOSED BY '"'
-        LINES TERMINATED BY '\r\n'
-        IGNORE 1 ROWS
-        (@state, @hhs, @cen)
-        SET state=@state, hhs=@hhs, cen=@cen;
-	'''.format(sourcefile, table_name)
-    try:
-        cursor = cnx.cursor()
-        cursor.execute(create_table_cmd)
-        cursor.execute(populate_table_cmd)
-        cnx.commit()
-    finally:
-        cnx.close()
-
-
-def init_raw_data(table_name, sourcefile):
-    print("Initialize {}".format(table_name))
-    (u, p) = secrets.db.epi
-    cnx = connector.connect(user=u, passwd=p, database="epidata")
-    create_table_cmd = '''
-        CREATE TABLE IF NOT EXISTS `{}` (
-        `id` INT(11) NOT NULL PRIMARY KEY AUTO_INCREMENT,
-        `epiweek` INT(6) NOT NULL,
-        `dmisid` CHAR(4) NULL,
-        `flu_type` CHAR(9) NOT NULL,
-        `visit_sum` INT(11) NOT NULL,
-        
-        KEY `epiweek` (`epiweek`),
-        KEY `dmisid` (`dmisid`),
-        KEY `flu_type` (`flu_type`)
-        );
-        '''.format(table_name)
-    populate_table_cmd = '''
-        LOAD DATA INFILE '{}'
-        INTO TABLE {}
-        FIELDS TERMINATED BY ',' 
-        ENCLOSED BY '"'
-        LINES TERMINATED BY '\r\n'
-        IGNORE 1 ROWS
-        (@id, @epiweek, @dmisid, @flu, @visits)
-        SET 
-            id = @id,
-            epiweek = @epiweek,
-            dmisid = nullif(@dmisid, 'ZZZZ'),
-            flu_type = @flu,
-            visit_sum = @visits
-        ;
-        '''.format(sourcefile, table_name)
-    try:
-        cursor = cnx.cursor()
-        cursor.execute(create_table_cmd)
-        cursor.execute(populate_table_cmd)
-        cnx.commit()
-    finally:
-        cnx.close()
-
-def agg_by_state(src_table, dest_table):
-    print("Aggregating records by states...")
-    (u, p) = secrets.db.epi
-    cnx = connector.connect(user=u, passwd=p, database="epidata")
-    cmd = '''
-        CREATE TABLE {}
-        SELECT a.epiweek, a.flu_type, d.state, d.country, sum(a.visit_sum) visit_sum
-        FROM {} a
-        LEFT JOIN dmisid_table d 
-        ON a.dmisid = d.dmisid 
-        GROUP BY a.epiweek, a.flu_type, d.state, d.country;
-    '''.format(dest_table, src_table)
-    try:
-        cursor = cnx.cursor()
-        cursor.execute(cmd)
-        cnx.commit()
-    finally:
-        cnx.close()
-
-def agg_by_region(src_table, dest_table):
-    print("Aggregating records by regions...")
-    (u, p) = secrets.db.epi
-    cnx = connector.connect(user=u, passwd=p, database="epidata")
-    cmd = '''
-        CREATE TABLE {}
-        SELECT s.epiweek, s.flu_type, r.hhs, r.cen, sum(s.visit_sum) visit_sum
-        FROM {} s
-        LEFT JOIN state2region_table r
-        ON s.state = r.state
-        GROUP BY s.epiweek, s.flu_type, r.hhs, r.cen;
-    '''.format(dest_table, src_table)
-    try:
-        cursor = cnx.cursor()
-        cursor.execute(cmd)
-        cnx.commit()
-    finally:
-        cnx.close()
-
-def init_all_tables(datapath):
-    init_dmisid_table(os.path.join(datapath, "simple_DMISID_FY2018.csv"))
-    init_region_table(os.path.join(datapath, "state2region.csv"))
-
-    periods = ["00to13", "13to17"]
-    for period in periods:
-        raw_table_name = 'afhsb_{}_raw'.format(period)
-        state_table_name = 'afhsb_{}_state'.format(period)
-        region_table_name = 'afhsb_{}_region'.format(period)
-
-        init_raw_data(raw_table_name, os.path.join(datapath, "filled_{}.csv".format(period)))
-        agg_by_state(raw_table_name, state_table_name)
-        agg_by_region(state_table_name, region_table_name)
-
-def dangerously_drop_all_afhsb_tables():
-    (u, p) = secrets.db.epi
-    cnx = connector.connect(user=u, passwd=p, database="epidata")
-    try:
-        cursor = cnx.cursor()
-        cursor.execute('''
-          DROP TABLE IF EXISTS `afhsb_00to13_raw`,
-                               `afhsb_00to13_region`,
-                               `afhsb_00to13_state`,
-                               `afhsb_13to17_raw`,
-                               `afhsb_13to17_region`,
-                               `afhsb_13to17_state`,
-                               `state2region_table`,
-                               `dmisid_table`;
-        ''')
-        cnx.commit() # (might do nothing; each DROP commits itself anyway)
-    finally:
-        cnx.close()
-
-def run_cmd(cmd):
-    (u, p) = secrets.db.epi
-    cnx = connector.connect(user=u, passwd=p, database="epidata")
-    try:
-        cursor = cnx.cursor()
-        cursor.execute(cmd)
-        cnx.commit()
-    finally:
-        cnx.close()
diff --git a/src/acquisition/afhsb/afhsb_update.py b/src/acquisition/afhsb/afhsb_update.py
deleted file mode 100644
index c5a8635c8..000000000
--- a/src/acquisition/afhsb/afhsb_update.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# standard library
-import argparse
-import tempfile
-import os
-import stat
-import shutil
-
-# first party
-from . import afhsb_sql
-
-DEFAULT_DATAPATH = '/home/automation/afhsb_data'
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--datapath', action='store', type=str, default=DEFAULT_DATAPATH, help='filepath to directory containing csv files to input into database')
-    args = parser.parse_args()
-    # MariaDB appears to refuse to LOAD DATA INFILE except on files under
-    # /var/lib/mysql (which seems dedicated to its own files) or /tmp; create a
-    # temporary directory, make rwx for automation & rx for mysql user, copy in
-    # (or alternatively, symlink --- unimplemented) args.datapath to the
-    # temporary directory, then run init_all_tables on this temporary datapath.
-    #   Set up temporary directory that will hold temporary datapath (initial
-    #   permissions are very restrictive):
-    tmp_datapath_parent_dir = tempfile.mkdtemp()
-    os.chmod(tmp_datapath_parent_dir, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP)
-    shutil.chown(tmp_datapath_parent_dir, group="mysql_automation")
-    #     (here, mysql_automation is a group with members {mysql,automation})
-    tmp_datapath = os.path.join(tmp_datapath_parent_dir, "afhsb_data")
-    #   Copy datapath to temporary datapath (initial permission of copy are
-    #   permissive, but require directory access, which was set appropriately
-    #   above):
-    shutil.copytree(args.datapath, tmp_datapath)
-    #   Run init_all_tables on temporary datapath:
-    afhsb_sql.init_all_tables(tmp_datapath)
-    #   (Temporary parent directory should be deleted automatically.)
-
-
-if __name__ == '__main__':
-    main()

From cdf3832cec31d3f96d0b00f62b48f9f5c171d55c Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 23 Jun 2023 14:55:24 -0700
Subject: [PATCH 06/43] feat(afhsb): remove afhsb from Epidata Python client

---
 src/client/delphi_epidata.py | 56 ------------------------------------
 1 file changed, 56 deletions(-)

diff --git a/src/client/delphi_epidata.py b/src/client/delphi_epidata.py
index a56527357..654eba74c 100644
--- a/src/client/delphi_epidata.py
+++ b/src/client/delphi_epidata.py
@@ -394,62 +394,6 @@ def meta_norostat(auth):
     # Make the API call
     return Epidata._request(params)
 
-  # Fetch AFHSB data
-  @staticmethod
-  def afhsb(auth, locations, epiweeks, flu_types):
-    """Fetch AFHSB data (point data, no min/max)."""
-    # Check parameters
-    if auth is None or locations is None or epiweeks is None or flu_types is None:
-      raise Exception('`auth`, `locations`, `epiweeks` and `flu_types` are all required')
-
-    loc_exception = 'Location parameter  `{}` is invalid. Valid `location` parameters are: '\
-      '`hhs[1-10]`, `cen[1-9]`, 2-letter state code or 3-letter country code.'
-    for location in locations:
-      location = location.lower()
-      if (location.startswith('hhs') or location.startswith('cen')):
-        prefix, postfix = location[:3], location[3:]
-        if (postfix.isnumeric()):
-          region_num = int(postfix)
-          if (region_num < 1 or region_num > 10 or (region_num == 10 and prefix == 'cen')):
-            raise Exception(loc_exception.format(location))
-        else:
-          raise Exception(loc_exception.format(location))
-      elif (len(location) < 2 or len(location) > 3):
-        raise Exception(loc_exception.format(location))
-
-    flu_exception = 'Flu-type parameters `{}` is invalid. Valid flu-type parameters are: '\
-      '`flu1`, `flu2`, `flu3`, `ili`, `flu2-flu1`, `flu3-flu2`, `ili-flu3`.'
-    valid_flu_types = ['flu1', 'flu2', 'flu3', 'ili', 'flu2-flu1', 'flu3-flu2', 'ili-flu3']
-    for flu_type in flu_types:
-      if (not flu_type in valid_flu_types):
-        raise Exception(flu_exception.format(flu_type))
-
-    # Set up request
-    params = {
-      'endpoint': 'afhsb',
-      'auth': auth,
-      'locations': Epidata._list(locations),
-      'epiweeks': Epidata._list(epiweeks),
-      'flu_types': Epidata._list(flu_types)
-    }
-    # Make the API call
-    return Epidata._request(params)
-
-  # Fetch AFHSB metadata
-  @staticmethod
-  def meta_afhsb(auth):
-    """Fetch AFHSB metadata."""
-    # Check parameters
-    if auth is None:
-      raise Exception('`auth` is required')
-    # Set up request
-    params = {
-      'endpoint': 'meta_afhsb',
-      'auth': auth,
-    }
-    # Make the API call
-    return Epidata._request(params)
-
   # Fetch NIDSS flu data
   @staticmethod
   def nidss_flu(regions, epiweeks, issues=None, lag=None):

From 7a8ee39eb0c0fd1daefaac7c9c4f34ff534eebe6 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 23 Jun 2023 14:55:30 -0700
Subject: [PATCH 07/43] feat(afhsb): remove afhsb from Epidata R client

---
 src/client/delphi_epidata.R | 35 -----------------------------------
 1 file changed, 35 deletions(-)

diff --git a/src/client/delphi_epidata.R b/src/client/delphi_epidata.R
index 06e9c2209..627948cc2 100644
--- a/src/client/delphi_epidata.R
+++ b/src/client/delphi_epidata.R
@@ -371,39 +371,6 @@ Epidata <- (function() {
     return(.request(params))
   }
 
-  # Fetch AFHSB data (point data, no min/max)
-  afhsb <- function(auth, locations, epiweeks, flu_types) {
-    # Check parameters
-    if(missing(auth) || missing(locations) || missing(epiweeks) || missing(flu_types)) {
-      stop('`auth`, `locations`, `epiweeks` and `flu_types` are all required')
-    }
-    # Set up request
-    params <- list(
-        endpoint = 'afhsb',
-        auth = auth,
-        locations = .list(locations),
-        epiweeks = .list(epiweeks),
-        flu_types = .list(flu_types)
-    )
-    # Make the API call
-    return(.request(params))
-  }
-
-  # Fetch AFHSB metadata
-  meta_afhsb <- function(auth) {
-    # Check parameters
-    if(missing(auth)) {
-      stop('`auth` is required')
-    }
-    # Set up request
-    params <- list(
-      endpoint = 'meta_afhsb',
-      auth = auth
-    )
-    # Make the API call
-    return(.request(params))
-  }
-
   # Fetch NIDSS flu data
   nidss.flu <- function(regions, epiweeks, issues, lag) {
     # Check parameters
@@ -662,8 +629,6 @@ Epidata <- (function() {
     quidel = quidel,
     norostat = norostat,
     meta_norostat = meta_norostat,
-    afhsb = afhsb,
-    meta_afhsb = meta_afhsb,
     nidss.flu = nidss.flu,
     nidss.dengue = nidss.dengue,
     delphi = delphi,

From 9ce66351aa811045e1ca38037ab6d669c0bcb3bb Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 23 Jun 2023 14:55:53 -0700
Subject: [PATCH 08/43] feat(afhsb): remove afhsb endpoint from server

---
 src/server/endpoints/__init__.py   |   4 -
 src/server/endpoints/afhsb.py      | 114 -----------------------------
 src/server/endpoints/meta_afhsb.py |  31 --------
 3 files changed, 149 deletions(-)
 delete mode 100644 src/server/endpoints/afhsb.py
 delete mode 100644 src/server/endpoints/meta_afhsb.py

diff --git a/src/server/endpoints/__init__.py b/src/server/endpoints/__init__.py
index b58692676..94f1de5b8 100644
--- a/src/server/endpoints/__init__.py
+++ b/src/server/endpoints/__init__.py
@@ -1,5 +1,4 @@
 from . import (
-    afhsb,
     cdc,
     covid_hosp_facility_lookup,
     covid_hosp_facility,
@@ -19,7 +18,6 @@
     ght,
     ilinet,
     kcdc_ili,
-    meta_afhsb,
     meta_norostat,
     meta,
     nidss_dengue,
@@ -36,7 +34,6 @@
 )
 
 endpoints = [
-    afhsb,
     cdc,
     covid_hosp_facility_lookup,
     covid_hosp_facility,
@@ -56,7 +53,6 @@
     ght,
     ilinet,
     kcdc_ili,
-    meta_afhsb,
     meta_norostat,
     meta,
     nidss_dengue,
diff --git a/src/server/endpoints/afhsb.py b/src/server/endpoints/afhsb.py
deleted file mode 100644
index a006defac..000000000
--- a/src/server/endpoints/afhsb.py
+++ /dev/null
@@ -1,114 +0,0 @@
-from typing import Dict, List
-
-from flask import Blueprint, request
-
-from .._params import extract_integers, extract_strings
-from .._query import execute_queries, filter_integers, filter_strings
-from .._validate import require_all
-from .._security import require_role
-
-# first argument is the endpoint name
-bp = Blueprint("afhsb", __name__)
-alias = None
-
-
-def _split_locations(locations: List[str]):
-    # split locations into national/regional/state
-    location_dict: Dict[str, List[str]] = {
-        "hhs": [],
-        "cen": [],
-        "state": [],
-        "country": [],
-    }
-    for location in locations:
-        location = location.lower()
-        if location[0:3] == "hhs":
-            location_dict["hhs"].append(location)
-        elif location[0:3] == "cen":
-            location_dict["cen"].append(location)
-        elif len(location) == 3:
-            location_dict["country"].append(location)
-        elif len(location) == 2:
-            location_dict["state"].append(location)
-    return location_dict
-
-
-def _split_flu_types(flu_types: List[str]):
-    # split flu types into disjoint/subset
-    disjoint_flus = []
-    subset_flus = []
-    for flu_type in flu_types:
-        if flu_type in ["flu1", "flu2-flu1", "flu3-flu2", "ili-flu3"]:
-            disjoint_flus.append(flu_type)
-        elif flu_type in ["flu2", "flu3", "ili"]:
-            subset_flus.append(flu_type)
-    return disjoint_flus, subset_flus
-
-
-FLU_MAPPING = {
-    "flu2": ["flu1", "flu2-flu1"],
-    "flu3": ["flu1", "flu2-flu1", "flu3-flu2"],
-    "ili": ["flu1", "flu2-flu1", "flu3-flu2", "ili-flu3"],
-}
-
-
-@bp.route("/", methods=("GET", "POST"))
-@require_role("afhsb")
-def handle():
-    require_all(request, "locations", "epiweeks", "flu_types")
-
-    locations = extract_strings("locations")
-    epiweeks = extract_integers("epiweeks")
-    flu_types = extract_strings("flu_types")
-
-    disjoint_flus, subset_flus = _split_flu_types(flu_types)
-    location_dict = _split_locations(locations)
-
-    # build query
-
-    queries = []
-    for location_type, loc in location_dict.items():
-        if not loc:
-            continue
-        table = (
-            "afhsb_00to13_region"
-            if location_type in ["hhs", "cen"]
-            else "afhsb_00to13_state"
-        )
-        fields = (
-            f"`epiweek`, `{location_type}` `location`, sum(`visit_sum`) `visit_sum`"
-        )
-        group = "`epiweek`, `location`"
-        order = "`epiweek` ASC, `location` ASC"
-        # build the filter
-        params = dict()
-        # build the epiweek filter
-        condition_epiweek = filter_integers("nd.`epiweek`", epiweeks, "epiweek", params)
-        condition_location = filter_strings(location_type, locations, "loc", params)
-
-        for subset_flu in subset_flus:
-            flu_params = params.copy()
-            condition_flu = filter_strings(
-                "`flu_type`", FLU_MAPPING[subset_flu], "flu_type", flu_params
-            )
-            query = f"""SELECT {fields}, '{subset_flu}' `flu_type` FROM {table}
-                        WHERE ({condition_epiweek}) AND ({condition_location}) AND ({condition_flu})
-                        GROUP BY {group} ORDER BY {order}"""
-            queries.append((query, flu_params))
-        # disjoint flu types: flu1, flu2-flu1, flu3-flu2, ili-flu3
-        if disjoint_flus:
-            flu_params = params.copy()
-            condition_flu = filter_strings(
-                "`flu_type`", disjoint_flus, "flu_type", flu_params
-            )
-            query = f"""SELECT {fields}, `flu_type` FROM {table}
-                        WHERE ({condition_epiweek}) AND ({condition_location}) AND ({condition_flu})
-                        GROUP BY {group},`flu_type` ORDER BY {order},`flu_type`"""
-            queries.append((query, flu_params))
-
-    fields_string = ["location", "flu_type"]
-    fields_int = ["epiweek", "visit_sum"]
-    fields_float = []
-
-    # send query
-    return execute_queries(queries, fields_string, fields_int, fields_float)
diff --git a/src/server/endpoints/meta_afhsb.py b/src/server/endpoints/meta_afhsb.py
deleted file mode 100644
index 096ab58ec..000000000
--- a/src/server/endpoints/meta_afhsb.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from flask import Blueprint, request
-
-from .._printer import print_non_standard
-from .._query import parse_result
-from .._security import require_role
-
-
-# first argument is the endpoint name
-bp = Blueprint("meta_afhsb", __name__)
-alias = None
-
-
-@bp.route("/", methods=("GET", "POST"))
-@require_role("afhsb")
-def handle():
-    # build query
-    table1 = "afhsb_00to13_state"
-    table2 = "afhsb_13to17_state"
-
-    string_keys = ["state", "country"]
-    int_keys = ["flu_severity"]
-    data = dict()
-
-    for key in string_keys:
-        query = f"SELECT DISTINCT `{key}` FROM (select `{key}` from `{table1}` union select `{key}` from `{table2}`) t"
-        data[key] = parse_result(query, {}, [key])
-    for key in int_keys:
-        query = f"SELECT DISTINCT `{key}` FROM (select `{key}` from `{table1}` union select `{key}` from `{table2}`) t"
-        data[key] = parse_result(query, {}, [], [key])
-
-    return print_non_standard(request.values.get("format"), data)

From 2878980e4b4810f5594b9bf7a0afb539089d67d9 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 23 Jun 2023 18:33:33 -0700
Subject: [PATCH 09/43] feat(afhsb): remove afhsb from deploy.json

---
 deploy.json | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/deploy.json b/deploy.json
index 59d141ba4..654d669cc 100644
--- a/deploy.json
+++ b/deploy.json
@@ -174,15 +174,6 @@
       "add-header-comment": true
     },
 
-    "// acquisition - afhsb",
-    {
-      "type": "move",
-      "src": "src/acquisition/afhsb/",
-      "dst": "[[package]]/acquisition/afhsb/",
-      "match": "^.*\\.(py)$",
-      "add-header-comment": true
-    },
-
     "// acquisition - covidcast",
     {
       "type": "move",

From d554566cbb323a3ffced9690e5dabdff41c387d4 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 23 Jun 2023 18:33:59 -0700
Subject: [PATCH 10/43] feat(afhsb): remove afhsb from setup.cfg

---
 dev/local/setup.cfg | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dev/local/setup.cfg b/dev/local/setup.cfg
index 443359b25..d7383ade1 100644
--- a/dev/local/setup.cfg
+++ b/dev/local/setup.cfg
@@ -6,7 +6,6 @@ version = 4.1.3
 packages =
     delphi.epidata
     delphi.epidata.acquisition
-    delphi.epidata.acquisition.afhsb
     delphi.epidata.acquisition.cdcp
     delphi.epidata.acquisition.covid_hosp
     delphi.epidata.acquisition.covid_hosp.common

From 316ecd18f27776b465f738db66ba2c628cd2803d Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 23 Jun 2023 18:34:13 -0700
Subject: [PATCH 11/43] feat(afhsb): remove norostat from setup.cfg

---
 dev/local/setup.cfg | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dev/local/setup.cfg b/dev/local/setup.cfg
index d7383ade1..69bc91778 100644
--- a/dev/local/setup.cfg
+++ b/dev/local/setup.cfg
@@ -20,7 +20,6 @@ packages =
     delphi.epidata.acquisition.ght
     delphi.epidata.acquisition.kcdc
     delphi.epidata.acquisition.nidss
-    delphi.epidata.acquisition.norostat
     delphi.epidata.acquisition.paho
     delphi.epidata.acquisition.quidel
     delphi.epidata.acquisition.twtr

From 5a6cefc004d45a45935436c93a3ba4ed636787df Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 23 Jun 2023 18:34:29 -0700
Subject: [PATCH 12/43] docs(afhsb): remove afhsb and its meta

---
 docs/api/afhsb.md      | 52 ------------------------------------------
 docs/api/meta_afhsb.md | 49 ---------------------------------------
 2 files changed, 101 deletions(-)
 delete mode 100644 docs/api/afhsb.md
 delete mode 100644 docs/api/meta_afhsb.md

diff --git a/docs/api/afhsb.md b/docs/api/afhsb.md
deleted file mode 100644
index d53ad643e..000000000
--- a/docs/api/afhsb.md
+++ /dev/null
@@ -1,52 +0,0 @@
----
-title: AFHSB
-parent: Other Endpoints (COVID-19 and Other Diseases)
----
-
-# AFHSB
-
-This is the API documentation for accessing the AFHSB (`afhsb`) endpoint of
-[Delphi](https://delphi.cmu.edu/)'s epidemiological data.
-
-General topics not specific to any particular endpoint are discussed in the
-[API overview](README.md). Such topics include:
-[contributing](README.md#contributing), [citing](README.md#citing), and
-[data licensing](README.md#data-licensing).
-
-## AFHSB Data
-
-... <!-- TODO -->
-
-# The API
-
-The base URL is: https://api.delphi.cmu.edu/epidata/afhsb/
-
-See [this documentation](README.md) for details on specifying epiweeks, dates, and lists.
-
-## Parameters
-
-### Required
-
-| Parameter | Description | Type |
-| --- | --- | --- |
-| `auth` | password | string |
-| `epiweeks` | epiweeks | `list` of epiweeks |
-| `locations` | locations | `list` of [region](https://github.com/cmu-delphi/delphi-epidata/blob/main/labels/regions.txt), [state](https://github.com/cmu-delphi/delphi-epidata/blob/main/labels/states.txt), or 3-letter country code labels |
-| `flu_types` | flu types | `list` of disjoint (`flu1`, `flu2-flu1`, `flu3-flu2`, `ili-flu3`) or subset (`flu2`, `flu3`, `ili`) flu type labels |
-
-## Response
-
-| Field     | Description                                                     | Type             |
-|-----------|-----------------------------------------------------------------|------------------|
-| `result`  | result code: 1 = success, 2 = too many results, -2 = no results | integer          |
-| `epidata` | list of results                                                 | array of objects |
-| ...       | ...                                                             | ...              | <!-- TODO -->
-| `message` | `success` or error message                                      | string           |
-
-# Example URLs
-
-<!-- TODO: fix -->
-
-# Code Samples
-
-<!-- TODO: fix -->
diff --git a/docs/api/meta_afhsb.md b/docs/api/meta_afhsb.md
deleted file mode 100644
index 6ba294772..000000000
--- a/docs/api/meta_afhsb.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-title: AFHSB Metadata
-parent: Other Endpoints (COVID-19 and Other Diseases)
----
-
-# AFHSB Metadata
-
-This is the documentation of the API for accessing the AFHSB Metadata (`meta_afhsb`) endpoint of
-the [Delphi](https://delphi.cmu.edu/)'s epidemiological data.
-
-General topics not specific to any particular endpoint are discussed in the
-[API overview](README.md). Such topics include:
-[contributing](README.md#contributing), [citing](README.md#citing), and
-[data licensing](README.md#data-licensing).
-
-## AFHSB Metadata
-
-... <!-- TODO -->
-
-# The API
-
-The base URL is: https://api.delphi.cmu.edu/epidata/meta_afhsb/
-
-See [this documentation](README.md) for details on specifying epiweeks, dates, and lists.
-
-## Parameters
-
-### Required
-
-| Parameter | Description | Type   |
-|-----------|-------------|--------|
-| `auth`    | password    | string |
-
-## Response
-
-| Field     | Description                                                     | Type             |
-|-----------|-----------------------------------------------------------------|------------------|
-| `result`  | result code: 1 = success, 2 = too many results, -2 = no results | integer          |
-| `epidata` | list of results                                                 | array of objects |
-| ...       | ...                                                             | ...              | <!-- TODO -->
-| `message` | `success` or error message                                      | string           |
-
-# Example URLs
-
-<!-- TODO: fix -->
-
-# Code Samples
-
-<!-- TODO: fix -->

From 7a2fc7b0c53d944c03ce4d2e878a216814e10a24 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 23 Jun 2023 18:34:39 -0700
Subject: [PATCH 13/43] docs(afhsb): remove afhsb from README

---
 docs/api/README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/api/README.md b/docs/api/README.md
index dd8f98d5c..709d068e0 100644
--- a/docs/api/README.md
+++ b/docs/api/README.md
@@ -110,7 +110,6 @@ The parameters available for each source are documented in each linked source-sp
 
 | Endpoint | Name | Description | Restricted? |
 | --- | --- | --- | --- |
-| [`afhsb`](ahfsb.md) | AFHSB | ... <!-- TODO --> | yes |
 | [`cdc`](cdc.md) | CDC Page Hits | ... <!-- TODO --> | yes |
 | [`delphi`](delphi.md) | Delphi's Forecast | ... <!-- TODO --> | no |
 | [`ecdc_ili`](ecdc_ili.md) | ECDC ILI | ECDC ILI data from the ECDC website. | no |
@@ -122,7 +121,6 @@ The parameters available for each source are documented in each linked source-sp
 | [`ght`](ght.md) | Google Health Trends | Estimate of influenza activity based on volume of certain search queries. | yes |
 | [`kcdc_ili`](kcdc_ili.md) | KCDC ILI | KCDC ILI data from KCDC website. | no |
 | [`meta`](meta.md) | API Metadata | Metadata for `fluview`, `twitter`, `wiki`, and `delphi`. | no |
-| [`meta_afhsb`](meta_afhsb.md) | AFHSB Metadata | ... <!-- TODO --> | yes |
 | [`nidss_flu`](nidss_flu.md) | NIDSS Flu | Outpatient ILI from Taiwan's National Infectious Disease Statistics System (NIDSS). | no |
 | [`nowcast`](nowcast.md) | ILI Nearby | A nowcast of U.S. national, regional, and state-level (weighted) percent ILI, available seven days (regionally) or five days (state-level) before the first ILINet report for the corresponding week. | no |
 | [`quidel`](quidel.md) | Quidel | Data provided by Quidel Corp., which contains flu lab test results. | yes |

From 92b546f3129e028153ea3ce6be8d38a0a7f10b23 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 23 Jun 2023 18:35:03 -0700
Subject: [PATCH 14/43] feat(afhsb): remove afhsb from the js client

---
 src/client/delphi_epidata.js | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/src/client/delphi_epidata.js b/src/client/delphi_epidata.js
index cf06ae976..117fe8949 100644
--- a/src/client/delphi_epidata.js
+++ b/src/client/delphi_epidata.js
@@ -123,19 +123,6 @@
       version: () => {
         return _request('version', {}).then((r) => Object.assign(r, {client_version}));
       },
-      /**
-       * Fetch AFHSB data (point data, no min/max)
-       */
-      afhsb: (auth, locations, epiweeks, flu_types) => {
-        requireAll({ auth, locations, epiweeks, flu_types });
-        const params = {
-          auth,
-          locations: _list(locations),
-          epiweeks: _list(epiweeks),
-          flu_types: _list(flu_types),
-        };
-        return _request("afhsb", params);
-      },
       /**
        * Fetch CDC page hits
        */
@@ -387,16 +374,6 @@
         };
         return _request("kcdc_ili", params);
       },
-      /**
-       * Fetch AFHSB metadata
-       */
-      meta_afhsb: (auth) => {
-        requireAll({ auth });
-        const params = {
-          auth,
-        };
-        return _request("meta_afhsb", params);
-      },
       /**
        * Fetch NoroSTAT metadata
        */

From 6fe6e7a7b3422e7a7579d95f104d602465e50d93 Mon Sep 17 00:00:00 2001
From: melange396 <george.haff@gmail.com>
Date: Mon, 26 Jun 2023 11:56:57 -0400
Subject: [PATCH 15/43] use second db handle for only for user admin and writes
 (#1184)

* change 'user_engine' to a 'WriteSession' instead, so the master db connection is used for writes [and associated admin session reads] only

* eager-load roles, remove unnecessary methods, add @default_session, move session ctx mgrs to admin page

* make sure sql statements and timing are logged for all engines, plus tag engines with id and log those too, and superfluous user method cleanup

* sqlalchemy cleanup: removed superfluous bits, improved argument passing for engine creation

* _assign_roles() does its own commit() and returns an instance of the newly updated User

* raise Exception when trying to update non-existent User, return UserRole on creation.

* use more appropriate reciever for static method call, and expand comment on static vs bound methods in User.
---
 src/server/_common.py         |   9 +--
 src/server/_db.py             |  61 +++++++++++++++---
 src/server/_security.py       |   6 +-
 src/server/admin/models.py    | 114 +++++++++++++++-------------------
 src/server/endpoints/admin.py |  99 +++++++++++++++--------------
 src/server/main.py            |   4 --
 6 files changed, 159 insertions(+), 134 deletions(-)

diff --git a/src/server/_common.py b/src/server/_common.py
index 56d4c38ec..f7c28c7ef 100644
--- a/src/server/_common.py
+++ b/src/server/_common.py
@@ -3,7 +3,7 @@
 
 from flask import Flask, g, request
 from sqlalchemy import event
-from sqlalchemy.engine import Connection
+from sqlalchemy.engine import Connection, Engine
 from werkzeug.exceptions import Unauthorized
 from werkzeug.local import LocalProxy
 
@@ -85,12 +85,12 @@ def log_info_with_request_and_response(message, response, **kwargs):
         **kwargs
     )
 
-@event.listens_for(engine, "before_cursor_execute")
+@event.listens_for(Engine, "before_cursor_execute")
 def before_cursor_execute(conn, cursor, statement, parameters, context, executemany):
     context._query_start_time = time.time()
 
 
-@event.listens_for(engine, "after_cursor_execute")
+@event.listens_for(Engine, "after_cursor_execute")
 def after_cursor_execute(conn, cursor, statement, parameters, context, executemany):
     # this timing info may be suspect, at least in terms of dbms cpu time...
     # it is likely that it includes that time as well as any overhead that
@@ -101,7 +101,8 @@ def after_cursor_execute(conn, cursor, statement, parameters, context, executema
     # Convert to milliseconds
     total_time *= 1000
     get_structured_logger("server_api").info(
-        "Executed SQL", statement=statement, params=parameters, elapsed_time_ms=total_time
+        "Executed SQL", statement=statement, params=parameters, elapsed_time_ms=total_time,
+        engine_id=conn.get_execution_options().get('engine_id')
     )
 
 
diff --git a/src/server/_db.py b/src/server/_db.py
index 53e632cdf..e65c885ff 100644
--- a/src/server/_db.py
+++ b/src/server/_db.py
@@ -1,4 +1,7 @@
-from sqlalchemy import create_engine, MetaData
+import functools
+from inspect import signature, Parameter
+
+from sqlalchemy import create_engine
 from sqlalchemy.engine import Engine
 from sqlalchemy.orm import sessionmaker
 
@@ -9,15 +12,57 @@
 #   previously `_common` imported from `_security` which imported from `admin.models`, which imported (back again) from `_common` for database connection objects
 
 
-engine: Engine = create_engine(SQLALCHEMY_DATABASE_URI, **SQLALCHEMY_ENGINE_OPTIONS)
 
-if SQLALCHEMY_DATABASE_URI_PRIMARY:
-    user_engine: Engine = create_engine(SQLALCHEMY_DATABASE_URI_PRIMARY, **SQLALCHEMY_ENGINE_OPTIONS)
-else:
-    user_engine: Engine = engine
+# a decorator to automatically provide a sqlalchemy session by default, if an existing session is not explicitly
+# specified to override it.  it is preferred to use a single session for a sequence of operations logically grouped
+# together, but this allows individual operations to be run by themselves without having to provide an
+# already-established session.  requires an argument to the wrapped function named 'session'.
+#   for instance:
+#
+#     @default_session(WriteSession)
+#     def foo(session):
+#       pass
+#
+#     #   calling:
+#     foo()
+#     #   is identical to:
+#     with WriteSession() as s:
+#       foo(s)
+def default_session(sess):
+    def decorator__default_session(func):
+        # make sure `func` is compatible w/ this decorator
+        func_params = signature(func).parameters
+        if 'session' not in func_params or func_params['session'].kind == Parameter.POSITIONAL_ONLY:
+            raise Exception(f"@default_session(): function {func.__name__}() must accept an argument 'session' that can be specified by keyword.")
+        # save position of 'session' arg, to later check if its been passed in by position/order
+        sess_index = list(func_params).index('session')
+
+        @functools.wraps(func)
+        def wrapper__default_session(*args, **kwargs):
+            if 'session' in kwargs or len(args) >= sess_index+1:
+                # 'session' has been specified by the caller, so we have nothing to do here.  pass along all args unchanged.
+                return func(*args, **kwargs)
+            # otherwise, we will wrap this call with a context manager for the default session provider, and pass that session instance to the wrapped function.
+            with sess() as session:
+                return func(*args, **kwargs, session=session)
 
-metadata = MetaData(bind=user_engine)
+        return wrapper__default_session
 
-Session = sessionmaker(bind=user_engine)
+    return decorator__default_session
 
 
+engine: Engine = create_engine(SQLALCHEMY_DATABASE_URI, **SQLALCHEMY_ENGINE_OPTIONS, execution_options={'engine_id': 'default'})
+Session = sessionmaker(bind=engine)
+
+if SQLALCHEMY_DATABASE_URI_PRIMARY and SQLALCHEMY_DATABASE_URI_PRIMARY != SQLALCHEMY_DATABASE_URI:
+    # if available, use the main/primary DB for write operations.  DB replication processes should be in place to
+    # propagate any written changes to the regular (load balanced) replicas.
+    write_engine: Engine = create_engine(SQLALCHEMY_DATABASE_URI_PRIMARY, **SQLALCHEMY_ENGINE_OPTIONS, execution_options={'engine_id': 'write_engine'})
+    WriteSession = sessionmaker(bind=write_engine)
+    # TODO: insert log statement acknowledging this second session handle is in use?
+else:
+    write_engine: Engine = engine
+    WriteSession = Session
+# NOTE: `WriteSession` could be called `AdminSession`, as its only (currently) used by the admin page, and the admin
+#       page is the only thing that should be writing to the db.  its tempting to let the admin page read from the
+#       regular `Session` and write with `WriteSession`, but concurrency problems may arise from sync/replication lag.
diff --git a/src/server/_security.py b/src/server/_security.py
index 761d088c3..61e2608b2 100644
--- a/src/server/_security.py
+++ b/src/server/_security.py
@@ -16,7 +16,7 @@
     TEMPORARY_API_KEY,
     URL_PREFIX,
 )
-from .admin.models import User, UserRole
+from .admin.models import User
 
 API_KEY_HARD_WARNING = API_KEY_REQUIRED_STARTING_AT - timedelta(days=14)
 API_KEY_SOFT_WARNING = API_KEY_HARD_WARNING - timedelta(days=14)
@@ -91,10 +91,6 @@ def _get_current_user():
 current_user: User = cast(User, LocalProxy(_get_current_user))
 
 
-def register_user_role(role_name: str) -> None:
-    UserRole.create_role(role_name)
-
-
 def _is_public_route() -> bool:
     public_routes_list = ["lib", "admin", "version"]
     for route in public_routes_list:
diff --git a/src/server/admin/models.py b/src/server/admin/models.py
index 62cbc186d..f5c0d54ed 100644
--- a/src/server/admin/models.py
+++ b/src/server/admin/models.py
@@ -3,7 +3,7 @@
 from sqlalchemy.orm import relationship
 from copy import deepcopy
 
-from .._db import Session
+from .._db import Session, WriteSession, default_session
 from delphi.epidata.common.logger import get_structured_logger
 
 from typing import Set, Optional, List
@@ -25,7 +25,7 @@ def _default_date_now():
 class User(Base):
     __tablename__ = "api_user"
     id = Column(Integer, primary_key=True, autoincrement=True)
-    roles = relationship("UserRole", secondary=association_table)
+    roles = relationship("UserRole", secondary=association_table, lazy="joined") # last arg does an eager load of this property from foreign tables
     api_key = Column(String(50), unique=True, nullable=False)
     email = Column(String(320), unique=True, nullable=False)
     created = Column(Date, default=_default_date_now)
@@ -35,97 +35,85 @@ def __init__(self, api_key: str, email: str = None) -> None:
         self.api_key = api_key
         self.email = email
 
-    @staticmethod
-    def list_users() -> List["User"]:
-        with Session() as session:
-            return session.query(User).all()
-
     @property
     def as_dict(self):
         return {
             "id": self.id,
             "api_key": self.api_key,
             "email": self.email,
-            "roles": User.get_user_roles(self.id),
+            "roles": set(role.name for role in self.roles),
             "created": self.created,
             "last_time_used": self.last_time_used
         }
 
-    @staticmethod
-    def get_user_roles(user_id: int) -> Set[str]:
-        with Session() as session:
-            user = session.query(User).filter(User.id == user_id).first()
-            return set([role.name for role in user.roles])
-
     def has_role(self, required_role: str) -> bool:
-        return required_role in User.get_user_roles(self.id)
+        return required_role in set(role.name for role in self.roles)
 
     @staticmethod
     def _assign_roles(user: "User", roles: Optional[Set[str]], session) -> None:
-        # NOTE: this uses a borrowed/existing `session`, and thus does not do a `session.commit()`...
-        #       that is the responsibility of the caller!
         get_structured_logger("api_user_models").info("setting roles", roles=roles, user_id=user.id, api_key=user.api_key)
         db_user = session.query(User).filter(User.id == user.id).first()
         # TODO: would it be sufficient to use the passed-in `user` instead of looking up this `db_user`?
+        #       or even use this as a bound method instead of a static??
+        #       same goes for `update_user()` and `delete_user()` below...
         if roles:
-            roles_to_assign = session.query(UserRole).filter(UserRole.name.in_(roles)).all()
-            db_user.roles = roles_to_assign
+            db_user.roles = session.query(UserRole).filter(UserRole.name.in_(roles)).all()
         else:
             db_user.roles = []
+        session.commit()
+        # retrieve the newly updated User object
+        return session.query(User).filter(User.id == user.id).first()
 
     @staticmethod
+    @default_session(Session)
     def find_user(*, # asterisk forces explicit naming of all arguments when calling this method
-        user_id: Optional[int] = None, api_key: Optional[str] = None, user_email: Optional[str] = None
+                  session,
+                  user_id: Optional[int] = None, api_key: Optional[str] = None, user_email: Optional[str] = None
     ) -> "User":
         # NOTE: be careful, using multiple arguments could match multiple users, but this will return only one!
-        with Session() as session:
-            user = (
-                session.query(User)
-                .filter((User.id == user_id) | (User.api_key == api_key) | (User.email == user_email))
-                .first()
-            )
+        user = (
+            session.query(User)
+            .filter((User.id == user_id) | (User.api_key == api_key) | (User.email == user_email))
+            .first()
+        )
         return user if user else None
 
     @staticmethod
-    def create_user(api_key: str, email: str, user_roles: Optional[Set[str]] = None) -> "User":
+    @default_session(WriteSession)
+    def create_user(api_key: str, email: str, session, user_roles: Optional[Set[str]] = None) -> "User":
         get_structured_logger("api_user_models").info("creating user", api_key=api_key)
-        with Session() as session:
-            new_user = User(api_key=api_key, email=email)
-            # TODO: we may need to populate 'created' field/column here, if the default
-            #   specified above gets bound to the time of when that line of python was evaluated.
-            session.add(new_user)
-            session.commit()
-            User._assign_roles(new_user, user_roles, session)
-            session.commit()
-        return new_user
+        new_user = User(api_key=api_key, email=email)
+        session.add(new_user)
+        session.commit()
+        return User._assign_roles(new_user, user_roles, session)
 
     @staticmethod
+    @default_session(WriteSession)
     def update_user(
         user: "User",
         email: Optional[str],
         api_key: Optional[str],
-        roles: Optional[Set[str]]
+        roles: Optional[Set[str]],
+        session
     ) -> "User":
         get_structured_logger("api_user_models").info("updating user", user_id=user.id, new_api_key=api_key)
-        with Session() as session:
-            user = User.find_user(user_id=user.id)
-            if user:
-                update_stmt = (
-                    update(User)
-                    .where(User.id == user.id)
-                    .values(api_key=api_key, email=email)
-                )
-                session.execute(update_stmt)
-                User._assign_roles(user, roles, session)
-                session.commit()
-        return user
+        user = User.find_user(user_id=user.id, session=session)
+        if not user:
+            raise Exception('user not found')
+        update_stmt = (
+            update(User)
+            .where(User.id == user.id)
+            .values(api_key=api_key, email=email)
+        )
+        session.execute(update_stmt)
+        return User._assign_roles(user, roles, session)
 
     @staticmethod
-    def delete_user(user_id: int) -> None:
+    @default_session(WriteSession)
+    def delete_user(user_id: int, session) -> None:
         get_structured_logger("api_user_models").info("deleting user", user_id=user_id)
-        with Session() as session:
-            session.execute(delete(User).where(User.id == user_id))
-            session.commit()
+        session.execute(delete(User).where(User.id == user_id))
+        session.commit()
 
 
 class UserRole(Base):
@@ -134,23 +122,23 @@ class UserRole(Base):
     name = Column(String(50), unique=True)
 
     @staticmethod
-    def create_role(name: str) -> None:
+    @default_session(WriteSession)
+    def create_role(name: str, session) -> None:
         get_structured_logger("api_user_models").info("creating user role", role=name)
-        with Session() as session:
-            session.execute(
-                f"""
+        # TODO: check role doesnt already exist
+        session.execute(f"""
             INSERT INTO user_role (name)
             SELECT '{name}'
             WHERE NOT EXISTS
                 (SELECT *
                 FROM user_role
                 WHERE name='{name}')
-            """
-            )
-            session.commit()
+        """)
+        session.commit()
+        return session.query(UserRole).filter(UserRole.name == name).first()
 
     @staticmethod
-    def list_all_roles():
-        with Session() as session:
-            roles = session.query(UserRole).all()
+    @default_session(Session)
+    def list_all_roles(session):
+        roles = session.query(UserRole).all()
         return [role.name for role in roles]
diff --git a/src/server/endpoints/admin.py b/src/server/endpoints/admin.py
index 17bc9ca9b..a6f941b48 100644
--- a/src/server/endpoints/admin.py
+++ b/src/server/endpoints/admin.py
@@ -7,6 +7,7 @@
 
 from .._common import log_info_with_request
 from .._config import ADMIN_PASSWORD, API_KEY_REGISTRATION_FORM_LINK, API_KEY_REMOVAL_REQUEST_LINK, REGISTER_WEBHOOK_TOKEN
+from .._db import WriteSession
 from .._security import resolve_auth_token
 from ..admin.models import User, UserRole
 
@@ -29,22 +30,13 @@ def _require_admin():
     return token
 
 
-def _parse_roles(roles: List[str]) -> Set[str]:
-    return set(roles)
-
-
-def _render(mode: str, token: str, flags: Dict, **kwargs):
+def _render(mode: str, token: str, flags: Dict, session, **kwargs):
     template = (templates_dir / "index.html").read_text("utf8")
     return render_template_string(
-        template, mode=mode, token=token, flags=flags, roles=UserRole.list_all_roles(), **kwargs
+        template, mode=mode, token=token, flags=flags, roles=UserRole.list_all_roles(session), **kwargs
     )
 
 
-def user_exists(user_email: str = None, api_key: str = None):
-    user = User.find_user(user_email=user_email, api_key=api_key)
-    return True if user else False
-
-
 # ~~~~ PUBLIC ROUTES ~~~~
 
 
@@ -67,44 +59,50 @@ def removal_request_redirect():
 def _index():
     token = _require_admin()
     flags = dict()
-    if request.method == "POST":
-        # register a new user
-        if not user_exists(user_email=request.values["email"], api_key=request.values["api_key"]):
-            User.create_user(
-                request.values["api_key"],
-                request.values["email"],
-                _parse_roles(request.values.getlist("roles")),
-            )
-            flags["banner"] = "Successfully Added"
-        else:
-            flags["banner"] = "User with such email and/or api key already exists."
-    users = [user.as_dict for user in User.list_users()]
-    return _render("overview", token, flags, users=users, user=dict())
+    with WriteSession() as session:
+        if request.method == "POST":
+            # register a new user
+            if not User.find_user(
+                    user_email=request.values["email"], api_key=request.values["api_key"],
+                    session=session):
+                User.create_user(
+                    api_key=request.values["api_key"],
+                    email=request.values["email"],
+                    user_roles=set(request.values.getlist("roles")),
+                    session=session
+                )
+                flags["banner"] = "Successfully Added"
+            else:
+                flags["banner"] = "User with such email and/or api key already exists."
+        users = [user.as_dict for user in session.query(User).all()]
+        return _render("overview", token, flags, session=session, users=users, user=dict())
 
 
 @bp.route("/<int:user_id>", methods=["GET", "PUT", "POST", "DELETE"])
 def _detail(user_id: int):
     token = _require_admin()
-    user = User.find_user(user_id=user_id)
-    if not user:
-        raise NotFound()
-    if request.method == "DELETE" or "delete" in request.values:
-        User.delete_user(user.id)
-        return redirect(f"./?auth={token}")
-    flags = dict()
-    if request.method in ["PUT", "POST"]:
-        user_check = User.find_user(api_key=request.values["api_key"], user_email=request.values["email"])
-        if user_check and user_check.id != user.id:
-            flags["banner"] = "Could not update user; same api_key and/or email already exists."
-        else:
-            user = user.update_user(
-                user=user,
-                api_key=request.values["api_key"],
-                email=request.values["email"],
-                roles=_parse_roles(request.values.getlist("roles")),
-            )
-            flags["banner"] = "Successfully Saved"
-    return _render("detail", token, flags, user=user.as_dict)
+    with WriteSession() as session:
+        user = User.find_user(user_id=user_id, session=session)
+        if not user:
+            raise NotFound()
+        if request.method == "DELETE" or "delete" in request.values:
+            User.delete_user(user.id, session=session)
+            return redirect(f"./?auth={token}")
+        flags = dict()
+        if request.method in ["PUT", "POST"]:
+            user_check = User.find_user(api_key=request.values["api_key"], user_email=request.values["email"], session=session)
+            if user_check and user_check.id != user.id:
+                flags["banner"] = "Could not update user; same api_key and/or email already exists."
+            else:
+                user = User.update_user(
+                    user=user,
+                    api_key=request.values["api_key"],
+                    email=request.values["email"],
+                    roles=set(request.values.getlist("roles")),
+                    session=session
+                )
+                flags["banner"] = "Successfully Saved"
+        return _render("detail", token, flags, session=session, user=user.as_dict)
 
 
 @bp.route("/register", methods=["POST"])
@@ -116,12 +114,13 @@ def _register():
 
     user_api_key = body["user_api_key"]
     user_email = body["user_email"]
-    if user_exists(user_email=user_email, api_key=user_api_key):
-        return make_response(
-            "User with email and/or API Key already exists, use different parameters or contact us for help",
-            409,
-        )
-    User.create_user(api_key=user_api_key, email=user_email)
+    with WriteSession() as session:
+        if User.find_user(user_email=user_email, api_key=user_api_key, session=session):
+            return make_response(
+                "User with email and/or API Key already exists, use different parameters or contact us for help",
+                409,
+            )
+        User.create_user(api_key=user_api_key, email=user_email, session=session)
     return make_response(f"Successfully registered API key '{user_api_key}'", 200)
 
 
diff --git a/src/server/main.py b/src/server/main.py
index c05b9d0d3..a91a91ee2 100644
--- a/src/server/main.py
+++ b/src/server/main.py
@@ -8,11 +8,9 @@
 
 from ._config import URL_PREFIX, VERSION
 from ._common import app, set_compatibility_mode
-from ._db import metadata, engine
 from ._exceptions import MissingOrWrongSourceException
 from .endpoints import endpoints
 from .endpoints.admin import bp as admin_bp, enable_admin
-from ._security import register_user_role
 from ._limiter import limiter, apply_limit
 
 __all__ = ["app"]
@@ -65,8 +63,6 @@ def send_lib_file(path: str):
     return send_from_directory(pathlib.Path(__file__).parent / "lib", path)
 
 
-metadata.create_all(engine)
-
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=5000)
 else:

From fa0bd53a095096e775d5545f4cb11a229a34a0cc Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Mon, 26 Jun 2023 09:40:51 -0700
Subject: [PATCH 16/43] feat(afhsb): remove afhsb from .ts file

---
 src/client/delphi_epidata.d.ts | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/client/delphi_epidata.d.ts b/src/client/delphi_epidata.d.ts
index f88b18247..0b81db779 100644
--- a/src/client/delphi_epidata.d.ts
+++ b/src/client/delphi_epidata.d.ts
@@ -20,7 +20,6 @@ declare module 'delphi_epidata' {
         client_version: string;
         version(): Promise<{version: string, client_version: string}>;
 
-        afhsb(callback: EpiDataCallback, auth: string, locations: StringParam, epiweeks: EpiRangeParam, flu_types: StringParam): Promise<EpiDataResponse>;
         cdc(callback: EpiDataCallback, auth: string, epiweeks: EpiRangeParam, locations: StringParam): Promise<EpiDataResponse>;
         covid_hosp_facility(callback: EpiDataCallback, hospital_pks: StringParam, collection_weeks: EpiRangeParam, publication_dates: EpiRangeParam): Promise<EpiDataResponse>;
         covid_hosp_facility_lookup(callback: EpiDataCallback, state?: string, ccn?: string, city?: string, zip?: string, fips_code?: string): Promise<EpiDataResponse>;
@@ -37,7 +36,6 @@ declare module 'delphi_epidata' {
         gft(callback: EpiDataCallback, locations: StringParam, epiweeks: EpiRangeParam): Promise<EpiDataResponse>;
         ght(callback: EpiDataCallback, auth: string, locations: StringParam, epiweeks: EpiRangeParam, query: string): Promise<EpiDataResponse>;
         kcdc_ili(callback: EpiDataCallback, regions: StringParam, epiweeks: EpiRangeParam, issues?: EpiRangeParam, lag?: number): Promise<EpiDataResponse>;
-        meta_afhsb(callback: EpiDataCallback, auth: string): Promise<EpiDataResponse>;
         meta_norostat(callback: EpiDataCallback, auth: string): Promise<EpiDataResponse>;
         meta(callback: EpiDataCallback): Promise<EpiDataResponse>;
         nidss_dengue(callback: EpiDataCallback, locations: StringParam, epiweeks: EpiRangeParam): Promise<EpiDataResponse>;
@@ -61,7 +59,6 @@ declare module 'delphi_epidata' {
         client_version: string;
         version(): Promise<{ version: string, client_version: string }>;
 
-        afhsb(auth: string, locations: StringParam, epiweeks: EpiRangeParam, flu_types: StringParam): Promise<EpiDataResponse>;
         cdc(auth: string, epiweeks: EpiRangeParam, locations: StringParam): Promise<EpiDataResponse>;
         covid_hosp_facility(hospital_pks: StringParam, collection_weeks: EpiRangeParam, publication_dates: EpiRangeParam): Promise<EpiDataResponse>;
         covid_hosp_facility_lookup(state?: string, ccn?: string, city?: string, zip?: string, fips_code?: string): Promise<EpiDataResponse>;
@@ -78,7 +75,6 @@ declare module 'delphi_epidata' {
         gft(locations: StringParam, epiweeks: EpiRangeParam): Promise<EpiDataResponse>;
         ght(auth: string, locations: StringParam, epiweeks: EpiRangeParam, query: string): Promise<EpiDataResponse>;
         kcdc_ili(regions: StringParam, epiweeks: EpiRangeParam, issues?: EpiRangeParam, lag?: number): Promise<EpiDataResponse>;
-        meta_afhsb(auth: string): Promise<EpiDataResponse>;
         meta_norostat(auth: string): Promise<EpiDataResponse>;
         meta(callback: EpiDataCallback): Promise<EpiDataResponse>;
         nidss_dengue(locations: StringParam, epiweeks: EpiRangeParam): Promise<EpiDataResponse>;

From 60cdbb1be5195e906e0bbac3a79c2890d5011b64 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 13:47:44 -0700
Subject: [PATCH 17/43] ci(black): set line-length 100

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d255c2849..d8589df09 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 
 [tool.black]
-line-length = 200
+line-length = 100
 target-version = ['py38']
 include = 'server,tests/server'

From 980b0b7e80c7923b79e14fee620645e680785703 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 13:49:43 -0700
Subject: [PATCH 18/43] style(black): format cdc acquisition

---
 src/acquisition/cdcp/cdc_dropbox_receiver.py | 212 ++++++------
 src/acquisition/cdcp/cdc_extract.py          | 213 ++++++------
 src/acquisition/cdcp/cdc_upload.py           | 327 ++++++++++---------
 3 files changed, 379 insertions(+), 373 deletions(-)

diff --git a/src/acquisition/cdcp/cdc_dropbox_receiver.py b/src/acquisition/cdcp/cdc_dropbox_receiver.py
index eb0d97f2a..65626101b 100644
--- a/src/acquisition/cdcp/cdc_dropbox_receiver.py
+++ b/src/acquisition/cdcp/cdc_dropbox_receiver.py
@@ -29,128 +29,128 @@
 
 
 # location constants
-DROPBOX_BASE_DIR = '/cdc_page_stats'
-DELPHI_BASE_DIR = '/common/cdc_stage'
+DROPBOX_BASE_DIR = "/cdc_page_stats"
+DELPHI_BASE_DIR = "/common/cdc_stage"
 
 
 def get_timestamp_string():
-  """
-  Return the current local date and time as a string.
+    """
+    Return the current local date and time as a string.
 
-  The format is "%Y%m%d_%H%M%S".
-  """
-  return datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
+    The format is "%Y%m%d_%H%M%S".
+    """
+    return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
 
 
 def trigger_further_processing():
-  """Add CDCP processing scripts to the Automation run queue."""
+    """Add CDCP processing scripts to the Automation run queue."""
 
-  # connect
-  u, p = secrets.db.auto
-  cnx = mysql.connector.connect(user=u, password=p, database='automation')
-  cur = cnx.cursor()
+    # connect
+    u, p = secrets.db.auto
+    cnx = mysql.connector.connect(user=u, password=p, database="automation")
+    cur = cnx.cursor()
 
-  # add step "Process CDCP Data" to queue
-  cur.execute('CALL automation.RunStep(46)')
+    # add step "Process CDCP Data" to queue
+    cur.execute("CALL automation.RunStep(46)")
 
-  # disconnect
-  cur.close()
-  cnx.commit()
-  cnx.close()
+    # disconnect
+    cur.close()
+    cnx.commit()
+    cnx.close()
 
 
 def fetch_data():
-  """
-  Check for new files on dropbox, download them, zip them, cleanup dropbox, and
-  trigger further processing of new data.
-  """
-
-  # initialize dropbox api
-  dbx = dropbox.Dropbox(secrets.cdcp.dropbox_token)
-
-  # look for new CDC data files
-  print('checking dropbox:%s' % DROPBOX_BASE_DIR)
-  save_list = []
-  for entry in dbx.files_list_folder(DROPBOX_BASE_DIR).entries:
-    name = entry.name
-    if name.endswith('.csv') or name.endswith('.zip'):
-      print(' download "%s"' % name)
-      save_list.append(name)
-    else:
-      print(' skip "%s"' % name)
-
-  # determine if there's anything to be done
-  if len(save_list) == 0:
-    print('did not find any new data files')
-    return
-
-  # download new files, saving them inside of a new zip file
-  timestamp = get_timestamp_string()
-  zip_path = '%s/dropbox_%s.zip' % (DELPHI_BASE_DIR, timestamp)
-  print('downloading into delphi:%s' % zip_path)
-  with ZipFile(zip_path, 'w', ZIP_DEFLATED) as zf:
+    """
+    Check for new files on dropbox, download them, zip them, cleanup dropbox, and
+    trigger further processing of new data.
+    """
+
+    # initialize dropbox api
+    dbx = dropbox.Dropbox(secrets.cdcp.dropbox_token)
+
+    # look for new CDC data files
+    print(f"checking dropbox: {DROPBOX_BASE_DIR}")
+    save_list = []
+    for entry in dbx.files_list_folder(DROPBOX_BASE_DIR).entries:
+        name = entry.name
+        if name.endswith(".csv") or name.endswith(".zip"):
+            print(f" download: {name}")
+            save_list.append(name)
+        else:
+            print(f" skip: {name}")
+
+    # determine if there's anything to be done
+    if len(save_list) == 0:
+        print("did not find any new data files")
+        return
+
+    # download new files, saving them inside of a new zip file
+    timestamp = get_timestamp_string()
+    zip_path = f"{DELPHI_BASE_DIR}/dropbox_{timestamp}.zip"
+    print(f"downloading into delphi:{zip_path}")
+    with ZipFile(zip_path, "w", ZIP_DEFLATED) as zf:
+        for name in save_list:
+            # location of the file on dropbox
+            dropbox_path = f"{DROPBOX_BASE_DIR}/{name}"
+            print(f" {dropbox_path}")
+
+            # start the download
+            meta, resp = dbx.files_download(dropbox_path)
+
+            # check status and length
+            if resp.status_code != 200:
+                raise Exception(["resp.status_code", resp.status_code])
+            dropbox_len = meta.size
+            print("  need %d bytes..." % dropbox_len)
+            content_len = int(resp.headers.get("Content-Length", -1))
+            if dropbox_len != content_len:
+                info = ["dropbox_len", dropbox_len, "content_len", content_len]
+                raise Exception(info)
+
+            # finish the download, holding the data in this variable
+            filedata = resp.content
+
+            # check the length again
+            payload_len = len(filedata)
+            print("  downloaded")
+            if dropbox_len != payload_len:
+                info = ["dropbox_len", dropbox_len, "payload_len", payload_len]
+                raise Exception(info)
+
+            # add the downloaded file to the zip file
+            zf.writestr(name, filedata)
+            print("  added")
+
+    # At this point, all the data is stored and awaiting further processing on
+    # the delphi server.
+    print(f"saved all new data in {zip_path}")
+
+    # on dropbox, archive downloaded files so they won't be downloaded again
+    archive_dir = f"archived_reports/processed_{timestamp}"
+    print("archiving files...")
     for name in save_list:
-      # location of the file on dropbox
-      dropbox_path = '%s/%s' % (DROPBOX_BASE_DIR, name)
-      print(' %s' % dropbox_path)
-
-      # start the download
-      meta, resp = dbx.files_download(dropbox_path)
-
-      # check status and length
-      if resp.status_code != 200:
-        raise Exception(['resp.status_code', resp.status_code])
-      dropbox_len = meta.size
-      print('  need %d bytes...' % dropbox_len)
-      content_len = int(resp.headers.get('Content-Length', -1))
-      if dropbox_len != content_len:
-        info = ['dropbox_len', dropbox_len, 'content_len', content_len]
-        raise Exception(info)
-
-      # finish the download, holding the data in this variable
-      filedata = resp.content
-
-      # check the length again
-      payload_len = len(filedata)
-      print('  downloaded')
-      if dropbox_len != payload_len:
-        info = ['dropbox_len', dropbox_len, 'payload_len', payload_len]
-        raise Exception(info)
-
-      # add the downloaded file to the zip file
-      zf.writestr(name, filedata)
-      print('  added')
-
-  # At this point, all the data is stored and awaiting further processing on
-  # the delphi server.
-  print('saved all new data in %s' % zip_path)
-
-  # on dropbox, archive downloaded files so they won't be downloaded again
-  archive_dir = 'archived_reports/processed_%s' % timestamp
-  print('archiving files...')
-  for name in save_list:
-    # source and destination
-    dropbox_src = '%s/%s' % (DROPBOX_BASE_DIR, name)
-    dropbox_dst = '%s/%s/%s' % (DROPBOX_BASE_DIR, archive_dir, name)
-    print(' "%s" -> "%s"' % (dropbox_src, dropbox_dst))
-
-    # move the file
-    meta = dbx.files_move(dropbox_src, dropbox_dst)
-
-    # sanity check
-    if archive_dir not in meta.path_lower:
-      raise Exception('failed to move "%s"' % name)
-
-  # finally, trigger the usual processing flow
-  print('triggering processing flow')
-  trigger_further_processing()
-  print('done')
+        # source and destination
+        dropbox_src = f"{DROPBOX_BASE_DIR}/{name}"
+        dropbox_dst = f"{DROPBOX_BASE_DIR}/{archive_dir}/{name}"
+        print(f" {dropbox_src} -> {dropbox_dst}")
+
+        # move the file
+        meta = dbx.files_move(dropbox_src, dropbox_dst)
+
+        # sanity check
+        if archive_dir not in meta.path_lower:
+            raise Exception(f"failed to move {name}")
+
+    # finally, trigger the usual processing flow
+    print("triggering processing flow")
+    trigger_further_processing()
+    print("done")
 
 
 def main():
-  # fetch new data
-  fetch_data()
+    # fetch new data
+    fetch_data()
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/cdcp/cdc_extract.py b/src/acquisition/cdcp/cdc_extract.py
index 83ed08d5b..e4d7af573 100644
--- a/src/acquisition/cdcp/cdc_extract.py
+++ b/src/acquisition/cdcp/cdc_extract.py
@@ -75,7 +75,7 @@
 
 
 def get_num_hits(cur, epiweek, state, page):
-  sql = '''
+    sql = """
     SELECT
       sum(c.`num`) `num`
     FROM
@@ -86,36 +86,36 @@ def get_num_hits(cur, epiweek, state, page):
       m.`date` = c.`date` AND m.`state` = c.`state`
     WHERE
       m.`epiweek` = %s AND c.`state` = %s AND c.`page` LIKE %s
-  '''
-  num = None
-  cur.execute(sql, (epiweek, state, page))
-  for (num,) in cur:
-    pass
-  if num is None:
-    return 0
-  return num
+    """
+    num = None
+    cur.execute(sql, (epiweek, state, page))
+    for (num,) in cur:
+        pass
+    if num is None:
+        return 0
+    return num
 
 
 def get_total_hits(cur, epiweek, state):
-  sql = '''
+    sql = """
     SELECT
       sum(m.`total`) `total`
     FROM
       `cdc_meta` m
     WHERE
       m.`epiweek` = %s AND m.`state` = %s
-  '''
-  total = None
-  cur.execute(sql, (epiweek, state))
-  for (total,) in cur:
-    pass
-  if total is None:
-    raise Exception('missing data for %d-%s' % (epiweek, state))
-  return total
+    """
+    total = None
+    cur.execute(sql, (epiweek, state))
+    for (total,) in cur:
+        pass
+    if total is None:
+        raise Exception("missing data for %d-%s" % (epiweek, state))
+    return total
 
 
 def store_result(cur, epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total):
-  sql = '''
+    sql = """
     INSERT INTO
       `cdc_extract` (`epiweek`, `state`, `num1`, `num2`, `num3`, `num4`, `num5`, `num6`, `num7`, `num8`, `total`)
     VALUES
@@ -130,94 +130,99 @@ def store_result(cur, epiweek, state, num1, num2, num3, num4, num5, num6, num7,
       `num7` = %s,
       `num8` = %s,
       `total` = %s
-  '''
-  values = [num1, num2, num3, num4, num5, num6, num7, num8, total]
-  args = tuple([epiweek, state] + values + values)
-  cur.execute(sql, args)
+    """
+    values = [num1, num2, num3, num4, num5, num6, num7, num8, total]
+    args = tuple([epiweek, state] + values + values)
+    cur.execute(sql, args)
 
 
 def extract(first_week=None, last_week=None, test_mode=False):
-  # page title templates
-  pages = [
-    '%What You Should Know for the % Influenza Season%',
-    '%What To Do If You Get Sick%',
-    '%Flu Symptoms & Severity%',
-    '%How Flu Spreads%',
-    '%What You Should Know About Flu Antiviral Drugs%',
-    '%Weekly US Map%',
-    '%Basics%',
-    '%Flu Activity & Surveillance%',
-  ]
-
-  # location information
-  states = sorted(cdc_upload.STATES.values())
-
-  # connect
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  # weeks to update
-  if first_week is None:
-    cur.execute('SELECT max(`epiweek`) FROM `cdc_extract`')
-    for (first_week,) in cur:
-      pass
-  if last_week is None:
-    cur.execute('SELECT max(`epiweek`) FROM `cdc_meta`')
-    for (last_week,) in cur:
-      pass
-  print('extracting %d--%d' % (first_week, last_week))
-
-  # update each epiweek
-  for epiweek in flu.range_epiweeks(first_week, last_week, inclusive=True):
-    # update each state
-    for state in states:
-      try:
-        num1 = get_num_hits(cur, epiweek, state, pages[0])
-        num2 = get_num_hits(cur, epiweek, state, pages[1])
-        num3 = get_num_hits(cur, epiweek, state, pages[2])
-        num4 = get_num_hits(cur, epiweek, state, pages[3])
-        num5 = get_num_hits(cur, epiweek, state, pages[4])
-        num6 = get_num_hits(cur, epiweek, state, pages[5])
-        num7 = get_num_hits(cur, epiweek, state, pages[6])
-        num8 = get_num_hits(cur, epiweek, state, pages[7])
-        total = get_total_hits(cur, epiweek, state)
-        store_result(cur, epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total)
-        print(' %d-%s: %d %d %d %d %d %d %d %d (%d)' % (epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total))
-      except Exception as ex:
-        print(' %d-%s: failed' % (epiweek, state), ex)
-        #raise ex
-      sys.stdout.flush()
-
-  # disconnect
-  cur.close()
-  if not test_mode:
-    cnx.commit()
-  cnx.close()
+    # page title templates
+    pages = [
+        "%What You Should Know for the % Influenza Season%",
+        "%What To Do If You Get Sick%",
+        "%Flu Symptoms & Severity%",
+        "%How Flu Spreads%",
+        "%What You Should Know About Flu Antiviral Drugs%",
+        "%Weekly US Map%",
+        "%Basics%",
+        "%Flu Activity & Surveillance%",
+    ]
+
+    # location information
+    states = sorted(cdc_upload.STATES.values())
+
+    # connect
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    # weeks to update
+    if first_week is None:
+        cur.execute("SELECT max(`epiweek`) FROM `cdc_extract`")
+        for (first_week,) in cur:
+            pass
+    if last_week is None:
+        cur.execute("SELECT max(`epiweek`) FROM `cdc_meta`")
+        for (last_week,) in cur:
+            pass
+    print("extracting %d--%d" % (first_week, last_week))
+
+    # update each epiweek
+    for epiweek in flu.range_epiweeks(first_week, last_week, inclusive=True):
+        # update each state
+        for state in states:
+            try:
+                num1 = get_num_hits(cur, epiweek, state, pages[0])
+                num2 = get_num_hits(cur, epiweek, state, pages[1])
+                num3 = get_num_hits(cur, epiweek, state, pages[2])
+                num4 = get_num_hits(cur, epiweek, state, pages[3])
+                num5 = get_num_hits(cur, epiweek, state, pages[4])
+                num6 = get_num_hits(cur, epiweek, state, pages[5])
+                num7 = get_num_hits(cur, epiweek, state, pages[6])
+                num8 = get_num_hits(cur, epiweek, state, pages[7])
+                total = get_total_hits(cur, epiweek, state)
+                store_result(
+                    cur, epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total
+                )
+                print(
+                    " %d-%s: %d %d %d %d %d %d %d %d (%d)"
+                    % (epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total)
+                )
+            except Exception as ex:
+                print(" %d-%s: failed" % (epiweek, state), ex)
+                # raise ex
+            sys.stdout.flush()
+
+    # disconnect
+    cur.close()
+    if not test_mode:
+        cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('--first', '-f', default=None, type=int, help='first epiweek override')
-  parser.add_argument('--last', '-l', default=None, type=int, help='last epiweek override')
-  parser.add_argument('--epiweek', '-w', default=None, type=int, help='epiweek override')
-  parser.add_argument('--test', '-t', default=False, action='store_true', help='dry run only')
-  args = parser.parse_args()
-
-  # sanity check
-  first, last, week = args.first, args.last, args.epiweek
-  for ew in [first, last, week]:
-    if ew is not None:
-      flu.check_epiweek(ew)
-  if first is not None and last is not None and first > last:
-    raise Exception('epiweeks in the wrong order')
-  if week is not None:
-    first = last = week
-
-  # extract the page hits for all states on the specified weeks
-  extract(first, last, args.test)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--first", "-f", default=None, type=int, help="first epiweek override")
+    parser.add_argument("--last", "-l", default=None, type=int, help="last epiweek override")
+    parser.add_argument("--epiweek", "-w", default=None, type=int, help="epiweek override")
+    parser.add_argument("--test", "-t", default=False, action="store_true", help="dry run only")
+    args = parser.parse_args()
+
+    # sanity check
+    first, last, week = args.first, args.last, args.epiweek
+    for ew in [first, last, week]:
+        if ew is not None:
+            flu.check_epiweek(ew)
+    if first is not None and last is not None and first > last:
+        raise Exception("epiweeks in the wrong order")
+    if week is not None:
+        first = last = week
+
+    # extract the page hits for all states on the specified weeks
+    extract(first, last, args.test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/cdcp/cdc_upload.py b/src/acquisition/cdcp/cdc_upload.py
index c9c206dfa..fef0821b7 100644
--- a/src/acquisition/cdcp/cdc_upload.py
+++ b/src/acquisition/cdcp/cdc_upload.py
@@ -87,191 +87,192 @@
 
 
 STATES = {
-  'Alabama': 'AL',
-  'Alaska': 'AK',
-  'Arizona': 'AZ',
-  'Arkansas': 'AR',
-  'California': 'CA',
-  'Colorado': 'CO',
-  'Connecticut': 'CT',
-  'Delaware': 'DE',
-  'District of Columbia': 'DC',
-  'Florida': 'FL',
-  'Georgia': 'GA',
-  'Hawaii': 'HI',
-  'Idaho': 'ID',
-  'Illinois': 'IL',
-  'Indiana': 'IN',
-  'Iowa': 'IA',
-  'Kansas': 'KS',
-  'Kentucky': 'KY',
-  'Louisiana': 'LA',
-  'Maine': 'ME',
-  'Maryland': 'MD',
-  'Massachusetts': 'MA',
-  'Michigan': 'MI',
-  'Minnesota': 'MN',
-  'Mississippi': 'MS',
-  'Missouri': 'MO',
-  'Montana': 'MT',
-  'Nebraska': 'NE',
-  'Nevada': 'NV',
-  'New Hampshire': 'NH',
-  'New Jersey': 'NJ',
-  'New Mexico': 'NM',
-  'New York': 'NY',
-  'North Carolina': 'NC',
-  'North Dakota': 'ND',
-  'Ohio': 'OH',
-  'Oklahoma': 'OK',
-  'Oregon': 'OR',
-  'Pennsylvania': 'PA',
-  'Rhode Island': 'RI',
-  'South Carolina': 'SC',
-  'South Dakota': 'SD',
-  'Tennessee': 'TN',
-  'Texas': 'TX',
-  'Utah': 'UT',
-  'Vermont': 'VT',
-  'Virginia': 'VA',
-  'Washington': 'WA',
-  'West Virginia': 'WV',
-  'Wisconsin': 'WI',
-  'Wyoming': 'WY',
-  #'Puerto Rico': 'PR',
-  #'Virgin Islands': 'VI',
-  #'Guam': 'GU',
+    "Alabama": "AL",
+    "Alaska": "AK",
+    "Arizona": "AZ",
+    "Arkansas": "AR",
+    "California": "CA",
+    "Colorado": "CO",
+    "Connecticut": "CT",
+    "Delaware": "DE",
+    "District of Columbia": "DC",
+    "Florida": "FL",
+    "Georgia": "GA",
+    "Hawaii": "HI",
+    "Idaho": "ID",
+    "Illinois": "IL",
+    "Indiana": "IN",
+    "Iowa": "IA",
+    "Kansas": "KS",
+    "Kentucky": "KY",
+    "Louisiana": "LA",
+    "Maine": "ME",
+    "Maryland": "MD",
+    "Massachusetts": "MA",
+    "Michigan": "MI",
+    "Minnesota": "MN",
+    "Mississippi": "MS",
+    "Missouri": "MO",
+    "Montana": "MT",
+    "Nebraska": "NE",
+    "Nevada": "NV",
+    "New Hampshire": "NH",
+    "New Jersey": "NJ",
+    "New Mexico": "NM",
+    "New York": "NY",
+    "North Carolina": "NC",
+    "North Dakota": "ND",
+    "Ohio": "OH",
+    "Oklahoma": "OK",
+    "Oregon": "OR",
+    "Pennsylvania": "PA",
+    "Rhode Island": "RI",
+    "South Carolina": "SC",
+    "South Dakota": "SD",
+    "Tennessee": "TN",
+    "Texas": "TX",
+    "Utah": "UT",
+    "Vermont": "VT",
+    "Virginia": "VA",
+    "Washington": "WA",
+    "West Virginia": "WV",
+    "Wisconsin": "WI",
+    "Wyoming": "WY",
+    #'Puerto Rico': 'PR',
+    #'Virgin Islands': 'VI',
+    #'Guam': 'GU',
 }
 
-sql_cdc = '''
+sql_cdc = """
   INSERT INTO
     `cdc` (`date`, `page`, `state`, `num`)
   VALUES
     (%s, %s, %s, %s)
   ON DUPLICATE KEY UPDATE
     `num` = %s
-'''
+"""
 
-sql_cdc_meta = '''
+sql_cdc_meta = """
   INSERT INTO
     `cdc_meta` (`date`, `epiweek`, `state`, `total`)
   VALUES
     (%s, yearweek(%s, 6), %s, %s)
   ON DUPLICATE KEY UPDATE
     `total` = %s
-'''
+"""
 
 
 def upload(test_mode):
-  # connect
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  # insert (or update) table `cdc`
-  def insert_cdc(date, page, state, num):
-    cur.execute(sql_cdc, (date, page, state, num, num))
-
-  # insert (or update) table `cdc_meta`
-  def insert_cdc_meta(date, state, total):
-    cur.execute(sql_cdc_meta, (date, date, state, total, total))
-
-  # loop over rows until the header row is found
-  def find_header(reader):
-    for row in reader:
-      if len(row) > 0 and row[0] == 'Date':
-        return True
-    return False
-
-  # parse csv files for `cdc` and `cdc_meta`
-  def parse_csv(meta):
-    def handler(reader):
-      if not find_header(reader):
-        raise Exception('header not found')
-      count = 0
-      cols = 3 if meta else 4
-      for row in reader:
-        if len(row) != cols:
-          continue
-        if meta:
-          (a, c, d) = row
-        else:
-          (a, b, c, d) = row
-        c = c[:-16]
-        if c not in STATES:
-          continue
-        a = datetime.strptime(a, '%b %d, %Y').strftime('%Y-%m-%d')
-        c = STATES[c]
-        d = int(d)
-        if meta:
-          insert_cdc_meta(a, c, d)
+    # connect
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    # insert (or update) table `cdc`
+    def insert_cdc(date, page, state, num):
+        cur.execute(sql_cdc, (date, page, state, num, num))
+
+    # insert (or update) table `cdc_meta`
+    def insert_cdc_meta(date, state, total):
+        cur.execute(sql_cdc_meta, (date, date, state, total, total))
+
+    # loop over rows until the header row is found
+    def find_header(reader):
+        for row in reader:
+            if len(row) > 0 and row[0] == "Date":
+                return True
+        return False
+
+    # parse csv files for `cdc` and `cdc_meta`
+    def parse_csv(meta):
+        def handler(reader):
+            if not find_header(reader):
+                raise Exception("header not found")
+            count = 0
+            cols = 3 if meta else 4
+            for row in reader:
+                if len(row) != cols:
+                    continue
+                if meta:
+                    (a, c, d) = row
+                else:
+                    (a, b, c, d) = row
+                c = c[:-16]
+                if c not in STATES:
+                    continue
+                a = datetime.strptime(a, "%b %d, %Y").strftime("%Y-%m-%d")
+                c = STATES[c]
+                d = int(d)
+                if meta:
+                    insert_cdc_meta(a, c, d)
+                else:
+                    insert_cdc(a, b, c, d)
+                count += 1
+            return count
+
+        return handler
+
+    # recursively open zip files
+    def parse_zip(zf, level=1):
+        for name in zf.namelist():
+            prefix = " " * level
+            print(prefix, name)
+            if name[-4:] == ".zip":
+                with zf.open(name) as temp:
+                    with ZipFile(io.BytesIO(temp.read())) as zf2:
+                        parse_zip(zf2, level + 1)
+            elif name[-4:] == ".csv":
+                handler = None
+                if "Flu Pages by Region" in name:
+                    handler = parse_csv(False)
+                elif "Regions for all CDC" in name:
+                    handler = parse_csv(True)
+                else:
+                    print(prefix, " (skipped)")
+                if handler is not None:
+                    with zf.open(name) as temp:
+                        count = handler(csv.reader(io.StringIO(str(temp.read(), "utf-8"))))
+                    print(prefix, " %d rows" % count)
+            else:
+                print(prefix, " (ignored)")
+
+    # find, parse, and move zip files
+    zip_files = glob.glob("/common/cdc_stage/*.zip")
+    print("searching...")
+    for f in zip_files:
+        print(" ", f)
+    print("parsing...")
+    for f in zip_files:
+        with ZipFile(f) as zf:
+            parse_zip(zf)
+    print("moving...")
+    for f in zip_files:
+        src = f
+        dst = os.path.join("/home/automation/cdc_page_stats/", os.path.basename(src))
+        print(" ", src, "->", dst)
+        if test_mode:
+            print("  (test mode enabled - not moved)")
         else:
-          insert_cdc(a, b, c, d)
-        count += 1
-      return count
-    return handler
-
-  # recursively open zip files
-  def parse_zip(zf, level=1):
-    for name in zf.namelist():
-      prefix = ' ' * level
-      print(prefix, name)
-      if name[-4:] == '.zip':
-        with zf.open(name) as temp:
-          with ZipFile(io.BytesIO(temp.read())) as zf2:
-            parse_zip(zf2, level + 1)
-      elif name[-4:] == '.csv':
-        handler = None
-        if 'Flu Pages by Region' in name:
-          handler = parse_csv(False)
-        elif 'Regions for all CDC' in name:
-          handler = parse_csv(True)
-        else:
-          print(prefix, ' (skipped)')
-        if handler is not None:
-          with zf.open(name) as temp:
-            count = handler(csv.reader(io.StringIO(str(temp.read(), 'utf-8'))))
-          print(prefix, ' %d rows' % count)
-      else:
-        print(prefix, ' (ignored)')
-
-  # find, parse, and move zip files
-  zip_files = glob.glob('/common/cdc_stage/*.zip')
-  print('searching...')
-  for f in zip_files:
-    print(' ', f)
-  print('parsing...')
-  for f in zip_files:
-    with ZipFile(f) as zf:
-      parse_zip(zf)
-  print('moving...')
-  for f in zip_files:
-    src = f
-    dst = os.path.join('/home/automation/cdc_page_stats/', os.path.basename(src))
-    print(' ', src, '->', dst)
-    if test_mode:
-      print('  (test mode enabled - not moved)')
-    else:
-      shutil.move(src, dst)
-      if not os.path.isfile(dst):
-        raise Exception('unable to move file')
-
-  # disconnect
-  cur.close()
-  if not test_mode:
-    cnx.commit()
-  cnx.close()
+            shutil.move(src, dst)
+            if not os.path.isfile(dst):
+                raise Exception("unable to move file")
+
+    # disconnect
+    cur.close()
+    if not test_mode:
+        cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('--test', '-t', default=False, action='store_true', help='dry run only')
-  args = parser.parse_args()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--test", "-t", default=False, action="store_true", help="dry run only")
+    args = parser.parse_args()
 
-  # make it happen
-  upload(args.test)
+    # make it happen
+    upload(args.test)
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()

From 9e6ff16f599e8feec34a08dd1bddbc5eae347b55 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 13:50:26 -0700
Subject: [PATCH 19/43] style(black): format covidcast_nowcast acquisition

---
 src/acquisition/covidcast_nowcast/load_sensors.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/acquisition/covidcast_nowcast/load_sensors.py b/src/acquisition/covidcast_nowcast/load_sensors.py
index 73ce7eee5..2e2269bb8 100644
--- a/src/acquisition/covidcast_nowcast/load_sensors.py
+++ b/src/acquisition/covidcast_nowcast/load_sensors.py
@@ -82,8 +82,7 @@ def load_and_prepare_file(filepath: str, attributes: PathDetails) -> pd.DataFram
 
 def _move_after_processing(filepath, success):
     archive_dir = SUCCESS_DIR if success else FAIL_DIR
-    new_dir = os.path.dirname(filepath).replace(
-        "receiving", archive_dir)
+    new_dir = os.path.dirname(filepath).replace("receiving", archive_dir)
     os.makedirs(new_dir, exist_ok=True)
     move(filepath, filepath.replace("receiving", archive_dir))
     print(f"{filepath} moved to {archive_dir}")
@@ -96,10 +95,14 @@ def method(table, conn, keys, data_iter):
             meta,
             # specify lag column explicitly; lag is a reserved word sqlalchemy doesn't know about
             sqlalchemy.Column("lag", sqlalchemy.Integer, quote=True),
-            autoload=True)
-        insert_stmt = sqlalchemy.dialects.mysql.insert(sql_table).values([dict(zip(keys, data)) for data in data_iter])
+            autoload=True,
+        )
+        insert_stmt = sqlalchemy.dialects.mysql.insert(sql_table).values(
+            [dict(zip(keys, data)) for data in data_iter]
+        )
         upsert_stmt = insert_stmt.on_duplicate_key_update({x.name: x for x in insert_stmt.inserted})
         conn.execute(upsert_stmt)
+
     return method
 
 
From d1141d904da4e62992b97c92d5caebd8fadffd42 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 13:51:28 -0700
Subject: [PATCH 20/43] style(black): format ecdc acquisition

---
 src/acquisition/ecdc/ecdc_db_update.py | 98 +++++++++++++-------------
 src/acquisition/ecdc/ecdc_ili.py       | 68 +++++++++++-------
 2 files changed, 91 insertions(+), 75 deletions(-)

diff --git a/src/acquisition/ecdc/ecdc_db_update.py b/src/acquisition/ecdc/ecdc_db_update.py
index 63689c1d5..6e0083ecc 100644
--- a/src/acquisition/ecdc/ecdc_db_update.py
+++ b/src/acquisition/ecdc/ecdc_db_update.py
@@ -33,9 +33,8 @@
 import argparse
 import datetime
 import glob
-import subprocess
-import random
 import os
+import tempfile
 
 # third party
 import mysql.connector
@@ -46,12 +45,14 @@
 from delphi.utils.epiweek import delta_epiweeks
 from delphi.utils.epidate import EpiDate
 
+
 def ensure_tables_exist():
-    (u,p) = secrets.db.epi
-    cnx = mysql.connector.connect(user=u,password=p,database='epidata')
+    (u, p) = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     try:
         cursor = cnx.cursor()
-        cursor.execute('''
+        cursor.execute(
+            """
             CREATE TABLE IF NOT EXISTS `ecdc_ili` (
                 `id` INT(11) NOT NULL PRIMARY KEY AUTO_INCREMENT,
                 `release_date` DATE NOT NULL,
@@ -62,58 +63,63 @@ def ensure_tables_exist():
                 `incidence_rate` DOUBLE NOT NULL,
                 UNIQUE KEY (`issue`, `epiweek`, `region`)
             );
-        ''');
+        """
+        )
         cnx.commit()
     finally:
         cnx.close()
 
+
 def safe_float(f):
     try:
-        return float(f.replace(',',''))
+        return float(f.replace(",", ""))
     except:
         return 0
 
+
 def safe_int(i):
     try:
-        return int(i.replace(',',''))
+        return int(i.replace(",", ""))
     except:
         return 0
 
-def get_rows(cnx, table='ecdc_ili'):
-  # Count and return the number of rows in the `ecdc_ili` table.
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM %s' % table)
-  for (num,) in select:
-    pass
-  select.close()
-  return num
+
+def get_rows(cnx, table="ecdc_ili"):
+    # Count and return the number of rows in the `ecdc_ili` table.
+    select = cnx.cursor()
+    select.execute("SELECT count(1) num FROM %s" % table)
+    for (num,) in select:
+        pass
+    select.close()
+    return num
+
 
 def update_from_file(issue, date, dir, test_mode=False):
     # Read ECDC data from CSVs and insert into (or update) the database.
     # database connection
     u, p = secrets.db.epi
-    cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-    rows1 = get_rows(cnx, 'ecdc_ili')
-    print('rows before: %d' % (rows1))
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx, "ecdc_ili")
+    print("rows before: %d" % (rows1))
     insert = cnx.cursor()
 
     # load the data, ignoring empty rows
-    files = glob.glob(os.path.join(dir,"*.csv"))
+    files = glob.glob(os.path.join(dir, "*.csv"))
     rows = []
     for filename in files:
-        with open(filename,'r') as f:
+        with open(filename) as f:
             for l in f:
-                data = list(map(lambda s: s.strip().replace('"',''),l.split(',')))
+                data = list(map(lambda s: s.strip().replace('"', ""), l.split(",")))
                 row = {}
-                row['epiweek'] = int(data[1][:4] + data[1][5:])
-                row['region'] = data[4]
-                row['incidence_rate'] = data[3]
+                row["epiweek"] = int(data[1][:4] + data[1][5:])
+                row["region"] = data[4]
+                row["incidence_rate"] = data[3]
                 rows.append(row)
-    print(' loaded %d rows' % len(rows))
+    print(" loaded %d rows" % len(rows))
     entries = [obj for obj in rows if obj]
-    print(' found %d entries' % len(entries))
+    print(" found %d entries" % len(entries))
 
-    sql = '''
+    sql = """
     INSERT INTO
         `ecdc_ili` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
         `incidence_rate`)
@@ -122,13 +128,13 @@ def update_from_file(issue, date, dir, test_mode=False):
     ON DUPLICATE KEY UPDATE
         `release_date` = least(`release_date`, '%s'),
         `incidence_rate` = %s
-    '''
+    """
 
     for row in entries:
-        lag = delta_epiweeks(row['epiweek'], issue)
-        data_args = [row['incidence_rate']]
+        lag = delta_epiweeks(row["epiweek"], issue)
+        data_args = [row["incidence_rate"]]
 
-        insert_args = [date,issue,row['epiweek'],row['region'],lag] + data_args
+        insert_args = [date, issue, row["epiweek"], row["region"], lag] + data_args
         update_args = [date] + data_args
         try:
             insert.execute(sql % tuple(insert_args + update_args))
@@ -138,39 +144,34 @@ def update_from_file(issue, date, dir, test_mode=False):
     # cleanup
     insert.close()
     if test_mode:
-        print('test mode, not committing')
+        print("test mode, not committing")
         rows2 = rows1
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print('rows after: %d (added %d)' % (rows2,rows2-rows1))
+    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
     cnx.close()
 
+
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--test',
-        action='store_true',
-        help='do dry run only, do not update the database'
+        "--test", action="store_true", help="do dry run only, do not update the database"
     )
     parser.add_argument(
-        '--file',
-        type=str,
-        help='load an existing zip file (otherwise fetch current data)'
+        "--file", type=str, help="load an existing zip file (otherwise fetch current data)"
     )
     parser.add_argument(
-        '--issue',
-        type=int,
-        help='issue of the file (e.g. 201740); used iff --file is given'
+        "--issue", type=int, help="issue of the file (e.g. 201740); used iff --file is given"
     )
     args = parser.parse_args()
 
     if (args.file is None) != (args.issue is None):
-        raise Exception('--file and --issue must both be present or absent')
+        raise Exception("--file and --issue must both be present or absent")
 
-    date = datetime.datetime.now().strftime('%Y-%m-%d')
-    print('assuming release date is today, %s' % date)
+    date = datetime.datetime.now().strftime("%Y-%m-%d")
+    print("assuming release date is today, %s" % date)
 
     ensure_tables_exist()
     if args.file:
@@ -204,7 +205,8 @@ def main():
             if not db_error:
                 break # Exit loop with success
         if flag >= max_tries:
-            print('WARNING: Database `ecdc_ili` did not update successfully')
+            print("WARNING: Database `ecdc_ili` did not update successfully")
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/src/acquisition/ecdc/ecdc_ili.py b/src/acquisition/ecdc/ecdc_ili.py
index 1dd0505d1..dca9b51ae 100644
--- a/src/acquisition/ecdc/ecdc_ili.py
+++ b/src/acquisition/ecdc/ecdc_ili.py
@@ -11,60 +11,74 @@
 from bs4 import BeautifulSoup
 from selenium import webdriver
 from selenium.webdriver.support.ui import Select
-from selenium.webdriver.support.ui import WebDriverWait 
+from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 
 
-def download_ecdc_data(download_dir = "downloads"):
-    url = 'https://flunewseurope.org/PrimaryCareData'
+def download_ecdc_data(download_dir="downloads"):
+    url = "https://flunewseurope.org/PrimaryCareData"
     resp = requests.get(url)
-    soup = BeautifulSoup(resp.content, 'lxml')
-    mydivs = soup.findAll('div')
+    soup = BeautifulSoup(resp.content, "lxml")
+    mydivs = soup.findAll("div")
     for div in mydivs:
         dic = div.attrs
-        if dic.get('class')== ['graph-container'] and dic.get('id')== 'dinfl06':
+        if dic.get("class") == ["graph-container"] and dic.get("id") == "dinfl06":
             break
     # get new url of the ILI chunck
-    url = div.contents[1].attrs['src']
+    url = div.contents[1].attrs["src"]
     opts = webdriver.firefox.options.Options()
     opts.set_headless()
     fp = webdriver.FirefoxProfile()
-    fp.set_preference("browser.download.folderList",2)
-    fp.set_preference("browser.download.manager.showWhenStarting",False)
-    fp.set_preference("browser.download.dir",os.path.abspath(download_dir))
-    fp.set_preference("browser.helperApps.neverAsk.saveToDisk","text/csv")
+    fp.set_preference("browser.download.folderList", 2)
+    fp.set_preference("browser.download.manager.showWhenStarting", False)
+    fp.set_preference("browser.download.dir", os.path.abspath(download_dir))
+    fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")
     try:
-        driver = webdriver.Firefox(options=opts,firefox_profile=fp)
+        driver = webdriver.Firefox(options=opts, firefox_profile=fp)
         driver.get(url)
         for i in range(2, 54):
             # select country
             try:
-                WebDriverWait(driver,30).until(EC.element_to_be_clickable((By.ID,'fluNewsReportViewer_ctl04_ctl03_ddValue')))
-                Select(driver.find_element_by_tag_name('select')).select_by_value(str(i))
+                WebDriverWait(driver, 30).until(
+                    EC.element_to_be_clickable((By.ID, "fluNewsReportViewer_ctl04_ctl03_ddValue"))
+                )
+                Select(driver.find_element_by_tag_name("select")).select_by_value(str(i))
                 time.sleep(3)
-                soup = BeautifulSoup(driver.page_source, 'html.parser')
-                options = soup.select('#fluNewsReportViewer_ctl04_ctl05_ddValue')[0].find_all('option')
+                soup = BeautifulSoup(driver.page_source, "html.parser")
+                options = soup.select("#fluNewsReportViewer_ctl04_ctl05_ddValue")[0].find_all(
+                    "option"
+                )
                 ind = 1
                 for j in range(len(options)):
-                    if 'ILI' in str(options[j]):
-                        pattern = re.compile(r'\d+')
+                    if "ILI" in str(options[j]):
+                        pattern = re.compile(r"\d+")
                         ind = re.findall(pattern, str(options[j]))[0]
                         break
                 if type(ind) == str:
                     # select clinical tyle
-                    WebDriverWait(driver,30).until(EC.element_to_be_clickable((By.ID,'fluNewsReportViewer_ctl04_ctl05_ddValue')))
-                    Select(driver.find_element_by_id('fluNewsReportViewer_ctl04_ctl05_ddValue')).select_by_value(ind)
-                    WebDriverWait(driver,30).until(EC.element_to_be_clickable((By.ID,'btnSelectExportType')))
-                    driver.find_element_by_id('btnSelectExportType').click()
-                    WebDriverWait(driver,30).until(EC.element_to_be_clickable((By.ID,'btnExportToCsv')))
-                    driver.find_element_by_id('btnExportToCsv').click()
+                    WebDriverWait(driver, 30).until(
+                        EC.element_to_be_clickable(
+                            (By.ID, "fluNewsReportViewer_ctl04_ctl05_ddValue")
+                        )
+                    )
+                    Select(
+                        driver.find_element_by_id("fluNewsReportViewer_ctl04_ctl05_ddValue")
+                    ).select_by_value(ind)
+                    WebDriverWait(driver, 30).until(
+                        EC.element_to_be_clickable((By.ID, "btnSelectExportType"))
+                    )
+                    driver.find_element_by_id("btnSelectExportType").click()
+                    WebDriverWait(driver, 30).until(
+                        EC.element_to_be_clickable((By.ID, "btnExportToCsv"))
+                    )
+                    driver.find_element_by_id("btnExportToCsv").click()
                 time.sleep(3)
             except:
                 driver.get(url)
     except:
-        print('WARNING: ECDC Scraper may not have downloaded all of the available data.')
-    #cleanup
-    os.system('''pkill "firefox" ''')
+        print("WARNING: ECDC Scraper may not have downloaded all of the available data.")
+    # cleanup
+    os.system("""pkill "firefox" """)
     os.system('''pkill "(firefox-bin)"''')
     os.system('''pkill "geckodriver*"''')

From 08af0f6b7bff85bbc2b193b63b5abf6a16ba03e4 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 13:52:50 -0700
Subject: [PATCH 21/43] style(black): format flusurv acquisition

---
 src/acquisition/flusurv/flusurv.py        | 277 +++++++++++-----------
 src/acquisition/flusurv/flusurv_update.py | 193 ++++++++-------
 2 files changed, 235 insertions(+), 235 deletions(-)

diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py
index 6b8d247ae..1e534b740 100644
--- a/src/acquisition/flusurv/flusurv.py
+++ b/src/acquisition/flusurv/flusurv.py
@@ -50,167 +50,170 @@
 # all currently available FluSurv locations and their associated codes
 # the number pair represents NetworkID and CatchmentID
 location_codes = {
-  'CA': (2, 1),
-  'CO': (2, 2),
-  'CT': (2, 3),
-  'GA': (2, 4),
-  'IA': (3, 5),
-  'ID': (3, 6),
-  'MD': (2, 7),
-  'MI': (3, 8),
-  'MN': (2, 9),
-  'NM': (2, 11),
-  'NY_albany': (2, 13),
-  'NY_rochester': (2, 14),
-  'OH': (3, 15),
-  'OK': (3, 16),
-  'OR': (2, 17),
-  'RI': (3, 18),
-  'SD': (3, 19),
-  'TN': (2, 20),
-  'UT': (3, 21),
-  'network_all': (1, 22),
-  'network_eip': (2, 22),
-  'network_ihsp': (3, 22),
+    "CA": (2, 1),
+    "CO": (2, 2),
+    "CT": (2, 3),
+    "GA": (2, 4),
+    "IA": (3, 5),
+    "ID": (3, 6),
+    "MD": (2, 7),
+    "MI": (3, 8),
+    "MN": (2, 9),
+    "NM": (2, 11),
+    "NY_albany": (2, 13),
+    "NY_rochester": (2, 14),
+    "OH": (3, 15),
+    "OK": (3, 16),
+    "OR": (2, 17),
+    "RI": (3, 18),
+    "SD": (3, 19),
+    "TN": (2, 20),
+    "UT": (3, 21),
+    "network_all": (1, 22),
+    "network_eip": (2, 22),
+    "network_ihsp": (3, 22),
 }
 
 
 def fetch_json(path, payload, call_count=1, requests_impl=requests):
-  """Send a request to the server and return the parsed JSON response."""
-
-  # it's polite to self-identify this "bot"
-  delphi_url = 'https://delphi.cmu.edu/index.html'
-  user_agent = 'Mozilla/5.0 (compatible; delphibot/1.0; +%s)' % delphi_url
-
-  # the FluSurv AMF server
-  flusurv_url = 'https://gis.cdc.gov/GRASP/Flu3/' + path
-
-  # request headers
-  headers = {
-    'Accept-Encoding': 'gzip',
-    'User-Agent': user_agent,
-  }
-  if payload is not None:
-    headers['Content-Type'] = 'application/json;charset=UTF-8'
-
-  # send the request and read the response
-  if payload is None:
-    method = requests_impl.get
-    data = None
-  else:
-    method = requests_impl.post
-    data = json.dumps(payload)
-  resp = method(flusurv_url, headers=headers, data=data)
-
-  # check the HTTP status code
-  if resp.status_code == 500 and call_count <= 2:
-    # the server often fails with this status, so wait and retry
-    delay = 10 * call_count
-    print('got status %d, will retry in %d sec...' % (resp.status_code, delay))
-    time.sleep(delay)
-    return fetch_json(path, payload, call_count=call_count + 1)
-  elif resp.status_code != 200:
-    raise Exception(['status code != 200', resp.status_code])
-
-  # check response mime type
-  if 'application/json' not in resp.headers.get('Content-Type', ''):
-    raise Exception('response is not json')
-
-  # return the decoded json object
-  return resp.json()
+    """Send a request to the server and return the parsed JSON response."""
+
+    # it's polite to self-identify this "bot"
+    delphi_url = "https://delphi.cmu.edu/index.html"
+    user_agent = "Mozilla/5.0 (compatible; delphibot/1.0; +%s)" % delphi_url
+
+    # the FluSurv AMF server
+    flusurv_url = "https://gis.cdc.gov/GRASP/Flu3/" + path
+
+    # request headers
+    headers = {
+        "Accept-Encoding": "gzip",
+        "User-Agent": user_agent,
+    }
+    if payload is not None:
+        headers["Content-Type"] = "application/json;charset=UTF-8"
+
+    # send the request and read the response
+    if payload is None:
+        method = requests_impl.get
+        data = None
+    else:
+        method = requests_impl.post
+        data = json.dumps(payload)
+    resp = method(flusurv_url, headers=headers, data=data)
+
+    # check the HTTP status code
+    if resp.status_code == 500 and call_count <= 2:
+        # the server often fails with this status, so wait and retry
+        delay = 10 * call_count
+        print("got status %d, will retry in %d sec..." % (resp.status_code, delay))
+        time.sleep(delay)
+        return fetch_json(path, payload, call_count=call_count + 1)
+    elif resp.status_code != 200:
+        raise Exception(["status code != 200", resp.status_code])
+
+    # check response mime type
+    if "application/json" not in resp.headers.get("Content-Type", ""):
+        raise Exception("response is not json")
+
+    # return the decoded json object
+    return resp.json()
 
 
 def fetch_flusurv_object(location_code):
-  """Return decoded FluSurv JSON object for the given location."""
-  return fetch_json('PostPhase03GetData', {
-    'appversion': 'Public',
-    'networkid': location_code[0],
-    'cacthmentid': location_code[1],
-  })
+    """Return decoded FluSurv JSON object for the given location."""
+    return fetch_json(
+        "PostPhase03GetData",
+        {
+            "appversion": "Public",
+            "networkid": location_code[0],
+            "cacthmentid": location_code[1],
+        },
+    )
 
 
 def mmwrid_to_epiweek(mmwrid):
-  """Convert a CDC week index into an epiweek."""
+    """Convert a CDC week index into an epiweek."""
 
-  # Add the difference in IDs, which are sequential, to a reference epiweek,
-  # which is 2003w40 in this case.
-  epiweek_200340 = EpiDate(2003, 9, 28)
-  mmwrid_200340 = 2179
-  return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew()
+    # Add the difference in IDs, which are sequential, to a reference epiweek,
+    # which is 2003w40 in this case.
+    epiweek_200340 = EpiDate(2003, 9, 28)
+    mmwrid_200340 = 2179
+    return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew()
 
 
 def extract_from_object(data_in):
-  """
-  Given a FluSurv data object, return hospitaliation rates.
-
-  The returned object is indexed first by epiweek, then by zero-indexed age
-  group.
-  """
-
-  # an object to hold the result
-  data_out = {}
-
-  # iterate over all seasons and age groups
-  for obj in data_in['busdata']['dataseries']:
-    if obj['age'] in (10, 11, 12):
-      # TODO(https://github.com/cmu-delphi/delphi-epidata/issues/242):
-      #   capture as-of-yet undefined age groups 10, 11, and 12
-      continue
-    age_index = obj['age'] - 1
-    # iterage over weeks
-    for mmwrid, _, _, rate in obj['data']:
-      epiweek = mmwrid_to_epiweek(mmwrid)
-      if epiweek not in data_out:
-        # weekly rate of each age group
-        data_out[epiweek] = [None] * 9
-      prev_rate = data_out[epiweek][age_index]
-      if prev_rate is None:
-        # this is the first time to see a rate for this epiweek/age
-        data_out[epiweek][age_index] = rate
-      elif prev_rate != rate:
-        # a different rate was already found for this epiweek/age
-        format_args = (epiweek, obj['age'], prev_rate, rate)
-        print('warning: %d %d %f != %f' % format_args)
-
-  # sanity check the result
-  if len(data_out) == 0:
-    raise Exception('no data found')
-
-  # print the result and return flu data
-  print('found data for %d weeks' % len(data_out))
-  return data_out
+    """
+    Given a FluSurv data object, return hospitaliation rates.
+
+    The returned object is indexed first by epiweek, then by zero-indexed age
+    group.
+    """
+
+    # an object to hold the result
+    data_out = {}
+
+    # iterate over all seasons and age groups
+    for obj in data_in["busdata"]["dataseries"]:
+        if obj["age"] in (10, 11, 12):
+            # TODO(https://github.com/cmu-delphi/delphi-epidata/issues/242):
+            #   capture as-of-yet undefined age groups 10, 11, and 12
+            continue
+        age_index = obj["age"] - 1
+        # iterage over weeks
+        for mmwrid, _, _, rate in obj["data"]:
+            epiweek = mmwrid_to_epiweek(mmwrid)
+            if epiweek not in data_out:
+                # weekly rate of each age group
+                data_out[epiweek] = [None] * 9
+            prev_rate = data_out[epiweek][age_index]
+            if prev_rate is None:
+                # this is the first time to see a rate for this epiweek/age
+                data_out[epiweek][age_index] = rate
+            elif prev_rate != rate:
+                # a different rate was already found for this epiweek/age
+                format_args = (epiweek, obj["age"], prev_rate, rate)
+                print("warning: %d %d %f != %f" % format_args)
+
+    # sanity check the result
+    if len(data_out) == 0:
+        raise Exception("no data found")
+
+    # print the result and return flu data
+    print("found data for %d weeks" % len(data_out))
+    return data_out
 
 
 def get_data(location_code):
-  """
-  Fetch and parse flu data for the given location.
+    """
+    Fetch and parse flu data for the given location.
 
-  This method performs the following operations:
-    - fetches FluSurv data from CDC
-    - extracts and returns hospitaliation rates
-  """
+    This method performs the following operations:
+      - fetches FluSurv data from CDC
+      - extracts and returns hospitaliation rates
+    """
 
-  # fetch
-  print('[fetching flusurv data...]')
-  data_in = fetch_flusurv_object(location_code)
+    # fetch
+    print("[fetching flusurv data...]")
+    data_in = fetch_flusurv_object(location_code)
 
-  # extract
-  print('[extracting values...]')
-  data_out = extract_from_object(data_in)
+    # extract
+    print("[extracting values...]")
+    data_out = extract_from_object(data_in)
 
-  # return
-  print('[scraped successfully]')
-  return data_out
+    # return
+    print("[scraped successfully]")
+    return data_out
 
 
 def get_current_issue():
-  """Scrape the current issue from the FluSurv main page."""
+    """Scrape the current issue from the FluSurv main page."""
 
-  # fetch
-  data = fetch_json('GetPhase03InitApp?appVersion=Public', None)
+    # fetch
+    data = fetch_json("GetPhase03InitApp?appVersion=Public", None)
 
-  # extract
-  date = datetime.strptime(data['loaddatetime'], '%b %d, %Y')
+    # extract
+    date = datetime.strptime(data["loaddatetime"], "%b %d, %Y")
 
-  # convert and return
-  return EpiDate(date.year, date.month, date.day).get_ew()
+    # convert and return
+    return EpiDate(date.year, date.month, date.day).get_ew()
diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py
index 35fadba05..295091104 100644
--- a/src/acquisition/flusurv/flusurv_update.py
+++ b/src/acquisition/flusurv/flusurv_update.py
@@ -82,108 +82,105 @@
 
 
 def get_rows(cur):
-  """Return the number of rows in the `flusurv` table."""
+    """Return the number of rows in the `flusurv` table."""
 
-  # count all rows
-  cur.execute('SELECT count(1) `num` FROM `flusurv`')
-  for (num,) in cur:
-    return num
+    # count all rows
+    cur.execute("SELECT count(1) `num` FROM `flusurv`")
+    for (num,) in cur:
+        return num
 
 
 def update(issue, location_name, test_mode=False):
-  """Fetch and store the currently avialble weekly FluSurv dataset."""
-
-  # fetch data
-  location_code = flusurv.location_codes[location_name]
-  print('fetching data for', location_name, location_code)
-  data = flusurv.get_data(location_code)
-
-  # metadata
-  epiweeks = sorted(data.keys())
-  location = location_name
-  release_date = str(EpiDate.today())
-
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(
-      host=secrets.db.host, user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-  rows1 = get_rows(cur)
-  print('rows before: %d' % rows1)
-
-  # SQL for insert/update
-  sql = '''
-  INSERT INTO `flusurv` (
-    `release_date`, `issue`, `epiweek`, `location`, `lag`, `rate_age_0`,
-    `rate_age_1`, `rate_age_2`, `rate_age_3`, `rate_age_4`, `rate_overall`,
-    `rate_age_5`, `rate_age_6`, `rate_age_7`
-  )
-  VALUES (
-    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
-  )
-  ON DUPLICATE KEY UPDATE
-    `release_date` = least(`release_date`, %s),
-    `rate_age_0` = coalesce(%s, `rate_age_0`),
-    `rate_age_1` = coalesce(%s, `rate_age_1`),
-    `rate_age_2` = coalesce(%s, `rate_age_2`),
-    `rate_age_3` = coalesce(%s, `rate_age_3`),
-    `rate_age_4` = coalesce(%s, `rate_age_4`),
-    `rate_overall` = coalesce(%s, `rate_overall`),
-    `rate_age_5` = coalesce(%s, `rate_age_5`),
-    `rate_age_6` = coalesce(%s, `rate_age_6`),
-    `rate_age_7` = coalesce(%s, `rate_age_7`)
-  '''
-
-  # insert/update each row of data (one per epiweek)
-  for epiweek in epiweeks:
-    lag = delta_epiweeks(epiweek, issue)
-    if lag > 52:
-      # Ignore values older than one year, as (1) they are assumed not to
-      # change, and (2) it would adversely affect database performance if all
-      # values (including duplicates) were stored on each run.
-      continue
-    args_meta = [release_date, issue, epiweek, location, lag]
-    args_insert = data[epiweek]
-    args_update = [release_date] + data[epiweek]
-    cur.execute(sql, tuple(args_meta + args_insert + args_update))
-
-  # commit and disconnect
-  rows2 = get_rows(cur)
-  print('rows after: %d (+%d)' % (rows2, rows2 - rows1))
-  cur.close()
-  if test_mode:
-    print('test mode: not committing database changes')
-  else:
-    cnx.commit()
-  cnx.close()
+    """Fetch and store the currently avialble weekly FluSurv dataset."""
+
+    # fetch data
+    location_code = flusurv.location_codes[location_name]
+    print("fetching data for", location_name, location_code)
+    data = flusurv.get_data(location_code)
+
+    # metadata
+    epiweeks = sorted(data.keys())
+    location = location_name
+    release_date = str(EpiDate.today())
+
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(host=secrets.db.host, user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+    rows1 = get_rows(cur)
+    print("rows before: %d" % rows1)
+
+    # SQL for insert/update
+    sql = """
+    INSERT INTO `flusurv` (
+      `release_date`, `issue`, `epiweek`, `location`, `lag`, `rate_age_0`,
+      `rate_age_1`, `rate_age_2`, `rate_age_3`, `rate_age_4`, `rate_overall`,
+      `rate_age_5`, `rate_age_6`, `rate_age_7`
+    )
+    VALUES (
+      %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
+    )
+    ON DUPLICATE KEY UPDATE
+      `release_date` = least(`release_date`, %s),
+      `rate_age_0` = coalesce(%s, `rate_age_0`),
+      `rate_age_1` = coalesce(%s, `rate_age_1`),
+      `rate_age_2` = coalesce(%s, `rate_age_2`),
+      `rate_age_3` = coalesce(%s, `rate_age_3`),
+      `rate_age_4` = coalesce(%s, `rate_age_4`),
+      `rate_overall` = coalesce(%s, `rate_overall`),
+      `rate_age_5` = coalesce(%s, `rate_age_5`),
+      `rate_age_6` = coalesce(%s, `rate_age_6`),
+      `rate_age_7` = coalesce(%s, `rate_age_7`)
+    """
+
+    # insert/update each row of data (one per epiweek)
+    for epiweek in epiweeks:
+        lag = delta_epiweeks(epiweek, issue)
+        if lag > 52:
+            # Ignore values older than one year, as (1) they are assumed not to
+            # change, and (2) it would adversely affect database performance if all
+            # values (including duplicates) were stored on each run.
+            continue
+        args_meta = [release_date, issue, epiweek, location, lag]
+        args_insert = data[epiweek]
+        args_update = [release_date] + data[epiweek]
+        cur.execute(sql, tuple(args_meta + args_insert + args_update))
+
+    # commit and disconnect
+    rows2 = get_rows(cur)
+    print("rows after: %d (+%d)" % (rows2, rows2 - rows1))
+    cur.close()
+    if test_mode:
+        print("test mode: not committing database changes")
+    else:
+        cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    'location',
-    help='location for which data should be scraped (e.g. "CA" or "all")'
-  )
-  parser.add_argument(
-    '--test', '-t',
-    default=False, action='store_true', help='do not commit database changes'
-  )
-  args = parser.parse_args()
-
-  # scrape current issue from the main page
-  issue = flusurv.get_current_issue()
-  print('current issue: %d' % issue)
-
-  # fetch flusurv data
-  if args.location == 'all':
-    # all locations
-    for location in flusurv.location_codes.keys():
-      update(issue, location, args.test)
-  else:
-    # single location
-    update(issue, args.location, args.test)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "location", help='location for which data should be scraped (e.g. "CA" or "all")'
+    )
+    parser.add_argument(
+        "--test", "-t", default=False, action="store_true", help="do not commit database changes"
+    )
+    args = parser.parse_args()
+
+    # scrape current issue from the main page
+    issue = flusurv.get_current_issue()
+    print("current issue: %d" % issue)
+
+    # fetch flusurv data
+    if args.location == "all":
+        # all locations
+        for location in flusurv.location_codes.keys():
+            update(issue, location, args.test)
+    else:
+        # single location
+        update(issue, args.location, args.test)
+
+
+if __name__ == "__main__":
+    main()

From 0133ef2042c4df8867e91595eb1f64873edb4632 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 13:53:10 -0700
Subject: [PATCH 22/43] style(black): format fluview acquisition

---
 src/acquisition/fluview/fluview.py            | 329 ++++----
 src/acquisition/fluview/fluview_locations.py  | 186 ++---
 src/acquisition/fluview/fluview_notify.py     |  80 +-
 src/acquisition/fluview/fluview_update.py     | 772 +++++++++---------
 .../fluview/impute_missing_values.py          | 493 ++++++-----
 5 files changed, 947 insertions(+), 913 deletions(-)

diff --git a/src/acquisition/fluview/fluview.py b/src/acquisition/fluview/fluview.py
index d723cbc59..a7e9fba87 100644
--- a/src/acquisition/fluview/fluview.py
+++ b/src/acquisition/fluview/fluview.py
@@ -34,183 +34,188 @@
 
 
 class Key:
-  """
-  Constants for navigating the metadata object contained in the web response
-  from CDC.
-  """
+    """
+    Constants for navigating the metadata object contained in the web response
+    from CDC.
+    """
 
-  class TierType:
-    nat = 'National'
-    hhs = 'HHS Regions'
-    cen = 'Census Divisions'
-    sta = 'State'
+    class TierType:
+        nat = "National"
+        hhs = "HHS Regions"
+        cen = "Census Divisions"
+        sta = "State"
 
-  class TierListEntry:
-    hhs = 'hhsregion'
-    cen = 'censusregions'
-    sta = 'states'
+    class TierListEntry:
+        hhs = "hhsregion"
+        cen = "censusregions"
+        sta = "states"
 
-  class TierIdEntry:
-    hhs = 'hhsregionid'
-    cen = 'censusregionid'
-    sta = 'stateid'
+    class TierIdEntry:
+        hhs = "hhsregionid"
+        cen = "censusregionid"
+        sta = "stateid"
 
 
 def check_status(resp, status, content_type):
-  """Raise an exception if the status code or content type is unexpected."""
-  if resp.status_code != status:
-    raise Exception('got unexpected status code: ' + str(resp.status_code))
-  actual_type = resp.headers.get('Content-Type', None)
-  if actual_type is None or content_type not in actual_type.lower():
-    raise Exception('got unexpected content type: ' + str(actual_type))
+    """Raise an exception if the status code or content type is unexpected."""
+    if resp.status_code != status:
+        raise Exception("got unexpected status code: " + str(resp.status_code))
+    actual_type = resp.headers.get("Content-Type", None)
+    if actual_type is None or content_type not in actual_type.lower():
+        raise Exception("got unexpected content type: " + str(actual_type))
 
 
 def fetch_metadata(sess):
-  """
-  Return metadata indicating the current issue and also numeric constants
-  representing the various locations.
-  """
-  url = 'https://gis.cdc.gov/grasp/flu2/GetPhase02InitApp?appVersion=Public'
-  resp = sess.get(url)
-  check_status(resp, 200, 'application/json')
-  return resp.json()
+    """
+    Return metadata indicating the current issue and also numeric constants
+    representing the various locations.
+    """
+    url = "https://gis.cdc.gov/grasp/flu2/GetPhase02InitApp?appVersion=Public"
+    resp = sess.get(url)
+    check_status(resp, 200, "application/json")
+    return resp.json()
 
 
 def get_issue_and_locations(data):
-  """Extract the issue and per-tier location lists from the metadata object."""
-
-  def get_tier_ids(name):
-    for row in data['regiontypes']:
-      if row['description'] == name:
-        return row['regiontypeid']
-    raise Exception()
-
-  tier_ids = dict((name, get_tier_ids(name)) for name in (
-    Key.TierType.nat,
-    Key.TierType.hhs,
-    Key.TierType.cen,
-    Key.TierType.sta,
-  ))
-
-  location_ids = {
-    Key.TierType.nat: [0],
-    Key.TierType.hhs: [],
-    Key.TierType.cen: [],
-    Key.TierType.sta: [],
-  }
-
-  # add location ids for HHS
-  for row in data[Key.TierListEntry.hhs]:
-    location_ids[Key.TierType.hhs].append(row[Key.TierIdEntry.hhs])
-  location_ids[Key.TierType.hhs] = sorted(set(location_ids[Key.TierType.hhs]))
-  num = len(location_ids[Key.TierType.hhs])
-  if num != 10:
-    raise Exception('expected 10 hhs regions, found %d' % num)
-
-  # add location ids for census divisions
-  for row in data[Key.TierListEntry.cen]:
-    location_ids[Key.TierType.cen].append(row[Key.TierIdEntry.cen])
-  location_ids[Key.TierType.cen] = sorted(set(location_ids[Key.TierType.cen]))
-  num = len(location_ids[Key.TierType.cen])
-  if num != 9:
-    raise Exception('expected 9 census divisions, found %d' % num)
-
-  # add location ids for states
-  for row in data[Key.TierListEntry.sta]:
-    location_ids[Key.TierType.sta].append(row[Key.TierIdEntry.sta])
-  location_ids[Key.TierType.sta] = sorted(set(location_ids[Key.TierType.sta]))
-  num = len(location_ids[Key.TierType.sta])
-  if num != 57:
-    raise Exception('expected 57 states/territories/cities, found %d' % num)
-
-  # return a useful subset of the metadata
-  # (latest epiweek, latest season, tier ids, location ids)
-  return {
-    'epiweek': data['mmwr'][-1]['yearweek'],
-    'season_id': data['mmwr'][-1]['seasonid'],
-    'tier_ids': tier_ids,
-    'location_ids': location_ids,
-  }
+    """Extract the issue and per-tier location lists from the metadata object."""
+
+    def get_tier_ids(name):
+        for row in data["regiontypes"]:
+            if row["description"] == name:
+                return row["regiontypeid"]
+        raise Exception()
+
+    tier_ids = {
+        name: get_tier_ids(name)
+        for name in (
+            Key.TierType.nat,
+            Key.TierType.hhs,
+            Key.TierType.cen,
+            Key.TierType.sta,
+        )
+    }
+
+    location_ids = {
+        Key.TierType.nat: [0],
+        Key.TierType.hhs: [],
+        Key.TierType.cen: [],
+        Key.TierType.sta: [],
+    }
+
+    # add location ids for HHS
+    for row in data[Key.TierListEntry.hhs]:
+        location_ids[Key.TierType.hhs].append(row[Key.TierIdEntry.hhs])
+    location_ids[Key.TierType.hhs] = sorted(set(location_ids[Key.TierType.hhs]))
+    num = len(location_ids[Key.TierType.hhs])
+    if num != 10:
+        raise Exception("expected 10 hhs regions, found %d" % num)
+
+    # add location ids for census divisions
+    for row in data[Key.TierListEntry.cen]:
+        location_ids[Key.TierType.cen].append(row[Key.TierIdEntry.cen])
+    location_ids[Key.TierType.cen] = sorted(set(location_ids[Key.TierType.cen]))
+    num = len(location_ids[Key.TierType.cen])
+    if num != 9:
+        raise Exception("expected 9 census divisions, found %d" % num)
+
+    # add location ids for states
+    for row in data[Key.TierListEntry.sta]:
+        location_ids[Key.TierType.sta].append(row[Key.TierIdEntry.sta])
+    location_ids[Key.TierType.sta] = sorted(set(location_ids[Key.TierType.sta]))
+    num = len(location_ids[Key.TierType.sta])
+    if num != 57:
+        raise Exception("expected 57 states/territories/cities, found %d" % num)
+
+    # return a useful subset of the metadata
+    # (latest epiweek, latest season, tier ids, location ids)
+    return {
+        "epiweek": data["mmwr"][-1]["yearweek"],
+        "season_id": data["mmwr"][-1]["seasonid"],
+        "tier_ids": tier_ids,
+        "location_ids": location_ids,
+    }
 
 
 def download_data(tier_id, location_ids, season_ids, filename):
-  """Download zipped ILINet data for the given locations and seasons."""
-
-  def get_entry(num, name=None):
-    return {'ID': num, 'Name': (name if name else num)}
-
-  # download the data (in memory)
-  url = 'https://gis.cdc.gov/grasp/flu2/PostPhase02DataDownload'
-  data = {
-    'AppVersion': 'Public',
-    'DatasourceDT': [get_entry(1, 'ILINet'), get_entry(0, 'WHO_NREVSS')],
-    'RegionTypeId': tier_id,
-    'SubRegionsDT': [get_entry(loc) for loc in sorted(location_ids)],
-    'SeasonsDT': [get_entry(season) for season in sorted(season_ids)],
-  }
-  resp = requests.post(url, json=data)
-  check_status(resp, 200, 'application/octet-stream')
-  payload = resp.content
-
-  # save the data to file and return the file length
-  with open(filename, 'wb') as f:
-    f.write(payload)
-  return len(payload)
+    """Download zipped ILINet data for the given locations and seasons."""
+
+    def get_entry(num, name=None):
+        return {"ID": num, "Name": (name if name else num)}
+
+    # download the data (in memory)
+    url = "https://gis.cdc.gov/grasp/flu2/PostPhase02DataDownload"
+    data = {
+        "AppVersion": "Public",
+        "DatasourceDT": [get_entry(1, "ILINet"), get_entry(0, "WHO_NREVSS")],
+        "RegionTypeId": tier_id,
+        "SubRegionsDT": [get_entry(loc) for loc in sorted(location_ids)],
+        "SeasonsDT": [get_entry(season) for season in sorted(season_ids)],
+    }
+    resp = requests.post(url, json=data)
+    check_status(resp, 200, "application/octet-stream")
+    payload = resp.content
+
+    # save the data to file and return the file length
+    with open(filename, "wb") as f:
+        f.write(payload)
+    return len(payload)
 
 
 def save_latest(path=None):
-  """
-  Save the latest two seasons of data for all locations, separately for each
-  location tier (i.e. national, HHS, census, and states).
-  """
-
-  # set up the session
-  sess = requests.session()
-  sess.headers.update({
-    # it's polite to self-identify this "bot"
-    'User-Agent': 'delphibot/1.0 (+https://delphi.cmu.edu/)',
-  })
-
-  # get metatdata
-  print('looking up ilinet metadata')
-  data = fetch_metadata(sess)
-  info = get_issue_and_locations(data)
-  issue = info['epiweek']
-  print('current issue: %d' % issue)
-
-  # establish timing
-  dt = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
-  current_season = info['season_id']
-  seasons = [s for s in range(current_season - 1, current_season + 1)]
-
-  # make the destination path if it doesn't already exist
-  if path is not None:
-    os.makedirs(path, exist_ok=True)
-
-  # download the data file for each tier
-  files = []
-  for delphi_name, cdc_name in (
-    ('nat', Key.TierType.nat),
-    ('hhs', Key.TierType.hhs),
-    ('cen', Key.TierType.cen),
-    ('sta', Key.TierType.sta),
-  ):
-    name = 'ilinet_%s_%d_%s.zip' % (delphi_name, issue, dt)
-    if path is None:
-      filename = name
-    else:
-      filename = os.path.join(path, name)
-    tier_id = info['tier_ids'][cdc_name]
-    locations = info['location_ids'][cdc_name]
-
-    # download and show timing information
-    print('downloading %s' % delphi_name)
-    t0 = time.time()
-    size = download_data(tier_id, locations, seasons, filename)
-    t1 = time.time()
-
-    print(' saved %s (%d bytes in %.1f seconds)' % (filename, size, t1 - t0))
-    files.append(filename)
-
-  # return the current issue and the list of downloaded files
-  return issue, files
+    """
+    Save the latest two seasons of data for all locations, separately for each
+    location tier (i.e. national, HHS, census, and states).
+    """
+
+    # set up the session
+    sess = requests.session()
+    sess.headers.update(
+        {
+            # it's polite to self-identify this "bot"
+            "User-Agent": "delphibot/1.0 (+https://delphi.cmu.edu/)",
+        }
+    )
+
+    # get metatdata
+    print("looking up ilinet metadata")
+    data = fetch_metadata(sess)
+    info = get_issue_and_locations(data)
+    issue = info["epiweek"]
+    print("current issue: %d" % issue)
+
+    # establish timing
+    dt = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    current_season = info["season_id"]
+    seasons = [s for s in range(current_season - 1, current_season + 1)]
+
+    # make the destination path if it doesn't already exist
+    if path is not None:
+        os.makedirs(path, exist_ok=True)
+
+    # download the data file for each tier
+    files = []
+    for delphi_name, cdc_name in (
+        ("nat", Key.TierType.nat),
+        ("hhs", Key.TierType.hhs),
+        ("cen", Key.TierType.cen),
+        ("sta", Key.TierType.sta),
+    ):
+        name = "ilinet_%s_%d_%s.zip" % (delphi_name, issue, dt)
+        if path is None:
+            filename = name
+        else:
+            filename = os.path.join(path, name)
+        tier_id = info["tier_ids"][cdc_name]
+        locations = info["location_ids"][cdc_name]
+
+        # download and show timing information
+        print("downloading %s" % delphi_name)
+        t0 = time.time()
+        size = download_data(tier_id, locations, seasons, filename)
+        t1 = time.time()
+
+        print(" saved %s (%d bytes in %.1f seconds)" % (filename, size, t1 - t0))
+        files.append(filename)
+
+    # return the current issue and the list of downloaded files
+    return issue, files
diff --git a/src/acquisition/fluview/fluview_locations.py b/src/acquisition/fluview/fluview_locations.py
index 9c851bc6f..e5ebe0fc3 100644
--- a/src/acquisition/fluview/fluview_locations.py
+++ b/src/acquisition/fluview/fluview_locations.py
@@ -15,100 +15,100 @@
 # https://gis.cdc.gov/grasp/flu2/GetPhase02InitApp?appVersion=Public
 # The values are used in queries of Delphi's Epidata API.
 cdc_to_delphi = {
-  'national': {
-    'x': 'nat',
-  },
-  'hhs regions': {
-    'region 1': 'hhs1',
-    'region 2': 'hhs2',
-    'region 3': 'hhs3',
-    'region 4': 'hhs4',
-    'region 5': 'hhs5',
-    'region 6': 'hhs6',
-    'region 7': 'hhs7',
-    'region 8': 'hhs8',
-    'region 9': 'hhs9',
-    'region 10': 'hhs10',
-  },
-  'census regions': {
-    'new england': 'cen1',
-    'mid-atlantic': 'cen2',
-    'east north central': 'cen3',
-    'west north central': 'cen4',
-    'south atlantic': 'cen5',
-    'east south central': 'cen6',
-    'west south central': 'cen7',
-    'mountain': 'cen8',
-    'pacific': 'cen9',
-  },
-  'states': {
-    # states/territories: two-letter ISO 3166
-    'alabama': 'al',
-    'alaska': 'ak',
-    'arizona': 'az',
-    'arkansas': 'ar',
-    'california': 'ca',
-    'colorado': 'co',
-    'connecticut': 'ct',
-    'delaware': 'de',
-    'florida': 'fl',
-    'georgia': 'ga',
-    'hawaii': 'hi',
-    'idaho': 'id',
-    'illinois': 'il',
-    'indiana': 'in',
-    'iowa': 'ia',
-    'kansas': 'ks',
-    'kentucky': 'ky',
-    'louisiana': 'la',
-    'maine': 'me',
-    'maryland': 'md',
-    'massachusetts': 'ma',
-    'michigan': 'mi',
-    'minnesota': 'mn',
-    'mississippi': 'ms',
-    'missouri': 'mo',
-    'montana': 'mt',
-    'nebraska': 'ne',
-    'nevada': 'nv',
-    'new hampshire': 'nh',
-    'new jersey': 'nj',
-    'new mexico': 'nm',
-    # Even though it's called "New York", this location doesn't include New
-    # York City ("jfk"). New York ("ny") is actually this *plus* jfk.
-    'new york': 'ny_minus_jfk',
-    'north carolina': 'nc',
-    'north dakota': 'nd',
-    'ohio': 'oh',
-    'oklahoma': 'ok',
-    'oregon': 'or',
-    'pennsylvania': 'pa',
-    'rhode island': 'ri',
-    'south carolina': 'sc',
-    'south dakota': 'sd',
-    'tennessee': 'tn',
-    'texas': 'tx',
-    'utah': 'ut',
-    'vermont': 'vt',
-    'virginia': 'va',
-    'washington': 'wa',
-    'west virginia': 'wv',
-    'wisconsin': 'wi',
-    'wyoming': 'wy',
-    'american samoa': 'as',
-    'commonwealth of the northern mariana islands': 'mp',
-    'district of columbia': 'dc',
-    'guam': 'gu',
-    'puerto rico': 'pr',
-    'virgin islands': 'vi',
-    # cities: three-letter IATA
-    'chicago': 'ord',
-    'los angeles': 'lax',
-    'new york city': 'jfk',
-  },
+    "national": {
+        "x": "nat",
+    },
+    "hhs regions": {
+        "region 1": "hhs1",
+        "region 2": "hhs2",
+        "region 3": "hhs3",
+        "region 4": "hhs4",
+        "region 5": "hhs5",
+        "region 6": "hhs6",
+        "region 7": "hhs7",
+        "region 8": "hhs8",
+        "region 9": "hhs9",
+        "region 10": "hhs10",
+    },
+    "census regions": {
+        "new england": "cen1",
+        "mid-atlantic": "cen2",
+        "east north central": "cen3",
+        "west north central": "cen4",
+        "south atlantic": "cen5",
+        "east south central": "cen6",
+        "west south central": "cen7",
+        "mountain": "cen8",
+        "pacific": "cen9",
+    },
+    "states": {
+        # states/territories: two-letter ISO 3166
+        "alabama": "al",
+        "alaska": "ak",
+        "arizona": "az",
+        "arkansas": "ar",
+        "california": "ca",
+        "colorado": "co",
+        "connecticut": "ct",
+        "delaware": "de",
+        "florida": "fl",
+        "georgia": "ga",
+        "hawaii": "hi",
+        "idaho": "id",
+        "illinois": "il",
+        "indiana": "in",
+        "iowa": "ia",
+        "kansas": "ks",
+        "kentucky": "ky",
+        "louisiana": "la",
+        "maine": "me",
+        "maryland": "md",
+        "massachusetts": "ma",
+        "michigan": "mi",
+        "minnesota": "mn",
+        "mississippi": "ms",
+        "missouri": "mo",
+        "montana": "mt",
+        "nebraska": "ne",
+        "nevada": "nv",
+        "new hampshire": "nh",
+        "new jersey": "nj",
+        "new mexico": "nm",
+        # Even though it's called "New York", this location doesn't include New
+        # York City ("jfk"). New York ("ny") is actually this *plus* jfk.
+        "new york": "ny_minus_jfk",
+        "north carolina": "nc",
+        "north dakota": "nd",
+        "ohio": "oh",
+        "oklahoma": "ok",
+        "oregon": "or",
+        "pennsylvania": "pa",
+        "rhode island": "ri",
+        "south carolina": "sc",
+        "south dakota": "sd",
+        "tennessee": "tn",
+        "texas": "tx",
+        "utah": "ut",
+        "vermont": "vt",
+        "virginia": "va",
+        "washington": "wa",
+        "west virginia": "wv",
+        "wisconsin": "wi",
+        "wyoming": "wy",
+        "american samoa": "as",
+        "commonwealth of the northern mariana islands": "mp",
+        "district of columbia": "dc",
+        "guam": "gu",
+        "puerto rico": "pr",
+        "virgin islands": "vi",
+        # cities: three-letter IATA
+        "chicago": "ord",
+        "los angeles": "lax",
+        "new york city": "jfk",
+    },
 }
 
 
 def get_location_name(region_type, region_name):
-  """Convert a CDC location type and name pair into a Delphi location name."""
-  return cdc_to_delphi[region_type.lower()][region_name.lower()]
+    """Convert a CDC location type and name pair into a Delphi location name."""
+    return cdc_to_delphi[region_type.lower()][region_name.lower()]
diff --git a/src/acquisition/fluview/fluview_notify.py b/src/acquisition/fluview/fluview_notify.py
index 13f0f3559..a280889a5 100644
--- a/src/acquisition/fluview/fluview_notify.py
+++ b/src/acquisition/fluview/fluview_notify.py
@@ -31,41 +31,53 @@
 import delphi.operations.secrets as secrets
 
 
-if __name__ == '__main__':
-  # Args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('-t', '--test', action='store_const', const=True, default=False, help="do dry run only, don't update the database")
-  args = parser.parse_args()
+if __name__ == "__main__":
+    # Args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-t",
+        "--test",
+        action="store_const",
+        const=True,
+        default=False,
+        help="do dry run only, don't update the database",
+    )
+    args = parser.parse_args()
 
-  # connect
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
+    # connect
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
 
-  # get the last known issue from the automation table `variables`
-  cur.execute('SELECT `value` FROM automation.`variables` WHERE `name` = %s', ('most_recent_issue',))
-  for (issue1,) in cur:
-    issue1 = int(issue1)
-  print('last known issue:', issue1)
-  # get the most recent issue from the epidata table `fluview`
-  cur.execute('SELECT max(`issue`) FROM `fluview`')
-  for (issue2,) in cur:
-    issue2 = int(issue2)
-  print('most recent issue:', issue2)
+    # get the last known issue from the automation table `variables`
+    cur.execute(
+        "SELECT `value` FROM automation.`variables` WHERE `name` = %s", ("most_recent_issue",)
+    )
+    for (issue1,) in cur:
+        issue1 = int(issue1)
+    print("last known issue:", issue1)
+    # get the most recent issue from the epidata table `fluview`
+    cur.execute("SELECT max(`issue`) FROM `fluview`")
+    for (issue2,) in cur:
+        issue2 = int(issue2)
+    print("most recent issue:", issue2)
 
-  if issue2 > issue1:
-    print('new data is available!')
-    if args.test:
-      print('test mode - not making any changes')
-    else:
-      # update the variable
-      cur.execute('UPDATE automation.`variables` SET `value` = %s WHERE `name` = %s', (issue2, 'most_recent_issue'))
-      # queue the 'New FluView Available' flow
-      cur.execute('CALL automation.RunStep(36)')
-  elif issue2 < issue2:
-    raise Exception('most recent issue is older than the last known issue')
+    if issue2 > issue1:
+        print("new data is available!")
+        if args.test:
+            print("test mode - not making any changes")
+        else:
+            # update the variable
+            cur.execute(
+                "UPDATE automation.`variables` SET `value` = %s WHERE `name` = %s",
+                (issue2, "most_recent_issue"),
+            )
+            # queue the 'New FluView Available' flow
+            cur.execute("CALL automation.RunStep(36)")
+    elif issue2 < issue2:
+        raise Exception("most recent issue is older than the last known issue")
 
-  # cleanup
-  cnx.commit()
-  cur.close()
-  cnx.close()
+    # cleanup
+    cnx.commit()
+    cur.close()
+    cnx.close()
diff --git a/src/acquisition/fluview/fluview_update.py b/src/acquisition/fluview/fluview_update.py
index 65bec7a40..e463fcbaf 100644
--- a/src/acquisition/fluview/fluview_update.py
+++ b/src/acquisition/fluview/fluview_update.py
@@ -130,398 +130,422 @@
 from . import fluview_locations
 
 # sheet names
-ILINET_SHEET = 'ILINet.csv'
-PHL_SHEET = 'WHO_NREVSS_Public_Health_Labs.csv'
-CL_SHEET = 'WHO_NREVSS_Clinical_Labs.csv'
+ILINET_SHEET = "ILINet.csv"
+PHL_SHEET = "WHO_NREVSS_Public_Health_Labs.csv"
+CL_SHEET = "WHO_NREVSS_Clinical_Labs.csv"
 # table names
-CL_TABLE = 'fluview_clinical'
-PHL_TABLE = 'fluview_public'
+CL_TABLE = "fluview_clinical"
+PHL_TABLE = "fluview_public"
+
 
 def optional_int(i):
-  return int(i) if i not in ('', 'X') else None
+    return int(i) if i not in ("", "X") else None
+
 
 def optional_float(i, j):
-  return float(i) if i not in ('', 'X') else float(j)
+    return float(i) if i not in ("", "X") else float(j)
+
 
 def nullable_float(i):
-  return float(i) if i not in ('', 'X') else None
+    return float(i) if i not in ("", "X") else None
+
 
 def get_ilinet_data(row):
-  if row[0] == 'REGION TYPE' and row != [
-    'REGION TYPE',
-    'REGION',
-    'YEAR',
-    'WEEK',
-    '% WEIGHTED ILI',
-    '%UNWEIGHTED ILI',
-    'AGE 0-4',
-    'AGE 25-49',
-    'AGE 25-64',
-    'AGE 5-24',
-    'AGE 50-64',
-    'AGE 65',
-    'ILITOTAL',
-    'NUM. OF PROVIDERS',
-    'TOTAL PATIENTS'
-  ]:
-    raise Exception('header row has changed')
-  if len(row) == 1 or row[0] == 'REGION TYPE':
-    # this is a header row
-    return None
-  if row[5] == 'X':
-    # ILI isn't reported, ignore this row
-    return None
-  return {
-    'location': fluview_locations.get_location_name(*row[:2]),
-    'epiweek': join_epiweek(int(row[2]), int(row[3])),
-    'wili': optional_float(*row[4:6]),
-    'ili': float(row[5]),
-    'age0': optional_int(row[6]),
-    'age1': optional_int(row[9]),
-    'age2': optional_int(row[8]),
-    'age3': optional_int(row[7]),
-    'age4': optional_int(row[10]),
-    'age5': optional_int(row[11]),
-    'n_ili': optional_int(row[12]),
-    'n_providers': optional_int(row[13]),
-    'n_patients': optional_int(row[14]),
-  }
+    if row[0] == "REGION TYPE" and row != [
+        "REGION TYPE",
+        "REGION",
+        "YEAR",
+        "WEEK",
+        "% WEIGHTED ILI",
+        "%UNWEIGHTED ILI",
+        "AGE 0-4",
+        "AGE 25-49",
+        "AGE 25-64",
+        "AGE 5-24",
+        "AGE 50-64",
+        "AGE 65",
+        "ILITOTAL",
+        "NUM. OF PROVIDERS",
+        "TOTAL PATIENTS",
+    ]:
+        raise Exception("header row has changed")
+    if len(row) == 1 or row[0] == "REGION TYPE":
+        # this is a header row
+        return None
+    if row[5] == "X":
+        # ILI isn't reported, ignore this row
+        return None
+    return {
+        "location": fluview_locations.get_location_name(*row[:2]),
+        "epiweek": join_epiweek(int(row[2]), int(row[3])),
+        "wili": optional_float(*row[4:6]),
+        "ili": float(row[5]),
+        "age0": optional_int(row[6]),
+        "age1": optional_int(row[9]),
+        "age2": optional_int(row[8]),
+        "age3": optional_int(row[7]),
+        "age4": optional_int(row[10]),
+        "age5": optional_int(row[11]),
+        "n_ili": optional_int(row[12]),
+        "n_providers": optional_int(row[13]),
+        "n_patients": optional_int(row[14]),
+    }
+
 
 def get_clinical_data(row):
-  if row[0] == 'REGION TYPE' and row != [
-    'REGION TYPE',
-    'REGION',
-    'YEAR',
-    'WEEK',
-    'TOTAL SPECIMENS',
-    'TOTAL A',
-    'TOTAL B',
-    'PERCENT POSITIVE',
-    'PERCENT A',
-    'PERCENT B'
-  ]:
-    raise Exception('header row has changed for clinical lab data.')
-  if len(row) == 1 or row[0] == 'REGION TYPE':
-    # this is a header row
-    return None
-  if row[4] == 'X':
-    # data is not reported, ignore this row
-    return None
-  # ignore percentage calculations for now
-  return {
-    'location': fluview_locations.get_location_name(*row[:2]),
-    'epiweek': join_epiweek(int(row[2]), int(row[3])),
-    'total_specimens': int(row[4]),
-    'total_a': optional_int(row[5]),
-    'total_b': optional_int(row[6]),
-    'percent_positive': nullable_float(row[7]),
-    'percent_a': nullable_float(row[8]),
-    'percent_b': nullable_float(row[9])
-  }
+    if row[0] == "REGION TYPE" and row != [
+        "REGION TYPE",
+        "REGION",
+        "YEAR",
+        "WEEK",
+        "TOTAL SPECIMENS",
+        "TOTAL A",
+        "TOTAL B",
+        "PERCENT POSITIVE",
+        "PERCENT A",
+        "PERCENT B",
+    ]:
+        raise Exception("header row has changed for clinical lab data.")
+    if len(row) == 1 or row[0] == "REGION TYPE":
+        # this is a header row
+        return None
+    if row[4] == "X":
+        # data is not reported, ignore this row
+        return None
+    # ignore percentage calculations for now
+    return {
+        "location": fluview_locations.get_location_name(*row[:2]),
+        "epiweek": join_epiweek(int(row[2]), int(row[3])),
+        "total_specimens": int(row[4]),
+        "total_a": optional_int(row[5]),
+        "total_b": optional_int(row[6]),
+        "percent_positive": nullable_float(row[7]),
+        "percent_a": nullable_float(row[8]),
+        "percent_b": nullable_float(row[9]),
+    }
+
 
 def get_public_data(row):
-  hrow1 = [
-    'REGION TYPE',
-    'REGION',
-    'SEASON_DESCRIPTION',
-    'TOTAL SPECIMENS',
-    'A (2009 H1N1)',
-    'A (H3)',
-    'A (Subtyping not Performed)',
-    'B',
-    'BVic',
-    'BYam',
-    'H3N2v'
-  ]
-  hrow2 = [
-    'REGION TYPE',
-    'REGION',
-    'YEAR',
-    'WEEK',
-    'TOTAL SPECIMENS',
-    'A (2009 H1N1)',
-    'A (H3)',
-    'A (Subtyping not Performed)',
-    'B',
-    'BVic',
-    'BYam',
-    'H3N2v'
-  ]
-  if row[0] == 'REGION TYPE' and row != hrow1 and row != hrow2:
-    raise Exception('header row has changed for public health lab data.')
-  if len(row) == 1 or row[0] == 'REGION TYPE':
-    # header row
-    return None
-  if row[3] == 'X':
-    # data is not reported, ignore this row
-    return None
-  # handle case where data is reported by season, not by epiweek
-  is_weekly = len(row) == len(hrow2)
-  # set epiweek
-  if is_weekly:
-    epiweek = join_epiweek(int(row[2]), int(row[3]))
-  else:
-    epiweek = int(row[2][7:11]) * 100 + 40
-  # row offset
-  offset = 1 if is_weekly else 0
-  return {
-    'location': fluview_locations.get_location_name(*row[:2]),
-    'epiweek': epiweek,
-    'total_specimens': int(row[3 + offset]),
-    'total_a_h1n1': optional_int(row[4+ offset]),
-    'total_a_h3': optional_int(row[5 + offset]),
-    'total_a_h3n2v': optional_int(row[10 + offset]),
-    'total_a_no_sub': optional_int(row[6 + offset]),
-    'total_b': optional_int(row[7 + offset]),
-    'total_b_vic': optional_int(row[8 + offset]),
-    'total_b_yam': optional_int(row[9 + offset])
-  }
-
-def load_zipped_csv(filename, sheetname='ILINet.csv'):
-  """Read rows from a zipped CSV, which is expected to be named as specified
-  by the sheetname parameter. Default is ILINet.csv, for the default flu data."""
-  with zipfile.ZipFile(filename) as f:
-    with f.open(sheetname) as ff:
-      return [row for row in csv.reader(io.StringIO(str(ff.read(), 'utf-8')))]
-
-def get_rows(cnx, table='fluview'):
-  """Count and return the number of rows in the `fluview` table.
-  Looking at the fluview table by default, but may pass parameter
-  to look at public health or clinical lab data instead."""
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM %s' % table)
-  for (num,) in select:
-    pass
-  select.close()
-  return num
+    hrow1 = [
+        "REGION TYPE",
+        "REGION",
+        "SEASON_DESCRIPTION",
+        "TOTAL SPECIMENS",
+        "A (2009 H1N1)",
+        "A (H3)",
+        "A (Subtyping not Performed)",
+        "B",
+        "BVic",
+        "BYam",
+        "H3N2v",
+    ]
+    hrow2 = [
+        "REGION TYPE",
+        "REGION",
+        "YEAR",
+        "WEEK",
+        "TOTAL SPECIMENS",
+        "A (2009 H1N1)",
+        "A (H3)",
+        "A (Subtyping not Performed)",
+        "B",
+        "BVic",
+        "BYam",
+        "H3N2v",
+    ]
+    if row[0] == "REGION TYPE" and row != hrow1 and row != hrow2:
+        raise Exception("header row has changed for public health lab data.")
+    if len(row) == 1 or row[0] == "REGION TYPE":
+        # header row
+        return None
+    if row[3] == "X":
+        # data is not reported, ignore this row
+        return None
+    # handle case where data is reported by season, not by epiweek
+    is_weekly = len(row) == len(hrow2)
+    # set epiweek
+    if is_weekly:
+        epiweek = join_epiweek(int(row[2]), int(row[3]))
+    else:
+        epiweek = int(row[2][7:11]) * 100 + 40
+    # row offset
+    offset = 1 if is_weekly else 0
+    return {
+        "location": fluview_locations.get_location_name(*row[:2]),
+        "epiweek": epiweek,
+        "total_specimens": int(row[3 + offset]),
+        "total_a_h1n1": optional_int(row[4 + offset]),
+        "total_a_h3": optional_int(row[5 + offset]),
+        "total_a_h3n2v": optional_int(row[10 + offset]),
+        "total_a_no_sub": optional_int(row[6 + offset]),
+        "total_b": optional_int(row[7 + offset]),
+        "total_b_vic": optional_int(row[8 + offset]),
+        "total_b_yam": optional_int(row[9 + offset]),
+    }
+
+
+def load_zipped_csv(filename, sheetname="ILINet.csv"):
+    """Read rows from a zipped CSV, which is expected to be named as specified
+    by the sheetname parameter. Default is ILINet.csv, for the default flu data."""
+    with zipfile.ZipFile(filename) as f:
+        with f.open(sheetname) as ff:
+            return [row for row in csv.reader(io.StringIO(str(ff.read(), "utf-8")))]
+
+
+def get_rows(cnx, table="fluview"):
+    """Count and return the number of rows in the `fluview` table.
+    Looking at the fluview table by default, but may pass parameter
+    to look at public health or clinical lab data instead."""
+    select = cnx.cursor()
+    select.execute("SELECT count(1) num FROM %s" % table)
+    for (num,) in select:
+        pass
+    select.close()
+    return num
+
 
 def update_from_file_clinical(issue, date, filename, test_mode=False):
-  """
-  Read WHO/NREVSS data from a zipped CSV and insert into (or update) the database.
-  """
-
-  # database connection
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  rows1 = get_rows(cnx, CL_TABLE)
-  print('rows before: %d' % (rows1))
-  insert = cnx.cursor()
-
-  # load the data, ignoring empty rows
-  print('loading data from %s as issued on %d' % (filename, issue))
-  rows = load_zipped_csv(filename, CL_SHEET)
-  print(' loaded %d rows' % len(rows))
-  data = [get_clinical_data(row) for row in rows]
-  entries = [obj for obj in data if obj]
-  print(' found %d entries' % len(entries))
-
-  sql = '''
-  INSERT INTO
-    `fluview_clinical` (`release_date`, `issue`, `epiweek`, `region`, `lag`, 
-    `total_specimens`, `total_a`, `total_b`, `percent_positive`, `percent_a`, 
-    `percent_b`)
-  VALUES
-    (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-  `release_date` = least(`release_date`, %s),
-  `total_specimens` = %s,
-  `total_a` = %s,
-  `total_b` = %s,
-  `percent_positive` = %s,
-  `percent_a` = %s,
-  `percent_b` = %s
-  '''
-
-  # insert each row
-  insert = cnx.cursor()
-  for row in entries:
-    lag = delta_epiweeks(row['epiweek'], issue)
-    args = [
-      row['total_specimens'], row['total_a'], row['total_b'],
-      row['percent_positive'], row['percent_a'], row['percent_b']
-    ]
-    ins_args = [date, issue, row['epiweek'], row['location'], lag] + args
-    upd_args = [date] + args
-    insert.execute(sql, ins_args + upd_args)
-
-  # cleanup
-  insert.close()
-  if test_mode:
-    print('test mode, not committing')
-    rows2 = rows1
-  else:
-    cnx.commit()
-    rows2 = get_rows(cnx)
-  print('rows after: %d (added %d)' % (rows2, rows2 - rows1))
-  cnx.close()
+    """
+    Read WHO/NREVSS data from a zipped CSV and insert into (or update) the database.
+    """
+
+    # database connection
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx, CL_TABLE)
+    print("rows before: %d" % (rows1))
+    insert = cnx.cursor()
+
+    # load the data, ignoring empty rows
+    print("loading data from %s as issued on %d" % (filename, issue))
+    rows = load_zipped_csv(filename, CL_SHEET)
+    print(" loaded %d rows" % len(rows))
+    data = [get_clinical_data(row) for row in rows]
+    entries = [obj for obj in data if obj]
+    print(" found %d entries" % len(entries))
+
+    sql = """
+    INSERT INTO
+        `fluview_clinical` (`release_date`, `issue`, `epiweek`, `region`, `lag`, 
+        `total_specimens`, `total_a`, `total_b`, `percent_positive`, `percent_a`, 
+        `percent_b`)
+    VALUES
+        (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+    `release_date` = least(`release_date`, %s),
+    `total_specimens` = %s,
+    `total_a` = %s,
+    `total_b` = %s,
+    `percent_positive` = %s,
+    `percent_a` = %s,
+    `percent_b` = %s
+    """
+
+    # insert each row
+    insert = cnx.cursor()
+    for row in entries:
+        lag = delta_epiweeks(row["epiweek"], issue)
+        args = [
+            row["total_specimens"],
+            row["total_a"],
+            row["total_b"],
+            row["percent_positive"],
+            row["percent_a"],
+            row["percent_b"],
+        ]
+        ins_args = [date, issue, row["epiweek"], row["location"], lag] + args
+        upd_args = [date] + args
+        insert.execute(sql, ins_args + upd_args)
+
+    # cleanup
+    insert.close()
+    if test_mode:
+        print("test mode, not committing")
+        rows2 = rows1
+    else:
+        cnx.commit()
+        rows2 = get_rows(cnx)
+    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    cnx.close()
+
 
 def update_from_file_public(issue, date, filename, test_mode=False):
-  """
-  Read WHO/NREVSS data from a zipped CSV and insert into (or update) the database.
-  """
-
-  # database connection
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  rows1 = get_rows(cnx, PHL_TABLE)
-  print('rows before: %d' % (rows1))
-  insert = cnx.cursor()
-
-  # load the data, ignoring empty rows
-  print('loading data from %s as issued on %d' % (filename, issue))
-  rows = load_zipped_csv(filename, PHL_SHEET)
-  print(' loaded %d rows' % len(rows))
-  data = [get_public_data(row) for row in rows]
-  entries = [obj for obj in data if obj]
-  print(' found %d entries' % len(entries))
-
-  sql = '''
-  INSERT INTO
-    `fluview_public` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
-    `total_specimens`, `total_a_h1n1`, `total_a_h3`, `total_a_h3n2v`,
-    `total_a_no_sub`, `total_b`, `total_b_vic`, `total_b_yam`)
-  VALUES
-    (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-  `release_date` = least(`release_date`, %s),
-  `total_specimens` = %s,
-  `total_a_h1n1` = %s,
-  `total_a_h3` = %s,
-  `total_a_h3n2v` = %s,
-  `total_a_no_sub` = %s,
-  `total_b` = %s,
-  `total_b_vic` = %s, 
-  `total_b_yam` = %s
-  '''
-
-  # insert each row
-  insert = cnx.cursor()
-  for row in entries:
-    lag = delta_epiweeks(row['epiweek'], issue)
-    args = [
-      row['total_specimens'], row['total_a_h1n1'], row['total_a_h3'],
-      row['total_a_h3n2v'], row['total_a_no_sub'], row['total_b'],
-      row['total_b_vic'], row['total_b_yam']
-    ]
-    ins_args = [date, issue, row['epiweek'], row['location'], lag] + args
-    upd_args = [date] + args
-    insert.execute(sql, ins_args + upd_args)
-
-  # cleanup
-  insert.close()
-  if test_mode:
-    print('test mode, not committing')
-    rows2 = rows1
-  else:
-    cnx.commit()
-    rows2 = get_rows(cnx)
-  print('rows after: %d (added %d)' % (rows2, rows2 - rows1))
-  cnx.close()
+    """
+    Read WHO/NREVSS data from a zipped CSV and insert into (or update) the database.
+    """
+
+    # database connection
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx, PHL_TABLE)
+    print("rows before: %d" % (rows1))
+    insert = cnx.cursor()
+
+    # load the data, ignoring empty rows
+    print("loading data from %s as issued on %d" % (filename, issue))
+    rows = load_zipped_csv(filename, PHL_SHEET)
+    print(" loaded %d rows" % len(rows))
+    data = [get_public_data(row) for row in rows]
+    entries = [obj for obj in data if obj]
+    print(" found %d entries" % len(entries))
+
+    sql = """
+    INSERT INTO
+        `fluview_public` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
+        `total_specimens`, `total_a_h1n1`, `total_a_h3`, `total_a_h3n2v`,
+        `total_a_no_sub`, `total_b`, `total_b_vic`, `total_b_yam`)
+    VALUES
+        (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+    `release_date` = least(`release_date`, %s),
+    `total_specimens` = %s,
+    `total_a_h1n1` = %s,
+    `total_a_h3` = %s,
+    `total_a_h3n2v` = %s,
+    `total_a_no_sub` = %s,
+    `total_b` = %s,
+    `total_b_vic` = %s, 
+    `total_b_yam` = %s
+    """
+
+    # insert each row
+    insert = cnx.cursor()
+    for row in entries:
+        lag = delta_epiweeks(row["epiweek"], issue)
+        args = [
+            row["total_specimens"],
+            row["total_a_h1n1"],
+            row["total_a_h3"],
+            row["total_a_h3n2v"],
+            row["total_a_no_sub"],
+            row["total_b"],
+            row["total_b_vic"],
+            row["total_b_yam"],
+        ]
+        ins_args = [date, issue, row["epiweek"], row["location"], lag] + args
+        upd_args = [date] + args
+        insert.execute(sql, ins_args + upd_args)
+
+    # cleanup
+    insert.close()
+    if test_mode:
+        print("test mode, not committing")
+        rows2 = rows1
+    else:
+        cnx.commit()
+        rows2 = get_rows(cnx)
+    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    cnx.close()
+
 
 def update_from_file(issue, date, filename, test_mode=False):
-  """
-  Read ILINet data from a zipped CSV and insert into (or update) the database.
-  """
-
-  # database connection
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  rows1 = get_rows(cnx)
-  print('rows before: %d' % (rows1))
-  insert = cnx.cursor()
-
-  # load the data, ignoring empty rows
-  print('loading data from %s as issued on %d' % (filename, issue))
-  rows = load_zipped_csv(filename)
-  print(' loaded %d rows' % len(rows))
-  data = [get_ilinet_data(row) for row in rows]
-  entries = [obj for obj in data if obj]
-  print(' found %d entries' % len(entries))
-
-  sql = '''
-  INSERT INTO
-    `fluview` (`release_date`, `issue`, `epiweek`, `region`, `lag`, `num_ili`,
-    `num_patients`, `num_providers`, `wili`, `ili`, `num_age_0`, `num_age_1`,
-    `num_age_2`, `num_age_3`, `num_age_4`, `num_age_5`)
-  VALUES
-    (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-  `release_date` = least(`release_date`, %s),
-  `num_ili` = %s,
-  `num_patients` = %s,
-  `num_providers` = %s,
-  `wili` = %s,
-  `ili` = %s,
-  `num_age_0` = coalesce(%s, `num_age_0`),
-  `num_age_1` = coalesce(%s, `num_age_1`),
-  `num_age_2` = coalesce(%s, `num_age_2`),
-  `num_age_3` = coalesce(%s, `num_age_3`),
-  `num_age_4` = coalesce(%s, `num_age_4`),
-  `num_age_5` = coalesce(%s, `num_age_5`)
-  '''
-
-  # insert each row
-  insert = cnx.cursor()
-  for row in entries:
-    lag = delta_epiweeks(row['epiweek'], issue)
-    args = [
-      row['n_ili'], row['n_patients'], row['n_providers'], row['wili'],
-      row['ili'], row['age0'], row['age1'], row['age2'], row['age3'],
-      row['age4'], row['age5']
-    ]
-    ins_args = [date, issue, row['epiweek'], row['location'], lag] + args
-    upd_args = [date] + args
-    insert.execute(sql, ins_args + upd_args)
-
-  # cleanup
-  insert.close()
-  if test_mode:
-    print('test mode, not committing')
-    rows2 = rows1
-  else:
-    cnx.commit()
-    rows2 = get_rows(cnx)
-  print('rows after: %d (added %d)' % (rows2, rows2 - rows1))
-  cnx.close()
+    """
+    Read ILINet data from a zipped CSV and insert into (or update) the database.
+    """
+
+    # database connection
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx)
+    print("rows before: %d" % (rows1))
+    insert = cnx.cursor()
+
+    # load the data, ignoring empty rows
+    print("loading data from %s as issued on %d" % (filename, issue))
+    rows = load_zipped_csv(filename)
+    print(" loaded %d rows" % len(rows))
+    data = [get_ilinet_data(row) for row in rows]
+    entries = [obj for obj in data if obj]
+    print(" found %d entries" % len(entries))
+
+    sql = """
+    INSERT INTO
+        `fluview` (`release_date`, `issue`, `epiweek`, `region`, `lag`, `num_ili`,
+        `num_patients`, `num_providers`, `wili`, `ili`, `num_age_0`, `num_age_1`,
+        `num_age_2`, `num_age_3`, `num_age_4`, `num_age_5`)
+    VALUES
+        (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+    `release_date` = least(`release_date`, %s),
+    `num_ili` = %s,
+    `num_patients` = %s,
+    `num_providers` = %s,
+    `wili` = %s,
+    `ili` = %s,
+    `num_age_0` = coalesce(%s, `num_age_0`),
+    `num_age_1` = coalesce(%s, `num_age_1`),
+    `num_age_2` = coalesce(%s, `num_age_2`),
+    `num_age_3` = coalesce(%s, `num_age_3`),
+    `num_age_4` = coalesce(%s, `num_age_4`),
+    `num_age_5` = coalesce(%s, `num_age_5`)
+    """
+
+    # insert each row
+    insert = cnx.cursor()
+    for row in entries:
+        lag = delta_epiweeks(row["epiweek"], issue)
+        args = [
+            row["n_ili"],
+            row["n_patients"],
+            row["n_providers"],
+            row["wili"],
+            row["ili"],
+            row["age0"],
+            row["age1"],
+            row["age2"],
+            row["age3"],
+            row["age4"],
+            row["age5"],
+        ]
+        ins_args = [date, issue, row["epiweek"], row["location"], lag] + args
+        upd_args = [date] + args
+        insert.execute(sql, ins_args + upd_args)
+
+    # cleanup
+    insert.close()
+    if test_mode:
+        print("test mode, not committing")
+        rows2 = rows1
+    else:
+        cnx.commit()
+        rows2 = get_rows(cnx)
+    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    cnx.close()
+
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    '--test',
-    action='store_true',
-    help='do dry run only, do not update the database'
-  )
-  parser.add_argument(
-    '--file',
-    type=str,
-    help='load an existing zip file (otherwise fetch current data)'
-  )
-  parser.add_argument(
-    '--issue',
-    type=int,
-    help='issue of the file (e.g. 201740); used iff --file is given'
-  )
-  args = parser.parse_args()
-
-  if (args.file is None) != (args.issue is None):
-    raise Exception('--file and --issue must both be present or absent')
-
-  date = datetime.datetime.now().strftime('%Y-%m-%d')
-  print('assuming release date is today, %s' % date)
-
-  if args.file:
-    update_from_file(args.issue, date, args.file, test_mode=args.test)
-    update_from_file_clinical(args.issue, date, args.file, test_mode=args.test)
-    # TODO: header row has changed for public health lab data
-    # update_from_file_public(args.issue, date, args.file, test_mode=args.test)
-  else:
-    issue, files = fluview.save_latest(path='flu_data')
-    for filename in files:
-      update_from_file(issue, date, filename, test_mode=args.test)
-      update_from_file_clinical(issue, date, filename, test_mode=args.test)
-      # TODO: header row has changed for public health lab data
-      # update_from_file_public(issue, date, filename, test_mode=args.test)
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--test", action="store_true", help="do dry run only, do not update the database"
+    )
+    parser.add_argument(
+        "--file", type=str, help="load an existing zip file (otherwise fetch current data)"
+    )
+    parser.add_argument(
+        "--issue", type=int, help="issue of the file (e.g. 201740); used iff --file is given"
+    )
+    args = parser.parse_args()
+
+    if (args.file is None) != (args.issue is None):
+        raise Exception("--file and --issue must both be present or absent")
+
+    date = datetime.datetime.now().strftime("%Y-%m-%d")
+    print("assuming release date is today, %s" % date)
+
+    if args.file:
+        update_from_file(args.issue, date, args.file, test_mode=args.test)
+        update_from_file_clinical(args.issue, date, args.file, test_mode=args.test)
+        # TODO: header row has changed for public health lab data
+        # update_from_file_public(args.issue, date, args.file, test_mode=args.test)
+    else:
+        issue, files = fluview.save_latest(path="flu_data")
+        for filename in files:
+            update_from_file(issue, date, filename, test_mode=args.test)
+            update_from_file_clinical(issue, date, filename, test_mode=args.test)
+            # TODO: header row has changed for public health lab data
+            # update_from_file_public(issue, date, filename, test_mode=args.test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/fluview/impute_missing_values.py b/src/acquisition/fluview/impute_missing_values.py
index 7f9a23231..230dd2f7d 100644
--- a/src/acquisition/fluview/impute_missing_values.py
+++ b/src/acquisition/fluview/impute_missing_values.py
@@ -59,290 +59,283 @@
 
 
 class Database:
-  """Database wrapper and abstraction layer."""
-
-  class Sql:
-    """Container for SQL constants."""
-
-    # Count the total number of imputed rows.
-    count_rows = '''
-      SELECT
-        count(1) `num`
-      FROM
-        `fluview_imputed`
-    '''
-
-    # Find (issue, epiweek) pairs that exist in table `fluview` but not in
-    # table `fluview_imputed`. Note that only issues >= 201740 are selected
-    # because that's when CDC first started posting state-level ILINet data.
-    # This assumes that `fluview` is always missing at least one location.
-    find_missing_rows = '''
-      SELECT
-        fv.`issue`, fv.`epiweek`
-      FROM (
+    """Database wrapper and abstraction layer."""
+
+    class Sql:
+        """Container for SQL constants."""
+
+        # Count the total number of imputed rows.
+        count_rows = """
         SELECT
-          `issue`, `epiweek`
+          count(1) `num`
         FROM
-          `fluview`
+          `fluview_imputed`
+        """
+
+        # Find (issue, epiweek) pairs that exist in table `fluview` but not in
+        # table `fluview_imputed`. Note that only issues >= 201740 are selected
+        # because that's when CDC first started posting state-level ILINet data.
+        # This assumes that `fluview` is always missing at least one location.
+        find_missing_rows = """
+        SELECT
+          fv.`issue`, fv.`epiweek`
+        FROM (
+          SELECT
+            `issue`, `epiweek`
+          FROM
+            `fluview`
+          WHERE
+            `issue` >= 201740
+          GROUP BY
+            `issue`, `epiweek`
+        ) fv
+        LEFT JOIN (
+          SELECT
+            `issue`, `epiweek`
+          FROM
+            `fluview_imputed`
+          GROUP BY
+            `issue`, `epiweek`
+        ) fvi
+        ON
+          fvi.`issue` = fv.`issue` AND fvi.`epiweek` = fv.`epiweek`
         WHERE
-          `issue` >= 201740
-        GROUP BY
-          `issue`, `epiweek`
-      ) fv
-      LEFT JOIN (
+          fvi.`issue` IS NULL
+        """
+
+        # Read all location rows from the `fluview` table for a given issue and
+        # epiweek.
+        get_known_values = """
         SELECT
-          `issue`, `epiweek`
+          `region`, `num_ili`, `num_patients`, `num_providers`
         FROM
-          `fluview_imputed`
-        GROUP BY
-          `issue`, `epiweek`
-      ) fvi
-      ON
-        fvi.`issue` = fv.`issue` AND fvi.`epiweek` = fv.`epiweek`
-      WHERE
-        fvi.`issue` IS NULL
-    '''
-
-    # Read all location rows from the `fluview` table for a given issue and
-    # epiweek.
-    get_known_values = '''
-      SELECT
-        `region`, `num_ili`, `num_patients`, `num_providers`
-      FROM
-        `fluview`
-      WHERE
-        `issue` = %s AND `epiweek` = %s
-    '''
-
-    # Insert location rows into the `fluview_imputed` table for a given issue
-    # and epiweek.
-    add_imputed_values = '''
-      INSERT INTO
-        `fluview_imputed` (
-          `issue`,
-          `epiweek`,
-          `region`,
-          `lag`,
-          `num_ili`,
-          `num_patients`,
-          `num_providers`,
-          `ili`
-        )
-      VALUES
-        (%s, %s, %s, %s, %s, %s, %s, %s)
-    '''
-
-  def connect(self):
-    """Connect to the database."""
-    u, p = secrets.db.epi
-    self.cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-    self.cur = self.cnx.cursor()
-
-  def close(self, commit):
-    """
-    Close the connection to the database, committing or rolling back changes as
-    indicated.
-    """
-    self.cur.close()
-    if commit:
-      self.cnx.commit()
-    else:
-      print('test mode, not committing')
-    self.cnx.close()
-
-  def count_rows(self):
-    """Count and return the number of rows in the `fluview_imputed` table."""
-    self.cur.execute(Database.Sql.count_rows)
-    for (num,) in self.cur:
-      return num
-
-  def find_missing_rows(self):
-    """
-    Find rows that still have missing values. Each missing row is uniquely
-    identified by an (issue, epiweek, location) tuple. This function finds the
-    first two.
-    """
+          `fluview`
+        WHERE
+          `issue` = %s AND `epiweek` = %s
+        """
+
+        # Insert location rows into the `fluview_imputed` table for a given issue
+        # and epiweek.
+        add_imputed_values = """
+        INSERT INTO
+          `fluview_imputed` (
+            `issue`,
+            `epiweek`,
+            `region`,
+            `lag`,
+            `num_ili`,
+            `num_patients`,
+            `num_providers`,
+            `ili`
+          )
+        VALUES
+          (%s, %s, %s, %s, %s, %s, %s, %s)
+        """
+
+    def connect(self):
+        """Connect to the database."""
+        u, p = secrets.db.epi
+        self.cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+        self.cur = self.cnx.cursor()
+
+    def close(self, commit):
+        """
+        Close the connection to the database, committing or rolling back changes as
+        indicated.
+        """
+        self.cur.close()
+        if commit:
+            self.cnx.commit()
+        else:
+            print("test mode, not committing")
+        self.cnx.close()
+
+    def count_rows(self):
+        """Count and return the number of rows in the `fluview_imputed` table."""
+        self.cur.execute(Database.Sql.count_rows)
+        for (num,) in self.cur:
+            return num
+
+    def find_missing_rows(self):
+        """
+        Find rows that still have missing values. Each missing row is uniquely
+        identified by an (issue, epiweek, location) tuple. This function finds the
+        first two.
+        """
+
+        self.cur.execute(Database.Sql.find_missing_rows)
+        return [(issue, epiweek) for (issue, epiweek) in self.cur]
+
+    def get_known_values(self, issue, epiweek):
+        """
+        Fetch ILINet data for all locations available for the given issue and
+        epiweek. The returned value is a dict mapping from locations to ILI data.
+        """
+
+        self.cur.execute(Database.Sql.get_known_values, (issue, epiweek))
+        return {loc: (n_ili, n_pat, n_prov) for (loc, n_ili, n_pat, n_prov) in self.cur}
+
+    def add_imputed_values(self, issue, epiweek, imputed):
+        """
+        Store imputed ILINet data for the given locations on the given issue and
+        epiweek. The imputed value is a dict mapping from locations to ILI data.
+        """
+
+        for loc in imputed.keys():
+            lag, n_ili, n_pat, n_prov, ili = imputed[loc]
+            args = (issue, epiweek, loc, lag, n_ili, n_pat, n_prov, ili)
+            self.cur.execute(Database.Sql.add_imputed_values, args)
 
-    self.cur.execute(Database.Sql.find_missing_rows)
-    return [(issue, epiweek) for (issue, epiweek) in self.cur]
 
-  def get_known_values(self, issue, epiweek):
-    """
-    Fetch ILINet data for all locations available for the given issue and
-    epiweek. The returned value is a dict mapping from locations to ILI data.
-    """
+class StatespaceException(Exception):
+    """Used to indicate that imputation is not possible with the given inputs."""
 
-    self.cur.execute(Database.Sql.get_known_values, (issue, epiweek))
-    return dict([
-      (loc, (n_ili, n_pat, n_prov))
-      for
-      (loc, n_ili, n_pat, n_prov)
-      in self.cur
-    ])
 
-  def add_imputed_values(self, issue, epiweek, imputed):
+def get_location_graph():
     """
-    Store imputed ILINet data for the given locations on the given issue and
-    epiweek. The imputed value is a dict mapping from locations to ILI data.
+    Return a matrix where rows represent regions, columns represent atoms, and
+    each entry is a 1 if the region contains the atom, otherwise 0. The
+    corresponding lists of regions and atoms are also returned.
     """
 
-    for loc in imputed.keys():
-      lag, n_ili, n_pat, n_prov, ili = imputed[loc]
-      args = (issue, epiweek, loc, lag, n_ili, n_pat, n_prov, ili)
-      self.cur.execute(Database.Sql.add_imputed_values, args)
-
-
-class StatespaceException(Exception):
-  """Used to indicate that imputation is not possible with the given inputs."""
-
-
-def get_location_graph():
-  """
-  Return a matrix where rows represent regions, columns represent atoms, and
-  each entry is a 1 if the region contains the atom, otherwise 0. The
-  corresponding lists of regions and atoms are also returned.
-  """
-
-  regions = sorted(Locations.region_list)
-  atoms = sorted(Locations.atom_list)
-  graph = np.zeros((len(regions), len(atoms)))
-  for i, r in enumerate(regions):
-    for a in Locations.region_map[r]:
-      j = atoms.index(a)
-      graph[i, j] = 1
-  return graph, regions, atoms
+    regions = sorted(Locations.region_list)
+    atoms = sorted(Locations.atom_list)
+    graph = np.zeros((len(regions), len(atoms)))
+    for i, r in enumerate(regions):
+        for a in Locations.region_map[r]:
+            j = atoms.index(a)
+            graph[i, j] = 1
+    return graph, regions, atoms
 
 
 def get_fusion_parameters(known_locations):
-  """
-  Return a matrix that fuses known ILI values into unknown ILI values. The
-  corresponding lists of known and unknown locations are also returned.
+    """
+    Return a matrix that fuses known ILI values into unknown ILI values. The
+    corresponding lists of known and unknown locations are also returned.
 
-  The goal is to infer ILI data in all locations, given ILI data in some
-  partial set of locations. This function takes a sensor fusion approach.
+    The goal is to infer ILI data in all locations, given ILI data in some
+    partial set of locations. This function takes a sensor fusion approach.
 
-  Let $z$ be a column vector of values in reported locations. Let $y$ be the
-  desired column vector of values in unreported locations. With matrices $H$
-  (mapping from latent state to reported values), $W$ (mapping from latent
-  state to unreported values), and $R = I$ (covariance, which is identity):
+    Let $z$ be a column vector of values in reported locations. Let $y$ be the
+    desired column vector of values in unreported locations. With matrices $H$
+    (mapping from latent state to reported values), $W$ (mapping from latent
+    state to unreported values), and $R = I$ (covariance, which is identity):
 
-    $y = W (H^T R^{-1} H)^{-1} H^T R^{-1} z$
-    $y = W (H^T H)^{-1} H^T z$
+      $y = W (H^T R^{-1} H)^{-1} H^T R^{-1} z$
+      $y = W (H^T H)^{-1} H^T z$
 
-  This is equavalent to OLS regression with an added translation from atomic
-  locations to missing locations. Unknown values are computed as a linear
-  combination of known values.
-  """
+    This is equavalent to OLS regression with an added translation from atomic
+    locations to missing locations. Unknown values are computed as a linear
+    combination of known values.
+    """
 
-  graph, regions, atoms = get_location_graph()
-  is_known = np.array([r in known_locations for r in regions])
-  is_unknown = np.logical_not(is_known)
-  if not np.any(is_known):
-    raise StatespaceException('no values are known')
-  if not np.any(is_unknown):
-    raise StatespaceException('no values are unknown')
+    graph, regions, atoms = get_location_graph()
+    is_known = np.array([r in known_locations for r in regions])
+    is_unknown = np.logical_not(is_known)
+    if not np.any(is_known):
+        raise StatespaceException("no values are known")
+    if not np.any(is_unknown):
+        raise StatespaceException("no values are unknown")
 
-  H = graph[is_known, :]
-  W = graph[is_unknown, :]
-  if np.linalg.matrix_rank(H) != len(atoms):
-    raise StatespaceException('system is underdetermined')
+    H = graph[is_known, :]
+    W = graph[is_unknown, :]
+    if np.linalg.matrix_rank(H) != len(atoms):
+        raise StatespaceException("system is underdetermined")
 
-  HtH = np.dot(H.T, H)
-  HtH_inv = np.linalg.inv(HtH)
-  H_pseudo_inv = np.dot(HtH_inv, H.T)
-  fuser = np.dot(W, H_pseudo_inv)
+    HtH = np.dot(H.T, H)
+    HtH_inv = np.linalg.inv(HtH)
+    H_pseudo_inv = np.dot(HtH_inv, H.T)
+    fuser = np.dot(W, H_pseudo_inv)
 
-  locations = np.array(regions)
-  filter_locations = lambda selected: list(map(str, locations[selected]))
-  return fuser, filter_locations(is_known), filter_locations(is_unknown)
+    locations = np.array(regions)
+    filter_locations = lambda selected: list(map(str, locations[selected]))
+    return fuser, filter_locations(is_known), filter_locations(is_unknown)
 
 
 def get_lag_and_ili(issue, epiweek, num_ili, num_patients):
-  """
-  Compute and return reporting lag and percent ILI from imputed ILINet data.
-  """
-  lag = delta_epiweeks(epiweek, issue)
-  ili = 100.0 * (0 if num_patients == 0 else num_ili / num_patients)
-  return lag, ili
+    """
+    Compute and return reporting lag and percent ILI from imputed ILINet data.
+    """
+    lag = delta_epiweeks(epiweek, issue)
+    ili = 100.0 * (0 if num_patients == 0 else num_ili / num_patients)
+    return lag, ili
 
 
 def impute_missing_values(database, test_mode=False):
-  """
-  Determine whether values are missing for any states and territories. If so,
-  impute them and store them in the database.
-  """
-
-  # database connection
-  database.connect()
-  rows1 = database.count_rows()
-  print('rows before: %d' % (rows1))
-
-  # iterate over missing epiweeks
-  missing_rows = database.find_missing_rows()
-  print('missing data for %d epiweeks' % len(missing_rows))
-  for issue, epiweek in missing_rows:
-    print('i=%d e=%d' % (issue, epiweek))
-
-    # get known values from table `fluview`
-    known_values = database.get_known_values(issue, epiweek)
-
-    # Unlike most other state-level data, which typically begins publicly on
-    # 2010w40, data for PR begins on 2013w40. Before this, there are no reports
-    # for PR. Here we assume that no report is equivalent to a report of all
-    # zeros (number of ILI, patients, and providers). That's mostly true, with
-    # the notable exception of wILI, but that's not relevant here. By assuming
-    # that PR reports zero on those weeks, it's possible to impute values for
-    # VI, which are otherwise not reported until 2015w40.
-    assume_pr_zero = epiweek < 201340 and 'pr' not in known_values
-    if assume_pr_zero:
-      known_values['pr'] = (0, 0, 0)
-
-    # get the imputation matrix and lists of known and unknown locations
-    F, known, unknown = get_fusion_parameters(known_values.keys())
-
-    # finally, impute the missing values
-    z = np.array([known_values[k] for k in known])
-    y = np.dot(F, z)
-
-    # possibly also record the assumptions made for PR
-    if assume_pr_zero:
-      unknown.append('pr')
-      y = np.vstack((y, [known_values['pr']]))
-
-    # add lag and percent ILI to the data for each imputed location
-    imputed_values = {}
-    for loc, values in zip(unknown, y):
-      n_ili, n_pat, n_prov = map(int, np.rint(values))
-      lag, ili = get_lag_and_ili(issue, epiweek, n_ili, n_pat)
-      imputed_values[loc] = (lag, n_ili, n_pat, n_prov, ili)
-      print(' %s: %s' % (loc, str(imputed_values[loc])))
-
-    # save all imputed values in table `fluview_imputed`
-    database.add_imputed_values(issue, epiweek, imputed_values)
-
-  # database cleanup
-  rows2 = database.count_rows()
-  print('rows after: %d (added %d)' % (rows2, rows2 - rows1))
-  commit = not test_mode
-  database.close(commit)
+    """
+    Determine whether values are missing for any states and territories. If so,
+    impute them and store them in the database.
+    """
+
+    # database connection
+    database.connect()
+    rows1 = database.count_rows()
+    print("rows before: %d" % (rows1))
+
+    # iterate over missing epiweeks
+    missing_rows = database.find_missing_rows()
+    print("missing data for %d epiweeks" % len(missing_rows))
+    for issue, epiweek in missing_rows:
+        print("i=%d e=%d" % (issue, epiweek))
+
+        # get known values from table `fluview`
+        known_values = database.get_known_values(issue, epiweek)
+
+        # Unlike most other state-level data, which typically begins publicly on
+        # 2010w40, data for PR begins on 2013w40. Before this, there are no reports
+        # for PR. Here we assume that no report is equivalent to a report of all
+        # zeros (number of ILI, patients, and providers). That's mostly true, with
+        # the notable exception of wILI, but that's not relevant here. By assuming
+        # that PR reports zero on those weeks, it's possible to impute values for
+        # VI, which are otherwise not reported until 2015w40.
+        assume_pr_zero = epiweek < 201340 and "pr" not in known_values
+        if assume_pr_zero:
+            known_values["pr"] = (0, 0, 0)
+
+        # get the imputation matrix and lists of known and unknown locations
+        F, known, unknown = get_fusion_parameters(known_values.keys())
+
+        # finally, impute the missing values
+        z = np.array([known_values[k] for k in known])
+        y = np.dot(F, z)
+
+        # possibly also record the assumptions made for PR
+        if assume_pr_zero:
+            unknown.append("pr")
+            y = np.vstack((y, [known_values["pr"]]))
+
+        # add lag and percent ILI to the data for each imputed location
+        imputed_values = {}
+        for loc, values in zip(unknown, y):
+            n_ili, n_pat, n_prov = map(int, np.rint(values))
+            lag, ili = get_lag_and_ili(issue, epiweek, n_ili, n_pat)
+            imputed_values[loc] = (lag, n_ili, n_pat, n_prov, ili)
+            print(f" {loc}: {str(imputed_values[loc])}")
+
+        # save all imputed values in table `fluview_imputed`
+        database.add_imputed_values(issue, epiweek, imputed_values)
+
+    # database cleanup
+    rows2 = database.count_rows()
+    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    commit = not test_mode
+    database.close(commit)
 
 
 def get_argument_parser():
-  """Set up command line arguments and usage."""
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    '--test',
-    action='store_true',
-    help='do dry run only, do not update the database'
-  )
-  return parser
+    """Set up command line arguments and usage."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--test", action="store_true", help="do dry run only, do not update the database"
+    )
+    return parser
 
 
 def main():
-  """Run this script from the command line."""
-  args = get_argument_parser().parse_args()
-  impute_missing_values(Database(), test_mode=args.test)
+    """Run this script from the command line."""
+    args = get_argument_parser().parse_args()
+    impute_missing_values(Database(), test_mode=args.test)
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()

From b8900a0bc846888885310911efd6e26459effa99 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 13:53:39 -0700
Subject: [PATCH 23/43] style(black): format ght acquisition

---
 src/acquisition/ght/ght_update.py           | 587 ++++++++++----------
 src/acquisition/ght/google_health_trends.py | 215 +++----
 2 files changed, 417 insertions(+), 385 deletions(-)

diff --git a/src/acquisition/ght/ght_update.py b/src/acquisition/ght/ght_update.py
index c1e9b8d94..76046c5c4 100644
--- a/src/acquisition/ght/ght_update.py
+++ b/src/acquisition/ght/ght_update.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -63,7 +63,7 @@
   * fixed multiple-word queries (surround with quotes)
 2015-12-01
   * Original version
-'''
+"""
 
 # standard library
 import argparse
@@ -88,304 +88,325 @@
 # 2010-04-19 and 2015-05-05
 # see: https://www.google.com/trends/correlate
 TERMS = [
-  '/m/0cycc',
-  'influenza type a',
-  'flu duration',
-  'flu fever',
-  'treating flu',
-  'fever flu',
-  'flu recovery',
-  'braun thermoscan',
-  'oscillococcinum',
-  'treating the flu',
-  'cold or flu',
-  'flu versus cold',
-  'flu remedies',
-  'contagious flu',
-  'type a influenza',
-  'flu or cold',
-  'duration of flu',
-  'cold versus flu',
-  'flu cough',
-  'flu headache',
-  'thermoscan',
-  'influenza incubation period',
-  'flu lasts',
-  'length of flu',
-  'flu stomach',
-  'cold vs flu',
-  'flu and fever',
-  'getting over the flu',
-  'influenza a',
-  'treatment for flu',
-  'flu length',
-  'treatment for the flu',
-  'influenza symptoms',
-  'over the counter flu',
-  'flu complications',
-  'cold and flu symptoms',
-  'influenza incubation',
-  'treatment of flu',
-  'human temperature',
-  'low body',
-  'flu contagious',
-  'robitussin ac',
-  'flu how long',
-  'ear thermometer',
-  'flu contagious period',
-  'treat flu',
-  'cough flu',
-  'low body temperature',
-  'expectorant',
-  'flu and cold',
-  'rapid flu',
-  'flu vs. cold',
-  'how to treat the flu',
-  'how long does the flu last?',
-  'viral pneumonia',
-  'flu in kids',
-  'type a flu',
-  'influenza treatment',
-  'fighting the flu',
-  'flu relief',
-  'treat the flu',
-  'flu medicine',
-  'dangerous fever',
-  'what is influenza',
-  'tussin',
-  'low body temp',
-  'flu care',
-  'flu in infants',
-  'flu dizziness',
-  'feed a fever',
-  'flu vs cold',
-  'flu vomiting',
-  'bacterial pneumonia',
-  'flu activity',
-  'flu chills',
-  'anas barbariae',
-  'flu germs',
-  'tylenol cold',
-  'how to get over the flu',
-  'flu in children',
-  'influenza a and b',
-  'duration of the flu',
-  'cold symptoms',
-  'flu report',
-  'rapid flu test',
-  'flu relapse',
-  'get over the flu',
-  'flu during pregnancy',
-  'flu recovery time',
-  'cure for flu',
-  'tamiflu and breastfeeding',
-  'flu chest pain',
-  'flu treatment',
-  'flu nausea',
-  'remedies for the flu',
-  'tamiflu in pregnancy',
-  'side effects of tamiflu',
-  'how to treat flu',
-  'viral bronchitis',
-  'flu how long contagious',
-  'flu remedy',
+    "/m/0cycc",
+    "influenza type a",
+    "flu duration",
+    "flu fever",
+    "treating flu",
+    "fever flu",
+    "flu recovery",
+    "braun thermoscan",
+    "oscillococcinum",
+    "treating the flu",
+    "cold or flu",
+    "flu versus cold",
+    "flu remedies",
+    "contagious flu",
+    "type a influenza",
+    "flu or cold",
+    "duration of flu",
+    "cold versus flu",
+    "flu cough",
+    "flu headache",
+    "thermoscan",
+    "influenza incubation period",
+    "flu lasts",
+    "length of flu",
+    "flu stomach",
+    "cold vs flu",
+    "flu and fever",
+    "getting over the flu",
+    "influenza a",
+    "treatment for flu",
+    "flu length",
+    "treatment for the flu",
+    "influenza symptoms",
+    "over the counter flu",
+    "flu complications",
+    "cold and flu symptoms",
+    "influenza incubation",
+    "treatment of flu",
+    "human temperature",
+    "low body",
+    "flu contagious",
+    "robitussin ac",
+    "flu how long",
+    "ear thermometer",
+    "flu contagious period",
+    "treat flu",
+    "cough flu",
+    "low body temperature",
+    "expectorant",
+    "flu and cold",
+    "rapid flu",
+    "flu vs. cold",
+    "how to treat the flu",
+    "how long does the flu last?",
+    "viral pneumonia",
+    "flu in kids",
+    "type a flu",
+    "influenza treatment",
+    "fighting the flu",
+    "flu relief",
+    "treat the flu",
+    "flu medicine",
+    "dangerous fever",
+    "what is influenza",
+    "tussin",
+    "low body temp",
+    "flu care",
+    "flu in infants",
+    "flu dizziness",
+    "feed a fever",
+    "flu vs cold",
+    "flu vomiting",
+    "bacterial pneumonia",
+    "flu activity",
+    "flu chills",
+    "anas barbariae",
+    "flu germs",
+    "tylenol cold",
+    "how to get over the flu",
+    "flu in children",
+    "influenza a and b",
+    "duration of the flu",
+    "cold symptoms",
+    "flu report",
+    "rapid flu test",
+    "flu relapse",
+    "get over the flu",
+    "flu during pregnancy",
+    "flu recovery time",
+    "cure for flu",
+    "tamiflu and breastfeeding",
+    "flu chest pain",
+    "flu treatment",
+    "flu nausea",
+    "remedies for the flu",
+    "tamiflu in pregnancy",
+    "side effects of tamiflu",
+    "how to treat flu",
+    "viral bronchitis",
+    "flu how long contagious",
+    "flu remedy",
 ]
 
 # a list of all US states, including DC and the US as a whole
 LOCATIONS = [
-  'US',
-  'AL',
-  'AK',
-  'AZ',
-  'AR',
-  'CA',
-  'CO',
-  'CT',
-  'DC',
-  'DE',
-  'FL',
-  'GA',
-  'HI',
-  'ID',
-  'IL',
-  'IN',
-  'IA',
-  'KS',
-  'KY',
-  'LA',
-  'ME',
-  'MD',
-  'MA',
-  'MI',
-  'MN',
-  'MS',
-  'MO',
-  'MT',
-  'NE',
-  'NV',
-  'NH',
-  'NJ',
-  'NM',
-  'NY',
-  'NC',
-  'ND',
-  'OH',
-  'OK',
-  'OR',
-  'PA',
-  'RI',
-  'SC',
-  'SD',
-  'TN',
-  'TX',
-  'UT',
-  'VT',
-  'VA',
-  'WA',
-  'WV',
-  'WI',
-  'WY',
+    "US",
+    "AL",
+    "AK",
+    "AZ",
+    "AR",
+    "CA",
+    "CO",
+    "CT",
+    "DC",
+    "DE",
+    "FL",
+    "GA",
+    "HI",
+    "ID",
+    "IL",
+    "IN",
+    "IA",
+    "KS",
+    "KY",
+    "LA",
+    "ME",
+    "MD",
+    "MA",
+    "MI",
+    "MN",
+    "MS",
+    "MO",
+    "MT",
+    "NE",
+    "NV",
+    "NH",
+    "NJ",
+    "NM",
+    "NY",
+    "NC",
+    "ND",
+    "OH",
+    "OK",
+    "OR",
+    "PA",
+    "RI",
+    "SC",
+    "SD",
+    "TN",
+    "TX",
+    "UT",
+    "VT",
+    "VA",
+    "WA",
+    "WV",
+    "WI",
+    "WY",
 ]
 
 
-def update(locations, terms, first=None, last=None, countries=['US']):
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
+def update(locations, terms, first=None, last=None, countries=["US"]):
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
 
-  def get_num_rows():
-    cur.execute('SELECT count(1) `num` FROM `ght`')
-    for (num,) in cur:
-      pass
-    return num
+    def get_num_rows():
+        cur.execute("SELECT count(1) `num` FROM `ght`")
+        for (num,) in cur:
+            pass
+        return num
 
-  # check from 4 weeks preceeding the last week with data through this week
-  cur.execute('SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `ght`')
-  for (ew0, ew1) in cur:
-    ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
-  ew0 = ew0 if first is None else first
-  ew1 = ew1 if last is None else last
-  print('Checking epiweeks between %d and %d...' % (ew0, ew1))
+    # check from 4 weeks preceeding the last week with data through this week
+    cur.execute("SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `ght`")
+    for (ew0, ew1) in cur:
+        ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
+    ew0 = ew0 if first is None else first
+    ew1 = ew1 if last is None else last
+    print("Checking epiweeks between %d and %d..." % (ew0, ew1))
 
-  # keep track of how many rows were added
-  rows_before = get_num_rows()
+    # keep track of how many rows were added
+    rows_before = get_num_rows()
 
-  # check Google Trends for new and/or revised data
-  sql = '''
+    # check Google Trends for new and/or revised data
+    sql = """
     INSERT INTO
       `ght` (`query`, `location`, `epiweek`, `value`)
     VALUES
       (%s, %s, %s, %s)
     ON DUPLICATE KEY UPDATE
       `value` = %s
-  '''
-  total_rows = 0
-  ght = GHT(API_KEY)
-  for term in terms:
-    print(' [%s] using term' % term)
-    ll, cl = len(locations), len(countries)
-    for i in range(max(ll,cl)):
-      location = locations[i] if i < ll else locations[0]
-      country = countries[i] if i < cl else countries[0]
-      try:
-        #term2 = ('"%s"' % term) if ' ' in term else term
-        term2 = term
-        attempt = 0
-        while True:
-          attempt += 1
-          try:
-            result = ght.get_data(ew0, ew1, location, term2, country=country)
-            break
-          except Exception as ex:
-            if attempt >= 5:
-              raise ex
-            else:
-              delay = 2 ** attempt
-              print(' [%s|%s] caught exception (will retry in %ds):' % (term, location, delay), ex)
-              time.sleep(delay)
-        values = [p['value'] for p in result['data']['lines'][0]['points']]
-        ew = result['start_week']
-        num_missing = 0
-        for v in values:
-          # Default SQL location value for US country for backwards compatibility
-          # i.e. California's location is still stored as 'CA',
-          # and having location == 'US' is still stored as 'US'
-          sql_location = location if location != NO_LOCATION_STR else country
-
-          # Change SQL location for non-US countries
-          if country != 'US':
-            # Underscore added to distinguish countries from 2-letter US states
-            sql_location = country + "_"
-            if location != NO_LOCATION_STR:
-              sql_location = sql_location + location
-          sql_data = (term, sql_location, ew, v, v)
-          cur.execute(sql, sql_data)
-          total_rows += 1
-          if v == 0:
-            num_missing += 1
-            #print(' [%s|%s|%d] missing value' % (term, location, ew))
-          ew = flu.add_epiweeks(ew, 1)
-        if num_missing > 0:
-          print(' [%s|%s] missing %d/%d value(s)' % (term, location, num_missing, len(values)))
-      except Exception as ex:
-        print(' [%s|%s] caught exception (will NOT retry):' % (term, location), ex)
-
-  # keep track of how many rows were added
-  rows_after = get_num_rows()
-  print('Inserted %d/%d row(s)'%(rows_after - rows_before, total_rows))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
+  """
+    total_rows = 0
+    ght = GHT(API_KEY)
+    for term in terms:
+        print(" [%s] using term" % term)
+        ll, cl = len(locations), len(countries)
+        for i in range(max(ll, cl)):
+            location = locations[i] if i < ll else locations[0]
+            country = countries[i] if i < cl else countries[0]
+            try:
+                # term2 = ('"%s"' % term) if ' ' in term else term
+                term2 = term
+                attempt = 0
+                while True:
+                    attempt += 1
+                    try:
+                        result = ght.get_data(ew0, ew1, location, term2, country=country)
+                        break
+                    except Exception as ex:
+                        if attempt >= 5:
+                            raise ex
+                        else:
+                            delay = 2**attempt
+                            print(
+                                " [%s|%s] caught exception (will retry in %ds):"
+                                % (term, location, delay),
+                                ex,
+                            )
+                            time.sleep(delay)
+                values = [p["value"] for p in result["data"]["lines"][0]["points"]]
+                ew = result["start_week"]
+                num_missing = 0
+                for v in values:
+                    # Default SQL location value for US country for backwards compatibility
+                    # i.e. California's location is still stored as 'CA',
+                    # and having location == 'US' is still stored as 'US'
+                    sql_location = location if location != NO_LOCATION_STR else country
+
+                    # Change SQL location for non-US countries
+                    if country != "US":
+                        # Underscore added to distinguish countries from 2-letter US states
+                        sql_location = country + "_"
+                        if location != NO_LOCATION_STR:
+                            sql_location = sql_location + location
+                    sql_data = (term, sql_location, ew, v, v)
+                    cur.execute(sql, sql_data)
+                    total_rows += 1
+                    if v == 0:
+                        num_missing += 1
+                        # print(' [%s|%s|%d] missing value' % (term, location, ew))
+                    ew = flu.add_epiweeks(ew, 1)
+                if num_missing > 0:
+                    print(
+                        " [%s|%s] missing %d/%d value(s)"
+                        % (term, location, num_missing, len(values))
+                    )
+            except Exception as ex:
+                print(f" [{term}|{location}] caught exception (will NOT retry):", ex)
+
+    # keep track of how many rows were added
+    rows_after = get_num_rows()
+    print("Inserted %d/%d row(s)" % (rows_after - rows_before, total_rows))
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('location', action='store', type=str, default=None, help='location(s) (ex: all; US; TX; CA,LA,WY)')
-  parser.add_argument('term', action='store', type=str, default=None, help='term/query/topic (ex: all; /m/0cycc; "flu fever")')
-  parser.add_argument('--first', '-f', default=None, type=int, help='first epiweek override')
-  parser.add_argument('--last', '-l', default=None, type=int, help='last epiweek override')
-  parser.add_argument('--country', '-c', default='US', type=str, help='location country (ex: US; BR)')
-  args = parser.parse_args()
-
-  # sanity check
-  first, last = args.first, args.last
-  if first is not None:
-    flu.check_epiweek(first)
-  if last is not None:
-    flu.check_epiweek(last)
-  if first is not None and last is not None and first > last:
-    raise Exception('epiweeks in the wrong order')
-
-  # decide what to update
-  if args.location.lower() == 'all':
-    locations = LOCATIONS
-  elif args.location.lower() == 'none':
-    locations = [NO_LOCATION_STR]
-  else:
-    locations = args.location.upper().split(',')
-  if args.term.lower() == 'all':
-    terms = TERMS
-  else:
-    terms = [args.term]
-
-  # country argument
-  # Check that country follows ISO 1366 Alpha-2 code. 
-  # See https://www.iso.org/obp/ui/#search.
-  countries = args.country.upper().split(',')
-  if not all(map(lambda x: len(x) == 2, countries)):
-    raise Exception('country name must be two letters (ISO 1366 Alpha-2)')
-
-  # if length of locations and countries is > 1, need to be the same
-  if len(locations) > 1 and len(countries) > 1 and len(locations) != len(countries):
-    raise Exception('locations and countries must be length 1, or same length')
-
-  # run the update
-  update(locations, terms, first, last, countries)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "location",
+        action="store",
+        type=str,
+        default=None,
+        help="location(s) (ex: all; US; TX; CA,LA,WY)",
+    )
+    parser.add_argument(
+        "term",
+        action="store",
+        type=str,
+        default=None,
+        help='term/query/topic (ex: all; /m/0cycc; "flu fever")',
+    )
+    parser.add_argument("--first", "-f", default=None, type=int, help="first epiweek override")
+    parser.add_argument("--last", "-l", default=None, type=int, help="last epiweek override")
+    parser.add_argument(
+        "--country", "-c", default="US", type=str, help="location country (ex: US; BR)"
+    )
+    args = parser.parse_args()
+
+    # sanity check
+    first, last = args.first, args.last
+    if first is not None:
+        flu.check_epiweek(first)
+    if last is not None:
+        flu.check_epiweek(last)
+    if first is not None and last is not None and first > last:
+        raise Exception("epiweeks in the wrong order")
+
+    # decide what to update
+    if args.location.lower() == "all":
+        locations = LOCATIONS
+    elif args.location.lower() == "none":
+        locations = [NO_LOCATION_STR]
+    else:
+        locations = args.location.upper().split(",")
+    if args.term.lower() == "all":
+        terms = TERMS
+    else:
+        terms = [args.term]
+
+    # country argument
+    # Check that country follows ISO 1366 Alpha-2 code.
+    # See https://www.iso.org/obp/ui/#search.
+    countries = args.country.upper().split(",")
+    if not all(map(lambda x: len(x) == 2, countries)):
+        raise Exception("country name must be two letters (ISO 1366 Alpha-2)")
+
+    # if length of locations and countries is > 1, need to be the same
+    if len(locations) > 1 and len(countries) > 1 and len(locations) != len(countries):
+        raise Exception("locations and countries must be length 1, or same length")
+
+    # run the update
+    update(locations, terms, first, last, countries)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/ght/google_health_trends.py b/src/acquisition/ght/google_health_trends.py
index 66a11c227..7fd95f9a4 100644
--- a/src/acquisition/ght/google_health_trends.py
+++ b/src/acquisition/ght/google_health_trends.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -18,7 +18,7 @@
   + sample command line usage
   + extract array of values from returned data
   * separated GHT class from ght_update.py
-'''
+"""
 
 # standard library
 import argparse
@@ -31,109 +31,120 @@
 from delphi.utils.epidate import EpiDate
 import delphi.utils.epiweek as flu
 
-NO_LOCATION_STR = 'none'
+NO_LOCATION_STR = "none"
+
 
 class GHT:
 
-  # Google Trends API endpoint
-  DISCOVERY_URL = 'https://www.googleapis.com/discovery/v1/apis/trends/v1beta/rest'
-
-  def __init__(self, key, delay=1):
-    self.service = build('trends', 'v1beta', developerKey=key, discoveryServiceUrl=GHT.DISCOVERY_URL)
-    self.delay = delay
-
-  # converts a YYYYWW week into a YYYY-MM-DD date (using Sunday of the week)
-  @staticmethod
-  def _ew2date(ew):
-    # parse the epiweek
-    year, week = flu.split_epiweek(ew)
-    # get the date object (middle of the week; Wednesday)
-    date = EpiDate.from_epiweek(year, week)
-    # go to the first day of the week (Sunday)
-    date = date.add_days(-3)
-    # date as string
-    return str(date)
-
-  # get data from Google APIs
-  # see: https://developers.google.com/apis-explorer/#p/trends/v1beta/trends.getTimelinesForHealth
-  def get_data(self, start_week, end_week, location, term, resolution='week', country='US'):
-    start_date = GHT._ew2date(start_week)
-    end_date = GHT._ew2date(end_week)
-    num_weeks = flu.delta_epiweeks(start_week, end_week) + 1
-
-    # getTimelinesForHealth parameters
-    params = {
-      'terms': term,
-      'time_startDate': start_date,
-      'time_endDate': end_date,
-      'timelineResolution': resolution,
-    }
-    # We have a special check for the US for backwards compatibility.
-    # i.e. if the country is 'US' AND the location is 'US', just put the geo-restriction for country.
-    # In contrast, another country might have a sub-region with initials 'US' and we want the region restriction instead.
-    if country == 'US':
-      if location == 'US' or location == NO_LOCATION_STR:
-        params['geoRestriction_country'] = 'US'
-      else:
-        params['geoRestriction_region'] = 'US-' + location
-    else:
-      if location == NO_LOCATION_STR:
-        params['geoRestriction_country'] = country
-      else:
-        params['geoRestriction_region'] = country + '-' + location
-
-    # make the API call
-    data = self.service.getTimelinesForHealth(**params).execute()
-
-    # extract the values
-    try:
-      values = [p['value'] for p in data['lines'][0]['points']]
-    except:
-      values = None
-
-    # throttle request rate
-    time.sleep(self.delay)
-
-    # return the results
-    return {
-      'start_week': start_week,
-      'end_week': end_week,
-      'num_weeks': num_weeks,
-      'location': location,
-      'country' : country,
-      'term': term,
-      'resolution': resolution,
-      'data': data,
-      'values': values,
-    }
+    # Google Trends API endpoint
+    DISCOVERY_URL = "https://www.googleapis.com/discovery/v1/apis/trends/v1beta/rest"
+
+    def __init__(self, key, delay=1):
+        self.service = build(
+            "trends", "v1beta", developerKey=key, discoveryServiceUrl=GHT.DISCOVERY_URL
+        )
+        self.delay = delay
+
+    # converts a YYYYWW week into a YYYY-MM-DD date (using Sunday of the week)
+    @staticmethod
+    def _ew2date(ew):
+        # parse the epiweek
+        year, week = flu.split_epiweek(ew)
+        # get the date object (middle of the week; Wednesday)
+        date = EpiDate.from_epiweek(year, week)
+        # go to the first day of the week (Sunday)
+        date = date.add_days(-3)
+        # date as string
+        return str(date)
+
+    # get data from Google APIs
+    # see: https://developers.google.com/apis-explorer/#p/trends/v1beta/trends.getTimelinesForHealth
+    def get_data(self, start_week, end_week, location, term, resolution="week", country="US"):
+        start_date = GHT._ew2date(start_week)
+        end_date = GHT._ew2date(end_week)
+        num_weeks = flu.delta_epiweeks(start_week, end_week) + 1
+
+        # getTimelinesForHealth parameters
+        params = {
+            "terms": term,
+            "time_startDate": start_date,
+            "time_endDate": end_date,
+            "timelineResolution": resolution,
+        }
+        # We have a special check for the US for backwards compatibility.
+        # i.e. if the country is 'US' AND the location is 'US', just put the geo-restriction for country.
+        # In contrast, another country might have a sub-region with initials 'US' and we want the region restriction instead.
+        if country == "US":
+            if location == "US" or location == NO_LOCATION_STR:
+                params["geoRestriction_country"] = "US"
+            else:
+                params["geoRestriction_region"] = "US-" + location
+        else:
+            if location == NO_LOCATION_STR:
+                params["geoRestriction_country"] = country
+            else:
+                params["geoRestriction_region"] = country + "-" + location
+
+        # make the API call
+        data = self.service.getTimelinesForHealth(**params).execute()
+
+        # extract the values
+        try:
+            values = [p["value"] for p in data["lines"][0]["points"]]
+        except:
+            values = None
+
+        # throttle request rate
+        time.sleep(self.delay)
+
+        # return the results
+        return {
+            "start_week": start_week,
+            "end_week": end_week,
+            "num_weeks": num_weeks,
+            "location": location,
+            "country": country,
+            "term": term,
+            "resolution": resolution,
+            "data": data,
+            "values": values,
+        }
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('apikey', action='store', type=str, default=None, help='API key')
-  parser.add_argument('startweek', action='store', type=int, default=None, help='first week (ex: 201440)')
-  parser.add_argument('endweek', action='store', type=int, default=None, help='last week (ex: 201520)')
-  parser.add_argument('location', action='store', type=str, default=None, help='location (ex: US)')
-  parser.add_argument('term', action='store', type=str, default=None, help='term/query/topic (ex: /m/0cycc)')
-  args = parser.parse_args()
-
-  # get the data
-  ght = GHT(args.apikey)
-  result = ght.get_data(args.startweek, args.endweek, args.location, args.term)
-  values = result['values']
-
-  # sanity check
-  expected_weeks = result['num_weeks']
-  received_weeks = len([v for v in values if v is not None and type(v) == float and v >= 0])
-  if expected_weeks != received_weeks:
-    raise Exception('expected %d weeks, received %d' % (expected_weeks, received_weeks))
-
-  # results
-  epiweeks = [ew for ew in flu.range_epiweeks(args.startweek, args.endweek, inclusive=True)]
-  for (epiweek, value) in zip(epiweeks, values):
-    print('%6d: %.3f' % (epiweek, value))
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("apikey", action="store", type=str, default=None, help="API key")
+    parser.add_argument(
+        "startweek", action="store", type=int, default=None, help="first week (ex: 201440)"
+    )
+    parser.add_argument(
+        "endweek", action="store", type=int, default=None, help="last week (ex: 201520)"
+    )
+    parser.add_argument(
+        "location", action="store", type=str, default=None, help="location (ex: US)"
+    )
+    parser.add_argument(
+        "term", action="store", type=str, default=None, help="term/query/topic (ex: /m/0cycc)"
+    )
+    args = parser.parse_args()
+
+    # get the data
+    ght = GHT(args.apikey)
+    result = ght.get_data(args.startweek, args.endweek, args.location, args.term)
+    values = result["values"]
+
+    # sanity check
+    expected_weeks = result["num_weeks"]
+    received_weeks = len([v for v in values if v is not None and type(v) == float and v >= 0])
+    if expected_weeks != received_weeks:
+        raise Exception("expected %d weeks, received %d" % (expected_weeks, received_weeks))
+
+    # results
+    epiweeks = [ew for ew in flu.range_epiweeks(args.startweek, args.endweek, inclusive=True)]
+    for (epiweek, value) in zip(epiweeks, values):
+        print("%6d: %.3f" % (epiweek, value))
+
+
+if __name__ == "__main__":
+    main()

From a849384c884934b3b7c3c67b68aa6240277d6b6d Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 13:54:09 -0700
Subject: [PATCH 24/43] style(black): format kcdc acquisition

---
 src/acquisition/kcdc/kcdc_update.py | 90 ++++++++++++++++-------------
 1 file changed, 49 insertions(+), 41 deletions(-)

diff --git a/src/acquisition/kcdc/kcdc_update.py b/src/acquisition/kcdc/kcdc_update.py
index 70c167738..b2c12dba9 100644
--- a/src/acquisition/kcdc/kcdc_update.py
+++ b/src/acquisition/kcdc/kcdc_update.py
@@ -42,12 +42,14 @@
 from delphi.utils.epiweek import delta_epiweeks, range_epiweeks, add_epiweeks
 from delphi.utils.epidate import EpiDate
 
+
 def ensure_tables_exist():
-    (u,p) = secrets.db.epi
-    cnx = mysql.connector.connect(user=u,password=p,database='epidata')
+    (u, p) = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     try:
         cursor = cnx.cursor()
-        cursor.execute('''
+        cursor.execute(
+            """
             CREATE TABLE IF NOT EXISTS `kcdc_ili` (
                 `id` INT(11) NOT NULL PRIMARY KEY AUTO_INCREMENT,
                 `release_date` DATE NOT NULL,
@@ -58,69 +60,76 @@ def ensure_tables_exist():
                 `ili` DOUBLE NOT NULL,
                 UNIQUE KEY (`issue`, `epiweek`, `region`)
             );
-        ''');
+            """
+        )
         cnx.commit()
     finally:
         cnx.close()
 
+
 def safe_float(f):
     try:
-        return float(f.replace(',',''))
+        return float(f.replace(",", ""))
     except:
         return 0
 
+
 def safe_int(i):
     try:
-        return int(i.replace(',',''))
+        return int(i.replace(",", ""))
     except:
         return 0
 
-def get_rows(cnx, table='kcdc_ili'):
-  # Count and return the number of rows in the `kcdc_ili` table.
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM %s' % table)
-  for (num,) in select:
-    pass
-  select.close()
-  return num
+
+def get_rows(cnx, table="kcdc_ili"):
+    # Count and return the number of rows in the `kcdc_ili` table.
+    select = cnx.cursor()
+    select.execute("SELECT count(1) num FROM %s" % table)
+    for (num,) in select:
+        pass
+    select.close()
+    return num
+
 
 def get_kcdc_data():
     issue = EpiDate.today().get_ew()
-    last_season = issue//100 + (1 if issue % 100 > 35 else 0)
-    url = 'http://www.cdc.go.kr/npt/biz/npp/iss/influenzaListAjax.do'
+    last_season = issue // 100 + (1 if issue % 100 > 35 else 0)
+    url = "https://www.cdc.go.kr/npt/biz/npp/iss/influenzaListAjax.do"
+    # Started in 2004
     params = {
-    'icdNm': 'influenza',
-    'startYear': '2004', # Started in 2004
-    'endYear': str(last_season)
+        "icdNm": "influenza",
+        "startYear": "2004",
+        "endYear": str(last_season),
     }
     response = requests.post(url, params)
     datas = response.json()
-    data = datas['data']
+    data = datas["data"]
     ews = []
     ilis = []
     ew1 = 200436
-    for year in range(2004,last_season):
-        year_data = data[year-2004]
+    for year in range(2004, last_season):
+        year_data = data[year - 2004]
         if year > 2004:
             ew1 = ews[-1] + 1
-        ili_yr = year_data["VALUE"].split('`')
-        ili_yr = [float(f) for f in ili_yr if f != '']
-        ew2 = add_epiweeks(ew1,len(ili_yr))
-        new_ews = list(range_epiweeks(ew1,ew2))
+        ili_yr = year_data["VALUE"].split("`")
+        ili_yr = [float(f) for f in ili_yr if f != ""]
+        ew2 = add_epiweeks(ew1, len(ili_yr))
+        new_ews = list(range_epiweeks(ew1, ew2))
         for i in range(len(new_ews)):
             j = float(ili_yr[i])
             ilis.append(j)
             ews.append(new_ews[i])
     return ews, ilis
 
+
 def update_from_data(ews, ilis, date, issue, test_mode=False):
     u, p = secrets.db.epi
-    cnx = mysql.connector.connect(user=u, password=p, database='epidata')
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     rows1 = get_rows(cnx)
-    print('rows before: %d' % (rows1))
+    print("rows before: %d" % (rows1))
     insert = cnx.cursor()
 
-    sql = '''
+    sql = """
     INSERT INTO
         `kcdc_ili` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
         `ili`)
@@ -129,15 +138,15 @@ def update_from_data(ews, ilis, date, issue, test_mode=False):
     ON DUPLICATE KEY UPDATE
         `release_date` = least(`release_date`, '%s'),
         `ili` = %s
-    '''
+    """
 
     for i in range(len(ews)):
         ew = ews[i]
         ili = ilis[i]
         lag = delta_epiweeks(ews[i], issue)
 
-        insert_args = [date,issue,ew,'ROK',lag,ili]
-        update_args = [date,ili]
+        insert_args = [date, issue, ew, "ROK", lag, ili]
+        update_args = [date, ili]
         try:
             insert.execute(sql % tuple(insert_args + update_args))
         except Exception:
@@ -146,34 +155,33 @@ def update_from_data(ews, ilis, date, issue, test_mode=False):
     # cleanup
     insert.close()
     if test_mode:
-        print('test mode, not committing')
+        print("test mode, not committing")
         rows2 = rows1
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print('rows after: %d (added %d)' % (rows2,rows2-rows1))
+    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
     cnx.close()
 
+
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--test',
-        action='store_true',
-        help='do dry run only, do not update the database'
+        "--test", action="store_true", help="do dry run only, do not update the database"
     )
     args = parser.parse_args()
 
-    date = datetime.datetime.now().strftime('%Y-%m-%d')
-    print('assuming release date is today, %s' % date)
+    date = datetime.datetime.now().strftime("%Y-%m-%d")
+    print("assuming release date is today, %s" % date)
     issue = EpiDate.today().get_ew()
 
     ensure_tables_exist()
 
-    ews,ilis = get_kcdc_data()
+    ews, ilis = get_kcdc_data()
 
     update_from_data(ews, ilis, date, issue, test_mode=args.test)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

From d04af3c02fda7708a16bec0952b1aa7475acaec7 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 14:04:35 -0700
Subject: [PATCH 25/43] style(black): format nidss acquisition

---
 src/acquisition/nidss/taiwan_nidss.py  | 433 +++++++++++++------------
 src/acquisition/nidss/taiwan_update.py | 162 +++++----
 2 files changed, 296 insertions(+), 299 deletions(-)

diff --git a/src/acquisition/nidss/taiwan_nidss.py b/src/acquisition/nidss/taiwan_nidss.py
index 27da863e1..57f4e272d 100644
--- a/src/acquisition/nidss/taiwan_nidss.py
+++ b/src/acquisition/nidss/taiwan_nidss.py
@@ -4,7 +4,7 @@
 ===============
 
 Scrapes weekly flu data from Taiwan's National Infectious Disease Statistics
-System (NIDSS): http://nidss.cdc.gov.tw/en/
+System (NIDSS): https://nidss.cdc.gov.tw/en/
 
 
 =================
@@ -37,233 +37,234 @@
 
 
 class NIDSS:
-  """An API for scraping the NIDSS site."""
+    """An API for scraping the NIDSS site."""
 
-  # The page where the flu data is kept
-  FLU_URL = 'https://nidss.cdc.gov.tw/en/CDCWNH01.aspx?dc=wnh'
+    # The page where the flu data is kept
+    FLU_URL = "https://nidss.cdc.gov.tw/en/CDCWNH01.aspx?dc=wnh"
 
-  # Link to the dengue data
-  DENGUE_URL = 'http://nidss.cdc.gov.tw/Download/Weekly_Age_County_Gender_061.csv'
+    # Link to the dengue data
+    DENGUE_URL = "https://nidss.cdc.gov.tw/Download/Weekly_Age_County_Gender_061.csv"
 
-  # Translate location names to English
-  # https://en.wikipedia.org/wiki/List_of_administrative_divisions_of_Taiwan
-  _TRANSLATED = {
-    b'5Y2X5oqV57ij': 'Nantou_County',
-    b'5Y+w5Lit5biC': 'Taichung_City',
-    b'5Y+w5YyX5biC': 'Taipei_City',
-    b'5Y+w5Y2X5biC': 'Tainan_City',
-    b'5Y+w5p2x57ij': 'Taitung_County',
-    b'5ZiJ576p5biC': 'Chiayi_City',
-    b'5ZiJ576p57ij': 'Chiayi_County',
-    b'5Z+66ZqG5biC': 'Keelung_City',
-    b'5a6c6Jit57ij': 'Yilan_County',
-    b'5bGP5p2x57ij': 'Pingtung_County',
-    b'5b2w5YyW57ij': 'Changhua_County',
-    b'5paw5YyX5biC': 'New_Taipei_City',
-    b'5paw56u55biC': 'Hsinchu_City',
-    b'5paw56u557ij': 'Hsinchu_County',
-    b'5qGD5ZyS5biC': 'Taoyuan_City',
-    b'5r6O5rmW57ij': 'Penghu_County',
-    b'6Iqx6JOu57ij': 'Hualien_County',
-    b'6IuX5qCX57ij': 'Miaoli_County',
-    b'6YeR6ZaA57ij': 'Kinmen_County',
-    b'6Zuy5p6X57ij': 'Yunlin_County',
-    b'6auY6ZuE5biC': 'Kaohsiung_City',
-    b'6YCj5rGf57ij': 'Lienchiang_County',
-  }
+    # Translate location names to English
+    # https://en.wikipedia.org/wiki/List_of_administrative_divisions_of_Taiwan
+    _TRANSLATED = {
+        b"5Y2X5oqV57ij": "Nantou_County",
+        b"5Y+w5Lit5biC": "Taichung_City",
+        b"5Y+w5YyX5biC": "Taipei_City",
+        b"5Y+w5Y2X5biC": "Tainan_City",
+        b"5Y+w5p2x57ij": "Taitung_County",
+        b"5ZiJ576p5biC": "Chiayi_City",
+        b"5ZiJ576p57ij": "Chiayi_County",
+        b"5Z+66ZqG5biC": "Keelung_City",
+        b"5a6c6Jit57ij": "Yilan_County",
+        b"5bGP5p2x57ij": "Pingtung_County",
+        b"5b2w5YyW57ij": "Changhua_County",
+        b"5paw5YyX5biC": "New_Taipei_City",
+        b"5paw56u55biC": "Hsinchu_City",
+        b"5paw56u557ij": "Hsinchu_County",
+        b"5qGD5ZyS5biC": "Taoyuan_City",
+        b"5r6O5rmW57ij": "Penghu_County",
+        b"6Iqx6JOu57ij": "Hualien_County",
+        b"6IuX5qCX57ij": "Miaoli_County",
+        b"6YeR6ZaA57ij": "Kinmen_County",
+        b"6Zuy5p6X57ij": "Yunlin_County",
+        b"6auY6ZuE5biC": "Kaohsiung_City",
+        b"6YCj5rGf57ij": "Lienchiang_County",
+    }
 
-  # Map locations to regions
-  # https://en.wikipedia.org/wiki/List_of_administrative_divisions_of_Taiwan
-  # https://en.wikipedia.org/wiki/Regions_of_Taiwan#Hexchotomy
-  LOCATION_TO_REGION = {
-    # Taipei
-    'Taipei_City': 'Taipei',
-    'Keelung_City': 'Taipei',
-    'New_Taipei_City': 'Taipei',
-    'Yilan_County': 'Taipei',
-    'Kinmen_County': 'Taipei',
-    'Lienchiang_County': 'Taipei',
-    # Northern
-    'Hsinchu_City': 'Northern',
-    'Taoyuan_City': 'Northern',
-    'Hsinchu_County': 'Northern',
-    'Miaoli_County': 'Northern',
-    # Central
-    'Taichung_City': 'Central',
-    'Changhua_County': 'Central',
-    'Nantou_County': 'Central',
-    # Southern
-    'Tainan_City': 'Southern',
-    'Chiayi_City': 'Southern',
-    'Yunlin_County': 'Southern',
-    'Chiayi_County': 'Southern',
-    # Kaoping
-    'Kaohsiung_City': 'Kaoping',
-    'Pingtung_County': 'Kaoping',
-    'Penghu_County': 'Kaoping',
-    # Eastern
-    'Hualien_County': 'Eastern',
-    'Taitung_County': 'Eastern',
-  }
+    # Map locations to regions
+    # https://en.wikipedia.org/wiki/List_of_administrative_divisions_of_Taiwan
+    # https://en.wikipedia.org/wiki/Regions_of_Taiwan#Hexchotomy
+    LOCATION_TO_REGION = {
+        # Taipei
+        "Taipei_City": "Taipei",
+        "Keelung_City": "Taipei",
+        "New_Taipei_City": "Taipei",
+        "Yilan_County": "Taipei",
+        "Kinmen_County": "Taipei",
+        "Lienchiang_County": "Taipei",
+        # Northern
+        "Hsinchu_City": "Northern",
+        "Taoyuan_City": "Northern",
+        "Hsinchu_County": "Northern",
+        "Miaoli_County": "Northern",
+        # Central
+        "Taichung_City": "Central",
+        "Changhua_County": "Central",
+        "Nantou_County": "Central",
+        # Southern
+        "Tainan_City": "Southern",
+        "Chiayi_City": "Southern",
+        "Yunlin_County": "Southern",
+        "Chiayi_County": "Southern",
+        # Kaoping
+        "Kaohsiung_City": "Kaoping",
+        "Pingtung_County": "Kaoping",
+        "Penghu_County": "Kaoping",
+        # Eastern
+        "Hualien_County": "Eastern",
+        "Taitung_County": "Eastern",
+    }
 
-  @staticmethod
-  def _get_metadata(html):
-    issue_pattern = re.compile('^.*Latest available data: Week (\\d+), (\\d{4})\\..*$')
-    release_pattern = re.compile('^.*Data as of \\d+:\\d+:\\d+, (\\d{4})/(\\d{2})/(\\d{2})\\..*$')
-    issue, release = None, None
-    for line in html.split('\n'):
-      match = issue_pattern.match(line)
-      if match is not None:
-        year, week = int(match.group(2)), int(match.group(1))
-        issue = year * 100 + week
-      match = release_pattern.match(line)
-      if match is not None:
-        year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
-        release = '%04d-%02d-%02d' % (year, month, day)
-    if issue is None or release is None:
-      raise Exception('metadata not found')
-    return issue, release
+    @staticmethod
+    def _get_metadata(html):
+        issue_pattern = re.compile("^.*Latest available data: Week (\\d+), (\\d{4})\\..*$")
+        release_pattern = re.compile(
+            "^.*Data as of \\d+:\\d+:\\d+, (\\d{4})/(\\d{2})/(\\d{2})\\..*$"
+        )
+        issue, release = None, None
+        for line in html.split("\n"):
+            match = issue_pattern.match(line)
+            if match is not None:
+                year, week = int(match.group(2)), int(match.group(1))
+                issue = year * 100 + week
+            match = release_pattern.match(line)
+            if match is not None:
+                year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
+                release = "%04d-%02d-%02d" % (year, month, day)
+        if issue is None or release is None:
+            raise Exception("metadata not found")
+        return issue, release
 
-  @staticmethod
-  def _get_flu_data(html):
-    week_pattern = re.compile('^categories: \\[(.*)\\],$')
-    value_pattern = re.compile('^series: \\[(.*)\\],$')
-    data = {}
-    parsing_ili = True
-    for line in html.split('\n'):
-      line = line.strip()
-      match = week_pattern.match(line)
-      if match is not None:
-        weeks = [int(x[1:-1]) for x in match.group(1).split(',')]
-        for week in weeks:
-          check_epiweek(week)
-          if week not in data:
-            data[week] = {}
-      match = value_pattern.match(line)
-      if match is not None:
-        for item in match.group(1).split('},{'):
-          parts = item.replace('{', '').replace('}', '').strip().split(' ')
-          location = parts[1][1:-2]
-          def num(value):
-            if parsing_ili:
-              return float(value)
-            else:
-              if '.' in value:
-                raise Exception('expected type int for visits')
-              return int(value)
-          values = [num(x) for x in parts[3][1:-1].split(',')]
-          unit = 'ili' if parsing_ili else 'visits'
-          if len(weeks) != len(values):
-            raise Exception('len(weeks) != len(values)')
-          for week, value in zip(weeks, values):
-            if location not in data[week]:
-              data[week][location] = {}
-            data[week][location][unit] = value
-        parsing_ili = False
-    if len(data) == 0:
-      raise Exception('no data')
-    return data
+    @staticmethod
+    def _get_flu_data(html):
+        week_pattern = re.compile("^categories: \\[(.*)\\],$")
+        value_pattern = re.compile("^series: \\[(.*)\\],$")
+        data = {}
+        parsing_ili = True
+        for line in html.split("\n"):
+            line = line.strip()
+            match = week_pattern.match(line)
+            if match is not None:
+                weeks = [int(x[1:-1]) for x in match.group(1).split(",")]
+                for week in weeks:
+                    check_epiweek(week)
+                    if week not in data:
+                        data[week] = {}
+            match = value_pattern.match(line)
+            if match is not None:
+                for item in match.group(1).split("},{"):
+                    parts = item.replace("{", "").replace("}", "").strip().split(" ")
+                    location = parts[1][1:-2]
+
+                    def num(value):
+                        if parsing_ili:
+                            return float(value)
+                        else:
+                            if "." in value:
+                                raise Exception("expected type int for visits")
+                            return int(value)
 
-  @staticmethod
-  def get_flu_data():
-    # Fetch the flu page
-    response = requests.get(NIDSS.FLU_URL)
-    if response.status_code != 200:
-      raise Exception('request failed [%d]' % response.status_code)
-    html = response.text
-    # Parse metadata
-    latest_week, release_date = NIDSS._get_metadata(html)
-    # Parse flu data
-    data = NIDSS._get_flu_data(html)
-    # Return results indexed by week and location
-    return latest_week, release_date, data
+                    values = [num(x) for x in parts[3][1:-1].split(",")]
+                    unit = "ili" if parsing_ili else "visits"
+                    if len(weeks) != len(values):
+                        raise Exception("len(weeks) != len(values)")
+                    for week, value in zip(weeks, values):
+                        if location not in data[week]:
+                            data[week][location] = {}
+                        data[week][location][unit] = value
+                parsing_ili = False
+        if len(data) == 0:
+            raise Exception("no data")
+        return data
 
-  @staticmethod
-  def get_dengue_data(first_week, last_week):
-    # Check week order
-    if first_week > last_week:
-      first_week, last_week = last_week, first_week
-    # Bounds check
-    if first_week < 200301 or last_week < 200301:
-      raise Exception('week out of range')
-    # Initialize data by week and location (zeroes are not reported)
-    data = {}
-    for week in range_epiweeks(first_week, add_epiweeks(last_week, 1)):
-      data[week] = {}
-      for location in NIDSS.LOCATION_TO_REGION.keys():
-        data[week][location] = 0
-    # Download CSV
-    response = requests.get(NIDSS.DENGUE_URL)
-    if response.status_code != 200:
-      raise Exception('export Dengue failed [%d]' % response.status_code)
-    csv = response.content.decode('big5-tw')
-    # Parse the data
-    lines = [l.strip() for l in csv.split('\n')[1:] if l.strip() != '']
-    for line in lines:
-      fields = line.split(',')
-      location_b64 = base64.b64encode(fields[3].encode('utf-8'))
-      location = NIDSS._TRANSLATED[location_b64]
-      # Fields currently unused:
-      # region = NIDSS.LOCATION_TO_REGION[location]
-      # imported_b64 = base64.b64encode(fields[6].encode('utf-8'))
-      # imported = imported_b64 == b'5piv'
-      # sex = fields[5]
-      # age = fields[7]
-      count = int(fields[8])
-      year = int(fields[1])
-      week = int(fields[2])
-      # Week 53 was reported each year in 2003-2007
-      if year < 2008 and year != 2003 and week > 52:
-        week = 52
-      # Epiweek system change in 2009
-      # See also: http://research.undefinedx.com/forum/index.php?topic=300.0
-      if year == 2009:
-        week -= 1
-        if week == 0:
-          year, week = 2008, 53
-      epiweek = year * 100 + week
-      if epiweek < first_week or epiweek > last_week:
-        # Outside of the requested range
-        continue
-      if epiweek not in data or location not in data[epiweek]:
-        # Not a vaild U.S. epiweek
-        raise Exception('data missing %d-%s' % (epiweek, location))
-      # Add the counts to the location on this epiweek
-      data[epiweek][location] += count
-    # Return results indexed by week and location
-    return data
+    @staticmethod
+    def get_flu_data():
+        # Fetch the flu page
+        response = requests.get(NIDSS.FLU_URL)
+        if response.status_code != 200:
+            raise Exception("request failed [%d]" % response.status_code)
+        html = response.text
+        # Parse metadata
+        latest_week, release_date = NIDSS._get_metadata(html)
+        # Parse flu data
+        data = NIDSS._get_flu_data(html)
+        # Return results indexed by week and location
+        return latest_week, release_date, data
+
+    @staticmethod
+    def get_dengue_data(first_week, last_week):
+        # Check week order
+        if first_week > last_week:
+            first_week, last_week = last_week, first_week
+        # Bounds check
+        if first_week < 200301 or last_week < 200301:
+            raise Exception("week out of range")
+        # Initialize data by week and location (zeroes are not reported)
+        data = {}
+        for week in range_epiweeks(first_week, add_epiweeks(last_week, 1)):
+            data[week] = {}
+            for location in NIDSS.LOCATION_TO_REGION.keys():
+                data[week][location] = 0
+        # Download CSV
+        response = requests.get(NIDSS.DENGUE_URL)
+        if response.status_code != 200:
+            raise Exception("export Dengue failed [%d]" % response.status_code)
+        csv = response.content.decode("big5-tw")
+        # Parse the data
+        lines = [l.strip() for l in csv.split("\n")[1:] if l.strip() != ""]
+        for line in lines:
+            fields = line.split(",")
+            location_b64 = base64.b64encode(fields[3].encode("utf-8"))
+            location = NIDSS._TRANSLATED[location_b64]
+            # Fields currently unused:
+            # region = NIDSS.LOCATION_TO_REGION[location]
+            # imported_b64 = base64.b64encode(fields[6].encode('utf-8'))
+            # imported = imported_b64 == b'5piv'
+            # sex = fields[5]
+            # age = fields[7]
+            count = int(fields[8])
+            year = int(fields[1])
+            week = int(fields[2])
+            # Week 53 was reported each year in 2003-2007
+            if year < 2008 and year != 2003 and week > 52:
+                week = 52
+            # Epiweek system change in 2009
+            # See also: https://research.undefinedx.com/forum/index.php?topic=300.0
+            if year == 2009:
+                week -= 1
+                if week == 0:
+                    year, week = 2008, 53
+            epiweek = year * 100 + week
+            if epiweek < first_week or epiweek > last_week:
+                # Outside of the requested range
+                continue
+            if epiweek not in data or location not in data[epiweek]:
+                # Not a vaild U.S. epiweek
+                raise Exception("data missing %d-%s" % (epiweek, location))
+            # Add the counts to the location on this epiweek
+            data[epiweek][location] += count
+        # Return results indexed by week and location
+        return data
 
 
 def main():
-  # Args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    'epiweek',
-    action='store',
-    type=int,
-    help='fetch data on this epiweek (ex: 201537)'
-  )
-  args = parser.parse_args()
-  ew = args.epiweek
+    # Args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "epiweek", action="store", type=int, help="fetch data on this epiweek (ex: 201537)"
+    )
+    args = parser.parse_args()
+    ew = args.epiweek
 
-  # Get the data
-  latest_week, release_date, fdata = NIDSS.get_flu_data()
-  ddata = NIDSS.get_dengue_data(ew, ew)
+    # Get the data
+    latest_week, release_date, fdata = NIDSS.get_flu_data()
+    ddata = NIDSS.get_dengue_data(ew, ew)
 
-  # Print the results
-  print('*** Meta ***')
-  print('latest_week:', latest_week)
-  print('release_date:', release_date)
-  print('*** Flu ***')
-  for region in sorted(list(fdata[ew].keys())):
-    visits, ili = fdata[ew][region]['visits'], fdata[ew][region]['ili']
-    print('region=%s | visits=%d | ili=%.3f' % (region, visits, ili))
-  print('*** Dengue ***')
-  for location in sorted(list(ddata[ew].keys())):
-    region = NIDSS.LOCATION_TO_REGION[location]
-    count = ddata[ew][location]
-    print('location=%s | region=%s | count=%d' % (location, region, count))
+    # Print the results
+    print("*** Meta ***")
+    print("latest_week:", latest_week)
+    print("release_date:", release_date)
+    print("*** Flu ***")
+    for region in sorted(list(fdata[ew].keys())):
+        visits, ili = fdata[ew][region]["visits"], fdata[ew][region]["ili"]
+        print("region=%s | visits=%d | ili=%.3f" % (region, visits, ili))
+    print("*** Dengue ***")
+    for location in sorted(list(ddata[ew].keys())):
+        region = NIDSS.LOCATION_TO_REGION[location]
+        count = ddata[ew][location]
+        print("location=%s | region=%s | count=%d" % (location, region, count))
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/nidss/taiwan_update.py b/src/acquisition/nidss/taiwan_update.py
index 830a7738d..c22f0dfaa 100644
--- a/src/acquisition/nidss/taiwan_update.py
+++ b/src/acquisition/nidss/taiwan_update.py
@@ -87,92 +87,88 @@
 
 # Get a row count just to know how many new rows are inserted
 def get_rows(cnx):
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM nidss_flu')
-  for (num,) in select:
-    rows_flu = num
-  select.execute('SELECT count(1) num FROM nidss_dengue')
-  for (num,) in select:
-    rows_dengue = num
-  select.close()
-  return (rows_flu, rows_dengue)
+    select = cnx.cursor()
+    select.execute("SELECT count(1) num FROM nidss_flu")
+    for (num,) in select:
+        rows_flu = num
+    select.execute("SELECT count(1) num FROM nidss_dengue")
+    for (num,) in select:
+        rows_dengue = num
+    select.close()
+    return (rows_flu, rows_dengue)
 
 
 def update(test_mode=False):
-  # test mode
-  if test_mode:
-    print('test mode enabled: changes will not be saved')
-
-  # Database connection
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  rows1 = get_rows(cnx)
-  print('rows before (flu): %d' % (rows1[0]))
-  print('rows before (dengue): %d' % (rows1[1]))
-  insert = cnx.cursor()
-  sql_flu = '''
-  INSERT INTO
-    `nidss_flu` (`release_date`, `issue`, `epiweek`, `region`, `lag`, `visits`, `ili`)
-  VALUES
-    (%s, %s, %s, %s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-    `release_date` = least(`release_date`, %s), `visits` = %s, `ili` = %s
-  '''
-  sql_dengue = '''
-  INSERT INTO
-    `nidss_dengue` (`epiweek`, `location`, `region`, `count`)
-  VALUES
-    (%s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-    `count` =  %s
-  '''
-
-  # Scrape flu data
-  current_week, release_date, data = NIDSS.get_flu_data()
-  for epiweek in sorted(list(data.keys())):
-    lag = delta_epiweeks(epiweek, current_week)
-    for region in data[epiweek].keys():
-      visits, ili = data[epiweek][region]['visits'], data[epiweek][region]['ili']
-      params1 = [release_date, current_week, epiweek, region, lag, visits, ili]
-      params2 = [release_date, visits, ili]
-      insert.execute(sql_flu, tuple(params1 + params2))
-
-  # Scrape dengue data from the past year
-  data = NIDSS.get_dengue_data(add_epiweeks(current_week, -51), current_week)
-  for epiweek in sorted(list(data.keys())):
-    for location in sorted(list(data[epiweek].keys())):
-      region = NIDSS.LOCATION_TO_REGION[location]
-      count = data[epiweek][location]
-      params = (epiweek, location, region, count, count)
-      insert.execute(sql_dengue, params)
-
-  # Cleanup
-  insert.close()
-  rows2 = get_rows(cnx)
-  print('rows after (flu): %d (added %d)' % (rows2[0], rows2[0] - rows1[0]))
-  print('rows after (dengue): %d (added %d)' % (rows2[1], rows2[1] - rows1[1]))
-  if test_mode:
-    print('test mode: changes not commited')
-  else:
-    cnx.commit()
-  cnx.close()
+    # test mode
+    if test_mode:
+        print("test mode enabled: changes will not be saved")
+
+    # Database connection
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx)
+    print("rows before (flu): %d" % (rows1[0]))
+    print("rows before (dengue): %d" % (rows1[1]))
+    insert = cnx.cursor()
+    sql_flu = """
+    INSERT INTO
+      `nidss_flu` (`release_date`, `issue`, `epiweek`, `region`, `lag`, `visits`, `ili`)
+    VALUES
+      (%s, %s, %s, %s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+      `release_date` = least(`release_date`, %s), `visits` = %s, `ili` = %s
+    """
+    sql_dengue = """
+    INSERT INTO
+      `nidss_dengue` (`epiweek`, `location`, `region`, `count`)
+    VALUES
+      (%s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+      `count` =  %s
+    """
+
+    # Scrape flu data
+    current_week, release_date, data = NIDSS.get_flu_data()
+    for epiweek in sorted(list(data.keys())):
+        lag = delta_epiweeks(epiweek, current_week)
+        for region in data[epiweek].keys():
+            visits, ili = data[epiweek][region]["visits"], data[epiweek][region]["ili"]
+            params1 = [release_date, current_week, epiweek, region, lag, visits, ili]
+            params2 = [release_date, visits, ili]
+            insert.execute(sql_flu, tuple(params1 + params2))
+
+    # Scrape dengue data from the past year
+    data = NIDSS.get_dengue_data(add_epiweeks(current_week, -51), current_week)
+    for epiweek in sorted(list(data.keys())):
+        for location in sorted(list(data[epiweek].keys())):
+            region = NIDSS.LOCATION_TO_REGION[location]
+            count = data[epiweek][location]
+            params = (epiweek, location, region, count, count)
+            insert.execute(sql_dengue, params)
+
+    # Cleanup
+    insert.close()
+    rows2 = get_rows(cnx)
+    print("rows after (flu): %d (added %d)" % (rows2[0], rows2[0] - rows1[0]))
+    print("rows after (dengue): %d (added %d)" % (rows2[1], rows2[1] - rows1[1]))
+    if test_mode:
+        print("test mode: changes not commited")
+    else:
+        cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    '--test',
-    '-t',
-    action='store_true',
-    default=False,
-    help='test mode, do not commit changes'
-  )
-  args = parser.parse_args()
-
-  # fetch and store NIDSS data
-  update(args.test)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--test", "-t", action="store_true", default=False, help="test mode, do not commit changes"
+    )
+    args = parser.parse_args()
+
+    # fetch and store NIDSS data
+    update(args.test)
+
+
+if __name__ == "__main__":
+    main()

From 7f60fbba572c1b6e5153a9ef216895bdc2f7f5b3 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 14:07:32 -0700
Subject: [PATCH 26/43] style(black): format paho acquisition

---
 src/acquisition/paho/paho_db_update.py | 137 ++++++++++++++-----------
 src/acquisition/paho/paho_download.py  | 114 ++++++++++++--------
 2 files changed, 147 insertions(+), 104 deletions(-)

diff --git a/src/acquisition/paho/paho_db_update.py b/src/acquisition/paho/paho_db_update.py
index d07885f79..08577f580 100644
--- a/src/acquisition/paho/paho_db_update.py
+++ b/src/acquisition/paho/paho_db_update.py
@@ -50,9 +50,8 @@
 import csv
 import datetime
 import glob
-import subprocess
-import random
 from io import StringIO
+import tempfile
 
 # third party
 import mysql.connector
@@ -64,12 +63,14 @@
 from delphi.utils.epiweek import delta_epiweeks, check_epiweek
 from delphi.utils.epidate import EpiDate
 
+
 def ensure_tables_exist():
-    (u,p) = secrets.db.epi
-    cnx = mysql.connector.connect(user=u,password=p,database='epidata')
+    (u, p) = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     try:
         cursor = cnx.cursor()
-        cursor.execute('''
+        cursor.execute(
+            """
             CREATE TABLE IF NOT EXISTS `paho_dengue` (
                 `id` INT(11) NOT NULL PRIMARY KEY AUTO_INCREMENT,
                 `release_date` DATE NOT NULL,
@@ -85,35 +86,44 @@ def ensure_tables_exist():
                 `num_deaths` INT(11) NOT NULL,
                 UNIQUE KEY (`issue`, `epiweek`, `region`)
             );
-        ''');
+        """
+        )
         cnx.commit()
     finally:
         cnx.close()
 
+
 def safe_float(f):
     try:
-        return float(f.replace(',',''))
+        return float(f.replace(",", ""))
     except:
         return 0
 
+
 def safe_int(i):
     try:
-        return int(i.replace(',',''))
+        return int(i.replace(",", ""))
     except:
         return 0
 
-def get_rows(cnx, table='paho_dengue'):
-  # Count and return the number of rows in the `fluview` table.
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM %s' % table)
-  for (num,) in select:
-    pass
-  select.close()
-  return num
+
+def get_rows(cnx, table="paho_dengue"):
+    # Count and return the number of rows in the `fluview` table.
+    select = cnx.cursor()
+    select.execute("SELECT count(1) num FROM %s" % table)
+    for (num,) in select:
+        pass
+    select.close()
+    return num
+
 
 def get_paho_row(row):
-    if row[0] == "\ufeffIncidence Rate (c)" and row != "\ufeffIncidence Rate (c),(SD/D) x100 (e),CFR (f),ID,Country or Subregion,Deaths,EW,Confirmed,Epidemiological Week (a),Pop (no usar),Serotype,Severe Dengue (d),Total of Dengue Cases (b),Year,Population x 1000".split(","):
-        raise Exception('PAHO header row has changed')
+    if row[
+        0
+    ] == "\ufeffIncidence Rate (c)" and row != "\ufeffIncidence Rate (c),(SD/D) x100 (e),CFR (f),ID,Country or Subregion,Deaths,EW,Confirmed,Epidemiological Week (a),Pop (no usar),Serotype,Severe Dengue (d),Total of Dengue Cases (b),Year,Population x 1000".split(
+        ","
+    ):
+        raise Exception("PAHO header row has changed")
     if len(row) == 1 or row[0] == "Incidence Rate (c)":
         # this is a header row
         return None
@@ -128,23 +138,26 @@ def get_paho_row(row):
             except:
                 return None
     try:
-        check_epiweek(safe_int(row[13])*100 + safe_int(row[8]), safe_int(row[13])*100 + safe_int(row[6]))
+        check_epiweek(
+            safe_int(row[13]) * 100 + safe_int(row[8]), safe_int(row[13]) * 100 + safe_int(row[6])
+        )
     except:
         return None
     return {
-        'issue': safe_int(row[13])*100 + safe_int(row[6]),
-        'epiweek': safe_int(row[13])*100 + safe_int(row[8]),
-        'region': country,
-        'total_pop': safe_int(row[14]),
-        'serotype': row[10],
-        'num_dengue': safe_int(row[12]),
-        'incidence_rate': safe_float(row[0]),
-        'num_severe': safe_int(row[11]),
-        'num_deaths': safe_int(row[5]),
-        'severe_ratio': safe_float(row[1]),
-        'cfr': safe_float(row[2])
+        "issue": safe_int(row[13]) * 100 + safe_int(row[6]),
+        "epiweek": safe_int(row[13]) * 100 + safe_int(row[8]),
+        "region": country,
+        "total_pop": safe_int(row[14]),
+        "serotype": row[10],
+        "num_dengue": safe_int(row[12]),
+        "incidence_rate": safe_float(row[0]),
+        "num_severe": safe_int(row[11]),
+        "num_deaths": safe_int(row[5]),
+        "severe_ratio": safe_float(row[1]),
+        "cfr": safe_float(row[2]),
     }
 
+
 def update_from_file(issue, date, filename, test_mode=False):
     # Read PAHO data from CSV and insert into (or update) the database.
 
@@ -156,23 +169,23 @@ def update_from_file(issue, date, filename, test_mode=False):
 
     # database connection
     u, p = secrets.db.epi
-    cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-    rows1 = get_rows(cnx, 'paho_dengue')
-    print('rows before: %d' % (rows1))
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx, "paho_dengue")
+    print("rows before: %d" % (rows1))
     insert = cnx.cursor()
 
     # load the data, ignoring empty rows
-    print('loading data from %s as issued on %d' % (filename, issue))
-    with open(filename,'r',encoding='utf-8') as f:
+    print("loading data from %s as issued on %d" % (filename, issue))
+    with open(filename, encoding="utf-8") as f:
         c = f.read()
     rows = []
-    for l in csv.reader(StringIO(c), delimiter=','):
+    for l in csv.reader(StringIO(c), delimiter=","):
         rows.append(get_paho_row(l))
-    print(' loaded %d rows' % len(rows))
+    print(" loaded %d rows" % len(rows))
     entries = [obj for obj in rows if obj]
-    print(' found %d entries' % len(entries))
+    print(" found %d entries" % len(entries))
 
-    sql = '''
+    sql = """
     INSERT INTO
         `paho_dengue` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
         `total_pop`, `serotype`, `num_dengue`, `incidence_rate`,
@@ -187,55 +200,56 @@ def update_from_file(issue, date, filename, test_mode=False):
         `incidence_rate` = %s,
         `num_severe` = %s,
         `num_deaths` = %s
-    '''
+    """
 
     for row in entries:
-        if row['issue'] > issue: # Issued in a week that hasn't happened yet
+        if row["issue"] > issue:  # Issued in a week that hasn't happened yet
             continue
-        lag = delta_epiweeks(row['epiweek'], issue)
-        data_args = [row['total_pop'], row['serotype'], row['num_dengue'],
-                     row['incidence_rate'], row['num_severe'], row['num_deaths']]
+        lag = delta_epiweeks(row["epiweek"], issue)
+        data_args = [
+            row["total_pop"],
+            row["serotype"],
+            row["num_dengue"],
+            row["incidence_rate"],
+            row["num_severe"],
+            row["num_deaths"],
+        ]
 
-        insert_args = [date,issue,row['epiweek'],row['region'],lag] + data_args
+        insert_args = [date, issue, row["epiweek"], row["region"], lag] + data_args
         update_args = [date] + data_args
         insert.execute(sql % tuple(insert_args + update_args))
 
     # cleanup
     insert.close()
     if test_mode:
-        print('test mode, not committing')
+        print("test mode, not committing")
         rows2 = rows1
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print('rows after: %d (added %d)' % (rows2,rows2-rows1))
+    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
     cnx.close()
 
+
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--test',
-        action='store_true',
-        help='do dry run only, do not update the database'
+        "--test", action="store_true", help="do dry run only, do not update the database"
     )
     parser.add_argument(
-        '--file',
-        type=str,
-        help='load an existing zip file (otherwise fetch current data)'
+        "--file", type=str, help="load an existing zip file (otherwise fetch current data)"
     )
     parser.add_argument(
-        '--issue',
-        type=int,
-        help='issue of the file (e.g. 201740); used iff --file is given'
+        "--issue", type=int, help="issue of the file (e.g. 201740); used iff --file is given"
     )
     args = parser.parse_args()
 
     if (args.file is None) != (args.issue is None):
-        raise Exception('--file and --issue must both be present or absent')
+        raise Exception("--file and --issue must both be present or absent")
 
-    date = datetime.datetime.now().strftime('%Y-%m-%d')
-    print('assuming release date is today, %s' % date)
+    date = datetime.datetime.now().strftime("%Y-%m-%d")
+    print("assuming release date is today, %s" % date)
 
     if args.file:
         update_from_file(args.issue, date, args.file, test_mode=args.test)
@@ -274,7 +288,8 @@ def main():
                 if not db_error:
                     break # Exit loop with success
             if flag >= max_tries:
-                print('WARNING: Database `paho_dengue` did not update successfully')
+                print("WARNING: Database `paho_dengue` did not update successfully")
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/src/acquisition/paho/paho_download.py b/src/acquisition/paho/paho_download.py
index 60dd13ae8..5308ec93f 100644
--- a/src/acquisition/paho/paho_download.py
+++ b/src/acquisition/paho/paho_download.py
@@ -1,4 +1,3 @@
-
 # IMPORTANT: This code is extremely unstable.
 # Slight changes to the PAHO website may render this script partially or entirely useless.
 
@@ -15,42 +14,51 @@
 
 headerheight = 0
 
+
 def wait_for(browser, css_selector, delay=10):
     try:
-        WebDriverWait(browser, delay).until(EC.presence_of_element_located((By.CSS_SELECTOR, css_selector)))
-        WebDriverWait(browser, delay).until(EC.element_to_be_clickable((By.CSS_SELECTOR, css_selector)))
-        print('Success Loading %s' % (css_selector))
+        WebDriverWait(browser, delay).until(
+            EC.presence_of_element_located((By.CSS_SELECTOR, css_selector))
+        )
+        WebDriverWait(browser, delay).until(
+            EC.element_to_be_clickable((By.CSS_SELECTOR, css_selector))
+        )
+        print("Success Loading %s" % (css_selector))
     except TimeoutException:
         print("Loading %s took too much time!" % (css_selector))
-        
+
+
 def find_and_click(browser, element):
     element.location_once_scrolled_into_view
     browser.switch_to.default_content()
-    browser.execute_script("window.scrollBy(0,-%d)"%headerheight)
+    browser.execute_script("window.scrollBy(0,-%d)" % headerheight)
     browser.switch_to.frame(browser.find_element_by_tag_name("iframe"))
     browser.switch_to.frame(browser.find_element_by_tag_name("iframe"))
     element.click()
 
-def get_paho_data(offset=0, dir='downloads'):
+
+def get_paho_data(offset=0, dir="downloads"):
     opts = Options()
     opts.set_headless()
     assert opts.headless  # Operating in headless mode
 
     fp = FirefoxProfile()
-    fp.set_preference("browser.download.folderList",2)
-    fp.set_preference("browser.download.manager.showWhenStarting",False)
-    fp.set_preference("browser.download.dir",os.path.abspath(dir))
-    fp.set_preference("browser.helperApps.neverAsk.saveToDisk","text/csv")
-
-    browser = Firefox(options=opts,firefox_profile=fp)
-    browser.get('http://www.paho.org/data/index.php/en/mnu-topics/indicadores-dengue-en/dengue-nacional-en/252-dengue-pais-ano-en.html?showall=&start=1')
+    fp.set_preference("browser.download.folderList", 2)
+    fp.set_preference("browser.download.manager.showWhenStarting", False)
+    fp.set_preference("browser.download.dir", os.path.abspath(dir))
+    fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")
+
+    browser = Firefox(options=opts, firefox_profile=fp)
+    browser.get(
+        "https://www.paho.org/data/index.php/en/mnu-topics/indicadores-dengue-en/dengue-nacional-en/252-dengue-pais-ano-en.html?showall=&start=1"
+    )
     tab1 = browser.window_handles[0]
-    browser.execute_script('''window.open("","_blank");''')
+    browser.execute_script("""window.open("","_blank");""")
     tab2 = browser.window_handles[1]
     browser.switch_to.window(tab1)
-    
+
     curr_offset = offset
-    
+
     wait_for(browser, "div.rt-top-inner", delay=30)
     header = browser.find_element_by_css_selector("div.rt-top-inner")
     global headerheight
@@ -59,41 +67,51 @@ def get_paho_data(offset=0, dir='downloads'):
     # The actual content of the data of this webpage is within 2 iframes, so we need to navigate into them first
     browser.switch_to.frame(browser.find_element_by_tag_name("iframe"))
     browser.switch_to.frame(browser.find_element_by_tag_name("iframe"))
-    
+
     # Locate the button that allows to download the table
-    downloadoption = browser.find_elements_by_css_selector("div.tabToolbarButton.tab-widget.download")[0]
+    downloadoption = browser.find_elements_by_css_selector(
+        "div.tabToolbarButton.tab-widget.download"
+    )[0]
     find_and_click(browser, downloadoption)
 
     wait_for(browser, "div[data-tb-test-id='DownloadImage-Button']")
 
     # Locate the button that prepares the table for download as an image
-    imagebutton = browser.find_elements_by_css_selector("div[data-tb-test-id='DownloadImage-Button']")[0]
+    imagebutton = browser.find_elements_by_css_selector(
+        "div[data-tb-test-id='DownloadImage-Button']"
+    )[0]
     find_and_click(browser, imagebutton)
 
     wait_for(browser, ".tabDownloadFileButton[data-test-id='DownloadLink']")
 
     # Locate the button that downloads the table as an image
-    downloadbutton = browser.find_elements_by_css_selector(".tabDownloadFileButton[data-test-id='DownloadLink']")[0]
+    downloadbutton = browser.find_elements_by_css_selector(
+        ".tabDownloadFileButton[data-test-id='DownloadLink']"
+    )[0]
 
     # Extract session ID
     href = downloadbutton.get_attribute("href")
     startidx = href.index("sessions/") + len("sessions/")
-    endidx = href.index("/",startidx)
+    endidx = href.index("/", startidx)
     sessionid = href[startidx:endidx]
 
-    dataurl = "http://phip.paho.org/vizql/w/Casosdedengue_tben/v/ByLastAvailableEpiWeek/viewData/sessions/%s/views/18076444178507886853_9530488980060483892?maxrows=200&viz=%%7B%%22worksheet%%22:%%22W%%20By%%20Last%%20Available%%20EpiWeek%%22,%%22dashboard%%22:%%22By%%20Last%%20Available%%20Epi%%20Week%%22%%7D"%sessionid
+    dataurl = f"https://phip.paho.org/vizql/w/Casosdedengue_tben/v/ByLastAvailableEpiWeek/viewData/sessions/{sessionid}/views/18076444178507886853_9530488980060483892?maxrows=200&viz=%%7B%%22worksheet%%22:%%22W%%20By%%20Last%%20Available%%20EpiWeek%%22,%%22dashboard%%22:%%22By%%20Last%%20Available%%20Epi%%20Week%%22%%7D"
 
     wait_for(browser, "div[data-tb-test-id='CancelBtn-Button']")
 
     # Cancel image download
-    cancelbutton = browser.find_elements_by_css_selector("div[data-tb-test-id='CancelBtn-Button']")[0]
+    cancelbutton = browser.find_elements_by_css_selector("div[data-tb-test-id='CancelBtn-Button']")[
+        0
+    ]
     find_and_click(browser, cancelbutton)
 
     wait_for(browser, "div[id='tableau_base_widget_FilterPanel_0']")
 
     # Default is to show data for current year, we want to get all years
     # Clicks drop-down menu to open options
-    yearselector = browser.find_elements_by_css_selector("div[id='tableau_base_widget_FilterPanel_0']")[0]
+    yearselector = browser.find_elements_by_css_selector(
+        "div[id='tableau_base_widget_FilterPanel_0']"
+    )[0]
     find_and_click(browser, yearselector)
 
     wait_for(browser, "div.facetOverflow")
@@ -107,27 +125,29 @@ def get_paho_data(offset=0, dir='downloads'):
 
     for i in range(offset):
         gp = browser.find_element_by_css_selector("div.wcGlassPane")
-        #print gp.is_enabled()
-        #print gp.is_selected()
-        #print gp.is_displayed()
+        # print gp.is_enabled()
+        # print gp.is_selected()
+        # print gp.is_displayed()
         try:
             WebDriverWait(browser, 10).until(EC.staleness_of(gp))
-            print("Loaded next week % d" % (53-offset))
+            print("Loaded next week % d" % (53 - offset))
         except TimeoutException:
-            print("Loading next week %d took too much time!" % (53-offset))
+            print("Loading next week %d took too much time!" % (53 - offset))
         gp = browser.find_element_by_css_selector("div.wcGlassPane")
-        #print gp.is_enabled()
-        #print gp.is_selected()
-        #print gp.is_displayed()
-        x = browser.find_elements_by_css_selector("div.dijitReset.dijitSliderButtonContainer.dijitSliderButtonContainerH.tableauArrowDec")[0]
+        # print gp.is_enabled()
+        # print gp.is_selected()
+        # print gp.is_displayed()
+        x = browser.find_elements_by_css_selector(
+            "div.dijitReset.dijitSliderButtonContainer.dijitSliderButtonContainerH.tableauArrowDec"
+        )[0]
         find_and_click(browser, x)
 
     # Cycle through all weeks, downloading each week as a separate .csv
     # Theoretically, need to cycle 53 times, but in practice only 54 works, unsure why
-    for i in range(54-offset):
+    for i in range(54 - offset):
         # If something goes wrong for whatever reason, try from the beginning
         try:
-            print('Loading week %d' % (53-i))
+            print("Loading week %d" % (53 - i))
             # (Re-)load URL
             browser.switch_to.window(tab2)
             browser.get(dataurl)
@@ -137,7 +157,9 @@ def get_paho_data(offset=0, dir='downloads'):
             full_data_tab = browser.find_elements_by_css_selector("li[id='tab-view-full-data']")[0]
             full_data_tab.click()
 
-            wait_for(browser, "a.csvLink") # Sometimes this fails but the button is successfully clicked anyway, not sure why
+            wait_for(
+                browser, "a.csvLink"
+            )  # Sometimes this fails but the button is successfully clicked anyway, not sure why
             # Actually download the data as a .csv (Will be downloaded to Firefox's default download destination)
             data_links = browser.find_elements_by_css_selector("a.csvLink")
             data_link = None
@@ -149,16 +171,22 @@ def get_paho_data(offset=0, dir='downloads'):
 
             # Locate button that decreases the current week by 1
             browser.switch_to.window(tab1)
-            wait_for(browser, "div.dijitReset.dijitSliderButtonContainer.dijitSliderButtonContainerH.tableauArrowDec")
-
-            x = browser.find_elements_by_css_selector("div.dijitReset.dijitSliderButtonContainer.dijitSliderButtonContainerH.tableauArrowDec")[0]
+            wait_for(
+                browser,
+                "div.dijitReset.dijitSliderButtonContainer.dijitSliderButtonContainerH.tableauArrowDec",
+            )
+
+            x = browser.find_elements_by_css_selector(
+                "div.dijitReset.dijitSliderButtonContainer.dijitSliderButtonContainerH.tableauArrowDec"
+            )[0]
             find_and_click(browser, x)
             curr_offset += 1
         except Exception as e:
-            print('Got exception %s\nTrying again from week %d' % (e,53-offset))
+            print("Got exception %s\nTrying again from week %d" % (e, 53 - offset))
             browser.quit()
             get_paho_data(offset=curr_offset)
     browser.quit()
 
-if __name__ == '__main__':
-    get_paho_data(dir='downloads/')
+
+if __name__ == "__main__":
+    get_paho_data(dir="downloads/")

From b9ceb400d9248c8271e8342275664ac5524e335d Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 14:08:31 -0700
Subject: [PATCH 27/43] style(black): format quidel acquisition

---
 src/acquisition/quidel/quidel.py        | 232 ++++++++++++++----------
 src/acquisition/quidel/quidel_update.py | 202 +++++++++++----------
 2 files changed, 245 insertions(+), 189 deletions(-)

diff --git a/src/acquisition/quidel/quidel.py b/src/acquisition/quidel/quidel.py
index a7c9a2918..3af99774f 100644
--- a/src/acquisition/quidel/quidel.py
+++ b/src/acquisition/quidel/quidel.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -15,7 +15,7 @@
     * add end date, end week check
 2017-12-02:
     * original version
-'''
+"""
 
 # standard library
 from collections import defaultdict
@@ -35,148 +35,187 @@
 import delphi.utils.epidate as ED
 from delphi.utils.geo.locations import Locations
 
-def word_map(row,terms):
-    for (k,v) in terms.items():
-        row = row.replace(k,v)
+
+def word_map(row, terms):
+    for (k, v) in terms.items():
+        row = row.replace(k, v)
     return row
 
-def date_less_than(d1,d2):
-    y1,m1,d1 = [int(x) for x in d1.split('-')]
-    y2,m2,d2 = [int(x) for x in d2.split('-')]
 
-    if y1*10000+m1*100+d1<y2*10000+m2*100+d2:
+def date_less_than(d1, d2):
+    y1, m1, d1 = (int(x) for x in d1.split("-"))
+    y2, m2, d2 = (int(x) for x in d2.split("-"))
+
+    if y1 * 10000 + m1 * 100 + d1 < y2 * 10000 + m2 * 100 + d2:
         return 1
-    elif y1*10000+m1*100+d1==y2*10000+m2*100+d2:
+    elif y1 * 10000 + m1 * 100 + d1 == y2 * 10000 + m2 * 100 + d2:
         return 0
     else:
         return -1
 
+
 # shift>0: shifted to future
 def date_to_epiweek(date, shift=0):
-    y,m,d = [int(x) for x in date.split('-')]
+    y, m, d = (int(x) for x in date.split("-"))
 
-    epidate = ED.EpiDate(y,m,d)
+    epidate = ED.EpiDate(y, m, d)
     epidate = epidate.add_days(shift)
     ew = epidate.get_ew()
     return ew
 
+
 # convert measurment to time series format
 # startweek and endweek are inclusive
-def measurement_to_ts(m,index,startweek=None,endweek=None):
+def measurement_to_ts(m, index, startweek=None, endweek=None):
     if startweek is None:
         startweek = 0
     if endweek is None:
         endweek = 999999
     res = {}
-    for r,rdict in m.items():
-        res[r]={}
-        for t,vals in rdict.items():
-            if index>=len(vals):
+    for r, rdict in m.items():
+        res[r] = {}
+        for t, vals in rdict.items():
+            if index >= len(vals):
                 raise Exception("Index is invalid")
-            if t>=startweek and t<=endweek:
+            if t >= startweek and t <= endweek:
                 res[r][t] = vals[index]
     return res
 
+
 class QuidelData:
     def __init__(self, raw_path, load_email=True):
         self.data_path = raw_path
-        self.excel_uptodate_path = join(raw_path,'excel/uptodate')
-        self.excel_history_path = join(raw_path,'excel/history')
-        self.csv_path = join(raw_path,'csv')
+        self.excel_uptodate_path = join(raw_path, "excel/uptodate")
+        self.excel_history_path = join(raw_path, "excel/history")
+        self.csv_path = join(raw_path, "csv")
         self.xlsx_uptodate_list = [
-            f[:-5] for f in listdir(self.excel_uptodate_path) if isfile(join(self.excel_uptodate_path, f)) and f[-5:]=='.xlsx'
+            f[:-5]
+            for f in listdir(self.excel_uptodate_path)
+            if isfile(join(self.excel_uptodate_path, f)) and f[-5:] == ".xlsx"
         ]
         self.xlsx_history_list = [
-            f[:-5] for f in listdir(self.excel_history_path) if isfile(join(self.excel_history_path, f)) and f[-5:]=='.xlsx'
+            f[:-5]
+            for f in listdir(self.excel_history_path)
+            if isfile(join(self.excel_history_path, f)) and f[-5:] == ".xlsx"
+        ]
+        self.csv_list = [
+            f[:-4]
+            for f in listdir(self.csv_path)
+            if isfile(join(self.csv_path, f)) and f[-4:] == ".csv"
         ]
-        self.csv_list = [f[:-4] for f in listdir(self.csv_path) if isfile(join(self.csv_path, f)) and f[-4:]=='.csv']
         self.map_terms = {
-            ' FL  34637"':'FL',
+            ' FL  34637"': "FL",
         }
         # hardcoded parameters
         self.date_dim = 1
         self.state_dim = 4
         self.fields = [
-            'sofia_ser','date','fac_id','city','state','zip','age',
-            'fluA','fluB','fluAll','county','fac_type'
+            "sofia_ser",
+            "date",
+            "fac_id",
+            "city",
+            "state",
+            "zip",
+            "age",
+            "fluA",
+            "fluB",
+            "fluAll",
+            "county",
+            "fac_type",
         ]
-        self.fields_to_keep = ['fac_id','fluA','fluB','fluAll']
+        self.fields_to_keep = ["fac_id", "fluA", "fluB", "fluAll"]
         self.dims_to_keep = [self.fields.index(x) for x in self.fields_to_keep]
         if load_email:
             self.retrieve_excels()
         self.prepare_csv()
 
     def retrieve_excels(self):
-        detach_dir = self.excel_uptodate_path # directory where to save attachments (default: current)
+        detach_dir = (
+            self.excel_uptodate_path
+        )  # directory where to save attachments (default: current)
 
         # connecting to the gmail imap server
         m = imaplib.IMAP4_SSL("imap.gmail.com")
-        m.login(secrets.quidel.email_addr,secrets.quidel.email_pwd)
-        m.select("INBOX") # here you a can choose a mail box like INBOX instead
+        m.login(secrets.quidel.email_addr, secrets.quidel.email_pwd)
+        m.select("INBOX")  # here you a can choose a mail box like INBOX instead
         # use m.list() to get all the mailboxes
-        _, items = m.search(None, "ALL") # you could filter using the IMAP rules here (check http://www.example-code.com/csharp/imap-search-critera.asp)
-        items = items[0].split() # getting the mails id
+        # you could filter using the IMAP rules here (check https://www.example-code.com/csharp/imap-search-critera.asp)
+        _, items = m.search(None, "ALL")  
+        items = items[0].split()  # getting the mails id
 
         # The emailids are ordered from past to now
         for emailid in items:
-            _, data = m.fetch(emailid, "(RFC822)") # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc
-            email_body = data[0][1].decode('utf-8') # getting the mail content
-            mail = email.message_from_string(email_body) # parsing the mail content to get a mail object
-
-            #Check if any attachments at all
-            if mail.get_content_maintype() != 'multipart':
+            _, data = m.fetch(
+                emailid, "(RFC822)"
+            )  # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc
+            email_body = data[0][1].decode("utf-8")  # getting the mail content
+            mail = email.message_from_string(
+                email_body
+            )  # parsing the mail content to get a mail object
+
+            # Check if any attachments at all
+            if mail.get_content_maintype() != "multipart":
                 continue
 
             # we use walk to create a generator so we can iterate on the parts and forget about the recursive headach
             for part in mail.walk():
                 # multipart are just containers, so we skip them
-                if part.get_content_maintype() == 'multipart':
+                if part.get_content_maintype() == "multipart":
                     continue
 
                 # is this part an attachment ?
-                if part.get('Content-Disposition') is None:
+                if part.get("Content-Disposition") is None:
                     continue
 
                 filename = part.get_filename()
                 # check duplicates
-                if filename[-5:]!='.xlsx' or filename[:-5] in self.xlsx_uptodate_list+self.xlsx_history_list:
+                if (
+                    filename[-5:] != ".xlsx"
+                    or filename[:-5] in self.xlsx_uptodate_list + self.xlsx_history_list
+                ):
                     continue
 
                 self.xlsx_uptodate_list.append(filename[:-5])
                 att_path = os.path.join(detach_dir, filename)
 
-                #Check if its already there
-                if not os.path.isfile(att_path) :
+                # Check if its already there
+                if not os.path.isfile(att_path):
                     # finally write the stuff
-                    fp = open(att_path, 'wb')
+                    fp = open(att_path, "wb")
                     fp.write(part.get_payload(decode=True))
                     fp.close()
 
     def prepare_csv(self):
-        need_update=False
+        need_update = False
         for f in self.xlsx_uptodate_list:
             if f in self.csv_list:
                 continue
             else:
-                need_update=True
+                need_update = True
 
-            date_regex = '\d{2}-\d{2}-\d{4}'
-            date_items = re.findall(date_regex,f)
+            date_regex = r"\d{2}-\d{2}-\d{4}"
+            date_items = re.findall(date_regex, f)
             if date_items:
-                end_date = '-'.join(date_items[-1].split('-')[x] for x in [2,0,1])
+                end_date = "-".join(date_items[-1].split("-")[x] for x in [2, 0, 1])
             else:
-                print("End date not found in file name:"+f)
+                print("End date not found in file name:" + f)
                 end_date = None
 
-            df_dict = pd.read_excel(join(self.excel_uptodate_path, f+'.xlsx'), sheet_name=None)
-            for (_,df) in df_dict.items():
-                df = df.dropna(axis=0, how='all')
-                df['TestDate'] = df['TestDate'].apply(lambda x: x.strftime('%Y-%m-%d'))
-                df_filtered = df[df['TestDate']!='']
+            df_dict = pd.read_excel(join(self.excel_uptodate_path, f + ".xlsx"), sheet_name=None)
+            for (_, df) in df_dict.items():
+                df = df.dropna(axis=0, how="all")
+                df["TestDate"] = df["TestDate"].apply(lambda x: x.strftime("%Y-%m-%d"))
+                df_filtered = df[df["TestDate"] != ""]
                 if end_date is not None:
-                    df_filtered = df_filtered[df.apply(lambda x: date_less_than(end_date,x['TestDate'])!=1, axis=1)]
-                df_filtered.to_csv(join(self.csv_path, f+'.csv'), index=False, encoding='utf-8')
-        self.csv_list = [f[:-4] for f in listdir(self.csv_path) if isfile(join(self.csv_path, f)) and f[-4:]=='.csv']
+                    df_filtered = df_filtered[
+                        df.apply(lambda x: date_less_than(end_date, x["TestDate"]) != 1, axis=1)
+                    ]
+                df_filtered.to_csv(join(self.csv_path, f + ".csv"), index=False, encoding="utf-8")
+        self.csv_list = [
+            f[:-4]
+            for f in listdir(self.csv_path)
+            if isfile(join(self.csv_path, f)) and f[-4:] == ".csv"
+        ]
         self.need_update = need_update
 
     def load_csv(self, dims=None):
@@ -186,12 +225,12 @@ def load_csv(self, dims=None):
         for f in self.csv_list:
             if f in self.xlsx_history_list:
                 continue
-            rf = open(join(self.csv_path,f+'.csv'))
+            rf = open(join(self.csv_path, f + ".csv"))
 
             lines = rf.readlines()
             for l in lines[1:]:
-                l = word_map(l,self.map_terms)
-                row = l.strip().split(',')
+                l = word_map(l, self.map_terms)
+                row = l.strip().split(",")
                 date = row[self.date_dim]
                 state = row[self.state_dim]
                 if state not in parsed_dict[date]:
@@ -202,7 +241,7 @@ def load_csv(self, dims=None):
 
     # hardcoded aggregation function
     # output: [#unique_device,fluA,fluB,fluAll,total]
-    def prepare_measurements(self,data_dict,use_hhs=True,start_weekday=6):
+    def prepare_measurements(self, data_dict, use_hhs=True, start_weekday=6):
         buffer_dict = {}
         if use_hhs:
             region_list = Locations.hhs_list
@@ -210,34 +249,35 @@ def prepare_measurements(self,data_dict,use_hhs=True,start_weekday=6):
             region_list = Locations.atom_list
 
         def get_hhs_region(atom):
-          for region in Locations.hhs_list:
-            if atom.lower() in Locations.hhs_map[region]:
-              return region
-          if atom.lower() == 'ny':
-            return 'hhs2'
-          return atom
+            for region in Locations.hhs_list:
+                if atom.lower() in Locations.hhs_map[region]:
+                    return region
+            if atom.lower() == "ny":
+                return "hhs2"
+            return atom
 
         day_shift = 6 - start_weekday
-        time_map = lambda x:date_to_epiweek(x,shift=day_shift)
-        region_map = lambda x:get_hhs_region(x) \
-                    if use_hhs and x not in Locations.hhs_list else x # a bit hacky
+        time_map = lambda x: date_to_epiweek(x, shift=day_shift)
+        region_map = (
+            lambda x: get_hhs_region(x) if use_hhs and x not in Locations.hhs_list else x
+        )  # a bit hacky
 
         end_date = sorted(data_dict.keys())[-1]
         # count the latest week in only if Thurs data is included
-        end_epiweek = date_to_epiweek(end_date,shift=-4)
+        end_epiweek = date_to_epiweek(end_date, shift=-4)
         # first pass: prepare device_id set
         device_dict = {}
-        for (date,daily_dict) in data_dict.items():
+        for (date, daily_dict) in data_dict.items():
             if not date:
                 continue
             ew = time_map(date)
-            if ew == -1 or ew>end_epiweek:
+            if ew == -1 or ew > end_epiweek:
                 continue
             if ew not in device_dict:
-                device_dict[ew]={}
+                device_dict[ew] = {}
                 for r in region_list:
                     device_dict[ew][r] = set()
-            for (state,rec_list) in daily_dict.items():
+            for (state, rec_list) in daily_dict.items():
                 region = region_map(state)
                 # get rid of non-US regions
                 if region not in region_list:
@@ -247,38 +287,40 @@ def get_hhs_region(atom):
                     device_dict[ew][region].add(fac)
 
         # second pass: prepare all measurements
-        for (date,daily_dict) in data_dict.items():
+        for (date, daily_dict) in data_dict.items():
             ew = time_map(date)
-            if ew == -1 or ew>end_epiweek:
+            if ew == -1 or ew > end_epiweek:
                 continue
             if ew not in buffer_dict:
-                buffer_dict[ew]={}
+                buffer_dict[ew] = {}
                 for r in region_list:
-                    buffer_dict[ew][r] = [0.0]*8
+                    buffer_dict[ew][r] = [0.0] * 8
 
-            for (state,rec_list) in daily_dict.items():
+            for (state, rec_list) in daily_dict.items():
                 region = region_map(state)
                 # get rid of non-US regions
                 if region not in region_list:
                     continue
                 for rec in rec_list:
                     fac_num = float(len(device_dict[ew][region]))
-                    buffer_dict[ew][region]= np.add(
-                        buffer_dict[ew][region],[
-                            rec[1]=='positive',
-                            rec[2]=='positive',
-                            rec[3]=='positive',
+                    buffer_dict[ew][region] = np.add(
+                        buffer_dict[ew][region],
+                        [
+                            rec[1] == "positive",
+                            rec[2] == "positive",
+                            rec[3] == "positive",
                             1.0,
-                            float(rec[1]=='positive')/fac_num,
-                            float(rec[2]=='positive')/fac_num,
-                            float(rec[3]=='positive')/fac_num,
-                            1.0/fac_num,
-                    ]).tolist()
+                            float(rec[1] == "positive") / fac_num,
+                            float(rec[2] == "positive") / fac_num,
+                            float(rec[3] == "positive") / fac_num,
+                            1.0 / fac_num,
+                        ],
+                    ).tolist()
         # switch two dims of dict
         result_dict = {}
         for r in region_list:
-            result_dict[r]={}
-            for (k,v) in buffer_dict.items():
-                result_dict[r][k]=v[r]
+            result_dict[r] = {}
+            for (k, v) in buffer_dict.items():
+                result_dict[r][k] = v[r]
 
         return result_dict
diff --git a/src/acquisition/quidel/quidel_update.py b/src/acquisition/quidel/quidel_update.py
index b6303533c..286a30834 100644
--- a/src/acquisition/quidel/quidel_update.py
+++ b/src/acquisition/quidel/quidel_update.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -33,7 +33,7 @@
 
 2017-12-02:
   * original version
-'''
+"""
 
 # standard library
 import argparse
@@ -49,106 +49,120 @@
 from delphi.utils.geo.locations import Locations
 
 LOCATIONS = Locations.hhs_list
-DATAPATH = '/home/automation/quidel_data'
+DATAPATH = "/home/automation/quidel_data"
+
 
 def update(locations, first=None, last=None, force_update=False, load_email=True):
-  # download and prepare data first
-  qd = quidel.QuidelData(DATAPATH,load_email)
-  if not qd.need_update and not force_update:
-    print('Data not updated, nothing needs change.')
-    return
-
-  qd_data = qd.load_csv()
-  qd_measurements = qd.prepare_measurements(qd_data,start_weekday=4)
-  qd_ts = quidel.measurement_to_ts(qd_measurements,7,startweek=first,endweek=last)
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  def get_num_rows():
-    cur.execute('SELECT count(1) `num` FROM `quidel`')
-    for (num,) in cur:
-      pass
-    return num
-
-  # check from 4 weeks preceeding the last week with data through this week
-  cur.execute('SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `quidel`')
-  for (ew0, ew1) in cur:
-    ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
-  ew0 = ew0 if first is None else first
-  ew1 = ew1 if last is None else last
-  print('Checking epiweeks between %d and %d...' % (ew0, ew1))
-
-  # keep track of how many rows were added
-  rows_before = get_num_rows()
-
-  # check Quidel for new and/or revised data
-  sql = '''
+    # download and prepare data first
+    qd = quidel.QuidelData(DATAPATH, load_email)
+    if not qd.need_update and not force_update:
+        print("Data not updated, nothing needs change.")
+        return
+
+    qd_data = qd.load_csv()
+    qd_measurements = qd.prepare_measurements(qd_data, start_weekday=4)
+    qd_ts = quidel.measurement_to_ts(qd_measurements, 7, startweek=first, endweek=last)
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    def get_num_rows():
+        cur.execute("SELECT count(1) `num` FROM `quidel`")
+        for (num,) in cur:
+            pass
+        return num
+
+    # check from 4 weeks preceeding the last week with data through this week
+    cur.execute("SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `quidel`")
+    for (ew0, ew1) in cur:
+        ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
+    ew0 = ew0 if first is None else first
+    ew1 = ew1 if last is None else last
+    print("Checking epiweeks between %d and %d..." % (ew0, ew1))
+
+    # keep track of how many rows were added
+    rows_before = get_num_rows()
+
+    # check Quidel for new and/or revised data
+    sql = """
     INSERT INTO
       `quidel` (`location`, `epiweek`, `value`)
     VALUES
       (%s, %s, %s)
     ON DUPLICATE KEY UPDATE
       `value` = %s
-  '''
-
-  total_rows = 0
-
-  for location in locations:
-    if location not in qd_ts:
-      continue
-    ews = sorted(qd_ts[location].keys())
-    num_missing = 0
-    for ew in ews:
-      v = qd_ts[location][ew]
-      sql_data = (location, ew, v, v)
-      cur.execute(sql, sql_data)
-      total_rows += 1
-      if v == 0:
-        num_missing += 1
-    if num_missing > 0:
-      print(' [%s] missing %d/%d value(s)' % (location, num_missing, len(ews)))
-
-  # keep track of how many rows were added
-  rows_after = get_num_rows()
-  print('Inserted %d/%d row(s)'%(rows_after - rows_before, total_rows))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
+    """
+
+    total_rows = 0
+
+    for location in locations:
+        if location not in qd_ts:
+            continue
+        ews = sorted(qd_ts[location].keys())
+        num_missing = 0
+        for ew in ews:
+            v = qd_ts[location][ew]
+            sql_data = (location, ew, v, v)
+            cur.execute(sql, sql_data)
+            total_rows += 1
+            if v == 0:
+                num_missing += 1
+        if num_missing > 0:
+            print(" [%s] missing %d/%d value(s)" % (location, num_missing, len(ews)))
+
+    # keep track of how many rows were added
+    rows_after = get_num_rows()
+    print("Inserted %d/%d row(s)" % (rows_after - rows_before, total_rows))
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('--location', action='store', type=str, default=None, help='location(s) (ex: all; any of hhs1-10)')
-  parser.add_argument('--first', '-f', default=None, type=int, help='first epiweek override')
-  parser.add_argument('--last', '-l', default=None, type=int, help='last epiweek override')
-  parser.add_argument('--force_update', '-u', action='store_true', help='force update db values')
-  parser.add_argument('--skip_email', '-s', action='store_true', help='skip email downloading step')
-  args = parser.parse_args()
-
-  # sanity check
-  first, last, force_update, skip_email = args.first, args.last, args.force_update, args.skip_email
-  load_email = not skip_email
-  if first is not None:
-    flu.check_epiweek(first)
-  if last is not None:
-    flu.check_epiweek(last)
-  if first is not None and last is not None and first > last:
-    raise Exception('epiweeks in the wrong order')
-
-  # decide what to update
-  if args.location.lower() == 'all':
-    locations = LOCATIONS
-  else:
-    locations = args.location.lower().split(',')
-
-  # run the update
-  update(locations, first, last, force_update, load_email)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--location",
+        action="store",
+        type=str,
+        default=None,
+        help="location(s) (ex: all; any of hhs1-10)",
+    )
+    parser.add_argument("--first", "-f", default=None, type=int, help="first epiweek override")
+    parser.add_argument("--last", "-l", default=None, type=int, help="last epiweek override")
+    parser.add_argument("--force_update", "-u", action="store_true", help="force update db values")
+    parser.add_argument(
+        "--skip_email", "-s", action="store_true", help="skip email downloading step"
+    )
+    args = parser.parse_args()
+
+    # sanity check
+    first, last, force_update, skip_email = (
+        args.first,
+        args.last,
+        args.force_update,
+        args.skip_email,
+    )
+    load_email = not skip_email
+    if first is not None:
+        flu.check_epiweek(first)
+    if last is not None:
+        flu.check_epiweek(last)
+    if first is not None and last is not None and first > last:
+        raise Exception("epiweeks in the wrong order")
+
+    # decide what to update
+    if args.location.lower() == "all":
+        locations = LOCATIONS
+    else:
+        locations = args.location.lower().split(",")
+
+    # run the update
+    update(locations, first, last, force_update, load_email)
+
+
+if __name__ == "__main__":
+    main()

From 07ed83e5768f717ab0f9a62a9209e4e2cffa058d Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 14:08:59 -0700
Subject: [PATCH 28/43] style(black): format twitter acquisition

---
 src/acquisition/twtr/healthtweets.py   | 342 ++++++++++++++++---------
 src/acquisition/twtr/pageparser.py     | 121 +++++----
 src/acquisition/twtr/twitter_update.py |  99 +++----
 3 files changed, 329 insertions(+), 233 deletions(-)

diff --git a/src/acquisition/twtr/healthtweets.py b/src/acquisition/twtr/healthtweets.py
index 78eb2b3ec..31976f376 100644
--- a/src/acquisition/twtr/healthtweets.py
+++ b/src/acquisition/twtr/healthtweets.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -20,7 +20,7 @@
  * Fetching daily values instead of weekly values
 2015-03-??
  * Original version
-'''
+"""
 
 # standard library
 import argparse
@@ -36,132 +36,220 @@
 
 class HealthTweets:
 
-  # mapping from state abbreviations to location codes used by healthtweets.org
-  STATE_CODES = {'AL': 3024, 'AK': 3025, 'AZ': 3026, 'AR': 3027, 'CA': 440, 'CO': 3029, 'CT': 3030, 'DE': 3031, 'DC': 3032, 'FL': 3033, 'GA': 3034, 'HI': 3035, 'ID': 3036, 'IL': 3037, 'IN': 3038, 'IA': 3039, 'KS': 3040, 'KY': 3041, 'LA': 2183, 'ME': 3043, 'MD': 3044, 'MA': 450, 'MI': 3046, 'MN': 3047, 'MS': 3048, 'MO': 3049, 'MT': 3050, 'NE': 3051, 'NV': 3052, 'NH': 3053, 'NJ': 478, 'NM': 2225, 'NY': 631, 'NC': 3057, 'ND': 3058, 'OH': 3059, 'OK': 3060, 'OR': 281, 'PA': 3062, 'RI': 3063, 'SC': 3064, 'SD': 3065, 'TN': 3066, 'TX': 3067, 'UT': 2272, 'VT': 3069, 'VA': 3070, 'WA': 3071, 'WV': 3072, 'WI': 3073, 'WY': 3074}
-
-  def __init__(self, username, password, debug=False):
-    self.debug = debug
-    self.session = requests.Session()
-    # spoof a web browser
-    self.session.headers.update({
-      'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
-    })
-    # get the login token
-    response = self._go('http://www.healthtweets.org/accounts/login')
-    token = self._get_token(response.text)
-    if self.debug:
-      print('token=%s'%(token))
-    data = {
-      'csrfmiddlewaretoken': token,
-      'username': username,
-      'password': password,
-      'next': '/',
+    # mapping from state abbreviations to location codes used by healthtweets.org
+    STATE_CODES = {
+        "AL": 3024,
+        "AK": 3025,
+        "AZ": 3026,
+        "AR": 3027,
+        "CA": 440,
+        "CO": 3029,
+        "CT": 3030,
+        "DE": 3031,
+        "DC": 3032,
+        "FL": 3033,
+        "GA": 3034,
+        "HI": 3035,
+        "ID": 3036,
+        "IL": 3037,
+        "IN": 3038,
+        "IA": 3039,
+        "KS": 3040,
+        "KY": 3041,
+        "LA": 2183,
+        "ME": 3043,
+        "MD": 3044,
+        "MA": 450,
+        "MI": 3046,
+        "MN": 3047,
+        "MS": 3048,
+        "MO": 3049,
+        "MT": 3050,
+        "NE": 3051,
+        "NV": 3052,
+        "NH": 3053,
+        "NJ": 478,
+        "NM": 2225,
+        "NY": 631,
+        "NC": 3057,
+        "ND": 3058,
+        "OH": 3059,
+        "OK": 3060,
+        "OR": 281,
+        "PA": 3062,
+        "RI": 3063,
+        "SC": 3064,
+        "SD": 3065,
+        "TN": 3066,
+        "TX": 3067,
+        "UT": 2272,
+        "VT": 3069,
+        "VA": 3070,
+        "WA": 3071,
+        "WV": 3072,
+        "WI": 3073,
+        "WY": 3074,
     }
-    # login to the site
-    response = self._go('http://www.healthtweets.org/accounts/login', data=data)
-    if response.status_code != 200 or 'Your username and password' in response.text:
-      raise Exception('login failed')
-
-  def get_values(self, state, date1, date2):
-    '''
-    state: two-letter state abbreviation (see STATE_CODES)
-    date1: the first date in the range, inclusive (format: YYYY-MM-DD)
-    date2: the last date in the range, inclusive (format: YYYY-MM-DD)
-    returns a dictionary (by date) of number of flu tweets (num) and total tweets (total)
-    '''
-    # get raw values (number of flu tweets) and normalized values (flu tweets as a percent of total tweets)
-    raw_values = self._get_values(state, date1, date2, False)
-    normalized_values = self._get_values(state, date1, date2, True)
-    values = {}
-    # save the raw number and calculate the total
-    for date in raw_values.keys():
-      if normalized_values[date] == 0:
-        continue
-      values[date] = {
-        'num': round(raw_values[date]),
-        'total': round(100 * raw_values[date] / normalized_values[date]),
-      }
-      print(date, raw_values[date], normalized_values[date])
-    return values
-
-  def _get_values(self, state, date1, date2, normalized):
-    if state not in HealthTweets.STATE_CODES:
-      raise Exception('invalid state')
-    state_code = HealthTweets.STATE_CODES[state]
-    d1, d2 = datetime.strptime(date1, '%Y-%m-%d'), datetime.strptime(date2, '%Y-%m-%d')
-    s1, s2 = d1.strftime('%m%%2F%d%%2F%Y'), d2.strftime('%m%%2F%d%%2F%Y')
-    count_type = 'normalized' if normalized else 'raw'
-    url = 'http://www.healthtweets.org/trends/plot?resolution=Day&count_type=%s&dayNum=%d&from=%s&to=%s&plot1_disease=65&location_plot1=%d'%(count_type, (d2 - d1).days, s1, s2, state_code)
-    response = self._go('http://www.healthtweets.org/trends/plot?resolution=Day&count_type=%s&dayNum=%d&from=%s&to=%s&plot1_disease=65&location_plot1=%d'%(count_type, (d2 - d1).days, s1, s2, state_code))
-    #print(state, date1, date2, normalized)
-    #print(url)
-    #print(response.status_code)
-    if response.status_code != 200:
-      raise Exception('plot status is ' + str(response.status_code) + ' (when was data last updated?)')
-    lines = [line.strip() for line in response.text.split('\n')]
-    data_line = [line for line in lines if line[:16] == 'var chartData = ']
-    if len(data_line) != 1:
-      raise Exception('lookup failed')
-    values = json.loads(data_line[0][16:-1])
-    return dict([(datetime.strptime(v[0], '%m/%d/%Y').strftime('%Y-%m-%d'), float(v[1])) for v in values])
-
-  def check_state(self, state):
-    '''
-    Sanity checks state code mapping.
-    state: two-letter state abbreviation (see STATE_CODES)
-    returns the full state name associated with the state abbreviation
-    '''
-    if state not in HealthTweets.STATE_CODES:
-      raise Exception('invalid state')
-    state_code = HealthTweets.STATE_CODES[state]
-    response = self._go('http://www.healthtweets.org/trends/plot?resolution=Day&count_type=normalized&dayNum=7&from=01%%2F01%%2F2015&to=01%%2F07%%2F2015&plot1_disease=65&location_plot1=%d'%(state_code))
-    lines = [line.strip() for line in response.text.split('\n')]
-    data_line = [line for line in lines if line[:29] == 'var plotNames = ["Influenza (']
-    if len(data_line) == 0:
-      raise Exception('check failed')
-    name = data_line[0][29:]
-    name = name.split('(')[0]
-    return name.strip()
-
-  def _get_token(self, html):
-    page = PageParser.parse(html)
-    hidden = PageParser.filter_all(page, [('html',), ('body',), ('div',), ('div',), ('div',), ('form',), ('input',)])
-    return hidden['attrs']['value']
-
-  def _go(self, url, method=None, referer=None, data=None):
-    if self.debug:
-      print('%s'%(url))
-    if method is None:
-      if data is None:
-        method = self.session.get
-      else:
-        method = self.session.post
-    response = method(url, headers={'referer': referer}, data=data)
-    html = response.text
-    if self.debug:
-      for item in response.history:
-        print(' [%d to %s]'%(item.status_code, item.headers['Location']))
-      print(' %d (%d bytes)'%(response.status_code, len(html)))
-    return response
+
+    def __init__(self, username, password, debug=False):
+        self.debug = debug
+        self.session = requests.Session()
+        # spoof a web browser
+        self.session.headers.update(
+            {
+                "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
+            }
+        )
+        # get the login token
+        response = self._go("https://www.healthtweets.org/accounts/login")
+        token = self._get_token(response.text)
+        if self.debug:
+            print("token=%s" % (token))
+        data = {
+            "csrfmiddlewaretoken": token,
+            "username": username,
+            "password": password,
+            "next": "/",
+        }
+        # login to the site
+        response = self._go("https://www.healthtweets.org/accounts/login", data=data)
+        if response.status_code != 200 or "Your username and password" in response.text:
+            raise Exception("login failed")
+
+    def get_values(self, state, date1, date2):
+        """
+        state: two-letter state abbreviation (see STATE_CODES)
+        date1: the first date in the range, inclusive (format: YYYY-MM-DD)
+        date2: the last date in the range, inclusive (format: YYYY-MM-DD)
+        returns a dictionary (by date) of number of flu tweets (num) and total tweets (total)
+        """
+        # get raw values (number of flu tweets) and normalized values (flu tweets as a percent of total tweets)
+        raw_values = self._get_values(state, date1, date2, False)
+        normalized_values = self._get_values(state, date1, date2, True)
+        values = {}
+        # save the raw number and calculate the total
+        for date in raw_values.keys():
+            if normalized_values[date] == 0:
+                continue
+            values[date] = {
+                "num": round(raw_values[date]),
+                "total": round(100 * raw_values[date] / normalized_values[date]),
+            }
+            print(date, raw_values[date], normalized_values[date])
+        return values
+
+    def _get_values(self, state, date1, date2, normalized):
+        if state not in HealthTweets.STATE_CODES:
+            raise Exception("invalid state")
+        state_code = HealthTweets.STATE_CODES[state]
+        d1, d2 = datetime.strptime(date1, "%Y-%m-%d"), datetime.strptime(date2, "%Y-%m-%d")
+        s1, s2 = d1.strftime("%m%%2F%d%%2F%Y"), d2.strftime("%m%%2F%d%%2F%Y")
+        count_type = "normalized" if normalized else "raw"
+        url = (
+            "https://www.healthtweets.org/trends/plot?resolution=Day&count_type=%s&dayNum=%d&from=%s&to=%s&plot1_disease=65&location_plot1=%d"
+            % (count_type, (d2 - d1).days, s1, s2, state_code)
+        )
+        response = self._go(
+            "https://www.healthtweets.org/trends/plot?resolution=Day&count_type=%s&dayNum=%d&from=%s&to=%s&plot1_disease=65&location_plot1=%d"
+            % (count_type, (d2 - d1).days, s1, s2, state_code)
+        )
+        # print(state, date1, date2, normalized)
+        # print(url)
+        # print(response.status_code)
+        if response.status_code != 200:
+            raise Exception(
+                "plot status is " + str(response.status_code) + " (when was data last updated?)"
+            )
+        lines = [line.strip() for line in response.text.split("\n")]
+        data_line = [line for line in lines if line[:16] == "var chartData = "]
+        if len(data_line) != 1:
+            raise Exception("lookup failed")
+        values = json.loads(data_line[0][16:-1])
+        return {
+            datetime.strptime(v[0], "%m/%d/%Y").strftime("%Y-%m-%d"): float(v[1]) for v in values
+        }
+
+    def check_state(self, state):
+        """
+        Sanity checks state code mapping.
+        state: two-letter state abbreviation (see STATE_CODES)
+        returns the full state name associated with the state abbreviation
+        """
+        if state not in HealthTweets.STATE_CODES:
+            raise Exception("invalid state")
+        state_code = HealthTweets.STATE_CODES[state]
+        response = self._go(
+            "https://www.healthtweets.org/trends/plot?resolution=Day&count_type=normalized&dayNum=7&from=01%%2F01%%2F2015&to=01%%2F07%%2F2015&plot1_disease=65&location_plot1=%d" % (state_code)
+        )
+        lines = [line.strip() for line in response.text.split("\n")]
+        data_line = [line for line in lines if line[:29] == 'var plotNames = ["Influenza (']
+        if len(data_line) == 0:
+            raise Exception("check failed")
+        name = data_line[0][29:]
+        name = name.split("(")[0]
+        return name.strip()
+
+    def _get_token(self, html):
+        page = PageParser.parse(html)
+        hidden = PageParser.filter_all(
+            page, [("html",), ("body",), ("div",), ("div",), ("div",), ("form",), ("input",)]
+        )
+        return hidden["attrs"]["value"]
+
+    def _go(self, url, method=None, referer=None, data=None):
+        if self.debug:
+            print("%s" % (url))
+        if method is None:
+            if data is None:
+                method = self.session.get
+            else:
+                method = self.session.post
+        response = method(url, headers={"referer": referer}, data=data)
+        html = response.text
+        if self.debug:
+            for item in response.history:
+                print(" [%d to %s]" % (item.status_code, item.headers["Location"]))
+            print(" %d (%d bytes)" % (response.status_code, len(html)))
+        return response
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('username', action='store', type=str, help='healthtweets.org username')
-  parser.add_argument('password', action='store', type=str, help='healthtweets.org password')
-  parser.add_argument('state', action='store', type=str, choices=list(HealthTweets.STATE_CODES.keys()), help='U.S. state (ex: TX)')
-  parser.add_argument('date1', action='store', type=str, help='first date, inclusive (ex: 2015-01-01)')
-  parser.add_argument('date2', action='store', type=str, help='last date, inclusive (ex: 2015-01-01)')
-  parser.add_argument('-d', '--debug', action='store_const', const=True, default=False, help='enable debug mode')
-  args = parser.parse_args()
-
-  ht = HealthTweets(args.username, args.password, debug=args.debug)
-  values = ht.get_values(args.state, args.date1, args.date2)
-  print('Daily counts in %s from %s to %s:'%(ht.check_state(args.state), args.date1, args.date2))
-  for date in sorted(list(values.keys())):
-    print('%s: num=%-4d total=%-5d (%.3f%%)'%(date, values[date]['num'], values[date]['total'], 100 * values[date]['num'] / values[date]['total']))
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("username", action="store", type=str, help="healthtweets.org username")
+    parser.add_argument("password", action="store", type=str, help="healthtweets.org password")
+    parser.add_argument(
+        "state",
+        action="store",
+        type=str,
+        choices=list(HealthTweets.STATE_CODES.keys()),
+        help="U.S. state (ex: TX)",
+    )
+    parser.add_argument(
+        "date1", action="store", type=str, help="first date, inclusive (ex: 2015-01-01)"
+    )
+    parser.add_argument(
+        "date2", action="store", type=str, help="last date, inclusive (ex: 2015-01-01)"
+    )
+    parser.add_argument(
+        "-d", "--debug", action="store_const", const=True, default=False, help="enable debug mode"
+    )
+    args = parser.parse_args()
+
+    ht = HealthTweets(args.username, args.password, debug=args.debug)
+    values = ht.get_values(args.state, args.date1, args.date2)
+    print(f"Daily counts in {ht.check_state(args.state)} from {args.date1} to {args.date2}:")
+    for date in sorted(list(values.keys())):
+        print(
+            "%s: num=%-4d total=%-5d (%.3f%%)"
+            % (
+                date,
+                values[date]["num"],
+                values[date]["total"],
+                100 * values[date]["num"] / values[date]["total"],
+            )
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/twtr/pageparser.py b/src/acquisition/twtr/pageparser.py
index 5e9aaaea1..2b2183c89 100644
--- a/src/acquisition/twtr/pageparser.py
+++ b/src/acquisition/twtr/pageparser.py
@@ -5,74 +5,73 @@
 
 
 class PageParser(HTMLParser):
-  '''
-  This is an HTML parser! All of the hard work is done by the superclass
-  (which is a Python built-in). This class puts the HTML into a hierarchy
-  that's (hopefully) easier to work with than raw string parsing.
-  '''
+    """
+    This is an HTML parser! All of the hard work is done by the superclass
+    (which is a Python built-in). This class puts the HTML into a hierarchy
+    that's (hopefully) easier to work with than raw string parsing.
+    """
 
-  @staticmethod
-  def parse(html):
-    parser = PageParser()
-    parser.feed(html)
-    return parser.get_root_node()
+    @staticmethod
+    def parse(html):
+        parser = PageParser()
+        parser.feed(html)
+        return parser.get_root_node()
 
-  @staticmethod
-  def banlist():
-    '''Commonly unclosed tags'''
-    return ('br', 'img', 'meta')
+    @staticmethod
+    def banlist():
+        """Commonly unclosed tags"""
+        return ("br", "img", "meta")
 
-  @staticmethod
-  def new_node(type):
-    '''An empty node of the HTML tree'''
-    return {'type': type, 'attrs': {}, 'nodes': [], 'data': ''}
+    @staticmethod
+    def new_node(type):
+        """An empty node of the HTML tree"""
+        return {"type": type, "attrs": {}, "nodes": [], "data": ""}
 
-  @staticmethod
-  def filter_all(node, filters):
-    '''Applies all filters'''
-    for f in filters:
-        node = PageParser.filter(node, *f)
-    return node
+    @staticmethod
+    def filter_all(node, filters):
+        """Applies all filters"""
+        for f in filters:
+            node = PageParser.filter(node, *f)
+        return node
 
-  @staticmethod
-  def filter(node, type, index=0):
-    '''Finds a sub-node of the given type, specified by index'''
-    i = 0
-    for node in node['nodes']:
-      if node['type'] == type:
-        if i == index:
-          return node
-        i += 1
-    return None
+    @staticmethod
+    def filter(node, type, index=0):
+        """Finds a sub-node of the given type, specified by index"""
+        i = 0
+        for node in node["nodes"]:
+            if node["type"] == type:
+                if i == index:
+                    return node
+                i += 1
+        return None
 
-  def __init__(self):
-    HTMLParser.__init__(self)
-    self.root = PageParser.new_node(None)
-    self.stack = [self.root]
-    self.indent = 0
+    def __init__(self):
+        HTMLParser.__init__(self)
+        self.root = PageParser.new_node(None)
+        self.stack = [self.root]
+        self.indent = 0
 
-  def get_root_node(self):
-    '''After parsing, returns the abstract root node (which contains the html node)'''
-    return self.root
+    def get_root_node(self):
+        """After parsing, returns the abstract root node (which contains the html node)"""
+        return self.root
 
-  def handle_starttag(self, tag, attrs):
-    '''Inherited - called when a start tag is found'''
-    if tag in PageParser.banlist():
-        return
-    element = PageParser.new_node(tag)
-    for (k, v) in attrs:
-        element['attrs'][k] = v
-    self.stack[-1]['nodes'].append(element)
-    self.stack.append(element)
+    def handle_starttag(self, tag, attrs):
+        """Inherited - called when a start tag is found"""
+        if tag in PageParser.banlist():
+            return
+        element = PageParser.new_node(tag)
+        for (k, v) in attrs:
+            element["attrs"][k] = v
+        self.stack[-1]["nodes"].append(element)
+        self.stack.append(element)
 
-  def handle_endtag(self, tag):
-    '''Inherited - called when an end tag is found'''
-    if tag in PageParser.banlist():
-        return
-    self.stack.pop()
+    def handle_endtag(self, tag):
+        """Inherited - called when an end tag is found"""
+        if tag in PageParser.banlist():
+            return
+        self.stack.pop()
 
-
-  def handle_data(self, data):
-    '''Inherited - called when a data string is found'''
-    element = self.stack[-1]
-    element['data'] += data
+    def handle_data(self, data):
+        """Inherited - called when a data string is found"""
+        element = self.stack[-1]
+        element["data"] += data
diff --git a/src/acquisition/twtr/twitter_update.py b/src/acquisition/twtr/twitter_update.py
index 5c1f3f45b..4354c5a80 100644
--- a/src/acquisition/twtr/twitter_update.py
+++ b/src/acquisition/twtr/twitter_update.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -49,7 +49,7 @@
   * Small documentation update
 2015-05-22
   * Original version
-'''
+"""
 
 # third party
 import mysql.connector
@@ -60,46 +60,55 @@
 
 
 def run():
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  def get_num_rows():
-    cur.execute('SELECT count(1) `num` FROM `twitter`')
-    for (num,) in cur:
-      pass
-    return num
-
-  # check from 7 days preceeding the last date with data through yesterday (healthtweets.org 404's if today's date is part of the range)
-  cur.execute('SELECT date_sub(max(`date`), INTERVAL 7 DAY) `date1`, date_sub(date(now()), INTERVAL 1 DAY) `date2` FROM `twitter`')
-  for (date1, date2) in cur:
-    date1, date2 = date1.strftime('%Y-%m-%d'), date2.strftime('%Y-%m-%d')
-  print('Checking dates between %s and %s...'%(date1, date2))
-
-  # keep track of how many rows were added
-  rows_before = get_num_rows()
-
-  # check healthtweets.org for new and/or revised data
-  ht = HealthTweets(*secrets.healthtweets.login)
-  sql = 'INSERT INTO `twitter` (`date`, `state`, `num`, `total`) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE `num` = %s, `total` = %s'
-  total_rows = 0
-  for state in sorted(HealthTweets.STATE_CODES.keys()):
-    values = ht.get_values(state, date1, date2)
-    for date in sorted(list(values.keys())):
-      sql_data = (date, state, values[date]['num'], values[date]['total'], values[date]['num'], values[date]['total'])
-      cur.execute(sql, sql_data)
-      total_rows += 1
-
-  # keep track of how many rows were added
-  rows_after = get_num_rows()
-  print('Inserted %d/%d row(s)'%(rows_after - rows_before, total_rows))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
-
-
-if __name__ == '__main__':
-  run()
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    def get_num_rows():
+        cur.execute("SELECT count(1) `num` FROM `twitter`")
+        for (num,) in cur:
+            pass
+        return num
+
+    # check from 7 days preceeding the last date with data through yesterday (healthtweets.org 404's if today's date is part of the range)
+    cur.execute(
+        "SELECT date_sub(max(`date`), INTERVAL 7 DAY) `date1`, date_sub(date(now()), INTERVAL 1 DAY) `date2` FROM `twitter`"
+    )
+    for (date1, date2) in cur:
+        date1, date2 = date1.strftime("%Y-%m-%d"), date2.strftime("%Y-%m-%d")
+    print(f"Checking dates between {date1} and {date2}...")
+
+    # keep track of how many rows were added
+    rows_before = get_num_rows()
+
+    # check healthtweets.org for new and/or revised data
+    ht = HealthTweets(*secrets.healthtweets.login)
+    sql = "INSERT INTO `twitter` (`date`, `state`, `num`, `total`) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE `num` = %s, `total` = %s"
+    total_rows = 0
+    for state in sorted(HealthTweets.STATE_CODES.keys()):
+        values = ht.get_values(state, date1, date2)
+        for date in sorted(list(values.keys())):
+            sql_data = (
+                date,
+                state,
+                values[date]["num"],
+                values[date]["total"],
+                values[date]["num"],
+                values[date]["total"],
+            )
+            cur.execute(sql, sql_data)
+            total_rows += 1
+
+    # keep track of how many rows were added
+    rows_after = get_num_rows()
+    print("Inserted %d/%d row(s)" % (rows_after - rows_before, total_rows))
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
+
+
+if __name__ == "__main__":
+    run()

From 923852eafa86b8f8b182d499489249ba8f815843 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 14:09:21 -0700
Subject: [PATCH 29/43] style(black): format wiki acquisition

---
 src/acquisition/wiki/wiki.py          | 246 +++++++-------
 src/acquisition/wiki/wiki_download.py | 470 ++++++++++++++------------
 src/acquisition/wiki/wiki_extract.py  | 140 ++++----
 src/acquisition/wiki/wiki_update.py   | 149 ++++----
 src/acquisition/wiki/wiki_util.py     | 275 ++++++++-------
 5 files changed, 674 insertions(+), 606 deletions(-)

diff --git a/src/acquisition/wiki/wiki.py b/src/acquisition/wiki/wiki.py
index 602e21102..c57582918 100644
--- a/src/acquisition/wiki/wiki.py
+++ b/src/acquisition/wiki/wiki.py
@@ -1,112 +1,112 @@
 """
-===============
-=== Purpose ===
-===============
-
-Wrapper for the entire wiki data collection process:
-  1. Uses wiki_update.py to fetch metadata for new access logs
-  2. Uses wiki_download.py to download the access logs
-  3. Uses wiki_extract.py to store article access counts
-
+===============
+=== Purpose ===
+===============
+
+Wrapper for the entire wiki data collection process:
+  1. Uses wiki_update.py to fetch metadata for new access logs
+  2. Uses wiki_download.py to download the access logs
+  3. Uses wiki_extract.py to store article access counts
+
 See also: master.php
-
-
-=======================
-=== Data Dictionary ===
-=======================
-
-`wiki_raw` is a staging table where extracted access log data is stored for
-further processing. When wiki_update.py finds a new log, it saves the name and
-hash to this table, with a status of 0. This table is read by master.php, which
-then hands out "jobs" (independently and in parallel) to wiki_download.py.
-After wiki_download.py downloads the log and extracts the counts, it submits
-the data (as JSON) to master.php, which then stores the "raw" JSON counts in
-this table.
-+----------+---------------+------+-----+---------+----------------+
-| Field    | Type          | Null | Key | Default | Extra          |
-+----------+---------------+------+-----+---------+----------------+
-| id       | int(11)       | NO   | PRI | NULL    | auto_increment |
-| name     | varchar(64)   | NO   | UNI | NULL    |                |
-| hash     | char(32)      | NO   |     | NULL    |                |
-| status   | int(11)       | NO   | MUL | 0       |                |
-| size     | int(11)       | YES  |     | NULL    |                |
-| datetime | datetime      | YES  |     | NULL    |                |
-| worker   | varchar(256)  | YES  |     | NULL    |                |
-| elapsed  | float         | YES  |     | NULL    |                |
-| data     | varchar(2048) | YES  |     | NULL    |                |
-+----------+---------------+------+-----+---------+----------------+
-id: unique identifier for each record
-name: name of the access log
-hash: md5 hash of the file, as reported by the dumps site (all zeroes if no
-  hash is provided)
-status: the status of the job, using the following values:
-  0: queued for download
-  1: download in progress
-  2: queued for extraction
-  3: extracted to `wiki` table
-  (any negative value indicates failure)
-size: the size, in bytes, of the downloaded file
-datetime: the timestamp of the most recent status update
-worker: name (user@hostname) of the machine working on the job
-elapsed: time, in seconds, taken to complete the job
-data: a JSON string containing counts for selected articles in the access log
-
-`wiki` is the table where access counts are stored (parsed from wiki_raw). The
-"raw" JSON counts are parsed by wiki_extract.py and stored directly in this
-table.
-+----------+-------------+------+-----+---------+----------------+
-| Field    | Type        | Null | Key | Default | Extra          |
-+----------+-------------+------+-----+---------+----------------+
-| id       | int(11)     | NO   | PRI | NULL    | auto_increment |
-| datetime | datetime    | NO   | MUL | NULL    |                |
-| article  | varchar(64) | NO   | MUL | NULL    |                |
-| count    | int(11)     | NO   |     | NULL    |                |
-+----------+-------------+------+-----+---------+----------------+
-id: unique identifier for each record
-datetime: UTC timestamp (rounded to the nearest hour) of article access
-article: name of the article
-count: number of times the article was accessed in the hour
-
-`wiki_meta` is a metadata table for this dataset. It contains pre-calculated
-date and epiweeks fields, and more importantly, the total number of English
-article hits (denominator) for each `datetime` in the `wiki` table. This table
-is populated in parallel with `wiki` by the wiki_extract.py script.
-+----------+----------+------+-----+---------+----------------+
-| Field    | Type     | Null | Key | Default | Extra          |
-+----------+----------+------+-----+---------+----------------+
-| id       | int(11)  | NO   | PRI | NULL    | auto_increment |
-| datetime | datetime | NO   | UNI | NULL    |                |
-| date     | date     | NO   |     | NULL    |                |
-| epiweek  | int(11)  | NO   |     | NULL    |                |
-| total    | int(11)  | NO   |     | NULL    |                |
-+----------+----------+------+-----+---------+----------------+
-id: unique identifier for each record
-datetime: UTC timestamp (rounded to the nearest hour) of article access
-date: the date portion of `datetime`
-epiweek: the year and week containing `datetime`
-total: total number of English article hits in the hour
-
-
-=================
-=== Changelog ===
-=================
-
+
+
+=======================
+=== Data Dictionary ===
+=======================
+
+`wiki_raw` is a staging table where extracted access log data is stored for
+further processing. When wiki_update.py finds a new log, it saves the name and
+hash to this table, with a status of 0. This table is read by master.php, which
+then hands out "jobs" (independently and in parallel) to wiki_download.py.
+After wiki_download.py downloads the log and extracts the counts, it submits
+the data (as JSON) to master.php, which then stores the "raw" JSON counts in
+this table.
++----------+---------------+------+-----+---------+----------------+
+| Field    | Type          | Null | Key | Default | Extra          |
++----------+---------------+------+-----+---------+----------------+
+| id       | int(11)       | NO   | PRI | NULL    | auto_increment |
+| name     | varchar(64)   | NO   | UNI | NULL    |                |
+| hash     | char(32)      | NO   |     | NULL    |                |
+| status   | int(11)       | NO   | MUL | 0       |                |
+| size     | int(11)       | YES  |     | NULL    |                |
+| datetime | datetime      | YES  |     | NULL    |                |
+| worker   | varchar(256)  | YES  |     | NULL    |                |
+| elapsed  | float         | YES  |     | NULL    |                |
+| data     | varchar(2048) | YES  |     | NULL    |                |
++----------+---------------+------+-----+---------+----------------+
+id: unique identifier for each record
+name: name of the access log
+hash: md5 hash of the file, as reported by the dumps site (all zeroes if no
+  hash is provided)
+status: the status of the job, using the following values:
+  0: queued for download
+  1: download in progress
+  2: queued for extraction
+  3: extracted to `wiki` table
+  (any negative value indicates failure)
+size: the size, in bytes, of the downloaded file
+datetime: the timestamp of the most recent status update
+worker: name (user@hostname) of the machine working on the job
+elapsed: time, in seconds, taken to complete the job
+data: a JSON string containing counts for selected articles in the access log
+
+`wiki` is the table where access counts are stored (parsed from wiki_raw). The
+"raw" JSON counts are parsed by wiki_extract.py and stored directly in this
+table.
++----------+-------------+------+-----+---------+----------------+
+| Field    | Type        | Null | Key | Default | Extra          |
++----------+-------------+------+-----+---------+----------------+
+| id       | int(11)     | NO   | PRI | NULL    | auto_increment |
+| datetime | datetime    | NO   | MUL | NULL    |                |
+| article  | varchar(64) | NO   | MUL | NULL    |                |
+| count    | int(11)     | NO   |     | NULL    |                |
++----------+-------------+------+-----+---------+----------------+
+id: unique identifier for each record
+datetime: UTC timestamp (rounded to the nearest hour) of article access
+article: name of the article
+count: number of times the article was accessed in the hour
+
+`wiki_meta` is a metadata table for this dataset. It contains pre-calculated
+date and epiweeks fields, and more importantly, the total number of English
+article hits (denominator) for each `datetime` in the `wiki` table. This table
+is populated in parallel with `wiki` by the wiki_extract.py script.
++----------+----------+------+-----+---------+----------------+
+| Field    | Type     | Null | Key | Default | Extra          |
++----------+----------+------+-----+---------+----------------+
+| id       | int(11)  | NO   | PRI | NULL    | auto_increment |
+| datetime | datetime | NO   | UNI | NULL    |                |
+| date     | date     | NO   |     | NULL    |                |
+| epiweek  | int(11)  | NO   |     | NULL    |                |
+| total    | int(11)  | NO   |     | NULL    |                |
++----------+----------+------+-----+---------+----------------+
+id: unique identifier for each record
+datetime: UTC timestamp (rounded to the nearest hour) of article access
+date: the date portion of `datetime`
+epiweek: the year and week containing `datetime`
+total: total number of English article hits in the hour
+
+
+=================
+=== Changelog ===
+=================
+
 2017-02-24
   * secrets and small improvements
 2016-08-14
   * Increased job limit (6 -> 12) (pageviews files are ~2x smaller)
-2015-08-26
+2015-08-26
   * Reduced job limit (8 -> 6)
-2015-08-14
+2015-08-14
   * Reduced job limit (10 -> 8)
-2015-08-11
+2015-08-11
   + New table `wiki_meta`
-2015-05-22
+2015-05-22
   * Updated status codes for `wiki_raw` table
-2015-05-21
+2015-05-21
   * Original version
 """
-
+
 # first party
 from . import wiki_update
 from . import wiki_download
@@ -115,31 +115,27 @@
 
 
 def main():
-  # step 1: find new access logs (aka "jobs")
-  print('looking for new jobs...')
-  try:
-    wiki_update.run()
-  except:
-    print('wiki_update failed')
-
-  # step 2: run a few jobs
-  print('running jobs...')
-  try:
-    wiki_download.run(
-      secrets.wiki.hmac,
-      download_limit=1024 * 1024 * 1024,
-      job_limit=12
-    )
-  except:
-    print('wiki_download failed')
-
-  # step 3: extract counts from the staging data
-  print('extracting counts...')
-  try:
-    wiki_extract.run(job_limit=100)
-  except:
-    print('wiki_extract failed')
-
-
-if __name__ == '__main__':
-  main()
+    # step 1: find new access logs (aka "jobs")
+    print("looking for new jobs...")
+    try:
+        wiki_update.run()
+    except:
+        print("wiki_update failed")
+
+    # step 2: run a few jobs
+    print("running jobs...")
+    try:
+        wiki_download.run(secrets.wiki.hmac, download_limit=1024 * 1024 * 1024, job_limit=12)
+    except:
+        print("wiki_download failed")
+
+    # step 3: extract counts from the staging data
+    print("extracting counts...")
+    try:
+        wiki_extract.run(job_limit=100)
+    except:
+        print("wiki_extract failed")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/wiki/wiki_download.py b/src/acquisition/wiki/wiki_download.py
index 1a01b7f8e..07cc7fdc1 100644
--- a/src/acquisition/wiki/wiki_download.py
+++ b/src/acquisition/wiki/wiki_download.py
@@ -27,16 +27,16 @@
 """
 
 # python 2 and 3
-from __future__ import print_function
 import sys
+
 if sys.version_info.major == 2:
-  # python 2 libraries
-  from urllib import urlencode
-  from urllib2 import urlopen
+    # python 2 libraries
+    from urllib import urlencode
+    from urllib2 import urlopen
 else:
-  # python 3 libraries
-  from urllib.parse import urlencode
-  from urllib.request import urlopen
+    # python 3 libraries
+    from urllib.parse import urlencode
+    from urllib.request import urlopen
 
 # common libraries
 import argparse
@@ -53,234 +53,274 @@
 
 
 VERSION = 10
-MASTER_URL = 'https://delphi.cmu.edu/~automation/public/wiki/master.php'
+MASTER_URL = "https://delphi.cmu.edu/~automation/public/wiki/master.php"
+
 
 def text(data_string):
-  return str(data_string.decode('utf-8'))
+    return str(data_string.decode("utf-8"))
 
 
 def data(text_string):
-  if sys.version_info.major == 2:
-    return text_string
-  else:
-    return bytes(text_string, 'utf-8')
+    if sys.version_info.major == 2:
+        return text_string
+    else:
+        return bytes(text_string, "utf-8")
 
 
 def get_hmac_sha256(key, msg):
-  key_bytes, msg_bytes = key.encode('utf-8'), msg.encode('utf-8')
-  return hmac.new(key_bytes, msg_bytes, hashlib.sha256).hexdigest()
+    key_bytes, msg_bytes = key.encode("utf-8"), msg.encode("utf-8")
+    return hmac.new(key_bytes, msg_bytes, hashlib.sha256).hexdigest()
 
 
 def extract_article_counts(filename, language, articles, debug_mode):
-  """
-  Support multiple languages ('en' | 'es' | 'pt')
-  Running time optimized to O(M), which means only need to scan the whole file once
-  :param filename:
-  :param language: Different languages such as 'en', 'es', and 'pt'
-  :param articles:
-  :param debug_mode:
-  :return:
-  """
-  counts = {}
-  articles_set = set(map(lambda x: x.lower(), articles))
-  total = 0
-  with open(filename, "r", encoding="utf8") as f:
-    for line in f:
-      content = line.strip().split()
-      if len(content) != 4:
-        print('unexpected article format: {0}'.format(line))
-        continue
-      article_title = content[1].lower()
-      article_count = int(content[2])
-      if content[0] == language:
-        total += article_count
-      if content[0] == language and article_title in articles_set:
-        if debug_mode:
-          print("Find article {0}: {1}".format(article_title, line))
-        counts[article_title] = article_count
-  if debug_mode:
-    print("Total number of counts for language {0} is {1}".format(language, total))
-  counts['total'] = total
-  return counts
+    """
+    Support multiple languages ('en' | 'es' | 'pt')
+    Running time optimized to O(M), which means only need to scan the whole file once
+    :param filename:
+    :param language: Different languages such as 'en', 'es', and 'pt'
+    :param articles:
+    :param debug_mode:
+    :return:
+    """
+    counts = {}
+    articles_set = set(map(lambda x: x.lower(), articles))
+    total = 0
+    with open(filename, encoding="utf8") as f:
+        for line in f:
+            content = line.strip().split()
+            if len(content) != 4:
+                print(f"unexpected article format: {line}")
+                continue
+            article_title = content[1].lower()
+            article_count = int(content[2])
+            if content[0] == language:
+                total += article_count
+            if content[0] == language and article_title in articles_set:
+                if debug_mode:
+                    print(f"Find article {article_title}: {line}")
+                counts[article_title] = article_count
+    if debug_mode:
+        print(f"Total number of counts for language {language} is {total}")
+    counts["total"] = total
+    return counts
 
 
 def extract_article_counts_orig(articles, debug_mode):
-  """
-  The original method which extracts article counts by shell command grep (only support en articles).
-  As it is difficult to deal with other languages (utf-8 encoding), we choose to use python read files.
-  Another things is that it is slower to go over the whole file once and once again, the time complexity is O(NM),
-  where N is the number of articles and M is the lines in the file
-  In our new implementation extract_article_counts(), the time complexity is O(M), and it can cope with utf8 encoding
-  :param articles:
-  :param debug_mode:
-  :return:
-  """
-  counts = {}
-  for article in articles:
-    if debug_mode:
-      print(' %s' % (article))
+    """
+    The original method which extracts article counts by shell command grep (only support en articles).
+    As it is difficult to deal with other languages (utf-8 encoding), we choose to use python read files.
+    Another things is that it is slower to go over the whole file once and once again, the time complexity is O(NM),
+    where N is the number of articles and M is the lines in the file
+    In our new implementation extract_article_counts(), the time complexity is O(M), and it can cope with utf8 encoding
+    :param articles:
+    :param debug_mode:
+    :return:
+    """
+    counts = {}
+    for article in articles:
+        if debug_mode:
+            print(" %s" % (article))
+        out = text(
+            subprocess.check_output(
+                'LC_ALL=C grep -a -i "^en %s " raw2 | cat' % (article.lower()), shell=True
+            )
+        ).strip()
+        count = 0
+        if len(out) > 0:
+            for line in out.split("\n"):
+                fields = line.split()
+                if len(fields) != 4:
+                    print("unexpected article format: [%s]" % (line))
+                else:
+                    count += int(fields[2])
+        # print ' %4d %s'%(count, article)
+        counts[article.lower()] = count
+        if debug_mode:
+            print("  %d" % (count))
+    print("getting total count...")
     out = text(
-      subprocess.check_output('LC_ALL=C grep -a -i "^en %s " raw2 | cat' % (article.lower()), shell=True)).strip()
-    count = 0
-    if len(out) > 0:
-      for line in out.split('\n'):
-        fields = line.split()
-        if len(fields) != 4:
-          print('unexpected article format: [%s]' % (line))
-        else:
-          count += int(fields[2])
-    # print ' %4d %s'%(count, article)
-    counts[article.lower()] = count
+        subprocess.check_output(
+            'cat raw2 | LC_ALL=C grep -a -i "^en " | cut -d" " -f 3 | awk \'{s+=$1} END {printf "%.0f", s}\'',
+            shell=True,
+        )
+    )
+    total = int(out)
     if debug_mode:
-      print('  %d' % (count))
-  print('getting total count...')
-  out = text(subprocess.check_output(
-    'cat raw2 | LC_ALL=C grep -a -i "^en " | cut -d" " -f 3 | awk \'{s+=$1} END {printf "%.0f", s}\'', shell=True))
-  total = int(out)
-  if debug_mode:
-    print(total)
-  counts['total'] = total
-  return counts
+        print(total)
+    counts["total"] = total
+    return counts
 
 
 def run(secret, download_limit=None, job_limit=None, sleep_time=1, job_type=0, debug_mode=False):
 
-  worker = text(subprocess.check_output("echo `whoami`@`hostname`", shell=True)).strip()
-  print('this is [%s]'%(worker))
-  if debug_mode:
-    print('*** running in debug mode ***')
-
-  total_download = 0
-  passed_jobs = 0
-  failed_jobs = 0
-  while (download_limit is None or total_download < download_limit) and (job_limit is None or (passed_jobs + failed_jobs) < job_limit):
-    try:
-      time_start = datetime.datetime.now()
-      req = urlopen(MASTER_URL + '?get=x&type=%s'%(job_type))
-      code = req.getcode()
-      if code != 200:
-        if code == 201:
-          print('no jobs available')
-          if download_limit is None and job_limit is None:
-            time.sleep(60)
-            continue
-          else:
-            print('nothing to do, exiting')
-            return
-        else:
-          raise Exception('server response code (get) was %d'%(code))
-      # Make the code compatible with mac os system
-      if platform == "darwin":
-        job_content = text(req.readlines()[1])
-      else:
-        job_content = text(req.readlines()[0])
-      if job_content == 'no jobs':
-        print('no jobs available')
-        if download_limit is None and job_limit is None:
-          time.sleep(60)
-          continue
-        else:
-          print('nothing to do, exiting')
-          return
-      job = json.loads(job_content)
-      print('received job [%d|%s]'%(job['id'], job['name']))
-      # updated parsing for pageviews - maybe use a regex in the future
-      #year, month = int(job['name'][11:15]), int(job['name'][15:17])
-      year, month = int(job['name'][10:14]), int(job['name'][14:16])
-      #print 'year=%d | month=%d'%(year, month)
-      url = 'https://dumps.wikimedia.org/other/pageviews/%d/%d-%02d/%s'%(year, year, month, job['name'])
-      print('downloading file [%s]...'%(url))
-      subprocess.check_call('curl -s %s > raw.gz'%(url), shell=True)
-      print('checking file size...')
-      # Make the code cross-platfrom, so use python to get the size of the file
-      # size = int(text(subprocess.check_output('ls -l raw.gz | cut -d" " -f 5', shell=True)))
-      size = os.stat("raw.gz").st_size
-      if debug_mode:
-        print(size)
-      total_download += size
-      if job['hash'] != '00000000000000000000000000000000':
-        print('checking hash...')
-        out = text(subprocess.check_output('md5sum raw.gz', shell=True))
-        result = out[0:32]
-        if result != job['hash']:
-          raise Exception('wrong hash [expected %s, got %s]'%(job['hash'], result))
-        if debug_mode:
-          print(result)
-      print('decompressing...')
-      subprocess.check_call('gunzip -f raw.gz', shell=True)
-      #print 'converting case...'
-      #subprocess.check_call('cat raw | tr "[:upper:]" "[:lower:]" > raw2', shell=True)
-      #subprocess.check_call('rm raw', shell=True)
-      subprocess.check_call('mv raw raw2', shell=True)
-      print('extracting article counts...')
-
-      # Use python to read the file and extract counts, if you want to use the original shell method, please use
-      counts = {}
-      for language in wiki_util.Articles.available_languages:
-        lang2articles = {'en': wiki_util.Articles.en_articles, 'es': wiki_util.Articles.es_articles, 'pt': wiki_util.Articles.pt_articles}
-        articles = lang2articles[language]
-        articles = sorted(articles)
-        if debug_mode:
-          print("Language is {0} and target articles are {1}".format(language, articles))
-        temp_counts = extract_article_counts("raw2", language, articles, debug_mode)
-        counts[language] = temp_counts
-
-      if not debug_mode:
-        print('deleting files...')
-        subprocess.check_call('rm raw2', shell=True)
-      print('saving results...')
-      time_stop = datetime.datetime.now()
-      result = {
-        'id': job['id'],
-        'size': size,
-        'data': json.dumps(counts),
-        'worker': worker,
-        'elapsed': (time_stop - time_start).total_seconds(),
-      }
-      payload = json.dumps(result)
-      hmac_str = get_hmac_sha256(secret, payload)
-      if debug_mode:
-        print(' hmac: %s' % hmac_str)
-      post_data = urlencode({'put': payload, 'hmac': hmac_str})
-      req = urlopen(MASTER_URL, data=data(post_data))
-      code = req.getcode()
-      if code != 200:
-        raise Exception('server response code (put) was %d'%(code))
-      print('done! (dl=%d)'%(total_download))
-      passed_jobs += 1
-    except Exception as ex:
-      print('***** Caught Exception: %s *****'%(str(ex)))
-      failed_jobs += 1
-      time.sleep(30)
-    print('passed=%d | failed=%d | total=%d'%(passed_jobs, failed_jobs, passed_jobs + failed_jobs))
-    time.sleep(sleep_time)
-
-  if download_limit is not None and total_download >= download_limit:
-    print('download limit has been reached [%d >= %d]'%(total_download, download_limit))
-  if job_limit is not None and (passed_jobs + failed_jobs) >= job_limit:
-    print('job limit has been reached [%d >= %d]'%(passed_jobs + failed_jobs, job_limit))
+    worker = text(subprocess.check_output("echo `whoami`@`hostname`", shell=True)).strip()
+    print("this is [%s]" % (worker))
+    if debug_mode:
+        print("*** running in debug mode ***")
+
+    total_download = 0
+    passed_jobs = 0
+    failed_jobs = 0
+    while (download_limit is None or total_download < download_limit) and (
+        job_limit is None or (passed_jobs + failed_jobs) < job_limit
+    ):
+        try:
+            time_start = datetime.datetime.now()
+            req = urlopen(MASTER_URL + "?get=x&type=%s" % (job_type))
+            code = req.getcode()
+            if code != 200:
+                if code == 201:
+                    print("no jobs available")
+                    if download_limit is None and job_limit is None:
+                        time.sleep(60)
+                        continue
+                    else:
+                        print("nothing to do, exiting")
+                        return
+                else:
+                    raise Exception("server response code (get) was %d" % (code))
+            # Make the code compatible with mac os system
+            if platform == "darwin":
+                job_content = text(req.readlines()[1])
+            else:
+                job_content = text(req.readlines()[0])
+            if job_content == "no jobs":
+                print("no jobs available")
+                if download_limit is None and job_limit is None:
+                    time.sleep(60)
+                    continue
+                else:
+                    print("nothing to do, exiting")
+                    return
+            job = json.loads(job_content)
+            print("received job [%d|%s]" % (job["id"], job["name"]))
+            # updated parsing for pageviews - maybe use a regex in the future
+            # year, month = int(job['name'][11:15]), int(job['name'][15:17])
+            year, month = int(job["name"][10:14]), int(job["name"][14:16])
+            # print 'year=%d | month=%d'%(year, month)
+            url = "https://dumps.wikimedia.org/other/pageviews/%d/%d-%02d/%s" % (
+                year,
+                year,
+                month,
+                job["name"],
+            )
+            print("downloading file [%s]..." % (url))
+            subprocess.check_call("curl -s %s > raw.gz" % (url), shell=True)
+            print("checking file size...")
+            # Make the code cross-platfrom, so use python to get the size of the file
+            # size = int(text(subprocess.check_output('ls -l raw.gz | cut -d" " -f 5', shell=True)))
+            size = os.stat("raw.gz").st_size
+            if debug_mode:
+                print(size)
+            total_download += size
+            if job["hash"] != "00000000000000000000000000000000":
+                print("checking hash...")
+                out = text(subprocess.check_output("md5sum raw.gz", shell=True))
+                result = out[0:32]
+                if result != job["hash"]:
+                    raise Exception(f"wrong hash [expected {job['hash']}, got {result}]")
+                if debug_mode:
+                    print(result)
+            print("decompressing...")
+            subprocess.check_call("gunzip -f raw.gz", shell=True)
+            # print 'converting case...'
+            # subprocess.check_call('cat raw | tr "[:upper:]" "[:lower:]" > raw2', shell=True)
+            # subprocess.check_call('rm raw', shell=True)
+            subprocess.check_call("mv raw raw2", shell=True)
+            print("extracting article counts...")
+
+            # Use python to read the file and extract counts, if you want to use the original shell method, please use
+            counts = {}
+            for language in wiki_util.Articles.available_languages:
+                lang2articles = {
+                    "en": wiki_util.Articles.en_articles,
+                    "es": wiki_util.Articles.es_articles,
+                    "pt": wiki_util.Articles.pt_articles,
+                }
+                articles = lang2articles[language]
+                articles = sorted(articles)
+                if debug_mode:
+                    print(f"Language is {language} and target articles are {articles}")
+                temp_counts = extract_article_counts("raw2", language, articles, debug_mode)
+                counts[language] = temp_counts
+
+            if not debug_mode:
+                print("deleting files...")
+                subprocess.check_call("rm raw2", shell=True)
+            print("saving results...")
+            time_stop = datetime.datetime.now()
+            result = {
+                "id": job["id"],
+                "size": size,
+                "data": json.dumps(counts),
+                "worker": worker,
+                "elapsed": (time_stop - time_start).total_seconds(),
+            }
+            payload = json.dumps(result)
+            hmac_str = get_hmac_sha256(secret, payload)
+            if debug_mode:
+                print(" hmac: %s" % hmac_str)
+            post_data = urlencode({"put": payload, "hmac": hmac_str})
+            req = urlopen(MASTER_URL, data=data(post_data))
+            code = req.getcode()
+            if code != 200:
+                raise Exception("server response code (put) was %d" % (code))
+            print("done! (dl=%d)" % (total_download))
+            passed_jobs += 1
+        except Exception as ex:
+            print("***** Caught Exception: %s *****" % (str(ex)))
+            failed_jobs += 1
+            time.sleep(30)
+        print(
+            "passed=%d | failed=%d | total=%d"
+            % (passed_jobs, failed_jobs, passed_jobs + failed_jobs)
+        )
+        time.sleep(sleep_time)
+
+    if download_limit is not None and total_download >= download_limit:
+        print("download limit has been reached [%d >= %d]" % (total_download, download_limit))
+    if job_limit is not None and (passed_jobs + failed_jobs) >= job_limit:
+        print("job limit has been reached [%d >= %d]" % (passed_jobs + failed_jobs, job_limit))
 
 
 def main():
-  # version info
-  print('version', VERSION)
-
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('secret', type=str, help='hmac secret key')
-  parser.add_argument('-b', '--blimit', action='store', type=int, default=None, help='download limit, in bytes')
-  parser.add_argument('-j', '--jlimit', action='store', type=int, default=None, help='job limit')
-  parser.add_argument('-s', '--sleep', action='store', type=int, default=1, help='seconds to sleep between each job')
-  parser.add_argument('-t', '--type', action='store', type=int, default=0, help='type of job')
-  parser.add_argument('-d', '--debug', action='store_const', const=True, default=False, help='enable debug mode')
-  args = parser.parse_args()
-
-  # runtime options
-  secret, download_limit, job_limit, sleep_time, job_type, debug_mode = args.secret, args.blimit, args.jlimit, args.sleep, args.type, args.debug
-
-  # run
-  run(secret, download_limit, job_limit, sleep_time, job_type, debug_mode)
-
-
-if __name__ == '__main__':
-  main()
+    # version info
+    print("version", VERSION)
+
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("secret", type=str, help="hmac secret key")
+    parser.add_argument(
+        "-b", "--blimit", action="store", type=int, default=None, help="download limit, in bytes"
+    )
+    parser.add_argument("-j", "--jlimit", action="store", type=int, default=None, help="job limit")
+    parser.add_argument(
+        "-s",
+        "--sleep",
+        action="store",
+        type=int,
+        default=1,
+        help="seconds to sleep between each job",
+    )
+    parser.add_argument("-t", "--type", action="store", type=int, default=0, help="type of job")
+    parser.add_argument(
+        "-d", "--debug", action="store_const", const=True, default=False, help="enable debug mode"
+    )
+    args = parser.parse_args()
+
+    # runtime options
+    secret, download_limit, job_limit, sleep_time, job_type, debug_mode = (
+        args.secret,
+        args.blimit,
+        args.jlimit,
+        args.sleep,
+        args.type,
+        args.debug,
+    )
+
+    # run
+    run(secret, download_limit, job_limit, sleep_time, job_type, debug_mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/wiki/wiki_extract.py b/src/acquisition/wiki/wiki_extract.py
index 839d7d6dc..f4e0efb96 100644
--- a/src/acquisition/wiki/wiki_extract.py
+++ b/src/acquisition/wiki/wiki_extract.py
@@ -35,74 +35,96 @@
 
 
 def floor_timestamp(timestamp):
-  return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour)
+    return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour)
 
 
 def ceil_timestamp(timestamp):
-  return floor_timestamp(timestamp) + timedelta(hours=1)
+    return floor_timestamp(timestamp) + timedelta(hours=1)
 
 
 def round_timestamp(timestamp):
-  before = floor_timestamp(timestamp)
-  after = ceil_timestamp(timestamp)
-  if (timestamp - before) < (after - timestamp):
-    return before
-  else:
-    return after
+    before = floor_timestamp(timestamp)
+    after = ceil_timestamp(timestamp)
+    if (timestamp - before) < (after - timestamp):
+        return before
+    else:
+        return after
 
 
 def get_timestamp(name):
-  # new parsing for pageviews compared to pagecounts - maybe switch to regex in the future
-  #return datetime(int(name[11:15]), int(name[15:17]), int(name[17:19]), int(name[20:22]), int(name[22:24]), int(name[24:26]))
-  return datetime(int(name[10:14]), int(name[14:16]), int(name[16:18]), int(name[19:21]), int(name[21:23]), int(name[23:25]))
+    # new parsing for pageviews compared to pagecounts - maybe switch to regex in the future
+    # return datetime(int(name[11:15]), int(name[15:17]), int(name[17:19]), int(name[20:22]), int(name[22:24]), int(name[24:26]))
+    return datetime(
+        int(name[10:14]),
+        int(name[14:16]),
+        int(name[16:18]),
+        int(name[19:21]),
+        int(name[21:23]),
+        int(name[23:25]),
+    )
 
 
 def run(job_limit=100):
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-  
-  # # Some preparation for utf-8, and it is a temporary trick solution. The real solution should change those char set and collation encoding to utf8 permanently
-  # cur.execute("SET NAMES utf8;")
-  # cur.execute("SET CHARACTER SET utf8;")
-  # # I print SHOW SESSION VARIABLES LIKE 'character\_set\_%'; and SHOW SESSION VARIABLES LIKE 'collation\_%'; on my local computer
-  # cur.execute("SET character_set_client=utf8mb4;")
-  # cur.execute("SET character_set_connection=utf8mb4;")
-  # cur.execute("SET character_set_database=utf8;")
-  # cur.execute("SET character_set_results=utf8mb4;")
-  # cur.execute("SET character_set_server=utf8;")
-  # cur.execute("SET collation_connection=utf8mb4_general_ci;")
-  # cur.execute("SET collation_database=utf8_general_ci;")
-  # cur.execute("SET collation_server=utf8_general_ci;")
-
-  # find jobs that are queued for extraction
-  cur.execute('SELECT `id`, `name`, `data` FROM `wiki_raw` WHERE `status` = 2 ORDER BY `name` ASC LIMIT %s', (job_limit,))
-  jobs = []
-  for (id, name, data_str) in cur:
-    jobs.append((id, name, json.loads(data_str)))
-  print('Processing data from %d jobs'%(len(jobs)))
-
-  # get the counts from the json object and insert into (or update) the database
-  # Notice that data_collect contains data with different languages
-  for (id, name, data_collect) in jobs:
-    print('processing job [%d|%s]...'%(id, name))
-    timestamp = round_timestamp(get_timestamp(name))
-    for language in data_collect.keys():
-      data = data_collect[language]
-      for article in sorted(data.keys()):
-        count = data[article]
-        cur.execute('INSERT INTO `wiki` (`datetime`, `article`, `count`, `language`) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE `count` = `count` + %s', (str(timestamp), article.encode('utf-8').decode('latin-1'), count, language, count))
-        if article == 'total':
-          cur.execute('INSERT INTO `wiki_meta` (`datetime`, `date`, `epiweek`, `total`, `language`) VALUES (%s, date(%s), yearweek(%s, 6), %s, %s) ON DUPLICATE KEY UPDATE `total` = `total` + %s', (str(timestamp), str(timestamp), str(timestamp), count, language, count))
-    # update the job
-    cur.execute('UPDATE `wiki_raw` SET `status` = 3 WHERE `id` = %s', (id,))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
-
-
-if __name__ == '__main__':
-  run()
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    # # Some preparation for utf-8, and it is a temporary trick solution. The real solution should change those char set and collation encoding to utf8 permanently
+    # cur.execute("SET NAMES utf8;")
+    # cur.execute("SET CHARACTER SET utf8;")
+    # # I print SHOW SESSION VARIABLES LIKE 'character\_set\_%'; and SHOW SESSION VARIABLES LIKE 'collation\_%'; on my local computer
+    # cur.execute("SET character_set_client=utf8mb4;")
+    # cur.execute("SET character_set_connection=utf8mb4;")
+    # cur.execute("SET character_set_database=utf8;")
+    # cur.execute("SET character_set_results=utf8mb4;")
+    # cur.execute("SET character_set_server=utf8;")
+    # cur.execute("SET collation_connection=utf8mb4_general_ci;")
+    # cur.execute("SET collation_database=utf8_general_ci;")
+    # cur.execute("SET collation_server=utf8_general_ci;")
+
+    # find jobs that are queued for extraction
+    cur.execute(
+        "SELECT `id`, `name`, `data` FROM `wiki_raw` WHERE `status` = 2 ORDER BY `name` ASC LIMIT %s",
+        (job_limit,),
+    )
+    jobs = []
+    for (id, name, data_str) in cur:
+        jobs.append((id, name, json.loads(data_str)))
+    print("Processing data from %d jobs" % (len(jobs)))
+
+    # get the counts from the json object and insert into (or update) the database
+    # Notice that data_collect contains data with different languages
+    for (id, name, data_collect) in jobs:
+        print("processing job [%d|%s]..." % (id, name))
+        timestamp = round_timestamp(get_timestamp(name))
+        for language in data_collect.keys():
+            data = data_collect[language]
+            for article in sorted(data.keys()):
+                count = data[article]
+                cur.execute(
+                    "INSERT INTO `wiki` (`datetime`, `article`, `count`, `language`) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE `count` = `count` + %s",
+                    (
+                        str(timestamp),
+                        article.encode("utf-8").decode("latin-1"),
+                        count,
+                        language,
+                        count,
+                    ),
+                )
+                if article == "total":
+                    cur.execute(
+                        "INSERT INTO `wiki_meta` (`datetime`, `date`, `epiweek`, `total`, `language`) VALUES (%s, date(%s), yearweek(%s, 6), %s, %s) ON DUPLICATE KEY UPDATE `total` = `total` + %s",
+                        (str(timestamp), str(timestamp), str(timestamp), count, language, count),
+                    )
+        # update the job
+        cur.execute("UPDATE `wiki_raw` SET `status` = 3 WHERE `id` = %s", (id,))
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
+
+
+if __name__ == "__main__":
+    run()
diff --git a/src/acquisition/wiki/wiki_update.py b/src/acquisition/wiki/wiki_update.py
index 411544810..c9aa6d6a2 100644
--- a/src/acquisition/wiki/wiki_update.py
+++ b/src/acquisition/wiki/wiki_update.py
@@ -32,87 +32,100 @@
 
 
 def floor_timestamp(timestamp):
-  return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour)
+    return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour)
 
 
 def ceil_timestamp(timestamp):
-  return floor_timestamp(timestamp) + timedelta(hours=1)
+    return floor_timestamp(timestamp) + timedelta(hours=1)
 
 
 def round_timestamp(timestamp):
-  before = floor_timestamp(timestamp)
-  after = ceil_timestamp(timestamp)
-  if (timestamp - before) < (after - timestamp):
-    return before
-  else:
-    return after
+    before = floor_timestamp(timestamp)
+    after = ceil_timestamp(timestamp)
+    if (timestamp - before) < (after - timestamp):
+        return before
+    else:
+        return after
 
 
 def get_timestamp(name):
-  # If the program is cold start (there are no previous names in the table, and the name will be None)
-  if name is None:
-    curr = datetime.now()
-    return datetime(curr.year, curr.month, curr.day, curr.hour, curr.minute, curr.second)
-  # new parsing for pageviews compared to pagecounts - maybe switch to regex in the future
-  #return datetime(int(name[11:15]), int(name[15:17]), int(name[17:19]), int(name[20:22]), int(name[22:24]), int(name[24:26]))
-  return datetime(int(name[10:14]), int(name[14:16]), int(name[16:18]), int(name[19:21]), int(name[21:23]), int(name[23:25]))
+    # If the program is cold start (there are no previous names in the table, and the name will be None)
+    if name is None:
+        curr = datetime.now()
+        return datetime(curr.year, curr.month, curr.day, curr.hour, curr.minute, curr.second)
+    # new parsing for pageviews compared to pagecounts - maybe switch to regex in the future
+    # return datetime(int(name[11:15]), int(name[15:17]), int(name[17:19]), int(name[20:22]), int(name[22:24]), int(name[24:26]))
+    return datetime(
+        int(name[10:14]),
+        int(name[14:16]),
+        int(name[16:18]),
+        int(name[19:21]),
+        int(name[21:23]),
+        int(name[23:25]),
+    )
 
 
 def get_manifest(year, month, optional=False):
-  # unlike pagecounts-raw, pageviews doesn't provide hashes
-  #url = 'https://dumps.wikimedia.org/other/pagecounts-raw/%d/%d-%02d/md5sums.txt'%(year, year, month)
-  url = 'https://dumps.wikimedia.org/other/pageviews/%d/%d-%02d/' % (year, year, month)
-  print('Checking manifest at %s...'%(url))
-  response = requests.get(url)
-  if response.status_code == 200:
-    #manifest = [line.strip().split() for line in response.text.split('\n') if 'pagecounts' in line]
-    manifest = [('00000000000000000000000000000000', line[9:37]) for line in response.text.split('\n') if '<a href="pageviews-' in line]
-  else:
-    if optional:
-      manifest = []
+    # unlike pagecounts-raw, pageviews doesn't provide hashes
+    # url = 'https://dumps.wikimedia.org/other/pagecounts-raw/%d/%d-%02d/md5sums.txt'%(year, year, month)
+    url = "https://dumps.wikimedia.org/other/pageviews/%d/%d-%02d/" % (year, year, month)
+    print("Checking manifest at %s..." % (url))
+    response = requests.get(url)
+    if response.status_code == 200:
+        # manifest = [line.strip().split() for line in response.text.split('\n') if 'pagecounts' in line]
+        manifest = [
+            ("00000000000000000000000000000000", line[9:37])
+            for line in response.text.split("\n")
+            if '<a href="pageviews-' in line
+        ]
     else:
-      raise Exception('expected 200 status code, but got %d'%(response.status_code))
-  print('Found %d access log(s)'%(len(manifest)))
-  return manifest
+        if optional:
+            manifest = []
+        else:
+            raise Exception("expected 200 status code, but got %d" % (response.status_code))
+    print("Found %d access log(s)" % (len(manifest)))
+    return manifest
 
 
 def run():
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  # get the most recent job in wiki_raw
-  # luckily, "pageviews" is lexicographically greater than "pagecounts-raw"
-  cur.execute('SELECT max(`name`) FROM `wiki_raw`')
-  for (max_name,) in cur:
-    pass
-  print('Last known file: %s'%(max_name))
-  timestamp = get_timestamp(max_name)
-
-  # crawl dumps.wikimedia.org to find more recent access logs
-  t1, t2 = floor_timestamp(timestamp), ceil_timestamp(timestamp)
-  manifest = get_manifest(t1.year, t1.month, optional=False)
-  if t2.month != t1.month:
-    manifest += get_manifest(t2.year, t2.month, optional=True)
-
-  # find access logs newer than the most recent job
-  new_logs = {}
-  for (hash, name) in manifest:
-    if max_name is None or name > max_name:
-      new_logs[name] = hash
-      print(' New job: %s [%s]'%(name, hash))
-  print('Found %d new job(s)'%(len(new_logs)))
-
-  # store metadata for new jobs
-  for name in sorted(new_logs.keys()):
-    cur.execute('INSERT INTO `wiki_raw` (`name`, `hash`) VALUES (%s, %s)', (name, new_logs[name]))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
-
-
-if __name__ == '__main__':
-  run()
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    # get the most recent job in wiki_raw
+    # luckily, "pageviews" is lexicographically greater than "pagecounts-raw"
+    cur.execute("SELECT max(`name`) FROM `wiki_raw`")
+    for (max_name,) in cur:
+        pass
+    print("Last known file: %s" % (max_name))
+    timestamp = get_timestamp(max_name)
+
+    # crawl dumps.wikimedia.org to find more recent access logs
+    t1, t2 = floor_timestamp(timestamp), ceil_timestamp(timestamp)
+    manifest = get_manifest(t1.year, t1.month, optional=False)
+    if t2.month != t1.month:
+        manifest += get_manifest(t2.year, t2.month, optional=True)
+
+    # find access logs newer than the most recent job
+    new_logs = {}
+    for (hash, name) in manifest:
+        if max_name is None or name > max_name:
+            new_logs[name] = hash
+            print(f" New job: {name} [{hash}]")
+    print("Found %d new job(s)" % (len(new_logs)))
+
+    # store metadata for new jobs
+    for name in sorted(new_logs.keys()):
+        cur.execute(
+            "INSERT INTO `wiki_raw` (`name`, `hash`) VALUES (%s, %s)", (name, new_logs[name])
+        )
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
+
+
+if __name__ == "__main__":
+    run()
diff --git a/src/acquisition/wiki/wiki_util.py b/src/acquisition/wiki/wiki_util.py
index ed3c743bc..55bf3e2ca 100644
--- a/src/acquisition/wiki/wiki_util.py
+++ b/src/acquisition/wiki/wiki_util.py
@@ -1,159 +1,156 @@
-
-
-
 class Articles:
 
     # Notice that all languages must be two chars, because that `language` column in table `wiki` is CHAR(2)
-    available_languages = ['en', 'es', 'pt']
+    available_languages = ["en", "es", "pt"]
 
     en_articles_flu = [
-        'Influenza_B_virus',
-        'Influenza_A_virus',
-        'Human_flu',
-        'Influenzavirus_C',
-        'Oseltamivir',
-        'Influenza',
-        'Influenzavirus_A',
-        'Influenza_A_virus_subtype_H1N1',
-        'Zanamivir',
-        'Influenza-like_illness',
-        'Common_cold',
-        'Sore_throat',
-        'Flu_season',
-        'Chills',
-        'Fever',
-        'Influenza_A_virus_subtype_H2N2',
-        'Swine_influenza',
-        'Shivering',
-        'Canine_influenza',
-        'Influenza_A_virus_subtype_H3N2',
-        'Neuraminidase_inhibitor',
-        'Influenza_pandemic',
-        'Viral_pneumonia',
-        'Influenza_prevention',
-        'Influenza_A_virus_subtype_H1N2',
-        'Rhinorrhea',
-        'Orthomyxoviridae',
-        'Nasal_congestion',
-        'Gastroenteritis',
-        'Rimantadine',
-        'Paracetamol',
-        'Amantadine',
-        'Viral_neuraminidase',
-        'Headache',
-        'Influenza_vaccine',
-        'Vomiting',
-        'Cough',
-        'Influenza_A_virus_subtype_H5N1',
-        'Nausea',
-        'Avian_influenza',
-        'Influenza_A_virus_subtype_H7N9',
-        'Influenza_A_virus_subtype_H10N7',
-        'Influenza_A_virus_subtype_H9N2',
-        'Hemagglutinin_(influenza)',
-        'Influenza_A_virus_subtype_H7N7',
-        'Fatigue_(medical)',
-        'Myalgia',
-        'Influenza_A_virus_subtype_H7N3',
-        'Malaise',
-        'Equine_influenza',
-        'Cat_flu',
-        'Influenza_A_virus_subtype_H3N8',
-        'Antiviral_drugs',
-        'Influenza_A_virus_subtype_H7N2',
+        "Influenza_B_virus",
+        "Influenza_A_virus",
+        "Human_flu",
+        "Influenzavirus_C",
+        "Oseltamivir",
+        "Influenza",
+        "Influenzavirus_A",
+        "Influenza_A_virus_subtype_H1N1",
+        "Zanamivir",
+        "Influenza-like_illness",
+        "Common_cold",
+        "Sore_throat",
+        "Flu_season",
+        "Chills",
+        "Fever",
+        "Influenza_A_virus_subtype_H2N2",
+        "Swine_influenza",
+        "Shivering",
+        "Canine_influenza",
+        "Influenza_A_virus_subtype_H3N2",
+        "Neuraminidase_inhibitor",
+        "Influenza_pandemic",
+        "Viral_pneumonia",
+        "Influenza_prevention",
+        "Influenza_A_virus_subtype_H1N2",
+        "Rhinorrhea",
+        "Orthomyxoviridae",
+        "Nasal_congestion",
+        "Gastroenteritis",
+        "Rimantadine",
+        "Paracetamol",
+        "Amantadine",
+        "Viral_neuraminidase",
+        "Headache",
+        "Influenza_vaccine",
+        "Vomiting",
+        "Cough",
+        "Influenza_A_virus_subtype_H5N1",
+        "Nausea",
+        "Avian_influenza",
+        "Influenza_A_virus_subtype_H7N9",
+        "Influenza_A_virus_subtype_H10N7",
+        "Influenza_A_virus_subtype_H9N2",
+        "Hemagglutinin_(influenza)",
+        "Influenza_A_virus_subtype_H7N7",
+        "Fatigue_(medical)",
+        "Myalgia",
+        "Influenza_A_virus_subtype_H7N3",
+        "Malaise",
+        "Equine_influenza",
+        "Cat_flu",
+        "Influenza_A_virus_subtype_H3N8",
+        "Antiviral_drugs",
+        "Influenza_A_virus_subtype_H7N2",
     ]
 
     en_articles_noro = [
-        'Norovirus',
-        'Diarrhea',
-        'Dehydration',
-        'Gastroenteritis',
-        'Vomiting',
-        'Abdominal_pain',
-        'Nausea',
-        'Foodborne_illness',
-        'Rotavirus',
-        'Fecal–oral_route',
-        'Intravenous_therapy',
-        'Oral_rehydration_therapy',
-        'Shellfish',
-        'Caliciviridae',
-        'Leaky_scanning',
+        "Norovirus",
+        "Diarrhea",
+        "Dehydration",
+        "Gastroenteritis",
+        "Vomiting",
+        "Abdominal_pain",
+        "Nausea",
+        "Foodborne_illness",
+        "Rotavirus",
+        "Fecal–oral_route",
+        "Intravenous_therapy",
+        "Oral_rehydration_therapy",
+        "Shellfish",
+        "Caliciviridae",
+        "Leaky_scanning",
     ]
 
     en_articles_dengue = [
-        'Dengue_fever',
-        'Dengue_virus',
-        'Aedes',
-        'Aedes_aegypti',
-        'Dengue_vaccine',
-        'Mosquito',
-        'Mosquito-borne_disease',
-        'Blood_transfusion',
-        'Paracetamol',
-        'Fever',
-        'Headache',
-        'Rhinitis',
-        'Flavivirus',
-        'Exanthem',
-        'Myalgia',
-        'Arthralgia',
-        'Thrombocytopenia',
-        'Hematuria',
-        'Nosebleed',
-        'Petechia',
-        'Nausea',
-        'Vomiting',
-        'Diarrhea',
+        "Dengue_fever",
+        "Dengue_virus",
+        "Aedes",
+        "Aedes_aegypti",
+        "Dengue_vaccine",
+        "Mosquito",
+        "Mosquito-borne_disease",
+        "Blood_transfusion",
+        "Paracetamol",
+        "Fever",
+        "Headache",
+        "Rhinitis",
+        "Flavivirus",
+        "Exanthem",
+        "Myalgia",
+        "Arthralgia",
+        "Thrombocytopenia",
+        "Hematuria",
+        "Nosebleed",
+        "Petechia",
+        "Nausea",
+        "Vomiting",
+        "Diarrhea",
     ]
 
     en_articles = list(set(en_articles_flu + en_articles_noro + en_articles_dengue))
 
     es_articles = [
-        'Dengue',
-        'Virus_dengue',
-        'Aedes',
-        'Aedes_aegypti',
-        'Culicidae',
-        'Transfusión_de_sangre',
-        'Paracetamol',
-        'Fiebre',
-        'Cefalea',
-        'Coriza',
-        'Flavivirus',
-        'Exantema',
-        'Mosquito',
-        'Mialgia',
-        'Artralgia',
-        'Trombocitopenia',
-        'Hematuria',
-        'Epistaxis',
-        'Petequia',
-        'Náusea',
-        'Vómito',
-        'Diarrea',
+        "Dengue",
+        "Virus_dengue",
+        "Aedes",
+        "Aedes_aegypti",
+        "Culicidae",
+        "Transfusión_de_sangre",
+        "Paracetamol",
+        "Fiebre",
+        "Cefalea",
+        "Coriza",
+        "Flavivirus",
+        "Exantema",
+        "Mosquito",
+        "Mialgia",
+        "Artralgia",
+        "Trombocitopenia",
+        "Hematuria",
+        "Epistaxis",
+        "Petequia",
+        "Náusea",
+        "Vómito",
+        "Diarrea",
     ]
 
     pt_articles = [
-        'Dengue',
-        'Vírus_da_dengue',
-        'Aedes',
-        'Aedes_aegypti',
-        'Culicidae',
-        'Transfusão_de_sangue',
-        'Paracetamol',
-        'Febre',
-        'Cefaleia',
-        'Coriza',
-        'Flavivírus',
-        'Exantema',
-        'Mialgia',
-        'Artralgia',
-        'Trombocitopenia',
-        'Hematúria',
-        'Epistaxe',
-        'Petéquia',
-        'Náusea',
-        'Vômito',
-        'Diarreia',
+        "Dengue",
+        "Vírus_da_dengue",
+        "Aedes",
+        "Aedes_aegypti",
+        "Culicidae",
+        "Transfusão_de_sangue",
+        "Paracetamol",
+        "Febre",
+        "Cefaleia",
+        "Coriza",
+        "Flavivírus",
+        "Exantema",
+        "Mialgia",
+        "Artralgia",
+        "Trombocitopenia",
+        "Hematúria",
+        "Epistaxe",
+        "Petéquia",
+        "Náusea",
+        "Vômito",
+        "Diarreia",
     ]

From c827e54de344ef54f115bc3bc8083713e835c059 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 14:09:56 -0700
Subject: [PATCH 30/43] ci(sonar): tempfiles for security warnings

---
 src/acquisition/ecdc/ecdc_db_update.py | 38 ++++++++++-----------
 src/acquisition/paho/paho_db_update.py | 46 ++++++++++++--------------
 2 files changed, 38 insertions(+), 46 deletions(-)

diff --git a/src/acquisition/ecdc/ecdc_db_update.py b/src/acquisition/ecdc/ecdc_db_update.py
index 6e0083ecc..86e3b1cd8 100644
--- a/src/acquisition/ecdc/ecdc_db_update.py
+++ b/src/acquisition/ecdc/ecdc_db_update.py
@@ -183,27 +183,23 @@ def main():
         max_tries = 5
         while flag < max_tries:
             flag = flag + 1
-            tmp_dir = ''.join(random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for i in range(8))
-            tmp_dir = 'downloads_' + tmp_dir
-            subprocess.call(["mkdir",tmp_dir])
-            # Use temporary directory to avoid data from different time
-            #   downloaded to same folder
-            download_ecdc_data(download_dir=tmp_dir)
-            issue = EpiDate.today().get_ew()
-            files = glob.glob('%s/*.csv' % tmp_dir)
-            for filename in files:
-                with open(filename,'r') as f:
-                    _ = f.readline()
-            db_error = False
-            for filename in files:
-                try:
-                    update_from_file(issue, date, filename, test_mode=args.test)
-                    subprocess.call(["rm",filename])
-                except:
-                    db_error = True
-            subprocess.call(["rm","-r",tmp_dir])
-            if not db_error:
-                break # Exit loop with success
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                # Use temporary directory to avoid data from different time
+                #   downloaded to same folder
+                download_ecdc_data(download_dir=tmp_dir)
+                issue = EpiDate.today().get_ew()
+                files = glob.glob(f"{tmp_dir}/*.csv")
+                for filename in files:
+                    with open(filename) as f:
+                        _ = f.readline()
+                db_error = False
+                for filename in files:
+                    try:
+                        update_from_file(issue, date, filename, test_mode=args.test)
+                    except:
+                        db_error = True
+                if not db_error:
+                    break  # Exit loop with success
         if flag >= max_tries:
             print("WARNING: Database `ecdc_ili` did not update successfully")
 
diff --git a/src/acquisition/paho/paho_db_update.py b/src/acquisition/paho/paho_db_update.py
index 08577f580..67fbc1d28 100644
--- a/src/acquisition/paho/paho_db_update.py
+++ b/src/acquisition/paho/paho_db_update.py
@@ -261,32 +261,28 @@ def main():
         max_tries = 5
         while flag < max_tries:
             flag = flag + 1
-            tmp_dir = ''.join(random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for i in range(8))
-            tmp_dir = 'downloads_' + tmp_dir
-            subprocess.call(["mkdir",tmp_dir])
-            # Use temporary directory to avoid data from different time
-            #   downloaded to same folder
-            get_paho_data(dir=tmp_dir)
-            issue = EpiDate.today().get_ew()
-            # Check to make sure we downloaded a file for every week
-            issueset = set()
-            files = glob.glob('%s/*.csv' % tmp_dir)
-            for filename in files:
-                with open(filename,'r') as f:
-                    _ = f.readline()
-                    data = f.readline().split(',')
-                    issueset.add(data[6])
-            db_error = False
-            if len(issueset) >= 53: # Shouldn't be more than 53
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                # Use temporary directory to avoid data from different time
+                #   downloaded to same folder
+                get_paho_data(dir=tmp_dir)
+                issue = EpiDate.today().get_ew()
+                # Check to make sure we downloaded a file for every week
+                issueset = set()
+                files = glob.glob(f"{tmp_dir}/*.csv")
                 for filename in files:
-                    try:
-                        update_from_file(issue, date, filename, test_mode=args.test)
-                        subprocess.call(["rm",filename])
-                    except:
-                        db_error = True
-                subprocess.call(["rm","-r",tmp_dir])
-                if not db_error:
-                    break # Exit loop with success
+                    with open(filename) as f:
+                        _ = f.readline()
+                        data = f.readline().split(",")
+                        issueset.add(data[6])
+                db_error = False
+                if len(issueset) >= 53:  # Shouldn't be more than 53
+                    for filename in files:
+                        try:
+                            update_from_file(issue, date, filename, test_mode=args.test)
+                        except:
+                            db_error = True
+                    if not db_error:
+                        break  # Exit loop with success
             if flag >= max_tries:
                 print("WARNING: Database `paho_dengue` did not update successfully")
 

From 76ddfbff9700f85ca145f7d1333bc00edc2da47d Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 26 May 2023 19:53:22 -0700
Subject: [PATCH 31/43] style: add .editorconfig

---
 .editorconfig | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 .editorconfig

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 000000000..8a80734f0
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,21 @@
+# EditorConfig helps developers define and maintain consistent
+# coding styles between different editors and IDEs
+# editorconfig.org
+
+root = true
+
+
+[*]
+
+# Change these settings to your own preference
+indent_style = space
+indent_size = 4
+
+# We recommend you to keep these unchanged
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+[*.md]
+trim_trailing_whitespace = false

From 145dd42fe7561bdcc38b40b3769bd840057e05f5 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Mon, 5 Jun 2023 15:48:57 -0700
Subject: [PATCH 32/43] style(pylint): add pylint config

---
 pyproject.toml | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d8589df09..a4399ca9b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,26 @@
-
 [tool.black]
 line-length = 100
 target-version = ['py38']
 include = 'server,tests/server'
+
+[tool.pylint]
+    [tool.pylint.'MESSAGES CONTROL']
+    max-line-length = 100
+    disable = [
+        'logging-format-interpolation',
+        # Allow pytest functions to be part of a class
+        'no-self-use',
+        'too-many-locals',
+        'too-many-arguments',
+        # Allow pytest classes to have one test
+        'too-few-public-methods',
+    ]
+
+    [tool.pylint.'BASIC']
+    # Allow arbitrarily short-named variables.
+    variable-rgx = ['[a-z_][a-z0-9_]*']
+    argument-rgx = [ '[a-z_][a-z0-9_]*' ]
+    attr-rgx = ['[a-z_][a-z0-9_]*']
+
+    [tool.pylint.'DESIGN']
+    ignored-argument-names = ['(_.*|run_as_module)']

From 1e7319ef5ea075c666e75e4fb540fb4514ec4dcd Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 13:30:34 -0700
Subject: [PATCH 33/43] Update src/acquisition/cdcp/cdc_extract.py

Co-authored-by: Katie Mazaitis <krivard@cs.cmu.edu>
---
 src/acquisition/cdcp/cdc_extract.py | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/src/acquisition/cdcp/cdc_extract.py b/src/acquisition/cdcp/cdc_extract.py
index e4d7af573..b8f772684 100644
--- a/src/acquisition/cdcp/cdc_extract.py
+++ b/src/acquisition/cdcp/cdc_extract.py
@@ -173,22 +173,12 @@ def extract(first_week=None, last_week=None, test_mode=False):
         # update each state
         for state in states:
             try:
-                num1 = get_num_hits(cur, epiweek, state, pages[0])
-                num2 = get_num_hits(cur, epiweek, state, pages[1])
-                num3 = get_num_hits(cur, epiweek, state, pages[2])
-                num4 = get_num_hits(cur, epiweek, state, pages[3])
-                num5 = get_num_hits(cur, epiweek, state, pages[4])
-                num6 = get_num_hits(cur, epiweek, state, pages[5])
-                num7 = get_num_hits(cur, epiweek, state, pages[6])
-                num8 = get_num_hits(cur, epiweek, state, pages[7])
+                nums = []
+                for i in range(8):
+                    nums[i] = get_num_hits(cur, epiweek, state, pages[i])
                 total = get_total_hits(cur, epiweek, state)
-                store_result(
-                    cur, epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total
-                )
-                print(
-                    " %d-%s: %d %d %d %d %d %d %d %d (%d)"
-                    % (epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total)
-                )
+                store_result(cur, epiweek, state, *nums, total)
+                print(f" {epiweek}-{state}: {' '.join(str(n) for n in nums)} ({total})")
             except Exception as ex:
                 print(" %d-%s: failed" % (epiweek, state), ex)
                 # raise ex

From b00f11bc439da092d76cdaba92f48d328ef10844 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 14:18:04 -0700
Subject: [PATCH 34/43] style(black): add fmt off tags around
 parser.add_argument calls

---
 src/acquisition/ght/ght_update.py           | 22 +++++----------------
 src/acquisition/ght/google_health_trends.py |  6 +++++-
 src/acquisition/quidel/quidel_update.py     | 14 ++++---------
 src/acquisition/twtr/healthtweets.py        | 22 ++++++---------------
 src/acquisition/wiki/wiki_download.py       | 19 +++++-------------
 5 files changed, 25 insertions(+), 58 deletions(-)

diff --git a/src/acquisition/ght/ght_update.py b/src/acquisition/ght/ght_update.py
index 76046c5c4..7f65bbfe5 100644
--- a/src/acquisition/ght/ght_update.py
+++ b/src/acquisition/ght/ght_update.py
@@ -351,25 +351,13 @@ def get_num_rows():
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "location",
-        action="store",
-        type=str,
-        default=None,
-        help="location(s) (ex: all; US; TX; CA,LA,WY)",
-    )
-    parser.add_argument(
-        "term",
-        action="store",
-        type=str,
-        default=None,
-        help='term/query/topic (ex: all; /m/0cycc; "flu fever")',
-    )
+    # fmt: off
+    parser.add_argument("location", action="store", type=str, default=None, help="location(s) (ex: all; US; TX; CA,LA,WY)")
+    parser.add_argument("term", action="store", type=str, default=None, help='term/query/topic (ex: all; /m/0cycc; "flu fever")')
     parser.add_argument("--first", "-f", default=None, type=int, help="first epiweek override")
     parser.add_argument("--last", "-l", default=None, type=int, help="last epiweek override")
-    parser.add_argument(
-        "--country", "-c", default="US", type=str, help="location country (ex: US; BR)"
-    )
+    parser.add_argument("--country", "-c", default="US", type=str, help="location country (ex: US; BR)")
+    # fmt: on
     args = parser.parse_args()
 
     # sanity check
diff --git a/src/acquisition/ght/google_health_trends.py b/src/acquisition/ght/google_health_trends.py
index 7fd95f9a4..69d751e95 100644
--- a/src/acquisition/ght/google_health_trends.py
+++ b/src/acquisition/ght/google_health_trends.py
@@ -114,7 +114,10 @@ def get_data(self, start_week, end_week, location, term, resolution="week", coun
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
-    parser.add_argument("apikey", action="store", type=str, default=None, help="API key")
+    # fmt: off
+    parser.add_argument(
+        "apikey", action="store", type=str, default=None, help="API key"
+    )
     parser.add_argument(
         "startweek", action="store", type=int, default=None, help="first week (ex: 201440)"
     )
@@ -127,6 +130,7 @@ def main():
     parser.add_argument(
         "term", action="store", type=str, default=None, help="term/query/topic (ex: /m/0cycc)"
     )
+    # fmt: on
     args = parser.parse_args()
 
     # get the data
diff --git a/src/acquisition/quidel/quidel_update.py b/src/acquisition/quidel/quidel_update.py
index 286a30834..06f8b9da5 100644
--- a/src/acquisition/quidel/quidel_update.py
+++ b/src/acquisition/quidel/quidel_update.py
@@ -124,19 +124,13 @@ def get_num_rows():
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--location",
-        action="store",
-        type=str,
-        default=None,
-        help="location(s) (ex: all; any of hhs1-10)",
-    )
+    # fmt: off
+    parser.add_argument("--location", action="store", type=str, default=None, help="location(s) (ex: all; any of hhs1-10)")
     parser.add_argument("--first", "-f", default=None, type=int, help="first epiweek override")
     parser.add_argument("--last", "-l", default=None, type=int, help="last epiweek override")
     parser.add_argument("--force_update", "-u", action="store_true", help="force update db values")
-    parser.add_argument(
-        "--skip_email", "-s", action="store_true", help="skip email downloading step"
-    )
+    parser.add_argument("--skip_email", "-s", action="store_true", help="skip email downloading step")
+    # fmt: on
     args = parser.parse_args()
 
     # sanity check
diff --git a/src/acquisition/twtr/healthtweets.py b/src/acquisition/twtr/healthtweets.py
index 31976f376..f64bbd689 100644
--- a/src/acquisition/twtr/healthtweets.py
+++ b/src/acquisition/twtr/healthtweets.py
@@ -216,24 +216,14 @@ def _go(self, url, method=None, referer=None, data=None):
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
+    # fmt: off
     parser.add_argument("username", action="store", type=str, help="healthtweets.org username")
     parser.add_argument("password", action="store", type=str, help="healthtweets.org password")
-    parser.add_argument(
-        "state",
-        action="store",
-        type=str,
-        choices=list(HealthTweets.STATE_CODES.keys()),
-        help="U.S. state (ex: TX)",
-    )
-    parser.add_argument(
-        "date1", action="store", type=str, help="first date, inclusive (ex: 2015-01-01)"
-    )
-    parser.add_argument(
-        "date2", action="store", type=str, help="last date, inclusive (ex: 2015-01-01)"
-    )
-    parser.add_argument(
-        "-d", "--debug", action="store_const", const=True, default=False, help="enable debug mode"
-    )
+    parser.add_argument("state", action="store", type=str, choices=list(HealthTweets.STATE_CODES.keys()), help="U.S. state (ex: TX)")
+    parser.add_argument("date1", action="store", type=str, help="first date, inclusive (ex: 2015-01-01)")
+    parser.add_argument("date2", action="store", type=str, help="last date, inclusive (ex: 2015-01-01)")
+    parser.add_argument("-d", "--debug", action="store_const", const=True, default=False, help="enable debug mode")
+    # fmt: on
     args = parser.parse_args()
 
     ht = HealthTweets(args.username, args.password, debug=args.debug)
diff --git a/src/acquisition/wiki/wiki_download.py b/src/acquisition/wiki/wiki_download.py
index 07cc7fdc1..c32fc87ed 100644
--- a/src/acquisition/wiki/wiki_download.py
+++ b/src/acquisition/wiki/wiki_download.py
@@ -289,23 +289,14 @@ def main():
 
     # args and usage
     parser = argparse.ArgumentParser()
+    # fmt: off
     parser.add_argument("secret", type=str, help="hmac secret key")
-    parser.add_argument(
-        "-b", "--blimit", action="store", type=int, default=None, help="download limit, in bytes"
-    )
+    parser.add_argument("-b", "--blimit", action="store", type=int, default=None, help="download limit, in bytes")
     parser.add_argument("-j", "--jlimit", action="store", type=int, default=None, help="job limit")
-    parser.add_argument(
-        "-s",
-        "--sleep",
-        action="store",
-        type=int,
-        default=1,
-        help="seconds to sleep between each job",
-    )
+    parser.add_argument("-s", "--sleep", action="store", type=int, default=1, help="seconds to sleep between each job")
     parser.add_argument("-t", "--type", action="store", type=int, default=0, help="type of job")
-    parser.add_argument(
-        "-d", "--debug", action="store_const", const=True, default=False, help="enable debug mode"
-    )
+    parser.add_argument("-d", "--debug", action="store_const", const=True, default=False, help="enable debug mode")
+    # fmt: on
     args = parser.parse_args()
 
     # runtime options

From dd1b08994278e7a4586d6af34028c2d3fff378b1 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 21 Jun 2023 15:24:32 -0700
Subject: [PATCH 35/43] style: update .editorconfig

---
 .editorconfig | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/.editorconfig b/.editorconfig
index 8a80734f0..b76cfd14a 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -4,18 +4,19 @@
 
 root = true
 
-
 [*]
-
-# Change these settings to your own preference
-indent_style = space
-indent_size = 4
-
 # We recommend you to keep these unchanged
 end_of_line = lf
 charset = utf-8
 trim_trailing_whitespace = true
 insert_final_newline = true
 
+
+[*.py]
+# Change these settings to your own preference
+indent_style = space
+indent_size = 4
+
+
 [*.md]
 trim_trailing_whitespace = false

From 7a27a3a1256147ffcfcbfffbc80fd28f399b2752 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 23 Jun 2023 10:26:47 -0700
Subject: [PATCH 36/43] style(flynt): convert .format and % strings to
 f-strings

---
 src/acquisition/cdcp/cdc_dropbox_receiver.py  |  2 +-
 src/acquisition/cdcp/cdc_extract.py           |  6 +--
 src/acquisition/cdcp/cdc_upload.py            |  2 +-
 src/acquisition/ecdc/ecdc_db_update.py        | 12 +++---
 src/acquisition/flusurv/flusurv.py            |  6 +--
 src/acquisition/flusurv/flusurv_update.py     |  6 +--
 src/acquisition/fluview/fluview.py            | 14 +++----
 src/acquisition/fluview/fluview_update.py     | 34 ++++++++--------
 .../fluview/impute_missing_values.py          |  8 ++--
 src/acquisition/ght/ght_update.py             | 12 +++---
 src/acquisition/ght/google_health_trends.py   |  4 +-
 src/acquisition/kcdc/kcdc_update.py           |  8 ++--
 src/acquisition/nidss/taiwan_nidss.py         | 12 +++---
 src/acquisition/nidss/taiwan_update.py        |  8 ++--
 src/acquisition/paho/paho_db_update.py        | 14 +++----
 src/acquisition/paho/paho_download.py         | 14 +++----
 src/acquisition/quidel/quidel_update.py       |  6 +--
 src/acquisition/twtr/healthtweets.py          | 21 +++++-----
 src/acquisition/twtr/twitter_update.py        |  2 +-
 src/acquisition/wiki/wiki_download.py         | 40 +++++++++----------
 src/acquisition/wiki/wiki_extract.py          |  4 +-
 src/acquisition/wiki/wiki_update.py           | 12 +++---
 22 files changed, 121 insertions(+), 126 deletions(-)

diff --git a/src/acquisition/cdcp/cdc_dropbox_receiver.py b/src/acquisition/cdcp/cdc_dropbox_receiver.py
index 65626101b..4fa20368e 100644
--- a/src/acquisition/cdcp/cdc_dropbox_receiver.py
+++ b/src/acquisition/cdcp/cdc_dropbox_receiver.py
@@ -101,7 +101,7 @@ def fetch_data():
             if resp.status_code != 200:
                 raise Exception(["resp.status_code", resp.status_code])
             dropbox_len = meta.size
-            print("  need %d bytes..." % dropbox_len)
+            print(f"  need {int(dropbox_len)} bytes...")
             content_len = int(resp.headers.get("Content-Length", -1))
             if dropbox_len != content_len:
                 info = ["dropbox_len", dropbox_len, "content_len", content_len]
diff --git a/src/acquisition/cdcp/cdc_extract.py b/src/acquisition/cdcp/cdc_extract.py
index b8f772684..0d38e0bcc 100644
--- a/src/acquisition/cdcp/cdc_extract.py
+++ b/src/acquisition/cdcp/cdc_extract.py
@@ -110,7 +110,7 @@ def get_total_hits(cur, epiweek, state):
     for (total,) in cur:
         pass
     if total is None:
-        raise Exception("missing data for %d-%s" % (epiweek, state))
+        raise Exception(f"missing data for {int(epiweek)}-{state}")
     return total
 
 
@@ -166,7 +166,7 @@ def extract(first_week=None, last_week=None, test_mode=False):
         cur.execute("SELECT max(`epiweek`) FROM `cdc_meta`")
         for (last_week,) in cur:
             pass
-    print("extracting %d--%d" % (first_week, last_week))
+    print(f"extracting {int(first_week)}--{int(last_week)}")
 
     # update each epiweek
     for epiweek in flu.range_epiweeks(first_week, last_week, inclusive=True):
@@ -180,7 +180,7 @@ def extract(first_week=None, last_week=None, test_mode=False):
                 store_result(cur, epiweek, state, *nums, total)
                 print(f" {epiweek}-{state}: {' '.join(str(n) for n in nums)} ({total})")
             except Exception as ex:
-                print(" %d-%s: failed" % (epiweek, state), ex)
+                print(f" {int(epiweek)}-{state}: failed", ex)
                 # raise ex
             sys.stdout.flush()
 
diff --git a/src/acquisition/cdcp/cdc_upload.py b/src/acquisition/cdcp/cdc_upload.py
index fef0821b7..0e191267b 100644
--- a/src/acquisition/cdcp/cdc_upload.py
+++ b/src/acquisition/cdcp/cdc_upload.py
@@ -232,7 +232,7 @@ def parse_zip(zf, level=1):
                 if handler is not None:
                     with zf.open(name) as temp:
                         count = handler(csv.reader(io.StringIO(str(temp.read(), "utf-8"))))
-                    print(prefix, " %d rows" % count)
+                    print(prefix, f" {int(count)} rows")
             else:
                 print(prefix, " (ignored)")
 
diff --git a/src/acquisition/ecdc/ecdc_db_update.py b/src/acquisition/ecdc/ecdc_db_update.py
index 86e3b1cd8..9a90dad5c 100644
--- a/src/acquisition/ecdc/ecdc_db_update.py
+++ b/src/acquisition/ecdc/ecdc_db_update.py
@@ -87,7 +87,7 @@ def safe_int(i):
 def get_rows(cnx, table="ecdc_ili"):
     # Count and return the number of rows in the `ecdc_ili` table.
     select = cnx.cursor()
-    select.execute("SELECT count(1) num FROM %s" % table)
+    select.execute(f"SELECT count(1) num FROM {table}")
     for (num,) in select:
         pass
     select.close()
@@ -100,7 +100,7 @@ def update_from_file(issue, date, dir, test_mode=False):
     u, p = secrets.db.epi
     cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     rows1 = get_rows(cnx, "ecdc_ili")
-    print("rows before: %d" % (rows1))
+    print(f"rows before: {int(rows1)}")
     insert = cnx.cursor()
 
     # load the data, ignoring empty rows
@@ -115,9 +115,9 @@ def update_from_file(issue, date, dir, test_mode=False):
                 row["region"] = data[4]
                 row["incidence_rate"] = data[3]
                 rows.append(row)
-    print(" loaded %d rows" % len(rows))
+    print(f" loaded {len(rows)} rows")
     entries = [obj for obj in rows if obj]
-    print(" found %d entries" % len(entries))
+    print(f" found {len(entries)} entries")
 
     sql = """
     INSERT INTO
@@ -149,7 +149,7 @@ def update_from_file(issue, date, dir, test_mode=False):
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
     cnx.close()
 
 
@@ -171,7 +171,7 @@ def main():
         raise Exception("--file and --issue must both be present or absent")
 
     date = datetime.datetime.now().strftime("%Y-%m-%d")
-    print("assuming release date is today, %s" % date)
+    print(f"assuming release date is today, {date}")
 
     ensure_tables_exist()
     if args.file:
diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py
index 1e534b740..28105d933 100644
--- a/src/acquisition/flusurv/flusurv.py
+++ b/src/acquisition/flusurv/flusurv.py
@@ -80,7 +80,7 @@ def fetch_json(path, payload, call_count=1, requests_impl=requests):
 
     # it's polite to self-identify this "bot"
     delphi_url = "https://delphi.cmu.edu/index.html"
-    user_agent = "Mozilla/5.0 (compatible; delphibot/1.0; +%s)" % delphi_url
+    user_agent = f"Mozilla/5.0 (compatible; delphibot/1.0; +{delphi_url})"
 
     # the FluSurv AMF server
     flusurv_url = "https://gis.cdc.gov/GRASP/Flu3/" + path
@@ -106,7 +106,7 @@ def fetch_json(path, payload, call_count=1, requests_impl=requests):
     if resp.status_code == 500 and call_count <= 2:
         # the server often fails with this status, so wait and retry
         delay = 10 * call_count
-        print("got status %d, will retry in %d sec..." % (resp.status_code, delay))
+        print(f"got status {int(resp.status_code)}, will retry in {int(delay)} sec...")
         time.sleep(delay)
         return fetch_json(path, payload, call_count=call_count + 1)
     elif resp.status_code != 200:
@@ -180,7 +180,7 @@ def extract_from_object(data_in):
         raise Exception("no data found")
 
     # print the result and return flu data
-    print("found data for %d weeks" % len(data_out))
+    print(f"found data for {len(data_out)} weeks")
     return data_out
 
 
diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py
index 295091104..3009c7a3d 100644
--- a/src/acquisition/flusurv/flusurv_update.py
+++ b/src/acquisition/flusurv/flusurv_update.py
@@ -108,7 +108,7 @@ def update(issue, location_name, test_mode=False):
     cnx = mysql.connector.connect(host=secrets.db.host, user=u, password=p, database="epidata")
     cur = cnx.cursor()
     rows1 = get_rows(cur)
-    print("rows before: %d" % rows1)
+    print(f"rows before: {int(rows1)}")
 
     # SQL for insert/update
     sql = """
@@ -148,7 +148,7 @@ def update(issue, location_name, test_mode=False):
 
     # commit and disconnect
     rows2 = get_rows(cur)
-    print("rows after: %d (+%d)" % (rows2, rows2 - rows1))
+    print(f"rows after: {int(rows2)} (+{int(rows2 - rows1)})")
     cur.close()
     if test_mode:
         print("test mode: not committing database changes")
@@ -170,7 +170,7 @@ def main():
 
     # scrape current issue from the main page
     issue = flusurv.get_current_issue()
-    print("current issue: %d" % issue)
+    print(f"current issue: {int(issue)}")
 
     # fetch flusurv data
     if args.location == "all":
diff --git a/src/acquisition/fluview/fluview.py b/src/acquisition/fluview/fluview.py
index a7e9fba87..9b4e6f537 100644
--- a/src/acquisition/fluview/fluview.py
+++ b/src/acquisition/fluview/fluview.py
@@ -108,7 +108,7 @@ def get_tier_ids(name):
     location_ids[Key.TierType.hhs] = sorted(set(location_ids[Key.TierType.hhs]))
     num = len(location_ids[Key.TierType.hhs])
     if num != 10:
-        raise Exception("expected 10 hhs regions, found %d" % num)
+        raise Exception(f"expected 10 hhs regions, found {int(num)}")
 
     # add location ids for census divisions
     for row in data[Key.TierListEntry.cen]:
@@ -116,7 +116,7 @@ def get_tier_ids(name):
     location_ids[Key.TierType.cen] = sorted(set(location_ids[Key.TierType.cen]))
     num = len(location_ids[Key.TierType.cen])
     if num != 9:
-        raise Exception("expected 9 census divisions, found %d" % num)
+        raise Exception(f"expected 9 census divisions, found {int(num)}")
 
     # add location ids for states
     for row in data[Key.TierListEntry.sta]:
@@ -124,7 +124,7 @@ def get_tier_ids(name):
     location_ids[Key.TierType.sta] = sorted(set(location_ids[Key.TierType.sta]))
     num = len(location_ids[Key.TierType.sta])
     if num != 57:
-        raise Exception("expected 57 states/territories/cities, found %d" % num)
+        raise Exception(f"expected 57 states/territories/cities, found {int(num)}")
 
     # return a useful subset of the metadata
     # (latest epiweek, latest season, tier ids, location ids)
@@ -181,7 +181,7 @@ def save_latest(path=None):
     data = fetch_metadata(sess)
     info = get_issue_and_locations(data)
     issue = info["epiweek"]
-    print("current issue: %d" % issue)
+    print(f"current issue: {int(issue)}")
 
     # establish timing
     dt = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -200,7 +200,7 @@ def save_latest(path=None):
         ("cen", Key.TierType.cen),
         ("sta", Key.TierType.sta),
     ):
-        name = "ilinet_%s_%d_%s.zip" % (delphi_name, issue, dt)
+        name = f"ilinet_{delphi_name}_{int(issue)}_{dt}.zip"
         if path is None:
             filename = name
         else:
@@ -209,12 +209,12 @@ def save_latest(path=None):
         locations = info["location_ids"][cdc_name]
 
         # download and show timing information
-        print("downloading %s" % delphi_name)
+        print(f"downloading {delphi_name}")
         t0 = time.time()
         size = download_data(tier_id, locations, seasons, filename)
         t1 = time.time()
 
-        print(" saved %s (%d bytes in %.1f seconds)" % (filename, size, t1 - t0))
+        print(f" saved {filename} ({int(size)} bytes in {t1 - t0:.1f} seconds)")
         files.append(filename)
 
     # return the current issue and the list of downloaded files
diff --git a/src/acquisition/fluview/fluview_update.py b/src/acquisition/fluview/fluview_update.py
index e463fcbaf..2c2551831 100644
--- a/src/acquisition/fluview/fluview_update.py
+++ b/src/acquisition/fluview/fluview_update.py
@@ -297,7 +297,7 @@ def get_rows(cnx, table="fluview"):
     Looking at the fluview table by default, but may pass parameter
     to look at public health or clinical lab data instead."""
     select = cnx.cursor()
-    select.execute("SELECT count(1) num FROM %s" % table)
+    select.execute(f"SELECT count(1) num FROM {table}")
     for (num,) in select:
         pass
     select.close()
@@ -313,16 +313,16 @@ def update_from_file_clinical(issue, date, filename, test_mode=False):
     u, p = secrets.db.epi
     cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     rows1 = get_rows(cnx, CL_TABLE)
-    print("rows before: %d" % (rows1))
+    print(f"rows before: {int(rows1)}")
     insert = cnx.cursor()
 
     # load the data, ignoring empty rows
-    print("loading data from %s as issued on %d" % (filename, issue))
+    print(f"loading data from {filename} as issued on {int(issue)}")
     rows = load_zipped_csv(filename, CL_SHEET)
-    print(" loaded %d rows" % len(rows))
+    print(f" loaded {len(rows)} rows")
     data = [get_clinical_data(row) for row in rows]
     entries = [obj for obj in data if obj]
-    print(" found %d entries" % len(entries))
+    print(f" found {len(entries)} entries")
 
     sql = """
     INSERT INTO
@@ -365,7 +365,7 @@ def update_from_file_clinical(issue, date, filename, test_mode=False):
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
     cnx.close()
 
 
@@ -378,16 +378,16 @@ def update_from_file_public(issue, date, filename, test_mode=False):
     u, p = secrets.db.epi
     cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     rows1 = get_rows(cnx, PHL_TABLE)
-    print("rows before: %d" % (rows1))
+    print(f"rows before: {int(rows1)}")
     insert = cnx.cursor()
 
     # load the data, ignoring empty rows
-    print("loading data from %s as issued on %d" % (filename, issue))
+    print(f"loading data from {filename} as issued on {int(issue)}")
     rows = load_zipped_csv(filename, PHL_SHEET)
-    print(" loaded %d rows" % len(rows))
+    print(f" loaded {len(rows)} rows")
     data = [get_public_data(row) for row in rows]
     entries = [obj for obj in data if obj]
-    print(" found %d entries" % len(entries))
+    print(f" found {len(entries)} entries")
 
     sql = """
     INSERT INTO
@@ -434,7 +434,7 @@ def update_from_file_public(issue, date, filename, test_mode=False):
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
     cnx.close()
 
 
@@ -447,16 +447,16 @@ def update_from_file(issue, date, filename, test_mode=False):
     u, p = secrets.db.epi
     cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     rows1 = get_rows(cnx)
-    print("rows before: %d" % (rows1))
+    print(f"rows before: {int(rows1)}")
     insert = cnx.cursor()
 
     # load the data, ignoring empty rows
-    print("loading data from %s as issued on %d" % (filename, issue))
+    print(f"loading data from {filename} as issued on {int(issue)}")
     rows = load_zipped_csv(filename)
-    print(" loaded %d rows" % len(rows))
+    print(f" loaded {len(rows)} rows")
     data = [get_ilinet_data(row) for row in rows]
     entries = [obj for obj in data if obj]
-    print(" found %d entries" % len(entries))
+    print(f" found {len(entries)} entries")
 
     sql = """
     INSERT INTO
@@ -509,7 +509,7 @@ def update_from_file(issue, date, filename, test_mode=False):
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
     cnx.close()
 
 
@@ -531,7 +531,7 @@ def main():
         raise Exception("--file and --issue must both be present or absent")
 
     date = datetime.datetime.now().strftime("%Y-%m-%d")
-    print("assuming release date is today, %s" % date)
+    print(f"assuming release date is today, {date}")
 
     if args.file:
         update_from_file(args.issue, date, args.file, test_mode=args.test)
diff --git a/src/acquisition/fluview/impute_missing_values.py b/src/acquisition/fluview/impute_missing_values.py
index 230dd2f7d..4b3e1d684 100644
--- a/src/acquisition/fluview/impute_missing_values.py
+++ b/src/acquisition/fluview/impute_missing_values.py
@@ -270,13 +270,13 @@ def impute_missing_values(database, test_mode=False):
     # database connection
     database.connect()
     rows1 = database.count_rows()
-    print("rows before: %d" % (rows1))
+    print(f"rows before: {int(rows1)}")
 
     # iterate over missing epiweeks
     missing_rows = database.find_missing_rows()
-    print("missing data for %d epiweeks" % len(missing_rows))
+    print(f"missing data for {len(missing_rows)} epiweeks")
     for issue, epiweek in missing_rows:
-        print("i=%d e=%d" % (issue, epiweek))
+        print(f"i={int(issue)} e={int(epiweek)}")
 
         # get known values from table `fluview`
         known_values = database.get_known_values(issue, epiweek)
@@ -317,7 +317,7 @@ def impute_missing_values(database, test_mode=False):
 
     # database cleanup
     rows2 = database.count_rows()
-    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
     commit = not test_mode
     database.close(commit)
 
diff --git a/src/acquisition/ght/ght_update.py b/src/acquisition/ght/ght_update.py
index 7f65bbfe5..b7d5fd493 100644
--- a/src/acquisition/ght/ght_update.py
+++ b/src/acquisition/ght/ght_update.py
@@ -266,7 +266,7 @@ def get_num_rows():
         ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
     ew0 = ew0 if first is None else first
     ew1 = ew1 if last is None else last
-    print("Checking epiweeks between %d and %d..." % (ew0, ew1))
+    print(f"Checking epiweeks between {int(ew0)} and {int(ew1)}...")
 
     # keep track of how many rows were added
     rows_before = get_num_rows()
@@ -283,7 +283,7 @@ def get_num_rows():
     total_rows = 0
     ght = GHT(API_KEY)
     for term in terms:
-        print(" [%s] using term" % term)
+        print(f" [{term}] using term")
         ll, cl = len(locations), len(countries)
         for i in range(max(ll, cl)):
             location = locations[i] if i < ll else locations[0]
@@ -303,8 +303,7 @@ def get_num_rows():
                         else:
                             delay = 2**attempt
                             print(
-                                " [%s|%s] caught exception (will retry in %ds):"
-                                % (term, location, delay),
+                                f" [{term}|{location}] caught exception (will retry in {int(delay)}s):",
                                 ex,
                             )
                             time.sleep(delay)
@@ -332,15 +331,14 @@ def get_num_rows():
                     ew = flu.add_epiweeks(ew, 1)
                 if num_missing > 0:
                     print(
-                        " [%s|%s] missing %d/%d value(s)"
-                        % (term, location, num_missing, len(values))
+                        f" [{term}|{location}] missing {int(num_missing)}/{len(values)} value(s)"
                     )
             except Exception as ex:
                 print(f" [{term}|{location}] caught exception (will NOT retry):", ex)
 
     # keep track of how many rows were added
     rows_after = get_num_rows()
-    print("Inserted %d/%d row(s)" % (rows_after - rows_before, total_rows))
+    print(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s)")
 
     # cleanup
     cur.close()
diff --git a/src/acquisition/ght/google_health_trends.py b/src/acquisition/ght/google_health_trends.py
index 69d751e95..86d8fc690 100644
--- a/src/acquisition/ght/google_health_trends.py
+++ b/src/acquisition/ght/google_health_trends.py
@@ -142,12 +142,12 @@ def main():
     expected_weeks = result["num_weeks"]
     received_weeks = len([v for v in values if v is not None and type(v) == float and v >= 0])
     if expected_weeks != received_weeks:
-        raise Exception("expected %d weeks, received %d" % (expected_weeks, received_weeks))
+        raise Exception(f"expected {int(expected_weeks)} weeks, received {int(received_weeks)}")
 
     # results
     epiweeks = [ew for ew in flu.range_epiweeks(args.startweek, args.endweek, inclusive=True)]
     for (epiweek, value) in zip(epiweeks, values):
-        print("%6d: %.3f" % (epiweek, value))
+        print(f"{int(epiweek):6}: {value:.3f}")
 
 
 if __name__ == "__main__":
diff --git a/src/acquisition/kcdc/kcdc_update.py b/src/acquisition/kcdc/kcdc_update.py
index b2c12dba9..713b21f00 100644
--- a/src/acquisition/kcdc/kcdc_update.py
+++ b/src/acquisition/kcdc/kcdc_update.py
@@ -84,7 +84,7 @@ def safe_int(i):
 def get_rows(cnx, table="kcdc_ili"):
     # Count and return the number of rows in the `kcdc_ili` table.
     select = cnx.cursor()
-    select.execute("SELECT count(1) num FROM %s" % table)
+    select.execute(f"SELECT count(1) num FROM {table}")
     for (num,) in select:
         pass
     select.close()
@@ -126,7 +126,7 @@ def update_from_data(ews, ilis, date, issue, test_mode=False):
     u, p = secrets.db.epi
     cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     rows1 = get_rows(cnx)
-    print("rows before: %d" % (rows1))
+    print(f"rows before: {int(rows1)}")
     insert = cnx.cursor()
 
     sql = """
@@ -160,7 +160,7 @@ def update_from_data(ews, ilis, date, issue, test_mode=False):
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
     cnx.close()
 
 
@@ -173,7 +173,7 @@ def main():
     args = parser.parse_args()
 
     date = datetime.datetime.now().strftime("%Y-%m-%d")
-    print("assuming release date is today, %s" % date)
+    print(f"assuming release date is today, {date}")
     issue = EpiDate.today().get_ew()
 
     ensure_tables_exist()
diff --git a/src/acquisition/nidss/taiwan_nidss.py b/src/acquisition/nidss/taiwan_nidss.py
index 57f4e272d..b2e369e63 100644
--- a/src/acquisition/nidss/taiwan_nidss.py
+++ b/src/acquisition/nidss/taiwan_nidss.py
@@ -121,7 +121,7 @@ def _get_metadata(html):
             match = release_pattern.match(line)
             if match is not None:
                 year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
-                release = "%04d-%02d-%02d" % (year, month, day)
+                release = f"{int(year):04}-{int(month):02}-{int(day):02}"
         if issue is None or release is None:
             raise Exception("metadata not found")
         return issue, release
@@ -173,7 +173,7 @@ def get_flu_data():
         # Fetch the flu page
         response = requests.get(NIDSS.FLU_URL)
         if response.status_code != 200:
-            raise Exception("request failed [%d]" % response.status_code)
+            raise Exception(f"request failed [{int(response.status_code)}]")
         html = response.text
         # Parse metadata
         latest_week, release_date = NIDSS._get_metadata(html)
@@ -199,7 +199,7 @@ def get_dengue_data(first_week, last_week):
         # Download CSV
         response = requests.get(NIDSS.DENGUE_URL)
         if response.status_code != 200:
-            raise Exception("export Dengue failed [%d]" % response.status_code)
+            raise Exception(f"export Dengue failed [{int(response.status_code)}]")
         csv = response.content.decode("big5-tw")
         # Parse the data
         lines = [l.strip() for l in csv.split("\n")[1:] if l.strip() != ""]
@@ -231,7 +231,7 @@ def get_dengue_data(first_week, last_week):
                 continue
             if epiweek not in data or location not in data[epiweek]:
                 # Not a vaild U.S. epiweek
-                raise Exception("data missing %d-%s" % (epiweek, location))
+                raise Exception(f"data missing {int(epiweek)}-{location}")
             # Add the counts to the location on this epiweek
             data[epiweek][location] += count
         # Return results indexed by week and location
@@ -258,12 +258,12 @@ def main():
     print("*** Flu ***")
     for region in sorted(list(fdata[ew].keys())):
         visits, ili = fdata[ew][region]["visits"], fdata[ew][region]["ili"]
-        print("region=%s | visits=%d | ili=%.3f" % (region, visits, ili))
+        print(f"region={region} | visits={int(visits)} | ili={ili:.3f}")
     print("*** Dengue ***")
     for location in sorted(list(ddata[ew].keys())):
         region = NIDSS.LOCATION_TO_REGION[location]
         count = ddata[ew][location]
-        print("location=%s | region=%s | count=%d" % (location, region, count))
+        print(f"location={location} | region={region} | count={int(count)}")
 
 
 if __name__ == "__main__":
diff --git a/src/acquisition/nidss/taiwan_update.py b/src/acquisition/nidss/taiwan_update.py
index c22f0dfaa..30d458481 100644
--- a/src/acquisition/nidss/taiwan_update.py
+++ b/src/acquisition/nidss/taiwan_update.py
@@ -107,8 +107,8 @@ def update(test_mode=False):
     u, p = secrets.db.epi
     cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     rows1 = get_rows(cnx)
-    print("rows before (flu): %d" % (rows1[0]))
-    print("rows before (dengue): %d" % (rows1[1]))
+    print(f"rows before (flu): {int(rows1[0])}")
+    print(f"rows before (dengue): {int(rows1[1])}")
     insert = cnx.cursor()
     sql_flu = """
     INSERT INTO
@@ -149,8 +149,8 @@ def update(test_mode=False):
     # Cleanup
     insert.close()
     rows2 = get_rows(cnx)
-    print("rows after (flu): %d (added %d)" % (rows2[0], rows2[0] - rows1[0]))
-    print("rows after (dengue): %d (added %d)" % (rows2[1], rows2[1] - rows1[1]))
+    print(f"rows after (flu): {int(rows2[0])} (added {int(rows2[0] - rows1[0])})")
+    print(f"rows after (dengue): {int(rows2[1])} (added {int(rows2[1] - rows1[1])})")
     if test_mode:
         print("test mode: changes not commited")
     else:
diff --git a/src/acquisition/paho/paho_db_update.py b/src/acquisition/paho/paho_db_update.py
index 67fbc1d28..04e4dfe1a 100644
--- a/src/acquisition/paho/paho_db_update.py
+++ b/src/acquisition/paho/paho_db_update.py
@@ -110,7 +110,7 @@ def safe_int(i):
 def get_rows(cnx, table="paho_dengue"):
     # Count and return the number of rows in the `fluview` table.
     select = cnx.cursor()
-    select.execute("SELECT count(1) num FROM %s" % table)
+    select.execute(f"SELECT count(1) num FROM {table}")
     for (num,) in select:
         pass
     select.close()
@@ -171,19 +171,19 @@ def update_from_file(issue, date, filename, test_mode=False):
     u, p = secrets.db.epi
     cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     rows1 = get_rows(cnx, "paho_dengue")
-    print("rows before: %d" % (rows1))
+    print(f"rows before: {int(rows1)}")
     insert = cnx.cursor()
 
     # load the data, ignoring empty rows
-    print("loading data from %s as issued on %d" % (filename, issue))
+    print(f"loading data from {filename} as issued on {int(issue)}")
     with open(filename, encoding="utf-8") as f:
         c = f.read()
     rows = []
     for l in csv.reader(StringIO(c), delimiter=","):
         rows.append(get_paho_row(l))
-    print(" loaded %d rows" % len(rows))
+    print(f" loaded {len(rows)} rows")
     entries = [obj for obj in rows if obj]
-    print(" found %d entries" % len(entries))
+    print(f" found {len(entries)} entries")
 
     sql = """
     INSERT INTO
@@ -227,7 +227,7 @@ def update_from_file(issue, date, filename, test_mode=False):
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
     cnx.close()
 
 
@@ -249,7 +249,7 @@ def main():
         raise Exception("--file and --issue must both be present or absent")
 
     date = datetime.datetime.now().strftime("%Y-%m-%d")
-    print("assuming release date is today, %s" % date)
+    print(f"assuming release date is today, {date}")
 
     if args.file:
         update_from_file(args.issue, date, args.file, test_mode=args.test)
diff --git a/src/acquisition/paho/paho_download.py b/src/acquisition/paho/paho_download.py
index 5308ec93f..c6fa70285 100644
--- a/src/acquisition/paho/paho_download.py
+++ b/src/acquisition/paho/paho_download.py
@@ -23,15 +23,15 @@ def wait_for(browser, css_selector, delay=10):
         WebDriverWait(browser, delay).until(
             EC.element_to_be_clickable((By.CSS_SELECTOR, css_selector))
         )
-        print("Success Loading %s" % (css_selector))
+        print(f"Success Loading {css_selector}")
     except TimeoutException:
-        print("Loading %s took too much time!" % (css_selector))
+        print(f"Loading {css_selector} took too much time!")
 
 
 def find_and_click(browser, element):
     element.location_once_scrolled_into_view
     browser.switch_to.default_content()
-    browser.execute_script("window.scrollBy(0,-%d)" % headerheight)
+    browser.execute_script(f"window.scrollBy(0,-{int(headerheight)})")
     browser.switch_to.frame(browser.find_element_by_tag_name("iframe"))
     browser.switch_to.frame(browser.find_element_by_tag_name("iframe"))
     element.click()
@@ -130,9 +130,9 @@ def get_paho_data(offset=0, dir="downloads"):
         # print gp.is_displayed()
         try:
             WebDriverWait(browser, 10).until(EC.staleness_of(gp))
-            print("Loaded next week % d" % (53 - offset))
+            print(f"Loaded next week {int(53 - offset)}")
         except TimeoutException:
-            print("Loading next week %d took too much time!" % (53 - offset))
+            print(f"Loading next week {int(53 - offset)} took too much time!")
         gp = browser.find_element_by_css_selector("div.wcGlassPane")
         # print gp.is_enabled()
         # print gp.is_selected()
@@ -147,7 +147,7 @@ def get_paho_data(offset=0, dir="downloads"):
     for i in range(54 - offset):
         # If something goes wrong for whatever reason, try from the beginning
         try:
-            print("Loading week %d" % (53 - i))
+            print(f"Loading week {int(53 - i)}")
             # (Re-)load URL
             browser.switch_to.window(tab2)
             browser.get(dataurl)
@@ -182,7 +182,7 @@ def get_paho_data(offset=0, dir="downloads"):
             find_and_click(browser, x)
             curr_offset += 1
         except Exception as e:
-            print("Got exception %s\nTrying again from week %d" % (e, 53 - offset))
+            print(f"Got exception {e}\nTrying again from week {int(53 - offset)}")
             browser.quit()
             get_paho_data(offset=curr_offset)
     browser.quit()
diff --git a/src/acquisition/quidel/quidel_update.py b/src/acquisition/quidel/quidel_update.py
index 06f8b9da5..267200643 100644
--- a/src/acquisition/quidel/quidel_update.py
+++ b/src/acquisition/quidel/quidel_update.py
@@ -79,7 +79,7 @@ def get_num_rows():
         ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
     ew0 = ew0 if first is None else first
     ew1 = ew1 if last is None else last
-    print("Checking epiweeks between %d and %d..." % (ew0, ew1))
+    print(f"Checking epiweeks between {int(ew0)} and {int(ew1)}...")
 
     # keep track of how many rows were added
     rows_before = get_num_rows()
@@ -109,11 +109,11 @@ def get_num_rows():
             if v == 0:
                 num_missing += 1
         if num_missing > 0:
-            print(" [%s] missing %d/%d value(s)" % (location, num_missing, len(ews)))
+            print(f" [{location}] missing {int(num_missing)}/{len(ews)} value(s)")
 
     # keep track of how many rows were added
     rows_after = get_num_rows()
-    print("Inserted %d/%d row(s)" % (rows_after - rows_before, total_rows))
+    print(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s)")
 
     # cleanup
     cur.close()
diff --git a/src/acquisition/twtr/healthtweets.py b/src/acquisition/twtr/healthtweets.py
index f64bbd689..13828af74 100644
--- a/src/acquisition/twtr/healthtweets.py
+++ b/src/acquisition/twtr/healthtweets.py
@@ -104,7 +104,7 @@ def __init__(self, username, password, debug=False):
         response = self._go("https://www.healthtweets.org/accounts/login")
         token = self._get_token(response.text)
         if self.debug:
-            print("token=%s" % (token))
+            print(f"token={token}")
         data = {
             "csrfmiddlewaretoken": token,
             "username": username,
@@ -145,13 +145,10 @@ def _get_values(self, state, date1, date2, normalized):
         d1, d2 = datetime.strptime(date1, "%Y-%m-%d"), datetime.strptime(date2, "%Y-%m-%d")
         s1, s2 = d1.strftime("%m%%2F%d%%2F%Y"), d2.strftime("%m%%2F%d%%2F%Y")
         count_type = "normalized" if normalized else "raw"
-        url = (
-            "https://www.healthtweets.org/trends/plot?resolution=Day&count_type=%s&dayNum=%d&from=%s&to=%s&plot1_disease=65&location_plot1=%d"
-            % (count_type, (d2 - d1).days, s1, s2, state_code)
-        )
         response = self._go(
-            "https://www.healthtweets.org/trends/plot?resolution=Day&count_type=%s&dayNum=%d&from=%s&to=%s&plot1_disease=65&location_plot1=%d"
-            % (count_type, (d2 - d1).days, s1, s2, state_code)
+            "https://www.healthtweets.org/trends/plot?resolution=Day"
+            f"&count_type={count_type}&dayNum={(d2 - d1).days}&from={s1}"
+            f"&to={s2}&plot1_disease=65&location_plot1={int(state_code)}"
         )
         # print(state, date1, date2, normalized)
         # print(url)
@@ -179,7 +176,9 @@ def check_state(self, state):
             raise Exception("invalid state")
         state_code = HealthTweets.STATE_CODES[state]
         response = self._go(
-            "https://www.healthtweets.org/trends/plot?resolution=Day&count_type=normalized&dayNum=7&from=01%%2F01%%2F2015&to=01%%2F07%%2F2015&plot1_disease=65&location_plot1=%d" % (state_code)
+            "https://www.healthtweets.org/trends/plot?resolution=Day"
+            "&count_type=normalized&dayNum=7&from=01%%2F01%%2F2015"
+            f"&to=01%%2F07%%2F2015&plot1_disease=65&location_plot1={int(state_code)}"
         )
         lines = [line.strip() for line in response.text.split("\n")]
         data_line = [line for line in lines if line[:29] == 'var plotNames = ["Influenza (']
@@ -198,7 +197,7 @@ def _get_token(self, html):
 
     def _go(self, url, method=None, referer=None, data=None):
         if self.debug:
-            print("%s" % (url))
+            print(url)
         if method is None:
             if data is None:
                 method = self.session.get
@@ -208,8 +207,8 @@ def _go(self, url, method=None, referer=None, data=None):
         html = response.text
         if self.debug:
             for item in response.history:
-                print(" [%d to %s]" % (item.status_code, item.headers["Location"]))
-            print(" %d (%d bytes)" % (response.status_code, len(html)))
+                print(f" [{int(item.status_code)} to {item.headers['Location']}]")
+            print(f" {int(response.status_code)} ({len(html)} bytes)")
         return response
 
 
diff --git a/src/acquisition/twtr/twitter_update.py b/src/acquisition/twtr/twitter_update.py
index 4354c5a80..80a023f19 100644
--- a/src/acquisition/twtr/twitter_update.py
+++ b/src/acquisition/twtr/twitter_update.py
@@ -102,7 +102,7 @@ def get_num_rows():
 
     # keep track of how many rows were added
     rows_after = get_num_rows()
-    print("Inserted %d/%d row(s)" % (rows_after - rows_before, total_rows))
+    print(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s)")
 
     # cleanup
     cur.close()
diff --git a/src/acquisition/wiki/wiki_download.py b/src/acquisition/wiki/wiki_download.py
index c32fc87ed..8cb586c24 100644
--- a/src/acquisition/wiki/wiki_download.py
+++ b/src/acquisition/wiki/wiki_download.py
@@ -119,10 +119,10 @@ def extract_article_counts_orig(articles, debug_mode):
     counts = {}
     for article in articles:
         if debug_mode:
-            print(" %s" % (article))
+            print(f" {article}")
         out = text(
             subprocess.check_output(
-                'LC_ALL=C grep -a -i "^en %s " raw2 | cat' % (article.lower()), shell=True
+                f'LC_ALL=C grep -a -i "^en {article.lower()} " raw2 | cat', shell=True
             )
         ).strip()
         count = 0
@@ -130,13 +130,13 @@ def extract_article_counts_orig(articles, debug_mode):
             for line in out.split("\n"):
                 fields = line.split()
                 if len(fields) != 4:
-                    print("unexpected article format: [%s]" % (line))
+                    print(f"unexpected article format: [{line}]")
                 else:
                     count += int(fields[2])
         # print ' %4d %s'%(count, article)
         counts[article.lower()] = count
         if debug_mode:
-            print("  %d" % (count))
+            print(f"  {int(count)}")
     print("getting total count...")
     out = text(
         subprocess.check_output(
@@ -154,7 +154,7 @@ def extract_article_counts_orig(articles, debug_mode):
 def run(secret, download_limit=None, job_limit=None, sleep_time=1, job_type=0, debug_mode=False):
 
     worker = text(subprocess.check_output("echo `whoami`@`hostname`", shell=True)).strip()
-    print("this is [%s]" % (worker))
+    print(f"this is [{worker}]")
     if debug_mode:
         print("*** running in debug mode ***")
 
@@ -166,7 +166,7 @@ def run(secret, download_limit=None, job_limit=None, sleep_time=1, job_type=0, d
     ):
         try:
             time_start = datetime.datetime.now()
-            req = urlopen(MASTER_URL + "?get=x&type=%s" % (job_type))
+            req = urlopen(MASTER_URL + f"?get=x&type={job_type}")
             code = req.getcode()
             if code != 200:
                 if code == 201:
@@ -178,7 +178,7 @@ def run(secret, download_limit=None, job_limit=None, sleep_time=1, job_type=0, d
                         print("nothing to do, exiting")
                         return
                 else:
-                    raise Exception("server response code (get) was %d" % (code))
+                    raise Exception(f"server response code (get) was {int(code)}")
             # Make the code compatible with mac os system
             if platform == "darwin":
                 job_content = text(req.readlines()[1])
@@ -193,19 +193,17 @@ def run(secret, download_limit=None, job_limit=None, sleep_time=1, job_type=0, d
                     print("nothing to do, exiting")
                     return
             job = json.loads(job_content)
-            print("received job [%d|%s]" % (job["id"], job["name"]))
+            print(f"received job [{int(job['id'])}|{job['name']}]")
             # updated parsing for pageviews - maybe use a regex in the future
             # year, month = int(job['name'][11:15]), int(job['name'][15:17])
             year, month = int(job["name"][10:14]), int(job["name"][14:16])
             # print 'year=%d | month=%d'%(year, month)
-            url = "https://dumps.wikimedia.org/other/pageviews/%d/%d-%02d/%s" % (
-                year,
-                year,
-                month,
-                job["name"],
+            url = (
+                "https://dumps.wikimedia.org/other/"
+                f"pageviews/{year}/{year}-{month:02d}/{job['name']}"
             )
-            print("downloading file [%s]..." % (url))
-            subprocess.check_call("curl -s %s > raw.gz" % (url), shell=True)
+            print(f"downloading file [{url}]...")
+            subprocess.check_call(f"curl -s {url} > raw.gz", shell=True)
             print("checking file size...")
             # Make the code cross-platfrom, so use python to get the size of the file
             # size = int(text(subprocess.check_output('ls -l raw.gz | cut -d" " -f 5', shell=True)))
@@ -259,16 +257,16 @@ def run(secret, download_limit=None, job_limit=None, sleep_time=1, job_type=0, d
             payload = json.dumps(result)
             hmac_str = get_hmac_sha256(secret, payload)
             if debug_mode:
-                print(" hmac: %s" % hmac_str)
+                print(f" hmac: {hmac_str}")
             post_data = urlencode({"put": payload, "hmac": hmac_str})
             req = urlopen(MASTER_URL, data=data(post_data))
             code = req.getcode()
             if code != 200:
-                raise Exception("server response code (put) was %d" % (code))
-            print("done! (dl=%d)" % (total_download))
+                raise Exception(f"server response code (put) was {int(code)}")
+            print(f"done! (dl={int(total_download)})")
             passed_jobs += 1
         except Exception as ex:
-            print("***** Caught Exception: %s *****" % (str(ex)))
+            print(f"***** Caught Exception: {str(ex)} *****")
             failed_jobs += 1
             time.sleep(30)
         print(
@@ -278,9 +276,9 @@ def run(secret, download_limit=None, job_limit=None, sleep_time=1, job_type=0, d
         time.sleep(sleep_time)
 
     if download_limit is not None and total_download >= download_limit:
-        print("download limit has been reached [%d >= %d]" % (total_download, download_limit))
+        print(f"download limit has been reached [{int(total_download)} >= {int(download_limit)}]")
     if job_limit is not None and (passed_jobs + failed_jobs) >= job_limit:
-        print("job limit has been reached [%d >= %d]" % (passed_jobs + failed_jobs, job_limit))
+        print(f"job limit has been reached [{int(passed_jobs + failed_jobs)} >= {int(job_limit)}]")
 
 
 def main():
diff --git a/src/acquisition/wiki/wiki_extract.py b/src/acquisition/wiki/wiki_extract.py
index f4e0efb96..718a64c20 100644
--- a/src/acquisition/wiki/wiki_extract.py
+++ b/src/acquisition/wiki/wiki_extract.py
@@ -91,12 +91,12 @@ def run(job_limit=100):
     jobs = []
     for (id, name, data_str) in cur:
         jobs.append((id, name, json.loads(data_str)))
-    print("Processing data from %d jobs" % (len(jobs)))
+    print(f"Processing data from {len(jobs)} jobs")
 
     # get the counts from the json object and insert into (or update) the database
     # Notice that data_collect contains data with different languages
     for (id, name, data_collect) in jobs:
-        print("processing job [%d|%s]..." % (id, name))
+        print(f"processing job [{int(id)}|{name}]...")
         timestamp = round_timestamp(get_timestamp(name))
         for language in data_collect.keys():
             data = data_collect[language]
diff --git a/src/acquisition/wiki/wiki_update.py b/src/acquisition/wiki/wiki_update.py
index c9aa6d6a2..a9f240629 100644
--- a/src/acquisition/wiki/wiki_update.py
+++ b/src/acquisition/wiki/wiki_update.py
@@ -68,8 +68,8 @@ def get_timestamp(name):
 def get_manifest(year, month, optional=False):
     # unlike pagecounts-raw, pageviews doesn't provide hashes
     # url = 'https://dumps.wikimedia.org/other/pagecounts-raw/%d/%d-%02d/md5sums.txt'%(year, year, month)
-    url = "https://dumps.wikimedia.org/other/pageviews/%d/%d-%02d/" % (year, year, month)
-    print("Checking manifest at %s..." % (url))
+    url = f"https://dumps.wikimedia.org/other/pageviews/{int(year)}/{int(year)}-{int(month):02}/"
+    print(f"Checking manifest at {url}...")
     response = requests.get(url)
     if response.status_code == 200:
         # manifest = [line.strip().split() for line in response.text.split('\n') if 'pagecounts' in line]
@@ -82,8 +82,8 @@ def get_manifest(year, month, optional=False):
         if optional:
             manifest = []
         else:
-            raise Exception("expected 200 status code, but got %d" % (response.status_code))
-    print("Found %d access log(s)" % (len(manifest)))
+            raise Exception(f"expected 200 status code, but got {int(response.status_code)}")
+    print(f"Found {len(manifest)} access log(s)")
     return manifest
 
 
@@ -98,7 +98,7 @@ def run():
     cur.execute("SELECT max(`name`) FROM `wiki_raw`")
     for (max_name,) in cur:
         pass
-    print("Last known file: %s" % (max_name))
+    print(f"Last known file: {max_name}")
     timestamp = get_timestamp(max_name)
 
     # crawl dumps.wikimedia.org to find more recent access logs
@@ -113,7 +113,7 @@ def run():
         if max_name is None or name > max_name:
             new_logs[name] = hash
             print(f" New job: {name} [{hash}]")
-    print("Found %d new job(s)" % (len(new_logs)))
+    print(f"Found {len(new_logs)} new job(s)")
 
     # store metadata for new jobs
     for name in sorted(new_logs.keys()):

From 8459cc298b931f0a6ea9342c5ab225f24623b69f Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Mon, 26 Jun 2023 09:58:26 -0700
Subject: [PATCH 37/43] gh: add .git-blame-ignore-revs

---
 .git-blame-ignore-revs | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 .git-blame-ignore-revs

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 000000000..97dc620be
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,24 @@
+# style(black): format cdc acquisition
+980b0b7e80c7923b79e14fee620645e680785703
+# style(black): format covidcast_nowcast acquisition
+9e6ff16f599e8feec34a08dd1bddbc5eae347b55
+# style(black): format ecdc acquisition
+d1141d904da4e62992b97c92d5caebd8fadffd42
+# style(black): format flusurv acquisition
+08af0f6b7bff85bbc2b193b63b5abf6a16ba03e4
+# style(black): format fluview acquisition
+0133ef2042c4df8867e91595eb1f64873edb4632
+# style(black): format ght acquisition
+b8900a0bc846888885310911efd6e26459effa99
+# style(black): format kcdc acquisition
+a849384c884934b3b7c3c67b68aa6240277d6b6d
+# style(black): format nidss acquisition
+d04af3c02fda7708a16bec0952b1aa7475acaec7
+# style(black): format paho acquisition
+7f60fbba572c1b6e5153a9ef216895bdc2f7f5b3
+# style(black): format quidel acquisition
+b9ceb400d9248c8271e8342275664ac5524e335d
+# style(black): format twitter acquisition
+07ed83e5768f717ab0f9a62a9209e4e2cffa058d
+# style(black): format wiki acquisition
+923852eafa86b8f8b182d499489249ba8f815843

From f93f0208fe3d7de45fe72092ed88fadb203d1998 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Mon, 26 Jun 2023 10:42:57 -0700
Subject: [PATCH 38/43] style(acquisition): minor formatting fixes

---
 src/acquisition/ecdc/ecdc_db_update.py      | 14 ++++--
 src/acquisition/flusurv/flusurv_update.py   | 11 ++++-
 src/acquisition/fluview/fluview_update.py   | 14 ++++--
 src/acquisition/ght/ght_update.py           | 44 ++++++++++++++----
 src/acquisition/ght/google_health_trends.py | 30 ++++++++++--
 src/acquisition/paho/paho_db_update.py      | 14 ++++--
 src/acquisition/quidel/quidel.py            |  2 +-
 src/acquisition/quidel/quidel_update.py     | 38 +++++++++++++--
 src/acquisition/twtr/healthtweets.py        | 45 +++++++++++++++---
 src/acquisition/wiki/wiki_download.py       | 51 ++++++++++++++++++---
 10 files changed, 221 insertions(+), 42 deletions(-)

diff --git a/src/acquisition/ecdc/ecdc_db_update.py b/src/acquisition/ecdc/ecdc_db_update.py
index 9a90dad5c..84423c376 100644
--- a/src/acquisition/ecdc/ecdc_db_update.py
+++ b/src/acquisition/ecdc/ecdc_db_update.py
@@ -156,15 +156,23 @@ def update_from_file(issue, date, dir, test_mode=False):
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
+    # fmt: off
     parser.add_argument(
-        "--test", action="store_true", help="do dry run only, do not update the database"
+        "--test",
+        action="store_true",
+        help="do dry run only, do not update the database"
     )
     parser.add_argument(
-        "--file", type=str, help="load an existing zip file (otherwise fetch current data)"
+        "--file",
+        type=str,
+        help="load an existing zip file (otherwise fetch current data)"
     )
     parser.add_argument(
-        "--issue", type=int, help="issue of the file (e.g. 201740); used iff --file is given"
+        "--issue",
+        type=int,
+        help="issue of the file (e.g. 201740); used iff --file is given"
     )
+    # fmt: on
     args = parser.parse_args()
 
     if (args.file is None) != (args.issue is None):
diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py
index 3009c7a3d..1aa8e9885 100644
--- a/src/acquisition/flusurv/flusurv_update.py
+++ b/src/acquisition/flusurv/flusurv_update.py
@@ -160,12 +160,19 @@ def update(issue, location_name, test_mode=False):
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
+    # fmt: off
     parser.add_argument(
-        "location", help='location for which data should be scraped (e.g. "CA" or "all")'
+        "location",
+        help='location for which data should be scraped (e.g. "CA" or "all")'
     )
     parser.add_argument(
-        "--test", "-t", default=False, action="store_true", help="do not commit database changes"
+        "--test",
+        "-t",
+        default=False,
+        action="store_true",
+        help="do not commit database changes"
     )
+    # fmt: on
     args = parser.parse_args()
 
     # scrape current issue from the main page
diff --git a/src/acquisition/fluview/fluview_update.py b/src/acquisition/fluview/fluview_update.py
index 2c2551831..406725b8a 100644
--- a/src/acquisition/fluview/fluview_update.py
+++ b/src/acquisition/fluview/fluview_update.py
@@ -516,15 +516,23 @@ def update_from_file(issue, date, filename, test_mode=False):
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
+    # fmt: off
     parser.add_argument(
-        "--test", action="store_true", help="do dry run only, do not update the database"
+        "--test",
+        action="store_true",
+        help="do dry run only, do not update the database"
     )
     parser.add_argument(
-        "--file", type=str, help="load an existing zip file (otherwise fetch current data)"
+        "--file",
+        type=str,
+        help="load an existing zip file (otherwise fetch current data)"
     )
     parser.add_argument(
-        "--issue", type=int, help="issue of the file (e.g. 201740); used iff --file is given"
+        "--issue",
+        type=int,
+        help="issue of the file (e.g. 201740); used iff --file is given"
     )
+    # fmt: on
     args = parser.parse_args()
 
     if (args.file is None) != (args.issue is None):
diff --git a/src/acquisition/ght/ght_update.py b/src/acquisition/ght/ght_update.py
index b7d5fd493..9e8d48d1d 100644
--- a/src/acquisition/ght/ght_update.py
+++ b/src/acquisition/ght/ght_update.py
@@ -330,9 +330,7 @@ def get_num_rows():
                         # print(' [%s|%s|%d] missing value' % (term, location, ew))
                     ew = flu.add_epiweeks(ew, 1)
                 if num_missing > 0:
-                    print(
-                        f" [{term}|{location}] missing {int(num_missing)}/{len(values)} value(s)"
-                    )
+                    print(f" [{term}|{location}] missing {int(num_missing)}/{len(values)} value(s)")
             except Exception as ex:
                 print(f" [{term}|{location}] caught exception (will NOT retry):", ex)
 
@@ -350,11 +348,41 @@ def main():
     # args and usage
     parser = argparse.ArgumentParser()
     # fmt: off
-    parser.add_argument("location", action="store", type=str, default=None, help="location(s) (ex: all; US; TX; CA,LA,WY)")
-    parser.add_argument("term", action="store", type=str, default=None, help='term/query/topic (ex: all; /m/0cycc; "flu fever")')
-    parser.add_argument("--first", "-f", default=None, type=int, help="first epiweek override")
-    parser.add_argument("--last", "-l", default=None, type=int, help="last epiweek override")
-    parser.add_argument("--country", "-c", default="US", type=str, help="location country (ex: US; BR)")
+    parser.add_argument(
+        "location",
+        action="store",
+        type=str,
+        default=None,
+        help="location(s) (ex: all; US; TX; CA,LA,WY)"
+    )
+    parser.add_argument(
+        "term",
+        action="store",
+        type=str,
+        default=None,
+        help='term/query/topic (ex: all; /m/0cycc; "flu fever")'
+    )
+    parser.add_argument(
+        "--first",
+        "-f",
+        default=None,
+        type=int,
+        help="first epiweek override"
+    )
+    parser.add_argument(
+        "--last",
+        "-l",
+        default=None,
+        type=int,
+        help="last epiweek override"
+    )
+    parser.add_argument(
+        "--country",
+        "-c",
+        default="US",
+        type=str,
+        help="location country (ex: US; BR)"
+    )
     # fmt: on
     args = parser.parse_args()
 
diff --git a/src/acquisition/ght/google_health_trends.py b/src/acquisition/ght/google_health_trends.py
index 86d8fc690..4bb8df25f 100644
--- a/src/acquisition/ght/google_health_trends.py
+++ b/src/acquisition/ght/google_health_trends.py
@@ -116,19 +116,39 @@ def main():
     parser = argparse.ArgumentParser()
     # fmt: off
     parser.add_argument(
-        "apikey", action="store", type=str, default=None, help="API key"
+        "apikey",
+        action="store",
+        type=str,
+        default=None,
+        help="API key"
     )
     parser.add_argument(
-        "startweek", action="store", type=int, default=None, help="first week (ex: 201440)"
+        "startweek",
+        action="store",
+        type=int,
+        default=None,
+        help="first week (ex: 201440)"
     )
     parser.add_argument(
-        "endweek", action="store", type=int, default=None, help="last week (ex: 201520)"
+        "endweek",
+        action="store",
+        type=int,
+        default=None,
+        help="last week (ex: 201520)"
     )
     parser.add_argument(
-        "location", action="store", type=str, default=None, help="location (ex: US)"
+        "location",
+        action="store",
+        type=str,
+        default=None,
+        help="location (ex: US)"
     )
     parser.add_argument(
-        "term", action="store", type=str, default=None, help="term/query/topic (ex: /m/0cycc)"
+        "term",
+        action="store",
+        type=str,
+        default=None,
+        help="term/query/topic (ex: /m/0cycc)"
     )
     # fmt: on
     args = parser.parse_args()
diff --git a/src/acquisition/paho/paho_db_update.py b/src/acquisition/paho/paho_db_update.py
index 04e4dfe1a..b351d3ff2 100644
--- a/src/acquisition/paho/paho_db_update.py
+++ b/src/acquisition/paho/paho_db_update.py
@@ -234,15 +234,23 @@ def update_from_file(issue, date, filename, test_mode=False):
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
+    # fmt: off
     parser.add_argument(
-        "--test", action="store_true", help="do dry run only, do not update the database"
+        "--test",
+        action="store_true",
+        help="do dry run only, do not update the database"
     )
     parser.add_argument(
-        "--file", type=str, help="load an existing zip file (otherwise fetch current data)"
+        "--file",
+        type=str,
+        help="load an existing zip file (otherwise fetch current data)"
     )
     parser.add_argument(
-        "--issue", type=int, help="issue of the file (e.g. 201740); used iff --file is given"
+        "--issue",
+        type=int,
+        help="issue of the file (e.g. 201740); used iff --file is given"
     )
+    # fmt: on
     args = parser.parse_args()
 
     if (args.file is None) != (args.issue is None):
diff --git a/src/acquisition/quidel/quidel.py b/src/acquisition/quidel/quidel.py
index 3af99774f..0540d5e7c 100644
--- a/src/acquisition/quidel/quidel.py
+++ b/src/acquisition/quidel/quidel.py
@@ -140,7 +140,7 @@ def retrieve_excels(self):
         m.select("INBOX")  # here you a can choose a mail box like INBOX instead
         # use m.list() to get all the mailboxes
         # you could filter using the IMAP rules here (check https://www.example-code.com/csharp/imap-search-critera.asp)
-        _, items = m.search(None, "ALL")  
+        _, items = m.search(None, "ALL")
         items = items[0].split()  # getting the mails id
 
         # The emailids are ordered from past to now
diff --git a/src/acquisition/quidel/quidel_update.py b/src/acquisition/quidel/quidel_update.py
index 267200643..563cea898 100644
--- a/src/acquisition/quidel/quidel_update.py
+++ b/src/acquisition/quidel/quidel_update.py
@@ -125,11 +125,39 @@ def main():
     # args and usage
     parser = argparse.ArgumentParser()
     # fmt: off
-    parser.add_argument("--location", action="store", type=str, default=None, help="location(s) (ex: all; any of hhs1-10)")
-    parser.add_argument("--first", "-f", default=None, type=int, help="first epiweek override")
-    parser.add_argument("--last", "-l", default=None, type=int, help="last epiweek override")
-    parser.add_argument("--force_update", "-u", action="store_true", help="force update db values")
-    parser.add_argument("--skip_email", "-s", action="store_true", help="skip email downloading step")
+    parser.add_argument(
+        "--location",
+        action="store",
+        type=str,
+        default=None,
+        help="location(s) (ex: all; any of hhs1-10)"
+    )
+    parser.add_argument(
+        "--first",
+        "-f",
+        default=None,
+        type=int,
+        help="first epiweek override"
+    )
+    parser.add_argument(
+        "--last",
+        "-l",
+        default=None,
+        type=int,
+        help="last epiweek override"
+    )
+    parser.add_argument(
+        "--force_update",
+        "-u",
+        action="store_true",
+        help="force update db values"
+    )
+    parser.add_argument(
+        "--skip_email",
+        "-s",
+        action="store_true",
+        help="skip email downloading step"
+    )
     # fmt: on
     args = parser.parse_args()
 
diff --git a/src/acquisition/twtr/healthtweets.py b/src/acquisition/twtr/healthtweets.py
index 13828af74..c1e345162 100644
--- a/src/acquisition/twtr/healthtweets.py
+++ b/src/acquisition/twtr/healthtweets.py
@@ -216,12 +216,45 @@ def main():
     # args and usage
     parser = argparse.ArgumentParser()
     # fmt: off
-    parser.add_argument("username", action="store", type=str, help="healthtweets.org username")
-    parser.add_argument("password", action="store", type=str, help="healthtweets.org password")
-    parser.add_argument("state", action="store", type=str, choices=list(HealthTweets.STATE_CODES.keys()), help="U.S. state (ex: TX)")
-    parser.add_argument("date1", action="store", type=str, help="first date, inclusive (ex: 2015-01-01)")
-    parser.add_argument("date2", action="store", type=str, help="last date, inclusive (ex: 2015-01-01)")
-    parser.add_argument("-d", "--debug", action="store_const", const=True, default=False, help="enable debug mode")
+    parser.add_argument(
+        "username",
+        action="store",
+        type=str,
+        help="healthtweets.org username"
+    )
+    parser.add_argument(
+        "password",
+        action="store",
+        type=str,
+        help="healthtweets.org password"
+    )
+    parser.add_argument(
+        "state",
+        action="store",
+        type=str,
+        choices=list(HealthTweets.STATE_CODES.keys()),
+        help="U.S. state (ex: TX)"
+    )
+    parser.add_argument(
+        "date1",
+        action="store",
+        type=str,
+        help="first date, inclusive (ex: 2015-01-01)"
+    )
+    parser.add_argument(
+        "date2",
+        action="store",
+        type=str,
+        help="last date, inclusive (ex: 2015-01-01)"
+    )
+    parser.add_argument(
+        "-d",
+        "--debug", 
+        action="store_const",
+        const=True,
+        default=False,
+        help="enable debug mode"
+    )
     # fmt: on
     args = parser.parse_args()
 
diff --git a/src/acquisition/wiki/wiki_download.py b/src/acquisition/wiki/wiki_download.py
index 8cb586c24..6192eab02 100644
--- a/src/acquisition/wiki/wiki_download.py
+++ b/src/acquisition/wiki/wiki_download.py
@@ -288,12 +288,51 @@ def main():
     # args and usage
     parser = argparse.ArgumentParser()
     # fmt: off
-    parser.add_argument("secret", type=str, help="hmac secret key")
-    parser.add_argument("-b", "--blimit", action="store", type=int, default=None, help="download limit, in bytes")
-    parser.add_argument("-j", "--jlimit", action="store", type=int, default=None, help="job limit")
-    parser.add_argument("-s", "--sleep", action="store", type=int, default=1, help="seconds to sleep between each job")
-    parser.add_argument("-t", "--type", action="store", type=int, default=0, help="type of job")
-    parser.add_argument("-d", "--debug", action="store_const", const=True, default=False, help="enable debug mode")
+    parser.add_argument(
+        "secret",
+        type=str,
+        help="hmac secret key"
+    )
+    parser.add_argument(
+        "-b",
+        "--blimit",
+        action="store",
+        type=int,
+        default=None,
+        help="download limit, in bytes"
+    )
+    parser.add_argument(
+        "-j",
+        "--jlimit",
+        action="store",
+        type=int,
+        default=None,
+        help="job limit"
+    )
+    parser.add_argument(
+        "-s",
+        "--sleep",
+        action="store",
+        type=int,
+        default=1,
+        help="seconds to sleep between each job"
+    )
+    parser.add_argument(
+        "-t",
+        "--type",
+        action="store",
+        type=int,
+        default=0,
+        help="type of job"
+    )
+    parser.add_argument(
+        "-d",
+        "--debug",
+        action="store_const",
+        const=True,
+        default=False,
+        help="enable debug mode"
+    )
     # fmt: on
     args = parser.parse_args()
 

From 27ea8810e59fa37484b8411f4d7a1ce9643404da Mon Sep 17 00:00:00 2001
From: Dmytro Trotsko <dmytrotsko@gmail.com>
Date: Mon, 26 Jun 2023 23:45:30 +0300
Subject: [PATCH 39/43] Resolved conflicts

---
 src/acquisition/fluview/fluview_notify.py        |  2 +-
 src/acquisition/fluview/fluview_update.py        | 10 +++++-----
 src/acquisition/fluview/impute_missing_values.py |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/acquisition/fluview/fluview_notify.py b/src/acquisition/fluview/fluview_notify.py
index a280889a5..3ed1a243f 100644
--- a/src/acquisition/fluview/fluview_notify.py
+++ b/src/acquisition/fluview/fluview_notify.py
@@ -46,7 +46,7 @@
 
     # connect
     u, p = secrets.db.epi
-    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata", host=secrets.db.host)
     cur = cnx.cursor()
 
     # get the last known issue from the automation table `variables`
diff --git a/src/acquisition/fluview/fluview_update.py b/src/acquisition/fluview/fluview_update.py
index 406725b8a..defd01dad 100644
--- a/src/acquisition/fluview/fluview_update.py
+++ b/src/acquisition/fluview/fluview_update.py
@@ -311,7 +311,7 @@ def update_from_file_clinical(issue, date, filename, test_mode=False):
 
     # database connection
     u, p = secrets.db.epi
-    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata", host=secrets.db.host)
     rows1 = get_rows(cnx, CL_TABLE)
     print(f"rows before: {int(rows1)}")
     insert = cnx.cursor()
@@ -364,7 +364,7 @@ def update_from_file_clinical(issue, date, filename, test_mode=False):
         rows2 = rows1
     else:
         cnx.commit()
-        rows2 = get_rows(cnx)
+        rows2 = get_rows(cnx, CL_TABLE)
     print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
     cnx.close()
 
@@ -376,7 +376,7 @@ def update_from_file_public(issue, date, filename, test_mode=False):
 
     # database connection
     u, p = secrets.db.epi
-    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata", host=secrets.db.host)
     rows1 = get_rows(cnx, PHL_TABLE)
     print(f"rows before: {int(rows1)}")
     insert = cnx.cursor()
@@ -433,7 +433,7 @@ def update_from_file_public(issue, date, filename, test_mode=False):
         rows2 = rows1
     else:
         cnx.commit()
-        rows2 = get_rows(cnx)
+        rows2 = get_rows(cnx, PHL_TABLE)
     print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
     cnx.close()
 
@@ -445,7 +445,7 @@ def update_from_file(issue, date, filename, test_mode=False):
 
     # database connection
     u, p = secrets.db.epi
-    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata", host=secrets.db.host)
     rows1 = get_rows(cnx)
     print(f"rows before: {int(rows1)}")
     insert = cnx.cursor()
diff --git a/src/acquisition/fluview/impute_missing_values.py b/src/acquisition/fluview/impute_missing_values.py
index 4b3e1d684..c795d9cce 100644
--- a/src/acquisition/fluview/impute_missing_values.py
+++ b/src/acquisition/fluview/impute_missing_values.py
@@ -135,7 +135,7 @@ class Sql:
     def connect(self):
         """Connect to the database."""
         u, p = secrets.db.epi
-        self.cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+        self.cnx = mysql.connector.connect(user=u, password=p, database="epidata", host=secrets.db.host)
         self.cur = self.cnx.cursor()
 
     def close(self, commit):

From dc4d74a933482386a8f1e773e118386cef8434c8 Mon Sep 17 00:00:00 2001
From: melange396 <george.haff@gmail.com>
Date: Mon, 26 Jun 2023 18:08:02 -0400
Subject: [PATCH 40/43] re-enable tracking of last time an api key was used
 (#1213)

---
 src/server/_security.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/server/_security.py b/src/server/_security.py
index 61e2608b2..b40ac445e 100644
--- a/src/server/_security.py
+++ b/src/server/_security.py
@@ -121,9 +121,8 @@ def decorated_function(*args, **kwargs):
 
 
 def update_key_last_time_used(user):
-    # TODO: reenable this once cc<-->aws latency issues are sorted out, or maybe do this call asynchronously
-    return
     if user:
         # update last usage for this user's api key to "now()"
+        # TODO: consider making this call asynchronously
         r = redis.Redis(host=REDIS_HOST, password=REDIS_PASSWORD)
         r.set(f"LAST_USED/{user.api_key}", datetime.strftime(datetime.now(), "%Y-%m-%d"))

From 5657dee1a2ba4c90db80896b230e08f3d091c491 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 26 Jun 2023 22:47:04 -0400
Subject: [PATCH 41/43] Bump requests from 2.28.1 to 2.31.0 (#1173)

Bumps [requests](https://github.com/psf/requests) from 2.28.1 to 2.31.0.
- [Release notes](https://github.com/psf/requests/releases)
- [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md)
- [Commits](https://github.com/psf/requests/compare/v2.28.1...v2.31.0)

---
updated-dependencies:
- dependency-name: requests
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements.api.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.api.txt b/requirements.api.txt
index c7de90997..e9c1418df 100644
--- a/requirements.api.txt
+++ b/requirements.api.txt
@@ -12,7 +12,7 @@ pandas==1.2.3
 python-dotenv==0.15.0
 pyyaml
 redis==3.5.3
-requests==2.28.1
+requests==2.31.0
 scipy==1.6.2
 SQLAlchemy==1.4.40
 structlog==22.1.0

From e0a09402316321f6aa7dd90034ada2fb7e904563 Mon Sep 17 00:00:00 2001
From: Dmytro Trotsko <dmytrotsko@gmail.com>
Date: Tue, 27 Jun 2023 19:01:51 +0300
Subject: [PATCH 42/43] Added new constraint fluview_clinical. Added migration
 file to remove duplicates from fluview_clinical

---
 src/ddl/fluview.sql                          |   2 +-
 src/ddl/migrations/fluview_clinical_v0.1.sql | 115 +++++++++++++++++++
 2 files changed, 116 insertions(+), 1 deletion(-)
 create mode 100644 src/ddl/migrations/fluview_clinical_v0.1.sql

diff --git a/src/ddl/fluview.sql b/src/ddl/fluview.sql
index 9da1589ce..11f10c9dc 100644
--- a/src/ddl/fluview.sql
+++ b/src/ddl/fluview.sql
@@ -269,8 +269,8 @@ CREATE TABLE `fluview_clinical` (
   `percent_a` double DEFAULT NULL,
   `percent_b` double DEFAULT NULL,
   PRIMARY KEY (`id`),
+  UNIQUE KEY `issue` (`issue`, `epiweek`, `region`),
   KEY `release_date` (`release_date`),
-  KEY `issue` (`issue`),
   KEY `epiweek` (`epiweek`),
   KEY `region` (`region`),
   KEY `lag` (`lag`)
diff --git a/src/ddl/migrations/fluview_clinical_v0.1.sql b/src/ddl/migrations/fluview_clinical_v0.1.sql
new file mode 100644
index 000000000..0b8aa5855
--- /dev/null
+++ b/src/ddl/migrations/fluview_clinical_v0.1.sql
@@ -0,0 +1,115 @@
+USE epidata;
+
+-- Create new `fluview_clinical` table with proper unique constraint.
+CREATE TABLE `fluview_clinical_v2` (
+    `id` int(11) NOT NULL AUTO_INCREMENT,
+    `release_date` date NOT NULL,
+    `issue` int(11) NOT NULL,
+    `epiweek` int(11) NOT NULL,
+    `region` varchar(12) NOT NULL,
+    `lag` int(11) NOT NULL,
+    `total_specimens` int(11) NOT NULL,
+    `total_a` int(11) DEFAULT NULL,
+    `total_b` int(11) DEFAULT NULL,
+    `percent_positive` double DEFAULT NULL,
+    `percent_a` double DEFAULT NULL,
+    `percent_b` double DEFAULT NULL,
+    PRIMARY KEY (`id`),
+    UNIQUE KEY `issue` (`issue`, `epiweek`, `region`),
+    KEY `release_date` (`release_date`),
+    KEY `epiweek` (`epiweek`),
+    KEY `region` (`region`),
+    KEY `lag` (`lag`)
+) ENGINE = InnoDB DEFAULT CHARSET = utf8;
+
+
+-- Insert unique rows from `fluview_clinical` into `fluview_clinical_v2`.
+-- This is done in order to reset ID counter and fill gaps betwen row's ids.
+INSERT INTO
+    fluview_clinical_v2(
+        `release_date`,
+        `issue`,
+        `epiweek`,
+        `region`,
+        `lag`,
+        `total_specimens`,
+        `total_a`,
+        `total_b`,
+        `percent_positive`,
+        `percent_a`,
+        `percent_b`
+    )
+SELECT
+    min_release_date release_date,
+    tmp.issue,
+    tmp.epiweek,
+    tmp.region,
+    tmp.lag,
+    tmp.total_specimens,
+    tmp.total_a,
+    tmp.total_b,
+    tmp.percent_positive,
+    tmp.percent_a,
+    tmp.percent_b
+FROM
+    (
+        -- get data associated with the most recent `release_date` for each unique `(epiweek, issue, region)` key
+        SELECT
+            s.release_date,
+            s.issue,
+            s.epiweek,
+            s.region,
+            s.lag,
+            s.total_specimens,
+            s.total_a,
+            s.total_b,
+            s.percent_positive,
+            s.percent_a,
+            s.percent_b
+        FROM
+            (
+                SELECT
+                    fc.release_date,
+                    fc.issue,
+                    fc.epiweek,
+                    fc.region,
+                    fc.lag,
+                    fc.total_specimens,
+                    fc.total_a,
+                    fc.total_b,
+                    fc.percent_positive,
+                    fc.percent_a,
+                    fc.percent_b,
+                    ROW_NUMBER() OVER(
+                        PARTITION BY fc.epiweek,
+                        fc.issue,
+                        fc.region
+                        ORDER BY
+                            fc.release_date DESC
+                    ) as row_num
+                FROM
+                    fluview_clinical fc
+            ) s
+        WHERE
+            s.row_num = 1
+    ) tmp
+    JOIN (
+        -- JOIN to recover first/least `release_date` because thats what the acquisition process does: https://github.com/cmu-delphi/delphi-epidata/blob/7fd20cd5c34b33c2310be67867b46a91aa840be9/src/acquisition/fluview/fluview_update.py#L326
+        SELECT
+            MIN(fc.release_date) as min_release_date,
+            fc.issue,
+            fc.epiweek,
+            fc.region
+        FROM
+            fluview_clinical fc
+        GROUP BY
+            fc.issue,
+            fc.epiweek,
+            fc.region
+    ) rel_date ON tmp.issue = rel_date.issue
+    AND tmp.epiweek = rel_date.epiweek
+    AND tmp.region = rel_date.region;
+
+DROP TABLE fluview_clinical;
+
+ALTER TABLE fluview_clinical_v2 RENAME fluview_clinical;
\ No newline at end of file

From 7cfc97d85b01c4729c09554ef9fc2ad3af147c1b Mon Sep 17 00:00:00 2001
From: nolangormley <nolangormley@users.noreply.github.com>
Date: Tue, 27 Jun 2023 18:16:12 +0000
Subject: [PATCH 43/43] chore: release delphi-epidata 4.1.4

---
 .bumpversion.cfg                                     | 2 +-
 dev/local/setup.cfg                                  | 2 +-
 src/client/delphi_epidata.R                          | 2 +-
 src/client/delphi_epidata.js                         | 2 +-
 src/client/packaging/npm/package.json                | 2 +-
 src/client/packaging/pypi/delphi_epidata/__init__.py | 2 +-
 src/client/packaging/pypi/setup.py                   | 2 +-
 src/server/_config.py                                | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 99c2373b6..358c9029d 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 4.1.3
+current_version = 4.1.4
 commit = False
 tag = False
 
diff --git a/dev/local/setup.cfg b/dev/local/setup.cfg
index 69bc91778..1b5529b22 100644
--- a/dev/local/setup.cfg
+++ b/dev/local/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = Delphi Development
-version = 4.1.3
+version = 4.1.4
 
 [options]
 packages =
diff --git a/src/client/delphi_epidata.R b/src/client/delphi_epidata.R
index 627948cc2..99201abd8 100644
--- a/src/client/delphi_epidata.R
+++ b/src/client/delphi_epidata.R
@@ -15,7 +15,7 @@ Epidata <- (function() {
   # API base url
   BASE_URL <- getOption('epidata.url', default = 'https://api.delphi.cmu.edu/epidata/')
 
-  client_version <- '4.1.3'
+  client_version <- '4.1.4'
 
   auth <- getOption("epidata.auth", default = NA)
 
diff --git a/src/client/delphi_epidata.js b/src/client/delphi_epidata.js
index 117fe8949..1987595d7 100644
--- a/src/client/delphi_epidata.js
+++ b/src/client/delphi_epidata.js
@@ -22,7 +22,7 @@
   }
 })(this, function (exports, fetchImpl, jQuery) {
   const BASE_URL = "https://api.delphi.cmu.edu/epidata/";
-  const client_version = "4.1.3";
+  const client_version = "4.1.4";
 
   // Helper function to cast values and/or ranges to strings
   function _listitem(value) {
diff --git a/src/client/packaging/npm/package.json b/src/client/packaging/npm/package.json
index 40c3d53a6..492d63760 100644
--- a/src/client/packaging/npm/package.json
+++ b/src/client/packaging/npm/package.json
@@ -2,7 +2,7 @@
   "name": "delphi_epidata",
   "description": "Delphi Epidata API Client",
   "authors": "Delphi Group",
-  "version": "4.1.3",
+  "version": "4.1.4",
   "license": "MIT",
   "homepage": "https://github.com/cmu-delphi/delphi-epidata",
   "bugs": {
diff --git a/src/client/packaging/pypi/delphi_epidata/__init__.py b/src/client/packaging/pypi/delphi_epidata/__init__.py
index c8f19f67c..1e280b80c 100644
--- a/src/client/packaging/pypi/delphi_epidata/__init__.py
+++ b/src/client/packaging/pypi/delphi_epidata/__init__.py
@@ -1,4 +1,4 @@
 from .delphi_epidata import Epidata
 
 name = 'delphi_epidata'
-__version__ = '4.1.3'
+__version__ = '4.1.4'
diff --git a/src/client/packaging/pypi/setup.py b/src/client/packaging/pypi/setup.py
index e57e565b6..5c36175b0 100644
--- a/src/client/packaging/pypi/setup.py
+++ b/src/client/packaging/pypi/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="delphi_epidata",
-    version="4.1.3",
+    version="4.1.4",
     author="David Farrow",
     author_email="dfarrow0@gmail.com",
     description="A programmatic interface to Delphi's Epidata API.",
diff --git a/src/server/_config.py b/src/server/_config.py
index 168512a3d..0fa9d55e3 100644
--- a/src/server/_config.py
+++ b/src/server/_config.py
@@ -7,7 +7,7 @@
 
 load_dotenv()
 
-VERSION = "4.1.3"
+VERSION = "4.1.4"
 
 MAX_RESULTS = int(10e6)
 MAX_COMPATIBILITY_RESULTS = int(3650)