diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 000000000..b76cfd14a
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,22 @@
+# EditorConfig helps developers define and maintain consistent
+# coding styles between different editors and IDEs
+# editorconfig.org
+
+root = true
+
+[*]
+# We recommend you to keep these unchanged
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+
+[*.py]
+# Change these settings to your own preference
+indent_style = space
+indent_size = 4
+
+
+[*.md]
+trim_trailing_whitespace = false
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 000000000..97dc620be
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,24 @@
+# style(black): format cdc acquisition
+980b0b7e80c7923b79e14fee620645e680785703
+# style(black): format covidcast_nowcast acquisition
+9e6ff16f599e8feec34a08dd1bddbc5eae347b55
+# style(black): format ecdc acquisition
+d1141d904da4e62992b97c92d5caebd8fadffd42
+# style(black): format flusurv acquisition
+08af0f6b7bff85bbc2b193b63b5abf6a16ba03e4
+# style(black): format fluview acquisition
+0133ef2042c4df8867e91595eb1f64873edb4632
+# style(black): format ght acquisition
+b8900a0bc846888885310911efd6e26459effa99
+# style(black): format kcdc acquisition
+a849384c884934b3b7c3c67b68aa6240277d6b6d
+# style(black): format nidss acquisition
+d04af3c02fda7708a16bec0952b1aa7475acaec7
+# style(black): format paho acquisition
+7f60fbba572c1b6e5153a9ef216895bdc2f7f5b3
+# style(black): format quidel acquisition
+b9ceb400d9248c8271e8342275664ac5524e335d
+# style(black): format twitter acquisition
+07ed83e5768f717ab0f9a62a9209e4e2cffa058d
+# style(black): format wiki acquisition
+923852eafa86b8f8b182d499489249ba8f815843
diff --git a/pyproject.toml b/pyproject.toml
index d255c2849..a4399ca9b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,26 @@
-
 [tool.black]
-line-length = 200
+line-length = 100
 target-version = ['py38']
 include = 'server,tests/server'
+
+[tool.pylint]
+    [tool.pylint.'MESSAGES CONTROL']
+    max-line-length = 100
+    disable = [
+        'logging-format-interpolation',
+        # Allow pytest functions to be part of a class
+        'no-self-use',
+        'too-many-locals',
+        'too-many-arguments',
+        # Allow pytest classes to have one test
+        'too-few-public-methods',
+    ]
+
+    [tool.pylint.'BASIC']
+    # Allow arbitrarily short-named variables.
+    variable-rgx = ['[a-z_][a-z0-9_]*']
+    argument-rgx = [ '[a-z_][a-z0-9_]*' ]
+    attr-rgx = ['[a-z_][a-z0-9_]*']
+
+    [tool.pylint.'DESIGN']
+    ignored-argument-names = ['(_.*|run_as_module)']
diff --git a/src/acquisition/cdcp/cdc_dropbox_receiver.py b/src/acquisition/cdcp/cdc_dropbox_receiver.py
index eb0d97f2a..4fa20368e 100644
--- a/src/acquisition/cdcp/cdc_dropbox_receiver.py
+++ b/src/acquisition/cdcp/cdc_dropbox_receiver.py
@@ -29,128 +29,128 @@
 
 
 # location constants
-DROPBOX_BASE_DIR = '/cdc_page_stats'
-DELPHI_BASE_DIR = '/common/cdc_stage'
+DROPBOX_BASE_DIR = "/cdc_page_stats"
+DELPHI_BASE_DIR = "/common/cdc_stage"
 
 
 def get_timestamp_string():
-  """
-  Return the current local date and time as a string.
+    """
+    Return the current local date and time as a string.
 
-  The format is "%Y%m%d_%H%M%S".
-  """
-  return datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
+    The format is "%Y%m%d_%H%M%S".
+    """
+    return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
 
 
 def trigger_further_processing():
-  """Add CDCP processing scripts to the Automation run queue."""
+    """Add CDCP processing scripts to the Automation run queue."""
 
-  # connect
-  u, p = secrets.db.auto
-  cnx = mysql.connector.connect(user=u, password=p, database='automation')
-  cur = cnx.cursor()
+    # connect
+    u, p = secrets.db.auto
+    cnx = mysql.connector.connect(user=u, password=p, database="automation")
+    cur = cnx.cursor()
 
-  # add step "Process CDCP Data" to queue
-  cur.execute('CALL automation.RunStep(46)')
+    # add step "Process CDCP Data" to queue
+    cur.execute("CALL automation.RunStep(46)")
 
-  # disconnect
-  cur.close()
-  cnx.commit()
-  cnx.close()
+    # disconnect
+    cur.close()
+    cnx.commit()
+    cnx.close()
 
 
 def fetch_data():
-  """
-  Check for new files on dropbox, download them, zip them, cleanup dropbox, and
-  trigger further processing of new data.
-  """
-
-  # initialize dropbox api
-  dbx = dropbox.Dropbox(secrets.cdcp.dropbox_token)
-
-  # look for new CDC data files
-  print('checking dropbox:%s' % DROPBOX_BASE_DIR)
-  save_list = []
-  for entry in dbx.files_list_folder(DROPBOX_BASE_DIR).entries:
-    name = entry.name
-    if name.endswith('.csv') or name.endswith('.zip'):
-      print(' download "%s"' % name)
-      save_list.append(name)
-    else:
-      print(' skip "%s"' % name)
-
-  # determine if there's anything to be done
-  if len(save_list) == 0:
-    print('did not find any new data files')
-    return
-
-  # download new files, saving them inside of a new zip file
-  timestamp = get_timestamp_string()
-  zip_path = '%s/dropbox_%s.zip' % (DELPHI_BASE_DIR, timestamp)
-  print('downloading into delphi:%s' % zip_path)
-  with ZipFile(zip_path, 'w', ZIP_DEFLATED) as zf:
+    """
+    Check for new files on dropbox, download them, zip them, cleanup dropbox, and
+    trigger further processing of new data.
+    """
+
+    # initialize dropbox api
+    dbx = dropbox.Dropbox(secrets.cdcp.dropbox_token)
+
+    # look for new CDC data files
+    print(f"checking dropbox: {DROPBOX_BASE_DIR}")
+    save_list = []
+    for entry in dbx.files_list_folder(DROPBOX_BASE_DIR).entries:
+        name = entry.name
+        if name.endswith(".csv") or name.endswith(".zip"):
+            print(f" download: {name}")
+            save_list.append(name)
+        else:
+            print(f" skip: {name}")
+
+    # determine if there's anything to be done
+    if len(save_list) == 0:
+        print("did not find any new data files")
+        return
+
+    # download new files, saving them inside of a new zip file
+    timestamp = get_timestamp_string()
+    zip_path = f"{DELPHI_BASE_DIR}/dropbox_{timestamp}.zip"
+    print(f"downloading into delphi:{zip_path}")
+    with ZipFile(zip_path, "w", ZIP_DEFLATED) as zf:
+        for name in save_list:
+            # location of the file on dropbox
+            dropbox_path = f"{DROPBOX_BASE_DIR}/{name}"
+            print(f" {dropbox_path}")
+
+            # start the download
+            meta, resp = dbx.files_download(dropbox_path)
+
+            # check status and length
+            if resp.status_code != 200:
+                raise Exception(["resp.status_code", resp.status_code])
+            dropbox_len = meta.size
+            print(f"  need {int(dropbox_len)} bytes...")
+            content_len = int(resp.headers.get("Content-Length", -1))
+            if dropbox_len != content_len:
+                info = ["dropbox_len", dropbox_len, "content_len", content_len]
+                raise Exception(info)
+
+            # finish the download, holding the data in this variable
+            filedata = resp.content
+
+            # check the length again
+            payload_len = len(filedata)
+            print("  downloaded")
+            if dropbox_len != payload_len:
+                info = ["dropbox_len", dropbox_len, "payload_len", payload_len]
+                raise Exception(info)
+
+            # add the downloaded file to the zip file
+            zf.writestr(name, filedata)
+            print("  added")
+
+    # At this point, all the data is stored and awaiting further processing on
+    # the delphi server.
+    print(f"saved all new data in {zip_path}")
+
+    # on dropbox, archive downloaded files so they won't be downloaded again
+    archive_dir = f"archived_reports/processed_{timestamp}"
+    print("archiving files...")
     for name in save_list:
-      # location of the file on dropbox
-      dropbox_path = '%s/%s' % (DROPBOX_BASE_DIR, name)
-      print(' %s' % dropbox_path)
-
-      # start the download
-      meta, resp = dbx.files_download(dropbox_path)
-
-      # check status and length
-      if resp.status_code != 200:
-        raise Exception(['resp.status_code', resp.status_code])
-      dropbox_len = meta.size
-      print('  need %d bytes...' % dropbox_len)
-      content_len = int(resp.headers.get('Content-Length', -1))
-      if dropbox_len != content_len:
-        info = ['dropbox_len', dropbox_len, 'content_len', content_len]
-        raise Exception(info)
-
-      # finish the download, holding the data in this variable
-      filedata = resp.content
-
-      # check the length again
-      payload_len = len(filedata)
-      print('  downloaded')
-      if dropbox_len != payload_len:
-        info = ['dropbox_len', dropbox_len, 'payload_len', payload_len]
-        raise Exception(info)
-
-      # add the downloaded file to the zip file
-      zf.writestr(name, filedata)
-      print('  added')
-
-  # At this point, all the data is stored and awaiting further processing on
-  # the delphi server.
-  print('saved all new data in %s' % zip_path)
-
-  # on dropbox, archive downloaded files so they won't be downloaded again
-  archive_dir = 'archived_reports/processed_%s' % timestamp
-  print('archiving files...')
-  for name in save_list:
-    # source and destination
-    dropbox_src = '%s/%s' % (DROPBOX_BASE_DIR, name)
-    dropbox_dst = '%s/%s/%s' % (DROPBOX_BASE_DIR, archive_dir, name)
-    print(' "%s" -> "%s"' % (dropbox_src, dropbox_dst))
-
-    # move the file
-    meta = dbx.files_move(dropbox_src, dropbox_dst)
-
-    # sanity check
-    if archive_dir not in meta.path_lower:
-      raise Exception('failed to move "%s"' % name)
-
-  # finally, trigger the usual processing flow
-  print('triggering processing flow')
-  trigger_further_processing()
-  print('done')
+        # source and destination
+        dropbox_src = f"{DROPBOX_BASE_DIR}/{name}"
+        dropbox_dst = f"{DROPBOX_BASE_DIR}/{archive_dir}/{name}"
+        print(f" {dropbox_src} -> {dropbox_dst}")
+
+        # move the file
+        meta = dbx.files_move(dropbox_src, dropbox_dst)
+
+        # sanity check
+        if archive_dir not in meta.path_lower:
+            raise Exception(f"failed to move {name}")
+
+    # finally, trigger the usual processing flow
+    print("triggering processing flow")
+    trigger_further_processing()
+    print("done")
 
 
 def main():
-  # fetch new data
-  fetch_data()
+    # fetch new data
+    fetch_data()
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/cdcp/cdc_extract.py b/src/acquisition/cdcp/cdc_extract.py
index 83ed08d5b..0d38e0bcc 100644
--- a/src/acquisition/cdcp/cdc_extract.py
+++ b/src/acquisition/cdcp/cdc_extract.py
@@ -75,7 +75,7 @@
 
 
 def get_num_hits(cur, epiweek, state, page):
-  sql = '''
+    sql = """
     SELECT
       sum(c.`num`) `num`
     FROM
@@ -86,36 +86,36 @@ def get_num_hits(cur, epiweek, state, page):
       m.`date` = c.`date` AND m.`state` = c.`state`
     WHERE
       m.`epiweek` = %s AND c.`state` = %s AND c.`page` LIKE %s
-  '''
-  num = None
-  cur.execute(sql, (epiweek, state, page))
-  for (num,) in cur:
-    pass
-  if num is None:
-    return 0
-  return num
+    """
+    num = None
+    cur.execute(sql, (epiweek, state, page))
+    for (num,) in cur:
+        pass
+    if num is None:
+        return 0
+    return num
 
 
 def get_total_hits(cur, epiweek, state):
-  sql = '''
+    sql = """
     SELECT
       sum(m.`total`) `total`
     FROM
       `cdc_meta` m
     WHERE
       m.`epiweek` = %s AND m.`state` = %s
-  '''
-  total = None
-  cur.execute(sql, (epiweek, state))
-  for (total,) in cur:
-    pass
-  if total is None:
-    raise Exception('missing data for %d-%s' % (epiweek, state))
-  return total
+    """
+    total = None
+    cur.execute(sql, (epiweek, state))
+    for (total,) in cur:
+        pass
+    if total is None:
+        raise Exception(f"missing data for {int(epiweek)}-{state}")
+    return total
 
 
 def store_result(cur, epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total):
-  sql = '''
+    sql = """
     INSERT INTO
       `cdc_extract` (`epiweek`, `state`, `num1`, `num2`, `num3`, `num4`, `num5`, `num6`, `num7`, `num8`, `total`)
     VALUES
@@ -130,94 +130,89 @@ def store_result(cur, epiweek, state, num1, num2, num3, num4, num5, num6, num7,
       `num7` = %s,
       `num8` = %s,
       `total` = %s
-  '''
-  values = [num1, num2, num3, num4, num5, num6, num7, num8, total]
-  args = tuple([epiweek, state] + values + values)
-  cur.execute(sql, args)
+    """
+    values = [num1, num2, num3, num4, num5, num6, num7, num8, total]
+    args = tuple([epiweek, state] + values + values)
+    cur.execute(sql, args)
 
 
 def extract(first_week=None, last_week=None, test_mode=False):
-  # page title templates
-  pages = [
-    '%What You Should Know for the % Influenza Season%',
-    '%What To Do If You Get Sick%',
-    '%Flu Symptoms & Severity%',
-    '%How Flu Spreads%',
-    '%What You Should Know About Flu Antiviral Drugs%',
-    '%Weekly US Map%',
-    '%Basics%',
-    '%Flu Activity & Surveillance%',
-  ]
-
-  # location information
-  states = sorted(cdc_upload.STATES.values())
-
-  # connect
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  # weeks to update
-  if first_week is None:
-    cur.execute('SELECT max(`epiweek`) FROM `cdc_extract`')
-    for (first_week,) in cur:
-      pass
-  if last_week is None:
-    cur.execute('SELECT max(`epiweek`) FROM `cdc_meta`')
-    for (last_week,) in cur:
-      pass
-  print('extracting %d--%d' % (first_week, last_week))
-
-  # update each epiweek
-  for epiweek in flu.range_epiweeks(first_week, last_week, inclusive=True):
-    # update each state
-    for state in states:
-      try:
-        num1 = get_num_hits(cur, epiweek, state, pages[0])
-        num2 = get_num_hits(cur, epiweek, state, pages[1])
-        num3 = get_num_hits(cur, epiweek, state, pages[2])
-        num4 = get_num_hits(cur, epiweek, state, pages[3])
-        num5 = get_num_hits(cur, epiweek, state, pages[4])
-        num6 = get_num_hits(cur, epiweek, state, pages[5])
-        num7 = get_num_hits(cur, epiweek, state, pages[6])
-        num8 = get_num_hits(cur, epiweek, state, pages[7])
-        total = get_total_hits(cur, epiweek, state)
-        store_result(cur, epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total)
-        print(' %d-%s: %d %d %d %d %d %d %d %d (%d)' % (epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total))
-      except Exception as ex:
-        print(' %d-%s: failed' % (epiweek, state), ex)
-        #raise ex
-      sys.stdout.flush()
-
-  # disconnect
-  cur.close()
-  if not test_mode:
-    cnx.commit()
-  cnx.close()
+    # page title templates
+    pages = [
+        "%What You Should Know for the % Influenza Season%",
+        "%What To Do If You Get Sick%",
+        "%Flu Symptoms & Severity%",
+        "%How Flu Spreads%",
+        "%What You Should Know About Flu Antiviral Drugs%",
+        "%Weekly US Map%",
+        "%Basics%",
+        "%Flu Activity & Surveillance%",
+    ]
+
+    # location information
+    states = sorted(cdc_upload.STATES.values())
+
+    # connect
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    # weeks to update
+    if first_week is None:
+        cur.execute("SELECT max(`epiweek`) FROM `cdc_extract`")
+        for (first_week,) in cur:
+            pass
+    if last_week is None:
+        cur.execute("SELECT max(`epiweek`) FROM `cdc_meta`")
+        for (last_week,) in cur:
+            pass
+    print(f"extracting {int(first_week)}--{int(last_week)}")
+
+    # update each epiweek
+    for epiweek in flu.range_epiweeks(first_week, last_week, inclusive=True):
+        # update each state
+        for state in states:
+            try:
+                nums = []
+                for i in range(8):
+                    nums[i] = get_num_hits(cur, epiweek, state, pages[i])
+                total = get_total_hits(cur, epiweek, state)
+                store_result(cur, epiweek, state, *nums, total)
+                print(f" {epiweek}-{state}: {' '.join(str(n) for n in nums)} ({total})")
+            except Exception as ex:
+                print(f" {int(epiweek)}-{state}: failed", ex)
+                # raise ex
+            sys.stdout.flush()
+
+    # disconnect
+    cur.close()
+    if not test_mode:
+        cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('--first', '-f', default=None, type=int, help='first epiweek override')
-  parser.add_argument('--last', '-l', default=None, type=int, help='last epiweek override')
-  parser.add_argument('--epiweek', '-w', default=None, type=int, help='epiweek override')
-  parser.add_argument('--test', '-t', default=False, action='store_true', help='dry run only')
-  args = parser.parse_args()
-
-  # sanity check
-  first, last, week = args.first, args.last, args.epiweek
-  for ew in [first, last, week]:
-    if ew is not None:
-      flu.check_epiweek(ew)
-  if first is not None and last is not None and first > last:
-    raise Exception('epiweeks in the wrong order')
-  if week is not None:
-    first = last = week
-
-  # extract the page hits for all states on the specified weeks
-  extract(first, last, args.test)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--first", "-f", default=None, type=int, help="first epiweek override")
+    parser.add_argument("--last", "-l", default=None, type=int, help="last epiweek override")
+    parser.add_argument("--epiweek", "-w", default=None, type=int, help="epiweek override")
+    parser.add_argument("--test", "-t", default=False, action="store_true", help="dry run only")
+    args = parser.parse_args()
+
+    # sanity check
+    first, last, week = args.first, args.last, args.epiweek
+    for ew in [first, last, week]:
+        if ew is not None:
+            flu.check_epiweek(ew)
+    if first is not None and last is not None and first > last:
+        raise Exception("epiweeks in the wrong order")
+    if week is not None:
+        first = last = week
+
+    # extract the page hits for all states on the specified weeks
+    extract(first, last, args.test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/cdcp/cdc_upload.py b/src/acquisition/cdcp/cdc_upload.py
index c9c206dfa..0e191267b 100644
--- a/src/acquisition/cdcp/cdc_upload.py
+++ b/src/acquisition/cdcp/cdc_upload.py
@@ -87,191 +87,192 @@
 
 
 STATES = {
-  'Alabama': 'AL',
-  'Alaska': 'AK',
-  'Arizona': 'AZ',
-  'Arkansas': 'AR',
-  'California': 'CA',
-  'Colorado': 'CO',
-  'Connecticut': 'CT',
-  'Delaware': 'DE',
-  'District of Columbia': 'DC',
-  'Florida': 'FL',
-  'Georgia': 'GA',
-  'Hawaii': 'HI',
-  'Idaho': 'ID',
-  'Illinois': 'IL',
-  'Indiana': 'IN',
-  'Iowa': 'IA',
-  'Kansas': 'KS',
-  'Kentucky': 'KY',
-  'Louisiana': 'LA',
-  'Maine': 'ME',
-  'Maryland': 'MD',
-  'Massachusetts': 'MA',
-  'Michigan': 'MI',
-  'Minnesota': 'MN',
-  'Mississippi': 'MS',
-  'Missouri': 'MO',
-  'Montana': 'MT',
-  'Nebraska': 'NE',
-  'Nevada': 'NV',
-  'New Hampshire': 'NH',
-  'New Jersey': 'NJ',
-  'New Mexico': 'NM',
-  'New York': 'NY',
-  'North Carolina': 'NC',
-  'North Dakota': 'ND',
-  'Ohio': 'OH',
-  'Oklahoma': 'OK',
-  'Oregon': 'OR',
-  'Pennsylvania': 'PA',
-  'Rhode Island': 'RI',
-  'South Carolina': 'SC',
-  'South Dakota': 'SD',
-  'Tennessee': 'TN',
-  'Texas': 'TX',
-  'Utah': 'UT',
-  'Vermont': 'VT',
-  'Virginia': 'VA',
-  'Washington': 'WA',
-  'West Virginia': 'WV',
-  'Wisconsin': 'WI',
-  'Wyoming': 'WY',
-  #'Puerto Rico': 'PR',
-  #'Virgin Islands': 'VI',
-  #'Guam': 'GU',
+    "Alabama": "AL",
+    "Alaska": "AK",
+    "Arizona": "AZ",
+    "Arkansas": "AR",
+    "California": "CA",
+    "Colorado": "CO",
+    "Connecticut": "CT",
+    "Delaware": "DE",
+    "District of Columbia": "DC",
+    "Florida": "FL",
+    "Georgia": "GA",
+    "Hawaii": "HI",
+    "Idaho": "ID",
+    "Illinois": "IL",
+    "Indiana": "IN",
+    "Iowa": "IA",
+    "Kansas": "KS",
+    "Kentucky": "KY",
+    "Louisiana": "LA",
+    "Maine": "ME",
+    "Maryland": "MD",
+    "Massachusetts": "MA",
+    "Michigan": "MI",
+    "Minnesota": "MN",
+    "Mississippi": "MS",
+    "Missouri": "MO",
+    "Montana": "MT",
+    "Nebraska": "NE",
+    "Nevada": "NV",
+    "New Hampshire": "NH",
+    "New Jersey": "NJ",
+    "New Mexico": "NM",
+    "New York": "NY",
+    "North Carolina": "NC",
+    "North Dakota": "ND",
+    "Ohio": "OH",
+    "Oklahoma": "OK",
+    "Oregon": "OR",
+    "Pennsylvania": "PA",
+    "Rhode Island": "RI",
+    "South Carolina": "SC",
+    "South Dakota": "SD",
+    "Tennessee": "TN",
+    "Texas": "TX",
+    "Utah": "UT",
+    "Vermont": "VT",
+    "Virginia": "VA",
+    "Washington": "WA",
+    "West Virginia": "WV",
+    "Wisconsin": "WI",
+    "Wyoming": "WY",
+    #'Puerto Rico': 'PR',
+    #'Virgin Islands': 'VI',
+    #'Guam': 'GU',
 }
 
-sql_cdc = '''
+sql_cdc = """
   INSERT INTO
     `cdc` (`date`, `page`, `state`, `num`)
   VALUES
     (%s, %s, %s, %s)
   ON DUPLICATE KEY UPDATE
     `num` = %s
-'''
+"""
 
-sql_cdc_meta = '''
+sql_cdc_meta = """
   INSERT INTO
     `cdc_meta` (`date`, `epiweek`, `state`, `total`)
   VALUES
     (%s, yearweek(%s, 6), %s, %s)
   ON DUPLICATE KEY UPDATE
     `total` = %s
-'''
+"""
 
 
 def upload(test_mode):
-  # connect
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  # insert (or update) table `cdc`
-  def insert_cdc(date, page, state, num):
-    cur.execute(sql_cdc, (date, page, state, num, num))
-
-  # insert (or update) table `cdc_meta`
-  def insert_cdc_meta(date, state, total):
-    cur.execute(sql_cdc_meta, (date, date, state, total, total))
-
-  # loop over rows until the header row is found
-  def find_header(reader):
-    for row in reader:
-      if len(row) > 0 and row[0] == 'Date':
-        return True
-    return False
-
-  # parse csv files for `cdc` and `cdc_meta`
-  def parse_csv(meta):
-    def handler(reader):
-      if not find_header(reader):
-        raise Exception('header not found')
-      count = 0
-      cols = 3 if meta else 4
-      for row in reader:
-        if len(row) != cols:
-          continue
-        if meta:
-          (a, c, d) = row
-        else:
-          (a, b, c, d) = row
-        c = c[:-16]
-        if c not in STATES:
-          continue
-        a = datetime.strptime(a, '%b %d, %Y').strftime('%Y-%m-%d')
-        c = STATES[c]
-        d = int(d)
-        if meta:
-          insert_cdc_meta(a, c, d)
+    # connect
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    # insert (or update) table `cdc`
+    def insert_cdc(date, page, state, num):
+        cur.execute(sql_cdc, (date, page, state, num, num))
+
+    # insert (or update) table `cdc_meta`
+    def insert_cdc_meta(date, state, total):
+        cur.execute(sql_cdc_meta, (date, date, state, total, total))
+
+    # loop over rows until the header row is found
+    def find_header(reader):
+        for row in reader:
+            if len(row) > 0 and row[0] == "Date":
+                return True
+        return False
+
+    # parse csv files for `cdc` and `cdc_meta`
+    def parse_csv(meta):
+        def handler(reader):
+            if not find_header(reader):
+                raise Exception("header not found")
+            count = 0
+            cols = 3 if meta else 4
+            for row in reader:
+                if len(row) != cols:
+                    continue
+                if meta:
+                    (a, c, d) = row
+                else:
+                    (a, b, c, d) = row
+                c = c[:-16]
+                if c not in STATES:
+                    continue
+                a = datetime.strptime(a, "%b %d, %Y").strftime("%Y-%m-%d")
+                c = STATES[c]
+                d = int(d)
+                if meta:
+                    insert_cdc_meta(a, c, d)
+                else:
+                    insert_cdc(a, b, c, d)
+                count += 1
+            return count
+
+        return handler
+
+    # recursively open zip files
+    def parse_zip(zf, level=1):
+        for name in zf.namelist():
+            prefix = " " * level
+            print(prefix, name)
+            if name[-4:] == ".zip":
+                with zf.open(name) as temp:
+                    with ZipFile(io.BytesIO(temp.read())) as zf2:
+                        parse_zip(zf2, level + 1)
+            elif name[-4:] == ".csv":
+                handler = None
+                if "Flu Pages by Region" in name:
+                    handler = parse_csv(False)
+                elif "Regions for all CDC" in name:
+                    handler = parse_csv(True)
+                else:
+                    print(prefix, " (skipped)")
+                if handler is not None:
+                    with zf.open(name) as temp:
+                        count = handler(csv.reader(io.StringIO(str(temp.read(), "utf-8"))))
+                    print(prefix, f" {int(count)} rows")
+            else:
+                print(prefix, " (ignored)")
+
+    # find, parse, and move zip files
+    zip_files = glob.glob("/common/cdc_stage/*.zip")
+    print("searching...")
+    for f in zip_files:
+        print(" ", f)
+    print("parsing...")
+    for f in zip_files:
+        with ZipFile(f) as zf:
+            parse_zip(zf)
+    print("moving...")
+    for f in zip_files:
+        src = f
+        dst = os.path.join("/home/automation/cdc_page_stats/", os.path.basename(src))
+        print(" ", src, "->", dst)
+        if test_mode:
+            print("  (test mode enabled - not moved)")
         else:
-          insert_cdc(a, b, c, d)
-        count += 1
-      return count
-    return handler
-
-  # recursively open zip files
-  def parse_zip(zf, level=1):
-    for name in zf.namelist():
-      prefix = ' ' * level
-      print(prefix, name)
-      if name[-4:] == '.zip':
-        with zf.open(name) as temp:
-          with ZipFile(io.BytesIO(temp.read())) as zf2:
-            parse_zip(zf2, level + 1)
-      elif name[-4:] == '.csv':
-        handler = None
-        if 'Flu Pages by Region' in name:
-          handler = parse_csv(False)
-        elif 'Regions for all CDC' in name:
-          handler = parse_csv(True)
-        else:
-          print(prefix, ' (skipped)')
-        if handler is not None:
-          with zf.open(name) as temp:
-            count = handler(csv.reader(io.StringIO(str(temp.read(), 'utf-8'))))
-          print(prefix, ' %d rows' % count)
-      else:
-        print(prefix, ' (ignored)')
-
-  # find, parse, and move zip files
-  zip_files = glob.glob('/common/cdc_stage/*.zip')
-  print('searching...')
-  for f in zip_files:
-    print(' ', f)
-  print('parsing...')
-  for f in zip_files:
-    with ZipFile(f) as zf:
-      parse_zip(zf)
-  print('moving...')
-  for f in zip_files:
-    src = f
-    dst = os.path.join('/home/automation/cdc_page_stats/', os.path.basename(src))
-    print(' ', src, '->', dst)
-    if test_mode:
-      print('  (test mode enabled - not moved)')
-    else:
-      shutil.move(src, dst)
-      if not os.path.isfile(dst):
-        raise Exception('unable to move file')
-
-  # disconnect
-  cur.close()
-  if not test_mode:
-    cnx.commit()
-  cnx.close()
+            shutil.move(src, dst)
+            if not os.path.isfile(dst):
+                raise Exception("unable to move file")
+
+    # disconnect
+    cur.close()
+    if not test_mode:
+        cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('--test', '-t', default=False, action='store_true', help='dry run only')
-  args = parser.parse_args()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--test", "-t", default=False, action="store_true", help="dry run only")
+    args = parser.parse_args()
 
-  # make it happen
-  upload(args.test)
+    # make it happen
+    upload(args.test)
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/covidcast_nowcast/load_sensors.py b/src/acquisition/covidcast_nowcast/load_sensors.py
index 73ce7eee5..2e2269bb8 100644
--- a/src/acquisition/covidcast_nowcast/load_sensors.py
+++ b/src/acquisition/covidcast_nowcast/load_sensors.py
@@ -82,8 +82,7 @@ def load_and_prepare_file(filepath: str, attributes: PathDetails) -> pd.DataFram
 
 def _move_after_processing(filepath, success):
     archive_dir = SUCCESS_DIR if success else FAIL_DIR
-    new_dir = os.path.dirname(filepath).replace(
-        "receiving", archive_dir)
+    new_dir = os.path.dirname(filepath).replace("receiving", archive_dir)
     os.makedirs(new_dir, exist_ok=True)
     move(filepath, filepath.replace("receiving", archive_dir))
     print(f"{filepath} moved to {archive_dir}")
@@ -96,10 +95,14 @@ def method(table, conn, keys, data_iter):
             meta,
             # specify lag column explicitly; lag is a reserved word sqlalchemy doesn't know about
             sqlalchemy.Column("lag", sqlalchemy.Integer, quote=True),
-            autoload=True)
-        insert_stmt = sqlalchemy.dialects.mysql.insert(sql_table).values([dict(zip(keys, data)) for data in data_iter])
+            autoload=True,
+        )
+        insert_stmt = sqlalchemy.dialects.mysql.insert(sql_table).values(
+            [dict(zip(keys, data)) for data in data_iter]
+        )
         upsert_stmt = insert_stmt.on_duplicate_key_update({x.name: x for x in insert_stmt.inserted})
         conn.execute(upsert_stmt)
+
     return method
 
 
diff --git a/src/acquisition/ecdc/ecdc_db_update.py b/src/acquisition/ecdc/ecdc_db_update.py
index 63689c1d5..84423c376 100644
--- a/src/acquisition/ecdc/ecdc_db_update.py
+++ b/src/acquisition/ecdc/ecdc_db_update.py
@@ -33,9 +33,8 @@
 import argparse
 import datetime
 import glob
-import subprocess
-import random
 import os
+import tempfile
 
 # third party
 import mysql.connector
@@ -46,12 +45,14 @@
 from delphi.utils.epiweek import delta_epiweeks
 from delphi.utils.epidate import EpiDate
 
+
 def ensure_tables_exist():
-    (u,p) = secrets.db.epi
-    cnx = mysql.connector.connect(user=u,password=p,database='epidata')
+    (u, p) = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     try:
         cursor = cnx.cursor()
-        cursor.execute('''
+        cursor.execute(
+            """
             CREATE TABLE IF NOT EXISTS `ecdc_ili` (
                 `id` INT(11) NOT NULL PRIMARY KEY AUTO_INCREMENT,
                 `release_date` DATE NOT NULL,
@@ -62,58 +63,63 @@ def ensure_tables_exist():
                 `incidence_rate` DOUBLE NOT NULL,
                 UNIQUE KEY (`issue`, `epiweek`, `region`)
             );
-        ''');
+        """
+        )
         cnx.commit()
     finally:
         cnx.close()
 
+
 def safe_float(f):
     try:
-        return float(f.replace(',',''))
+        return float(f.replace(",", ""))
     except:
         return 0
 
+
 def safe_int(i):
     try:
-        return int(i.replace(',',''))
+        return int(i.replace(",", ""))
     except:
         return 0
 
-def get_rows(cnx, table='ecdc_ili'):
-  # Count and return the number of rows in the `ecdc_ili` table.
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM %s' % table)
-  for (num,) in select:
-    pass
-  select.close()
-  return num
+
+def get_rows(cnx, table="ecdc_ili"):
+    # Count and return the number of rows in the `ecdc_ili` table.
+    select = cnx.cursor()
+    select.execute(f"SELECT count(1) num FROM {table}")
+    for (num,) in select:
+        pass
+    select.close()
+    return num
+
 
 def update_from_file(issue, date, dir, test_mode=False):
     # Read ECDC data from CSVs and insert into (or update) the database.
     # database connection
     u, p = secrets.db.epi
-    cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-    rows1 = get_rows(cnx, 'ecdc_ili')
-    print('rows before: %d' % (rows1))
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx, "ecdc_ili")
+    print(f"rows before: {int(rows1)}")
     insert = cnx.cursor()
 
     # load the data, ignoring empty rows
-    files = glob.glob(os.path.join(dir,"*.csv"))
+    files = glob.glob(os.path.join(dir, "*.csv"))
     rows = []
     for filename in files:
-        with open(filename,'r') as f:
+        with open(filename) as f:
             for l in f:
-                data = list(map(lambda s: s.strip().replace('"',''),l.split(',')))
+                data = list(map(lambda s: s.strip().replace('"', ""), l.split(",")))
                 row = {}
-                row['epiweek'] = int(data[1][:4] + data[1][5:])
-                row['region'] = data[4]
-                row['incidence_rate'] = data[3]
+                row["epiweek"] = int(data[1][:4] + data[1][5:])
+                row["region"] = data[4]
+                row["incidence_rate"] = data[3]
                 rows.append(row)
-    print(' loaded %d rows' % len(rows))
+    print(f" loaded {len(rows)} rows")
     entries = [obj for obj in rows if obj]
-    print(' found %d entries' % len(entries))
+    print(f" found {len(entries)} entries")
 
-    sql = '''
+    sql = """
     INSERT INTO
         `ecdc_ili` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
         `incidence_rate`)
@@ -122,13 +128,13 @@ def update_from_file(issue, date, dir, test_mode=False):
     ON DUPLICATE KEY UPDATE
         `release_date` = least(`release_date`, '%s'),
         `incidence_rate` = %s
-    '''
+    """
 
     for row in entries:
-        lag = delta_epiweeks(row['epiweek'], issue)
-        data_args = [row['incidence_rate']]
+        lag = delta_epiweeks(row["epiweek"], issue)
+        data_args = [row["incidence_rate"]]
 
-        insert_args = [date,issue,row['epiweek'],row['region'],lag] + data_args
+        insert_args = [date, issue, row["epiweek"], row["region"], lag] + data_args
         update_args = [date] + data_args
         try:
             insert.execute(sql % tuple(insert_args + update_args))
@@ -138,39 +144,42 @@ def update_from_file(issue, date, dir, test_mode=False):
     # cleanup
     insert.close()
     if test_mode:
-        print('test mode, not committing')
+        print("test mode, not committing")
         rows2 = rows1
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print('rows after: %d (added %d)' % (rows2,rows2-rows1))
+    print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
     cnx.close()
 
+
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
+    # fmt: off
     parser.add_argument(
-        '--test',
-        action='store_true',
-        help='do dry run only, do not update the database'
+        "--test",
+        action="store_true",
+        help="do dry run only, do not update the database"
     )
     parser.add_argument(
-        '--file',
+        "--file",
         type=str,
-        help='load an existing zip file (otherwise fetch current data)'
+        help="load an existing zip file (otherwise fetch current data)"
     )
     parser.add_argument(
-        '--issue',
+        "--issue",
         type=int,
-        help='issue of the file (e.g. 201740); used iff --file is given'
+        help="issue of the file (e.g. 201740); used iff --file is given"
     )
+    # fmt: on
     args = parser.parse_args()
 
     if (args.file is None) != (args.issue is None):
-        raise Exception('--file and --issue must both be present or absent')
+        raise Exception("--file and --issue must both be present or absent")
 
-    date = datetime.datetime.now().strftime('%Y-%m-%d')
-    print('assuming release date is today, %s' % date)
+    date = datetime.datetime.now().strftime("%Y-%m-%d")
+    print(f"assuming release date is today, {date}")
 
     ensure_tables_exist()
     if args.file:
@@ -182,29 +191,26 @@ def main():
         max_tries = 5
         while flag < max_tries:
             flag = flag + 1
-            tmp_dir = ''.join(random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for i in range(8))
-            tmp_dir = 'downloads_' + tmp_dir
-            subprocess.call(["mkdir",tmp_dir])
-            # Use temporary directory to avoid data from different time
-            #   downloaded to same folder
-            download_ecdc_data(download_dir=tmp_dir)
-            issue = EpiDate.today().get_ew()
-            files = glob.glob('%s/*.csv' % tmp_dir)
-            for filename in files:
-                with open(filename,'r') as f:
-                    _ = f.readline()
-            db_error = False
-            for filename in files:
-                try:
-                    update_from_file(issue, date, filename, test_mode=args.test)
-                    subprocess.call(["rm",filename])
-                except:
-                    db_error = True
-            subprocess.call(["rm","-r",tmp_dir])
-            if not db_error:
-                break # Exit loop with success
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                # Use temporary directory to avoid data from different time
+                #   downloaded to same folder
+                download_ecdc_data(download_dir=tmp_dir)
+                issue = EpiDate.today().get_ew()
+                files = glob.glob(f"{tmp_dir}/*.csv")
+                for filename in files:
+                    with open(filename) as f:
+                        _ = f.readline()
+                db_error = False
+                for filename in files:
+                    try:
+                        update_from_file(issue, date, filename, test_mode=args.test)
+                    except:
+                        db_error = True
+                if not db_error:
+                    break  # Exit loop with success
         if flag >= max_tries:
-            print('WARNING: Database `ecdc_ili` did not update successfully')
+            print("WARNING: Database `ecdc_ili` did not update successfully")
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/src/acquisition/ecdc/ecdc_ili.py b/src/acquisition/ecdc/ecdc_ili.py
index 1dd0505d1..dca9b51ae 100644
--- a/src/acquisition/ecdc/ecdc_ili.py
+++ b/src/acquisition/ecdc/ecdc_ili.py
@@ -11,60 +11,74 @@
 from bs4 import BeautifulSoup
 from selenium import webdriver
 from selenium.webdriver.support.ui import Select
-from selenium.webdriver.support.ui import WebDriverWait 
+from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 
 
-def download_ecdc_data(download_dir = "downloads"):
-    url = 'https://flunewseurope.org/PrimaryCareData'
+def download_ecdc_data(download_dir="downloads"):
+    url = "https://flunewseurope.org/PrimaryCareData"
     resp = requests.get(url)
-    soup = BeautifulSoup(resp.content, 'lxml')
-    mydivs = soup.findAll('div')
+    soup = BeautifulSoup(resp.content, "lxml")
+    mydivs = soup.findAll("div")
     for div in mydivs:
         dic = div.attrs
-        if dic.get('class')== ['graph-container'] and dic.get('id')== 'dinfl06':
+        if dic.get("class") == ["graph-container"] and dic.get("id") == "dinfl06":
             break
     # get new url of the ILI chunck
-    url = div.contents[1].attrs['src']
+    url = div.contents[1].attrs["src"]
     opts = webdriver.firefox.options.Options()
     opts.set_headless()
     fp = webdriver.FirefoxProfile()
-    fp.set_preference("browser.download.folderList",2)
-    fp.set_preference("browser.download.manager.showWhenStarting",False)
-    fp.set_preference("browser.download.dir",os.path.abspath(download_dir))
-    fp.set_preference("browser.helperApps.neverAsk.saveToDisk","text/csv")
+    fp.set_preference("browser.download.folderList", 2)
+    fp.set_preference("browser.download.manager.showWhenStarting", False)
+    fp.set_preference("browser.download.dir", os.path.abspath(download_dir))
+    fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")
     try:
-        driver = webdriver.Firefox(options=opts,firefox_profile=fp)
+        driver = webdriver.Firefox(options=opts, firefox_profile=fp)
         driver.get(url)
         for i in range(2, 54):
             # select country
             try:
-                WebDriverWait(driver,30).until(EC.element_to_be_clickable((By.ID,'fluNewsReportViewer_ctl04_ctl03_ddValue')))
-                Select(driver.find_element_by_tag_name('select')).select_by_value(str(i))
+                WebDriverWait(driver, 30).until(
+                    EC.element_to_be_clickable((By.ID, "fluNewsReportViewer_ctl04_ctl03_ddValue"))
+                )
+                Select(driver.find_element_by_tag_name("select")).select_by_value(str(i))
                 time.sleep(3)
-                soup = BeautifulSoup(driver.page_source, 'html.parser')
-                options = soup.select('#fluNewsReportViewer_ctl04_ctl05_ddValue')[0].find_all('option')
+                soup = BeautifulSoup(driver.page_source, "html.parser")
+                options = soup.select("#fluNewsReportViewer_ctl04_ctl05_ddValue")[0].find_all(
+                    "option"
+                )
                 ind = 1
                 for j in range(len(options)):
-                    if 'ILI' in str(options[j]):
-                        pattern = re.compile(r'\d+')
+                    if "ILI" in str(options[j]):
+                        pattern = re.compile(r"\d+")
                         ind = re.findall(pattern, str(options[j]))[0]
                         break
                 if type(ind) == str:
                     # select clinical tyle
-                    WebDriverWait(driver,30).until(EC.element_to_be_clickable((By.ID,'fluNewsReportViewer_ctl04_ctl05_ddValue')))
-                    Select(driver.find_element_by_id('fluNewsReportViewer_ctl04_ctl05_ddValue')).select_by_value(ind)
-                    WebDriverWait(driver,30).until(EC.element_to_be_clickable((By.ID,'btnSelectExportType')))
-                    driver.find_element_by_id('btnSelectExportType').click()
-                    WebDriverWait(driver,30).until(EC.element_to_be_clickable((By.ID,'btnExportToCsv')))
-                    driver.find_element_by_id('btnExportToCsv').click()
+                    WebDriverWait(driver, 30).until(
+                        EC.element_to_be_clickable(
+                            (By.ID, "fluNewsReportViewer_ctl04_ctl05_ddValue")
+                        )
+                    )
+                    Select(
+                        driver.find_element_by_id("fluNewsReportViewer_ctl04_ctl05_ddValue")
+                    ).select_by_value(ind)
+                    WebDriverWait(driver, 30).until(
+                        EC.element_to_be_clickable((By.ID, "btnSelectExportType"))
+                    )
+                    driver.find_element_by_id("btnSelectExportType").click()
+                    WebDriverWait(driver, 30).until(
+                        EC.element_to_be_clickable((By.ID, "btnExportToCsv"))
+                    )
+                    driver.find_element_by_id("btnExportToCsv").click()
                 time.sleep(3)
             except:
                 driver.get(url)
     except:
-        print('WARNING: ECDC Scraper may not have downloaded all of the available data.')
-    #cleanup
-    os.system('''pkill "firefox" ''')
+        print("WARNING: ECDC Scraper may not have downloaded all of the available data.")
+    # cleanup
+    os.system("""pkill "firefox" """)
     os.system('''pkill "(firefox-bin)"''')
     os.system('''pkill "geckodriver*"''')
diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py
index 6b8d247ae..28105d933 100644
--- a/src/acquisition/flusurv/flusurv.py
+++ b/src/acquisition/flusurv/flusurv.py
@@ -50,167 +50,170 @@
 # all currently available FluSurv locations and their associated codes
 # the number pair represents NetworkID and CatchmentID
 location_codes = {
-  'CA': (2, 1),
-  'CO': (2, 2),
-  'CT': (2, 3),
-  'GA': (2, 4),
-  'IA': (3, 5),
-  'ID': (3, 6),
-  'MD': (2, 7),
-  'MI': (3, 8),
-  'MN': (2, 9),
-  'NM': (2, 11),
-  'NY_albany': (2, 13),
-  'NY_rochester': (2, 14),
-  'OH': (3, 15),
-  'OK': (3, 16),
-  'OR': (2, 17),
-  'RI': (3, 18),
-  'SD': (3, 19),
-  'TN': (2, 20),
-  'UT': (3, 21),
-  'network_all': (1, 22),
-  'network_eip': (2, 22),
-  'network_ihsp': (3, 22),
+    "CA": (2, 1),
+    "CO": (2, 2),
+    "CT": (2, 3),
+    "GA": (2, 4),
+    "IA": (3, 5),
+    "ID": (3, 6),
+    "MD": (2, 7),
+    "MI": (3, 8),
+    "MN": (2, 9),
+    "NM": (2, 11),
+    "NY_albany": (2, 13),
+    "NY_rochester": (2, 14),
+    "OH": (3, 15),
+    "OK": (3, 16),
+    "OR": (2, 17),
+    "RI": (3, 18),
+    "SD": (3, 19),
+    "TN": (2, 20),
+    "UT": (3, 21),
+    "network_all": (1, 22),
+    "network_eip": (2, 22),
+    "network_ihsp": (3, 22),
 }
 
 
 def fetch_json(path, payload, call_count=1, requests_impl=requests):
-  """Send a request to the server and return the parsed JSON response."""
-
-  # it's polite to self-identify this "bot"
-  delphi_url = 'https://delphi.cmu.edu/index.html'
-  user_agent = 'Mozilla/5.0 (compatible; delphibot/1.0; +%s)' % delphi_url
-
-  # the FluSurv AMF server
-  flusurv_url = 'https://gis.cdc.gov/GRASP/Flu3/' + path
-
-  # request headers
-  headers = {
-    'Accept-Encoding': 'gzip',
-    'User-Agent': user_agent,
-  }
-  if payload is not None:
-    headers['Content-Type'] = 'application/json;charset=UTF-8'
-
-  # send the request and read the response
-  if payload is None:
-    method = requests_impl.get
-    data = None
-  else:
-    method = requests_impl.post
-    data = json.dumps(payload)
-  resp = method(flusurv_url, headers=headers, data=data)
-
-  # check the HTTP status code
-  if resp.status_code == 500 and call_count <= 2:
-    # the server often fails with this status, so wait and retry
-    delay = 10 * call_count
-    print('got status %d, will retry in %d sec...' % (resp.status_code, delay))
-    time.sleep(delay)
-    return fetch_json(path, payload, call_count=call_count + 1)
-  elif resp.status_code != 200:
-    raise Exception(['status code != 200', resp.status_code])
-
-  # check response mime type
-  if 'application/json' not in resp.headers.get('Content-Type', ''):
-    raise Exception('response is not json')
-
-  # return the decoded json object
-  return resp.json()
+    """Send a request to the server and return the parsed JSON response."""
+
+    # it's polite to self-identify this "bot"
+    delphi_url = "https://delphi.cmu.edu/index.html"
+    user_agent = f"Mozilla/5.0 (compatible; delphibot/1.0; +{delphi_url})"
+
+    # the FluSurv AMF server
+    flusurv_url = "https://gis.cdc.gov/GRASP/Flu3/" + path
+
+    # request headers
+    headers = {
+        "Accept-Encoding": "gzip",
+        "User-Agent": user_agent,
+    }
+    if payload is not None:
+        headers["Content-Type"] = "application/json;charset=UTF-8"
+
+    # send the request and read the response
+    if payload is None:
+        method = requests_impl.get
+        data = None
+    else:
+        method = requests_impl.post
+        data = json.dumps(payload)
+    resp = method(flusurv_url, headers=headers, data=data)
+
+    # check the HTTP status code
+    if resp.status_code == 500 and call_count <= 2:
+        # the server often fails with this status, so wait and retry
+        delay = 10 * call_count
+        print(f"got status {int(resp.status_code)}, will retry in {int(delay)} sec...")
+        time.sleep(delay)
+        return fetch_json(path, payload, call_count=call_count + 1)
+    elif resp.status_code != 200:
+        raise Exception(["status code != 200", resp.status_code])
+
+    # check response mime type
+    if "application/json" not in resp.headers.get("Content-Type", ""):
+        raise Exception("response is not json")
+
+    # return the decoded json object
+    return resp.json()
 
 
 def fetch_flusurv_object(location_code):
-  """Return decoded FluSurv JSON object for the given location."""
-  return fetch_json('PostPhase03GetData', {
-    'appversion': 'Public',
-    'networkid': location_code[0],
-    'cacthmentid': location_code[1],
-  })
+    """Return decoded FluSurv JSON object for the given location."""
+    return fetch_json(
+        "PostPhase03GetData",
+        {
+            "appversion": "Public",
+            "networkid": location_code[0],
+            "cacthmentid": location_code[1],
+        },
+    )
 
 
 def mmwrid_to_epiweek(mmwrid):
-  """Convert a CDC week index into an epiweek."""
+    """Convert a CDC week index into an epiweek."""
 
-  # Add the difference in IDs, which are sequential, to a reference epiweek,
-  # which is 2003w40 in this case.
-  epiweek_200340 = EpiDate(2003, 9, 28)
-  mmwrid_200340 = 2179
-  return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew()
+    # Add the difference in IDs, which are sequential, to a reference epiweek,
+    # which is 2003w40 in this case.
+    epiweek_200340 = EpiDate(2003, 9, 28)
+    mmwrid_200340 = 2179
+    return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew()
 
 
 def extract_from_object(data_in):
-  """
-  Given a FluSurv data object, return hospitaliation rates.
-
-  The returned object is indexed first by epiweek, then by zero-indexed age
-  group.
-  """
-
-  # an object to hold the result
-  data_out = {}
-
-  # iterate over all seasons and age groups
-  for obj in data_in['busdata']['dataseries']:
-    if obj['age'] in (10, 11, 12):
-      # TODO(https://github.com/cmu-delphi/delphi-epidata/issues/242):
-      #   capture as-of-yet undefined age groups 10, 11, and 12
-      continue
-    age_index = obj['age'] - 1
-    # iterage over weeks
-    for mmwrid, _, _, rate in obj['data']:
-      epiweek = mmwrid_to_epiweek(mmwrid)
-      if epiweek not in data_out:
-        # weekly rate of each age group
-        data_out[epiweek] = [None] * 9
-      prev_rate = data_out[epiweek][age_index]
-      if prev_rate is None:
-        # this is the first time to see a rate for this epiweek/age
-        data_out[epiweek][age_index] = rate
-      elif prev_rate != rate:
-        # a different rate was already found for this epiweek/age
-        format_args = (epiweek, obj['age'], prev_rate, rate)
-        print('warning: %d %d %f != %f' % format_args)
-
-  # sanity check the result
-  if len(data_out) == 0:
-    raise Exception('no data found')
-
-  # print the result and return flu data
-  print('found data for %d weeks' % len(data_out))
-  return data_out
+    """
+    Given a FluSurv data object, return hospitaliation rates.
+
+    The returned object is indexed first by epiweek, then by zero-indexed age
+    group.
+    """
+
+    # an object to hold the result
+    data_out = {}
+
+    # iterate over all seasons and age groups
+    for obj in data_in["busdata"]["dataseries"]:
+        if obj["age"] in (10, 11, 12):
+            # TODO(https://github.com/cmu-delphi/delphi-epidata/issues/242):
+            #   capture as-of-yet undefined age groups 10, 11, and 12
+            continue
+        age_index = obj["age"] - 1
+        # iterage over weeks
+        for mmwrid, _, _, rate in obj["data"]:
+            epiweek = mmwrid_to_epiweek(mmwrid)
+            if epiweek not in data_out:
+                # weekly rate of each age group
+                data_out[epiweek] = [None] * 9
+            prev_rate = data_out[epiweek][age_index]
+            if prev_rate is None:
+                # this is the first time to see a rate for this epiweek/age
+                data_out[epiweek][age_index] = rate
+            elif prev_rate != rate:
+                # a different rate was already found for this epiweek/age
+                format_args = (epiweek, obj["age"], prev_rate, rate)
+                print("warning: %d %d %f != %f" % format_args)
+
+    # sanity check the result
+    if len(data_out) == 0:
+        raise Exception("no data found")
+
+    # print the result and return flu data
+    print(f"found data for {len(data_out)} weeks")
+    return data_out
 
 
 def get_data(location_code):
-  """
-  Fetch and parse flu data for the given location.
+    """
+    Fetch and parse flu data for the given location.
 
-  This method performs the following operations:
-    - fetches FluSurv data from CDC
-    - extracts and returns hospitaliation rates
-  """
+    This method performs the following operations:
+      - fetches FluSurv data from CDC
+      - extracts and returns hospitaliation rates
+    """
 
-  # fetch
-  print('[fetching flusurv data...]')
-  data_in = fetch_flusurv_object(location_code)
+    # fetch
+    print("[fetching flusurv data...]")
+    data_in = fetch_flusurv_object(location_code)
 
-  # extract
-  print('[extracting values...]')
-  data_out = extract_from_object(data_in)
+    # extract
+    print("[extracting values...]")
+    data_out = extract_from_object(data_in)
 
-  # return
-  print('[scraped successfully]')
-  return data_out
+    # return
+    print("[scraped successfully]")
+    return data_out
 
 
 def get_current_issue():
-  """Scrape the current issue from the FluSurv main page."""
+    """Scrape the current issue from the FluSurv main page."""
 
-  # fetch
-  data = fetch_json('GetPhase03InitApp?appVersion=Public', None)
+    # fetch
+    data = fetch_json("GetPhase03InitApp?appVersion=Public", None)
 
-  # extract
-  date = datetime.strptime(data['loaddatetime'], '%b %d, %Y')
+    # extract
+    date = datetime.strptime(data["loaddatetime"], "%b %d, %Y")
 
-  # convert and return
-  return EpiDate(date.year, date.month, date.day).get_ew()
+    # convert and return
+    return EpiDate(date.year, date.month, date.day).get_ew()
diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py
index 35fadba05..1aa8e9885 100644
--- a/src/acquisition/flusurv/flusurv_update.py
+++ b/src/acquisition/flusurv/flusurv_update.py
@@ -82,108 +82,112 @@
 
 
 def get_rows(cur):
-  """Return the number of rows in the `flusurv` table."""
+    """Return the number of rows in the `flusurv` table."""
 
-  # count all rows
-  cur.execute('SELECT count(1) `num` FROM `flusurv`')
-  for (num,) in cur:
-    return num
+    # count all rows
+    cur.execute("SELECT count(1) `num` FROM `flusurv`")
+    for (num,) in cur:
+        return num
 
 
 def update(issue, location_name, test_mode=False):
-  """Fetch and store the currently avialble weekly FluSurv dataset."""
-
-  # fetch data
-  location_code = flusurv.location_codes[location_name]
-  print('fetching data for', location_name, location_code)
-  data = flusurv.get_data(location_code)
-
-  # metadata
-  epiweeks = sorted(data.keys())
-  location = location_name
-  release_date = str(EpiDate.today())
-
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(
-      host=secrets.db.host, user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-  rows1 = get_rows(cur)
-  print('rows before: %d' % rows1)
-
-  # SQL for insert/update
-  sql = '''
-  INSERT INTO `flusurv` (
-    `release_date`, `issue`, `epiweek`, `location`, `lag`, `rate_age_0`,
-    `rate_age_1`, `rate_age_2`, `rate_age_3`, `rate_age_4`, `rate_overall`,
-    `rate_age_5`, `rate_age_6`, `rate_age_7`
-  )
-  VALUES (
-    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
-  )
-  ON DUPLICATE KEY UPDATE
-    `release_date` = least(`release_date`, %s),
-    `rate_age_0` = coalesce(%s, `rate_age_0`),
-    `rate_age_1` = coalesce(%s, `rate_age_1`),
-    `rate_age_2` = coalesce(%s, `rate_age_2`),
-    `rate_age_3` = coalesce(%s, `rate_age_3`),
-    `rate_age_4` = coalesce(%s, `rate_age_4`),
-    `rate_overall` = coalesce(%s, `rate_overall`),
-    `rate_age_5` = coalesce(%s, `rate_age_5`),
-    `rate_age_6` = coalesce(%s, `rate_age_6`),
-    `rate_age_7` = coalesce(%s, `rate_age_7`)
-  '''
-
-  # insert/update each row of data (one per epiweek)
-  for epiweek in epiweeks:
-    lag = delta_epiweeks(epiweek, issue)
-    if lag > 52:
-      # Ignore values older than one year, as (1) they are assumed not to
-      # change, and (2) it would adversely affect database performance if all
-      # values (including duplicates) were stored on each run.
-      continue
-    args_meta = [release_date, issue, epiweek, location, lag]
-    args_insert = data[epiweek]
-    args_update = [release_date] + data[epiweek]
-    cur.execute(sql, tuple(args_meta + args_insert + args_update))
-
-  # commit and disconnect
-  rows2 = get_rows(cur)
-  print('rows after: %d (+%d)' % (rows2, rows2 - rows1))
-  cur.close()
-  if test_mode:
-    print('test mode: not committing database changes')
-  else:
-    cnx.commit()
-  cnx.close()
+    """Fetch and store the currently avialble weekly FluSurv dataset."""
+
+    # fetch data
+    location_code = flusurv.location_codes[location_name]
+    print("fetching data for", location_name, location_code)
+    data = flusurv.get_data(location_code)
+
+    # metadata
+    epiweeks = sorted(data.keys())
+    location = location_name
+    release_date = str(EpiDate.today())
+
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(host=secrets.db.host, user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+    rows1 = get_rows(cur)
+    print(f"rows before: {int(rows1)}")
+
+    # SQL for insert/update
+    sql = """
+    INSERT INTO `flusurv` (
+      `release_date`, `issue`, `epiweek`, `location`, `lag`, `rate_age_0`,
+      `rate_age_1`, `rate_age_2`, `rate_age_3`, `rate_age_4`, `rate_overall`,
+      `rate_age_5`, `rate_age_6`, `rate_age_7`
+    )
+    VALUES (
+      %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
+    )
+    ON DUPLICATE KEY UPDATE
+      `release_date` = least(`release_date`, %s),
+      `rate_age_0` = coalesce(%s, `rate_age_0`),
+      `rate_age_1` = coalesce(%s, `rate_age_1`),
+      `rate_age_2` = coalesce(%s, `rate_age_2`),
+      `rate_age_3` = coalesce(%s, `rate_age_3`),
+      `rate_age_4` = coalesce(%s, `rate_age_4`),
+      `rate_overall` = coalesce(%s, `rate_overall`),
+      `rate_age_5` = coalesce(%s, `rate_age_5`),
+      `rate_age_6` = coalesce(%s, `rate_age_6`),
+      `rate_age_7` = coalesce(%s, `rate_age_7`)
+    """
+
+    # insert/update each row of data (one per epiweek)
+    for epiweek in epiweeks:
+        lag = delta_epiweeks(epiweek, issue)
+        if lag > 52:
+            # Ignore values older than one year, as (1) they are assumed not to
+            # change, and (2) it would adversely affect database performance if all
+            # values (including duplicates) were stored on each run.
+            continue
+        args_meta = [release_date, issue, epiweek, location, lag]
+        args_insert = data[epiweek]
+        args_update = [release_date] + data[epiweek]
+        cur.execute(sql, tuple(args_meta + args_insert + args_update))
+
+    # commit and disconnect
+    rows2 = get_rows(cur)
+    print(f"rows after: {int(rows2)} (+{int(rows2 - rows1)})")
+    cur.close()
+    if test_mode:
+        print("test mode: not committing database changes")
+    else:
+        cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    'location',
-    help='location for which data should be scraped (e.g. "CA" or "all")'
-  )
-  parser.add_argument(
-    '--test', '-t',
-    default=False, action='store_true', help='do not commit database changes'
-  )
-  args = parser.parse_args()
-
-  # scrape current issue from the main page
-  issue = flusurv.get_current_issue()
-  print('current issue: %d' % issue)
-
-  # fetch flusurv data
-  if args.location == 'all':
-    # all locations
-    for location in flusurv.location_codes.keys():
-      update(issue, location, args.test)
-  else:
-    # single location
-    update(issue, args.location, args.test)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    # fmt: off
+    parser.add_argument(
+        "location",
+        help='location for which data should be scraped (e.g. "CA" or "all")'
+    )
+    parser.add_argument(
+        "--test",
+        "-t",
+        default=False,
+        action="store_true",
+        help="do not commit database changes"
+    )
+    # fmt: on
+    args = parser.parse_args()
+
+    # scrape current issue from the main page
+    issue = flusurv.get_current_issue()
+    print(f"current issue: {int(issue)}")
+
+    # fetch flusurv data
+    if args.location == "all":
+        # all locations
+        for location in flusurv.location_codes.keys():
+            update(issue, location, args.test)
+    else:
+        # single location
+        update(issue, args.location, args.test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/fluview/fluview.py b/src/acquisition/fluview/fluview.py
index d723cbc59..9b4e6f537 100644
--- a/src/acquisition/fluview/fluview.py
+++ b/src/acquisition/fluview/fluview.py
@@ -34,183 +34,188 @@
 
 
 class Key:
-  """
-  Constants for navigating the metadata object contained in the web response
-  from CDC.
-  """
+    """
+    Constants for navigating the metadata object contained in the web response
+    from CDC.
+    """
 
-  class TierType:
-    nat = 'National'
-    hhs = 'HHS Regions'
-    cen = 'Census Divisions'
-    sta = 'State'
+    class TierType:
+        nat = "National"
+        hhs = "HHS Regions"
+        cen = "Census Divisions"
+        sta = "State"
 
-  class TierListEntry:
-    hhs = 'hhsregion'
-    cen = 'censusregions'
-    sta = 'states'
+    class TierListEntry:
+        hhs = "hhsregion"
+        cen = "censusregions"
+        sta = "states"
 
-  class TierIdEntry:
-    hhs = 'hhsregionid'
-    cen = 'censusregionid'
-    sta = 'stateid'
+    class TierIdEntry:
+        hhs = "hhsregionid"
+        cen = "censusregionid"
+        sta = "stateid"
 
 
 def check_status(resp, status, content_type):
-  """Raise an exception if the status code or content type is unexpected."""
-  if resp.status_code != status:
-    raise Exception('got unexpected status code: ' + str(resp.status_code))
-  actual_type = resp.headers.get('Content-Type', None)
-  if actual_type is None or content_type not in actual_type.lower():
-    raise Exception('got unexpected content type: ' + str(actual_type))
+    """Raise an exception if the status code or content type is unexpected."""
+    if resp.status_code != status:
+        raise Exception("got unexpected status code: " + str(resp.status_code))
+    actual_type = resp.headers.get("Content-Type", None)
+    if actual_type is None or content_type not in actual_type.lower():
+        raise Exception("got unexpected content type: " + str(actual_type))
 
 
 def fetch_metadata(sess):
-  """
-  Return metadata indicating the current issue and also numeric constants
-  representing the various locations.
-  """
-  url = 'https://gis.cdc.gov/grasp/flu2/GetPhase02InitApp?appVersion=Public'
-  resp = sess.get(url)
-  check_status(resp, 200, 'application/json')
-  return resp.json()
+    """
+    Return metadata indicating the current issue and also numeric constants
+    representing the various locations.
+    """
+    url = "https://gis.cdc.gov/grasp/flu2/GetPhase02InitApp?appVersion=Public"
+    resp = sess.get(url)
+    check_status(resp, 200, "application/json")
+    return resp.json()
 
 
 def get_issue_and_locations(data):
-  """Extract the issue and per-tier location lists from the metadata object."""
-
-  def get_tier_ids(name):
-    for row in data['regiontypes']:
-      if row['description'] == name:
-        return row['regiontypeid']
-    raise Exception()
-
-  tier_ids = dict((name, get_tier_ids(name)) for name in (
-    Key.TierType.nat,
-    Key.TierType.hhs,
-    Key.TierType.cen,
-    Key.TierType.sta,
-  ))
-
-  location_ids = {
-    Key.TierType.nat: [0],
-    Key.TierType.hhs: [],
-    Key.TierType.cen: [],
-    Key.TierType.sta: [],
-  }
-
-  # add location ids for HHS
-  for row in data[Key.TierListEntry.hhs]:
-    location_ids[Key.TierType.hhs].append(row[Key.TierIdEntry.hhs])
-  location_ids[Key.TierType.hhs] = sorted(set(location_ids[Key.TierType.hhs]))
-  num = len(location_ids[Key.TierType.hhs])
-  if num != 10:
-    raise Exception('expected 10 hhs regions, found %d' % num)
-
-  # add location ids for census divisions
-  for row in data[Key.TierListEntry.cen]:
-    location_ids[Key.TierType.cen].append(row[Key.TierIdEntry.cen])
-  location_ids[Key.TierType.cen] = sorted(set(location_ids[Key.TierType.cen]))
-  num = len(location_ids[Key.TierType.cen])
-  if num != 9:
-    raise Exception('expected 9 census divisions, found %d' % num)
-
-  # add location ids for states
-  for row in data[Key.TierListEntry.sta]:
-    location_ids[Key.TierType.sta].append(row[Key.TierIdEntry.sta])
-  location_ids[Key.TierType.sta] = sorted(set(location_ids[Key.TierType.sta]))
-  num = len(location_ids[Key.TierType.sta])
-  if num != 57:
-    raise Exception('expected 57 states/territories/cities, found %d' % num)
-
-  # return a useful subset of the metadata
-  # (latest epiweek, latest season, tier ids, location ids)
-  return {
-    'epiweek': data['mmwr'][-1]['yearweek'],
-    'season_id': data['mmwr'][-1]['seasonid'],
-    'tier_ids': tier_ids,
-    'location_ids': location_ids,
-  }
+    """Extract the issue and per-tier location lists from the metadata object."""
+
+    def get_tier_ids(name):
+        for row in data["regiontypes"]:
+            if row["description"] == name:
+                return row["regiontypeid"]
+        raise Exception()
+
+    tier_ids = {
+        name: get_tier_ids(name)
+        for name in (
+            Key.TierType.nat,
+            Key.TierType.hhs,
+            Key.TierType.cen,
+            Key.TierType.sta,
+        )
+    }
+
+    location_ids = {
+        Key.TierType.nat: [0],
+        Key.TierType.hhs: [],
+        Key.TierType.cen: [],
+        Key.TierType.sta: [],
+    }
+
+    # add location ids for HHS
+    for row in data[Key.TierListEntry.hhs]:
+        location_ids[Key.TierType.hhs].append(row[Key.TierIdEntry.hhs])
+    location_ids[Key.TierType.hhs] = sorted(set(location_ids[Key.TierType.hhs]))
+    num = len(location_ids[Key.TierType.hhs])
+    if num != 10:
+        raise Exception(f"expected 10 hhs regions, found {int(num)}")
+
+    # add location ids for census divisions
+    for row in data[Key.TierListEntry.cen]:
+        location_ids[Key.TierType.cen].append(row[Key.TierIdEntry.cen])
+    location_ids[Key.TierType.cen] = sorted(set(location_ids[Key.TierType.cen]))
+    num = len(location_ids[Key.TierType.cen])
+    if num != 9:
+        raise Exception(f"expected 9 census divisions, found {int(num)}")
+
+    # add location ids for states
+    for row in data[Key.TierListEntry.sta]:
+        location_ids[Key.TierType.sta].append(row[Key.TierIdEntry.sta])
+    location_ids[Key.TierType.sta] = sorted(set(location_ids[Key.TierType.sta]))
+    num = len(location_ids[Key.TierType.sta])
+    if num != 57:
+        raise Exception(f"expected 57 states/territories/cities, found {int(num)}")
+
+    # return a useful subset of the metadata
+    # (latest epiweek, latest season, tier ids, location ids)
+    return {
+        "epiweek": data["mmwr"][-1]["yearweek"],
+        "season_id": data["mmwr"][-1]["seasonid"],
+        "tier_ids": tier_ids,
+        "location_ids": location_ids,
+    }
 
 
 def download_data(tier_id, location_ids, season_ids, filename):
-  """Download zipped ILINet data for the given locations and seasons."""
-
-  def get_entry(num, name=None):
-    return {'ID': num, 'Name': (name if name else num)}
-
-  # download the data (in memory)
-  url = 'https://gis.cdc.gov/grasp/flu2/PostPhase02DataDownload'
-  data = {
-    'AppVersion': 'Public',
-    'DatasourceDT': [get_entry(1, 'ILINet'), get_entry(0, 'WHO_NREVSS')],
-    'RegionTypeId': tier_id,
-    'SubRegionsDT': [get_entry(loc) for loc in sorted(location_ids)],
-    'SeasonsDT': [get_entry(season) for season in sorted(season_ids)],
-  }
-  resp = requests.post(url, json=data)
-  check_status(resp, 200, 'application/octet-stream')
-  payload = resp.content
-
-  # save the data to file and return the file length
-  with open(filename, 'wb') as f:
-    f.write(payload)
-  return len(payload)
+    """Download zipped ILINet data for the given locations and seasons."""
+
+    def get_entry(num, name=None):
+        return {"ID": num, "Name": (name if name else num)}
+
+    # download the data (in memory)
+    url = "https://gis.cdc.gov/grasp/flu2/PostPhase02DataDownload"
+    data = {
+        "AppVersion": "Public",
+        "DatasourceDT": [get_entry(1, "ILINet"), get_entry(0, "WHO_NREVSS")],
+        "RegionTypeId": tier_id,
+        "SubRegionsDT": [get_entry(loc) for loc in sorted(location_ids)],
+        "SeasonsDT": [get_entry(season) for season in sorted(season_ids)],
+    }
+    resp = requests.post(url, json=data)
+    check_status(resp, 200, "application/octet-stream")
+    payload = resp.content
+
+    # save the data to file and return the file length
+    with open(filename, "wb") as f:
+        f.write(payload)
+    return len(payload)
 
 
 def save_latest(path=None):
-  """
-  Save the latest two seasons of data for all locations, separately for each
-  location tier (i.e. national, HHS, census, and states).
-  """
-
-  # set up the session
-  sess = requests.session()
-  sess.headers.update({
-    # it's polite to self-identify this "bot"
-    'User-Agent': 'delphibot/1.0 (+https://delphi.cmu.edu/)',
-  })
-
-  # get metatdata
-  print('looking up ilinet metadata')
-  data = fetch_metadata(sess)
-  info = get_issue_and_locations(data)
-  issue = info['epiweek']
-  print('current issue: %d' % issue)
-
-  # establish timing
-  dt = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
-  current_season = info['season_id']
-  seasons = [s for s in range(current_season - 1, current_season + 1)]
-
-  # make the destination path if it doesn't already exist
-  if path is not None:
-    os.makedirs(path, exist_ok=True)
-
-  # download the data file for each tier
-  files = []
-  for delphi_name, cdc_name in (
-    ('nat', Key.TierType.nat),
-    ('hhs', Key.TierType.hhs),
-    ('cen', Key.TierType.cen),
-    ('sta', Key.TierType.sta),
-  ):
-    name = 'ilinet_%s_%d_%s.zip' % (delphi_name, issue, dt)
-    if path is None:
-      filename = name
-    else:
-      filename = os.path.join(path, name)
-    tier_id = info['tier_ids'][cdc_name]
-    locations = info['location_ids'][cdc_name]
-
-    # download and show timing information
-    print('downloading %s' % delphi_name)
-    t0 = time.time()
-    size = download_data(tier_id, locations, seasons, filename)
-    t1 = time.time()
-
-    print(' saved %s (%d bytes in %.1f seconds)' % (filename, size, t1 - t0))
-    files.append(filename)
-
-  # return the current issue and the list of downloaded files
-  return issue, files
+    """
+    Save the latest two seasons of data for all locations, separately for each
+    location tier (i.e. national, HHS, census, and states).
+    """
+
+    # set up the session
+    sess = requests.session()
+    sess.headers.update(
+        {
+            # it's polite to self-identify this "bot"
+            "User-Agent": "delphibot/1.0 (+https://delphi.cmu.edu/)",
+        }
+    )
+
+    # get metatdata
+    print("looking up ilinet metadata")
+    data = fetch_metadata(sess)
+    info = get_issue_and_locations(data)
+    issue = info["epiweek"]
+    print(f"current issue: {int(issue)}")
+
+    # establish timing
+    dt = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    current_season = info["season_id"]
+    seasons = [s for s in range(current_season - 1, current_season + 1)]
+
+    # make the destination path if it doesn't already exist
+    if path is not None:
+        os.makedirs(path, exist_ok=True)
+
+    # download the data file for each tier
+    files = []
+    for delphi_name, cdc_name in (
+        ("nat", Key.TierType.nat),
+        ("hhs", Key.TierType.hhs),
+        ("cen", Key.TierType.cen),
+        ("sta", Key.TierType.sta),
+    ):
+        name = f"ilinet_{delphi_name}_{int(issue)}_{dt}.zip"
+        if path is None:
+            filename = name
+        else:
+            filename = os.path.join(path, name)
+        tier_id = info["tier_ids"][cdc_name]
+        locations = info["location_ids"][cdc_name]
+
+        # download and show timing information
+        print(f"downloading {delphi_name}")
+        t0 = time.time()
+        size = download_data(tier_id, locations, seasons, filename)
+        t1 = time.time()
+
+        print(f" saved {filename} ({int(size)} bytes in {t1 - t0:.1f} seconds)")
+        files.append(filename)
+
+    # return the current issue and the list of downloaded files
+    return issue, files
diff --git a/src/acquisition/fluview/fluview_locations.py b/src/acquisition/fluview/fluview_locations.py
index 9c851bc6f..e5ebe0fc3 100644
--- a/src/acquisition/fluview/fluview_locations.py
+++ b/src/acquisition/fluview/fluview_locations.py
@@ -15,100 +15,100 @@
 # https://gis.cdc.gov/grasp/flu2/GetPhase02InitApp?appVersion=Public
 # The values are used in queries of Delphi's Epidata API.
 cdc_to_delphi = {
-  'national': {
-    'x': 'nat',
-  },
-  'hhs regions': {
-    'region 1': 'hhs1',
-    'region 2': 'hhs2',
-    'region 3': 'hhs3',
-    'region 4': 'hhs4',
-    'region 5': 'hhs5',
-    'region 6': 'hhs6',
-    'region 7': 'hhs7',
-    'region 8': 'hhs8',
-    'region 9': 'hhs9',
-    'region 10': 'hhs10',
-  },
-  'census regions': {
-    'new england': 'cen1',
-    'mid-atlantic': 'cen2',
-    'east north central': 'cen3',
-    'west north central': 'cen4',
-    'south atlantic': 'cen5',
-    'east south central': 'cen6',
-    'west south central': 'cen7',
-    'mountain': 'cen8',
-    'pacific': 'cen9',
-  },
-  'states': {
-    # states/territories: two-letter ISO 3166
-    'alabama': 'al',
-    'alaska': 'ak',
-    'arizona': 'az',
-    'arkansas': 'ar',
-    'california': 'ca',
-    'colorado': 'co',
-    'connecticut': 'ct',
-    'delaware': 'de',
-    'florida': 'fl',
-    'georgia': 'ga',
-    'hawaii': 'hi',
-    'idaho': 'id',
-    'illinois': 'il',
-    'indiana': 'in',
-    'iowa': 'ia',
-    'kansas': 'ks',
-    'kentucky': 'ky',
-    'louisiana': 'la',
-    'maine': 'me',
-    'maryland': 'md',
-    'massachusetts': 'ma',
-    'michigan': 'mi',
-    'minnesota': 'mn',
-    'mississippi': 'ms',
-    'missouri': 'mo',
-    'montana': 'mt',
-    'nebraska': 'ne',
-    'nevada': 'nv',
-    'new hampshire': 'nh',
-    'new jersey': 'nj',
-    'new mexico': 'nm',
-    # Even though it's called "New York", this location doesn't include New
-    # York City ("jfk"). New York ("ny") is actually this *plus* jfk.
-    'new york': 'ny_minus_jfk',
-    'north carolina': 'nc',
-    'north dakota': 'nd',
-    'ohio': 'oh',
-    'oklahoma': 'ok',
-    'oregon': 'or',
-    'pennsylvania': 'pa',
-    'rhode island': 'ri',
-    'south carolina': 'sc',
-    'south dakota': 'sd',
-    'tennessee': 'tn',
-    'texas': 'tx',
-    'utah': 'ut',
-    'vermont': 'vt',
-    'virginia': 'va',
-    'washington': 'wa',
-    'west virginia': 'wv',
-    'wisconsin': 'wi',
-    'wyoming': 'wy',
-    'american samoa': 'as',
-    'commonwealth of the northern mariana islands': 'mp',
-    'district of columbia': 'dc',
-    'guam': 'gu',
-    'puerto rico': 'pr',
-    'virgin islands': 'vi',
-    # cities: three-letter IATA
-    'chicago': 'ord',
-    'los angeles': 'lax',
-    'new york city': 'jfk',
-  },
+    "national": {
+        "x": "nat",
+    },
+    "hhs regions": {
+        "region 1": "hhs1",
+        "region 2": "hhs2",
+        "region 3": "hhs3",
+        "region 4": "hhs4",
+        "region 5": "hhs5",
+        "region 6": "hhs6",
+        "region 7": "hhs7",
+        "region 8": "hhs8",
+        "region 9": "hhs9",
+        "region 10": "hhs10",
+    },
+    "census regions": {
+        "new england": "cen1",
+        "mid-atlantic": "cen2",
+        "east north central": "cen3",
+        "west north central": "cen4",
+        "south atlantic": "cen5",
+        "east south central": "cen6",
+        "west south central": "cen7",
+        "mountain": "cen8",
+        "pacific": "cen9",
+    },
+    "states": {
+        # states/territories: two-letter ISO 3166
+        "alabama": "al",
+        "alaska": "ak",
+        "arizona": "az",
+        "arkansas": "ar",
+        "california": "ca",
+        "colorado": "co",
+        "connecticut": "ct",
+        "delaware": "de",
+        "florida": "fl",
+        "georgia": "ga",
+        "hawaii": "hi",
+        "idaho": "id",
+        "illinois": "il",
+        "indiana": "in",
+        "iowa": "ia",
+        "kansas": "ks",
+        "kentucky": "ky",
+        "louisiana": "la",
+        "maine": "me",
+        "maryland": "md",
+        "massachusetts": "ma",
+        "michigan": "mi",
+        "minnesota": "mn",
+        "mississippi": "ms",
+        "missouri": "mo",
+        "montana": "mt",
+        "nebraska": "ne",
+        "nevada": "nv",
+        "new hampshire": "nh",
+        "new jersey": "nj",
+        "new mexico": "nm",
+        # Even though it's called "New York", this location doesn't include New
+        # York City ("jfk"). New York ("ny") is actually this *plus* jfk.
+        "new york": "ny_minus_jfk",
+        "north carolina": "nc",
+        "north dakota": "nd",
+        "ohio": "oh",
+        "oklahoma": "ok",
+        "oregon": "or",
+        "pennsylvania": "pa",
+        "rhode island": "ri",
+        "south carolina": "sc",
+        "south dakota": "sd",
+        "tennessee": "tn",
+        "texas": "tx",
+        "utah": "ut",
+        "vermont": "vt",
+        "virginia": "va",
+        "washington": "wa",
+        "west virginia": "wv",
+        "wisconsin": "wi",
+        "wyoming": "wy",
+        "american samoa": "as",
+        "commonwealth of the northern mariana islands": "mp",
+        "district of columbia": "dc",
+        "guam": "gu",
+        "puerto rico": "pr",
+        "virgin islands": "vi",
+        # cities: three-letter IATA
+        "chicago": "ord",
+        "los angeles": "lax",
+        "new york city": "jfk",
+    },
 }
 
 
 def get_location_name(region_type, region_name):
-  """Convert a CDC location type and name pair into a Delphi location name."""
-  return cdc_to_delphi[region_type.lower()][region_name.lower()]
+    """Convert a CDC location type and name pair into a Delphi location name."""
+    return cdc_to_delphi[region_type.lower()][region_name.lower()]
diff --git a/src/acquisition/fluview/fluview_notify.py b/src/acquisition/fluview/fluview_notify.py
index 13f0f3559..a280889a5 100644
--- a/src/acquisition/fluview/fluview_notify.py
+++ b/src/acquisition/fluview/fluview_notify.py
@@ -31,41 +31,53 @@
 import delphi.operations.secrets as secrets
 
 
-if __name__ == '__main__':
-  # Args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('-t', '--test', action='store_const', const=True, default=False, help="do dry run only, don't update the database")
-  args = parser.parse_args()
+if __name__ == "__main__":
+    # Args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-t",
+        "--test",
+        action="store_const",
+        const=True,
+        default=False,
+        help="do dry run only, don't update the database",
+    )
+    args = parser.parse_args()
 
-  # connect
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
+    # connect
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
 
-  # get the last known issue from the automation table `variables`
-  cur.execute('SELECT `value` FROM automation.`variables` WHERE `name` = %s', ('most_recent_issue',))
-  for (issue1,) in cur:
-    issue1 = int(issue1)
-  print('last known issue:', issue1)
-  # get the most recent issue from the epidata table `fluview`
-  cur.execute('SELECT max(`issue`) FROM `fluview`')
-  for (issue2,) in cur:
-    issue2 = int(issue2)
-  print('most recent issue:', issue2)
+    # get the last known issue from the automation table `variables`
+    cur.execute(
+        "SELECT `value` FROM automation.`variables` WHERE `name` = %s", ("most_recent_issue",)
+    )
+    for (issue1,) in cur:
+        issue1 = int(issue1)
+    print("last known issue:", issue1)
+    # get the most recent issue from the epidata table `fluview`
+    cur.execute("SELECT max(`issue`) FROM `fluview`")
+    for (issue2,) in cur:
+        issue2 = int(issue2)
+    print("most recent issue:", issue2)
 
-  if issue2 > issue1:
-    print('new data is available!')
-    if args.test:
-      print('test mode - not making any changes')
-    else:
-      # update the variable
-      cur.execute('UPDATE automation.`variables` SET `value` = %s WHERE `name` = %s', (issue2, 'most_recent_issue'))
-      # queue the 'New FluView Available' flow
-      cur.execute('CALL automation.RunStep(36)')
-  elif issue2 < issue2:
-    raise Exception('most recent issue is older than the last known issue')
+    if issue2 > issue1:
+        print("new data is available!")
+        if args.test:
+            print("test mode - not making any changes")
+        else:
+            # update the variable
+            cur.execute(
+                "UPDATE automation.`variables` SET `value` = %s WHERE `name` = %s",
+                (issue2, "most_recent_issue"),
+            )
+            # queue the 'New FluView Available' flow
+            cur.execute("CALL automation.RunStep(36)")
+    elif issue2 < issue2:
+        raise Exception("most recent issue is older than the last known issue")
 
-  # cleanup
-  cnx.commit()
-  cur.close()
-  cnx.close()
+    # cleanup
+    cnx.commit()
+    cur.close()
+    cnx.close()
diff --git a/src/acquisition/fluview/fluview_update.py b/src/acquisition/fluview/fluview_update.py
index 65bec7a40..406725b8a 100644
--- a/src/acquisition/fluview/fluview_update.py
+++ b/src/acquisition/fluview/fluview_update.py
@@ -130,398 +130,430 @@
 from . import fluview_locations
 
 # sheet names
-ILINET_SHEET = 'ILINet.csv'
-PHL_SHEET = 'WHO_NREVSS_Public_Health_Labs.csv'
-CL_SHEET = 'WHO_NREVSS_Clinical_Labs.csv'
+ILINET_SHEET = "ILINet.csv"
+PHL_SHEET = "WHO_NREVSS_Public_Health_Labs.csv"
+CL_SHEET = "WHO_NREVSS_Clinical_Labs.csv"
 # table names
-CL_TABLE = 'fluview_clinical'
-PHL_TABLE = 'fluview_public'
+CL_TABLE = "fluview_clinical"
+PHL_TABLE = "fluview_public"
+
 
 def optional_int(i):
-  return int(i) if i not in ('', 'X') else None
+    return int(i) if i not in ("", "X") else None
+
 
 def optional_float(i, j):
-  return float(i) if i not in ('', 'X') else float(j)
+    return float(i) if i not in ("", "X") else float(j)
+
 
 def nullable_float(i):
-  return float(i) if i not in ('', 'X') else None
+    return float(i) if i not in ("", "X") else None
+
 
 def get_ilinet_data(row):
-  if row[0] == 'REGION TYPE' and row != [
-    'REGION TYPE',
-    'REGION',
-    'YEAR',
-    'WEEK',
-    '% WEIGHTED ILI',
-    '%UNWEIGHTED ILI',
-    'AGE 0-4',
-    'AGE 25-49',
-    'AGE 25-64',
-    'AGE 5-24',
-    'AGE 50-64',
-    'AGE 65',
-    'ILITOTAL',
-    'NUM. OF PROVIDERS',
-    'TOTAL PATIENTS'
-  ]:
-    raise Exception('header row has changed')
-  if len(row) == 1 or row[0] == 'REGION TYPE':
-    # this is a header row
-    return None
-  if row[5] == 'X':
-    # ILI isn't reported, ignore this row
-    return None
-  return {
-    'location': fluview_locations.get_location_name(*row[:2]),
-    'epiweek': join_epiweek(int(row[2]), int(row[3])),
-    'wili': optional_float(*row[4:6]),
-    'ili': float(row[5]),
-    'age0': optional_int(row[6]),
-    'age1': optional_int(row[9]),
-    'age2': optional_int(row[8]),
-    'age3': optional_int(row[7]),
-    'age4': optional_int(row[10]),
-    'age5': optional_int(row[11]),
-    'n_ili': optional_int(row[12]),
-    'n_providers': optional_int(row[13]),
-    'n_patients': optional_int(row[14]),
-  }
+    if row[0] == "REGION TYPE" and row != [
+        "REGION TYPE",
+        "REGION",
+        "YEAR",
+        "WEEK",
+        "% WEIGHTED ILI",
+        "%UNWEIGHTED ILI",
+        "AGE 0-4",
+        "AGE 25-49",
+        "AGE 25-64",
+        "AGE 5-24",
+        "AGE 50-64",
+        "AGE 65",
+        "ILITOTAL",
+        "NUM. OF PROVIDERS",
+        "TOTAL PATIENTS",
+    ]:
+        raise Exception("header row has changed")
+    if len(row) == 1 or row[0] == "REGION TYPE":
+        # this is a header row
+        return None
+    if row[5] == "X":
+        # ILI isn't reported, ignore this row
+        return None
+    return {
+        "location": fluview_locations.get_location_name(*row[:2]),
+        "epiweek": join_epiweek(int(row[2]), int(row[3])),
+        "wili": optional_float(*row[4:6]),
+        "ili": float(row[5]),
+        "age0": optional_int(row[6]),
+        "age1": optional_int(row[9]),
+        "age2": optional_int(row[8]),
+        "age3": optional_int(row[7]),
+        "age4": optional_int(row[10]),
+        "age5": optional_int(row[11]),
+        "n_ili": optional_int(row[12]),
+        "n_providers": optional_int(row[13]),
+        "n_patients": optional_int(row[14]),
+    }
+
 
 def get_clinical_data(row):
-  if row[0] == 'REGION TYPE' and row != [
-    'REGION TYPE',
-    'REGION',
-    'YEAR',
-    'WEEK',
-    'TOTAL SPECIMENS',
-    'TOTAL A',
-    'TOTAL B',
-    'PERCENT POSITIVE',
-    'PERCENT A',
-    'PERCENT B'
-  ]:
-    raise Exception('header row has changed for clinical lab data.')
-  if len(row) == 1 or row[0] == 'REGION TYPE':
-    # this is a header row
-    return None
-  if row[4] == 'X':
-    # data is not reported, ignore this row
-    return None
-  # ignore percentage calculations for now
-  return {
-    'location': fluview_locations.get_location_name(*row[:2]),
-    'epiweek': join_epiweek(int(row[2]), int(row[3])),
-    'total_specimens': int(row[4]),
-    'total_a': optional_int(row[5]),
-    'total_b': optional_int(row[6]),
-    'percent_positive': nullable_float(row[7]),
-    'percent_a': nullable_float(row[8]),
-    'percent_b': nullable_float(row[9])
-  }
+    if row[0] == "REGION TYPE" and row != [
+        "REGION TYPE",
+        "REGION",
+        "YEAR",
+        "WEEK",
+        "TOTAL SPECIMENS",
+        "TOTAL A",
+        "TOTAL B",
+        "PERCENT POSITIVE",
+        "PERCENT A",
+        "PERCENT B",
+    ]:
+        raise Exception("header row has changed for clinical lab data.")
+    if len(row) == 1 or row[0] == "REGION TYPE":
+        # this is a header row
+        return None
+    if row[4] == "X":
+        # data is not reported, ignore this row
+        return None
+    # ignore percentage calculations for now
+    return {
+        "location": fluview_locations.get_location_name(*row[:2]),
+        "epiweek": join_epiweek(int(row[2]), int(row[3])),
+        "total_specimens": int(row[4]),
+        "total_a": optional_int(row[5]),
+        "total_b": optional_int(row[6]),
+        "percent_positive": nullable_float(row[7]),
+        "percent_a": nullable_float(row[8]),
+        "percent_b": nullable_float(row[9]),
+    }
+
 
 def get_public_data(row):
-  hrow1 = [
-    'REGION TYPE',
-    'REGION',
-    'SEASON_DESCRIPTION',
-    'TOTAL SPECIMENS',
-    'A (2009 H1N1)',
-    'A (H3)',
-    'A (Subtyping not Performed)',
-    'B',
-    'BVic',
-    'BYam',
-    'H3N2v'
-  ]
-  hrow2 = [
-    'REGION TYPE',
-    'REGION',
-    'YEAR',
-    'WEEK',
-    'TOTAL SPECIMENS',
-    'A (2009 H1N1)',
-    'A (H3)',
-    'A (Subtyping not Performed)',
-    'B',
-    'BVic',
-    'BYam',
-    'H3N2v'
-  ]
-  if row[0] == 'REGION TYPE' and row != hrow1 and row != hrow2:
-    raise Exception('header row has changed for public health lab data.')
-  if len(row) == 1 or row[0] == 'REGION TYPE':
-    # header row
-    return None
-  if row[3] == 'X':
-    # data is not reported, ignore this row
-    return None
-  # handle case where data is reported by season, not by epiweek
-  is_weekly = len(row) == len(hrow2)
-  # set epiweek
-  if is_weekly:
-    epiweek = join_epiweek(int(row[2]), int(row[3]))
-  else:
-    epiweek = int(row[2][7:11]) * 100 + 40
-  # row offset
-  offset = 1 if is_weekly else 0
-  return {
-    'location': fluview_locations.get_location_name(*row[:2]),
-    'epiweek': epiweek,
-    'total_specimens': int(row[3 + offset]),
-    'total_a_h1n1': optional_int(row[4+ offset]),
-    'total_a_h3': optional_int(row[5 + offset]),
-    'total_a_h3n2v': optional_int(row[10 + offset]),
-    'total_a_no_sub': optional_int(row[6 + offset]),
-    'total_b': optional_int(row[7 + offset]),
-    'total_b_vic': optional_int(row[8 + offset]),
-    'total_b_yam': optional_int(row[9 + offset])
-  }
-
-def load_zipped_csv(filename, sheetname='ILINet.csv'):
-  """Read rows from a zipped CSV, which is expected to be named as specified
-  by the sheetname parameter. Default is ILINet.csv, for the default flu data."""
-  with zipfile.ZipFile(filename) as f:
-    with f.open(sheetname) as ff:
-      return [row for row in csv.reader(io.StringIO(str(ff.read(), 'utf-8')))]
-
-def get_rows(cnx, table='fluview'):
-  """Count and return the number of rows in the `fluview` table.
-  Looking at the fluview table by default, but may pass parameter
-  to look at public health or clinical lab data instead."""
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM %s' % table)
-  for (num,) in select:
-    pass
-  select.close()
-  return num
+    hrow1 = [
+        "REGION TYPE",
+        "REGION",
+        "SEASON_DESCRIPTION",
+        "TOTAL SPECIMENS",
+        "A (2009 H1N1)",
+        "A (H3)",
+        "A (Subtyping not Performed)",
+        "B",
+        "BVic",
+        "BYam",
+        "H3N2v",
+    ]
+    hrow2 = [
+        "REGION TYPE",
+        "REGION",
+        "YEAR",
+        "WEEK",
+        "TOTAL SPECIMENS",
+        "A (2009 H1N1)",
+        "A (H3)",
+        "A (Subtyping not Performed)",
+        "B",
+        "BVic",
+        "BYam",
+        "H3N2v",
+    ]
+    if row[0] == "REGION TYPE" and row != hrow1 and row != hrow2:
+        raise Exception("header row has changed for public health lab data.")
+    if len(row) == 1 or row[0] == "REGION TYPE":
+        # header row
+        return None
+    if row[3] == "X":
+        # data is not reported, ignore this row
+        return None
+    # handle case where data is reported by season, not by epiweek
+    is_weekly = len(row) == len(hrow2)
+    # set epiweek
+    if is_weekly:
+        epiweek = join_epiweek(int(row[2]), int(row[3]))
+    else:
+        epiweek = int(row[2][7:11]) * 100 + 40
+    # row offset
+    offset = 1 if is_weekly else 0
+    return {
+        "location": fluview_locations.get_location_name(*row[:2]),
+        "epiweek": epiweek,
+        "total_specimens": int(row[3 + offset]),
+        "total_a_h1n1": optional_int(row[4 + offset]),
+        "total_a_h3": optional_int(row[5 + offset]),
+        "total_a_h3n2v": optional_int(row[10 + offset]),
+        "total_a_no_sub": optional_int(row[6 + offset]),
+        "total_b": optional_int(row[7 + offset]),
+        "total_b_vic": optional_int(row[8 + offset]),
+        "total_b_yam": optional_int(row[9 + offset]),
+    }
+
+
+def load_zipped_csv(filename, sheetname="ILINet.csv"):
+    """Read rows from a zipped CSV, which is expected to be named as specified
+    by the sheetname parameter. Default is ILINet.csv, for the default flu data."""
+    with zipfile.ZipFile(filename) as f:
+        with f.open(sheetname) as ff:
+            return [row for row in csv.reader(io.StringIO(str(ff.read(), "utf-8")))]
+
+
+def get_rows(cnx, table="fluview"):
+    """Count and return the number of rows in the `fluview` table.
+    Looking at the fluview table by default, but may pass parameter
+    to look at public health or clinical lab data instead."""
+    select = cnx.cursor()
+    select.execute(f"SELECT count(1) num FROM {table}")
+    for (num,) in select:
+        pass
+    select.close()
+    return num
+
 
 def update_from_file_clinical(issue, date, filename, test_mode=False):
-  """
-  Read WHO/NREVSS data from a zipped CSV and insert into (or update) the database.
-  """
-
-  # database connection
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  rows1 = get_rows(cnx, CL_TABLE)
-  print('rows before: %d' % (rows1))
-  insert = cnx.cursor()
-
-  # load the data, ignoring empty rows
-  print('loading data from %s as issued on %d' % (filename, issue))
-  rows = load_zipped_csv(filename, CL_SHEET)
-  print(' loaded %d rows' % len(rows))
-  data = [get_clinical_data(row) for row in rows]
-  entries = [obj for obj in data if obj]
-  print(' found %d entries' % len(entries))
-
-  sql = '''
-  INSERT INTO
-    `fluview_clinical` (`release_date`, `issue`, `epiweek`, `region`, `lag`, 
-    `total_specimens`, `total_a`, `total_b`, `percent_positive`, `percent_a`, 
-    `percent_b`)
-  VALUES
-    (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-  `release_date` = least(`release_date`, %s),
-  `total_specimens` = %s,
-  `total_a` = %s,
-  `total_b` = %s,
-  `percent_positive` = %s,
-  `percent_a` = %s,
-  `percent_b` = %s
-  '''
-
-  # insert each row
-  insert = cnx.cursor()
-  for row in entries:
-    lag = delta_epiweeks(row['epiweek'], issue)
-    args = [
-      row['total_specimens'], row['total_a'], row['total_b'],
-      row['percent_positive'], row['percent_a'], row['percent_b']
-    ]
-    ins_args = [date, issue, row['epiweek'], row['location'], lag] + args
-    upd_args = [date] + args
-    insert.execute(sql, ins_args + upd_args)
-
-  # cleanup
-  insert.close()
-  if test_mode:
-    print('test mode, not committing')
-    rows2 = rows1
-  else:
-    cnx.commit()
-    rows2 = get_rows(cnx)
-  print('rows after: %d (added %d)' % (rows2, rows2 - rows1))
-  cnx.close()
+    """
+    Read WHO/NREVSS data from a zipped CSV and insert into (or update) the database.
+    """
+
+    # database connection
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx, CL_TABLE)
+    print(f"rows before: {int(rows1)}")
+    insert = cnx.cursor()
+
+    # load the data, ignoring empty rows
+    print(f"loading data from {filename} as issued on {int(issue)}")
+    rows = load_zipped_csv(filename, CL_SHEET)
+    print(f" loaded {len(rows)} rows")
+    data = [get_clinical_data(row) for row in rows]
+    entries = [obj for obj in data if obj]
+    print(f" found {len(entries)} entries")
+
+    sql = """
+    INSERT INTO
+        `fluview_clinical` (`release_date`, `issue`, `epiweek`, `region`, `lag`, 
+        `total_specimens`, `total_a`, `total_b`, `percent_positive`, `percent_a`, 
+        `percent_b`)
+    VALUES
+        (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+    `release_date` = least(`release_date`, %s),
+    `total_specimens` = %s,
+    `total_a` = %s,
+    `total_b` = %s,
+    `percent_positive` = %s,
+    `percent_a` = %s,
+    `percent_b` = %s
+    """
+
+    # insert each row
+    insert = cnx.cursor()
+    for row in entries:
+        lag = delta_epiweeks(row["epiweek"], issue)
+        args = [
+            row["total_specimens"],
+            row["total_a"],
+            row["total_b"],
+            row["percent_positive"],
+            row["percent_a"],
+            row["percent_b"],
+        ]
+        ins_args = [date, issue, row["epiweek"], row["location"], lag] + args
+        upd_args = [date] + args
+        insert.execute(sql, ins_args + upd_args)
+
+    # cleanup
+    insert.close()
+    if test_mode:
+        print("test mode, not committing")
+        rows2 = rows1
+    else:
+        cnx.commit()
+        rows2 = get_rows(cnx)
+    print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
+    cnx.close()
+
 
 def update_from_file_public(issue, date, filename, test_mode=False):
-  """
-  Read WHO/NREVSS data from a zipped CSV and insert into (or update) the database.
-  """
-
-  # database connection
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  rows1 = get_rows(cnx, PHL_TABLE)
-  print('rows before: %d' % (rows1))
-  insert = cnx.cursor()
-
-  # load the data, ignoring empty rows
-  print('loading data from %s as issued on %d' % (filename, issue))
-  rows = load_zipped_csv(filename, PHL_SHEET)
-  print(' loaded %d rows' % len(rows))
-  data = [get_public_data(row) for row in rows]
-  entries = [obj for obj in data if obj]
-  print(' found %d entries' % len(entries))
-
-  sql = '''
-  INSERT INTO
-    `fluview_public` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
-    `total_specimens`, `total_a_h1n1`, `total_a_h3`, `total_a_h3n2v`,
-    `total_a_no_sub`, `total_b`, `total_b_vic`, `total_b_yam`)
-  VALUES
-    (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-  `release_date` = least(`release_date`, %s),
-  `total_specimens` = %s,
-  `total_a_h1n1` = %s,
-  `total_a_h3` = %s,
-  `total_a_h3n2v` = %s,
-  `total_a_no_sub` = %s,
-  `total_b` = %s,
-  `total_b_vic` = %s, 
-  `total_b_yam` = %s
-  '''
-
-  # insert each row
-  insert = cnx.cursor()
-  for row in entries:
-    lag = delta_epiweeks(row['epiweek'], issue)
-    args = [
-      row['total_specimens'], row['total_a_h1n1'], row['total_a_h3'],
-      row['total_a_h3n2v'], row['total_a_no_sub'], row['total_b'],
-      row['total_b_vic'], row['total_b_yam']
-    ]
-    ins_args = [date, issue, row['epiweek'], row['location'], lag] + args
-    upd_args = [date] + args
-    insert.execute(sql, ins_args + upd_args)
-
-  # cleanup
-  insert.close()
-  if test_mode:
-    print('test mode, not committing')
-    rows2 = rows1
-  else:
-    cnx.commit()
-    rows2 = get_rows(cnx)
-  print('rows after: %d (added %d)' % (rows2, rows2 - rows1))
-  cnx.close()
+    """
+    Read WHO/NREVSS data from a zipped CSV and insert into (or update) the database.
+    """
+
+    # database connection
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx, PHL_TABLE)
+    print(f"rows before: {int(rows1)}")
+    insert = cnx.cursor()
+
+    # load the data, ignoring empty rows
+    print(f"loading data from {filename} as issued on {int(issue)}")
+    rows = load_zipped_csv(filename, PHL_SHEET)
+    print(f" loaded {len(rows)} rows")
+    data = [get_public_data(row) for row in rows]
+    entries = [obj for obj in data if obj]
+    print(f" found {len(entries)} entries")
+
+    sql = """
+    INSERT INTO
+        `fluview_public` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
+        `total_specimens`, `total_a_h1n1`, `total_a_h3`, `total_a_h3n2v`,
+        `total_a_no_sub`, `total_b`, `total_b_vic`, `total_b_yam`)
+    VALUES
+        (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+    `release_date` = least(`release_date`, %s),
+    `total_specimens` = %s,
+    `total_a_h1n1` = %s,
+    `total_a_h3` = %s,
+    `total_a_h3n2v` = %s,
+    `total_a_no_sub` = %s,
+    `total_b` = %s,
+    `total_b_vic` = %s, 
+    `total_b_yam` = %s
+    """
+
+    # insert each row
+    insert = cnx.cursor()
+    for row in entries:
+        lag = delta_epiweeks(row["epiweek"], issue)
+        args = [
+            row["total_specimens"],
+            row["total_a_h1n1"],
+            row["total_a_h3"],
+            row["total_a_h3n2v"],
+            row["total_a_no_sub"],
+            row["total_b"],
+            row["total_b_vic"],
+            row["total_b_yam"],
+        ]
+        ins_args = [date, issue, row["epiweek"], row["location"], lag] + args
+        upd_args = [date] + args
+        insert.execute(sql, ins_args + upd_args)
+
+    # cleanup
+    insert.close()
+    if test_mode:
+        print("test mode, not committing")
+        rows2 = rows1
+    else:
+        cnx.commit()
+        rows2 = get_rows(cnx)
+    print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
+    cnx.close()
+
 
 def update_from_file(issue, date, filename, test_mode=False):
-  """
-  Read ILINet data from a zipped CSV and insert into (or update) the database.
-  """
-
-  # database connection
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  rows1 = get_rows(cnx)
-  print('rows before: %d' % (rows1))
-  insert = cnx.cursor()
-
-  # load the data, ignoring empty rows
-  print('loading data from %s as issued on %d' % (filename, issue))
-  rows = load_zipped_csv(filename)
-  print(' loaded %d rows' % len(rows))
-  data = [get_ilinet_data(row) for row in rows]
-  entries = [obj for obj in data if obj]
-  print(' found %d entries' % len(entries))
-
-  sql = '''
-  INSERT INTO
-    `fluview` (`release_date`, `issue`, `epiweek`, `region`, `lag`, `num_ili`,
-    `num_patients`, `num_providers`, `wili`, `ili`, `num_age_0`, `num_age_1`,
-    `num_age_2`, `num_age_3`, `num_age_4`, `num_age_5`)
-  VALUES
-    (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-  `release_date` = least(`release_date`, %s),
-  `num_ili` = %s,
-  `num_patients` = %s,
-  `num_providers` = %s,
-  `wili` = %s,
-  `ili` = %s,
-  `num_age_0` = coalesce(%s, `num_age_0`),
-  `num_age_1` = coalesce(%s, `num_age_1`),
-  `num_age_2` = coalesce(%s, `num_age_2`),
-  `num_age_3` = coalesce(%s, `num_age_3`),
-  `num_age_4` = coalesce(%s, `num_age_4`),
-  `num_age_5` = coalesce(%s, `num_age_5`)
-  '''
-
-  # insert each row
-  insert = cnx.cursor()
-  for row in entries:
-    lag = delta_epiweeks(row['epiweek'], issue)
-    args = [
-      row['n_ili'], row['n_patients'], row['n_providers'], row['wili'],
-      row['ili'], row['age0'], row['age1'], row['age2'], row['age3'],
-      row['age4'], row['age5']
-    ]
-    ins_args = [date, issue, row['epiweek'], row['location'], lag] + args
-    upd_args = [date] + args
-    insert.execute(sql, ins_args + upd_args)
-
-  # cleanup
-  insert.close()
-  if test_mode:
-    print('test mode, not committing')
-    rows2 = rows1
-  else:
-    cnx.commit()
-    rows2 = get_rows(cnx)
-  print('rows after: %d (added %d)' % (rows2, rows2 - rows1))
-  cnx.close()
+    """
+    Read ILINet data from a zipped CSV and insert into (or update) the database.
+    """
+
+    # database connection
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx)
+    print(f"rows before: {int(rows1)}")
+    insert = cnx.cursor()
+
+    # load the data, ignoring empty rows
+    print(f"loading data from {filename} as issued on {int(issue)}")
+    rows = load_zipped_csv(filename)
+    print(f" loaded {len(rows)} rows")
+    data = [get_ilinet_data(row) for row in rows]
+    entries = [obj for obj in data if obj]
+    print(f" found {len(entries)} entries")
+
+    sql = """
+    INSERT INTO
+        `fluview` (`release_date`, `issue`, `epiweek`, `region`, `lag`, `num_ili`,
+        `num_patients`, `num_providers`, `wili`, `ili`, `num_age_0`, `num_age_1`,
+        `num_age_2`, `num_age_3`, `num_age_4`, `num_age_5`)
+    VALUES
+        (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+    `release_date` = least(`release_date`, %s),
+    `num_ili` = %s,
+    `num_patients` = %s,
+    `num_providers` = %s,
+    `wili` = %s,
+    `ili` = %s,
+    `num_age_0` = coalesce(%s, `num_age_0`),
+    `num_age_1` = coalesce(%s, `num_age_1`),
+    `num_age_2` = coalesce(%s, `num_age_2`),
+    `num_age_3` = coalesce(%s, `num_age_3`),
+    `num_age_4` = coalesce(%s, `num_age_4`),
+    `num_age_5` = coalesce(%s, `num_age_5`)
+    """
+
+    # insert each row
+    insert = cnx.cursor()
+    for row in entries:
+        lag = delta_epiweeks(row["epiweek"], issue)
+        args = [
+            row["n_ili"],
+            row["n_patients"],
+            row["n_providers"],
+            row["wili"],
+            row["ili"],
+            row["age0"],
+            row["age1"],
+            row["age2"],
+            row["age3"],
+            row["age4"],
+            row["age5"],
+        ]
+        ins_args = [date, issue, row["epiweek"], row["location"], lag] + args
+        upd_args = [date] + args
+        insert.execute(sql, ins_args + upd_args)
+
+    # cleanup
+    insert.close()
+    if test_mode:
+        print("test mode, not committing")
+        rows2 = rows1
+    else:
+        cnx.commit()
+        rows2 = get_rows(cnx)
+    print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
+    cnx.close()
+
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    '--test',
-    action='store_true',
-    help='do dry run only, do not update the database'
-  )
-  parser.add_argument(
-    '--file',
-    type=str,
-    help='load an existing zip file (otherwise fetch current data)'
-  )
-  parser.add_argument(
-    '--issue',
-    type=int,
-    help='issue of the file (e.g. 201740); used iff --file is given'
-  )
-  args = parser.parse_args()
-
-  if (args.file is None) != (args.issue is None):
-    raise Exception('--file and --issue must both be present or absent')
-
-  date = datetime.datetime.now().strftime('%Y-%m-%d')
-  print('assuming release date is today, %s' % date)
-
-  if args.file:
-    update_from_file(args.issue, date, args.file, test_mode=args.test)
-    update_from_file_clinical(args.issue, date, args.file, test_mode=args.test)
-    # TODO: header row has changed for public health lab data
-    # update_from_file_public(args.issue, date, args.file, test_mode=args.test)
-  else:
-    issue, files = fluview.save_latest(path='flu_data')
-    for filename in files:
-      update_from_file(issue, date, filename, test_mode=args.test)
-      update_from_file_clinical(issue, date, filename, test_mode=args.test)
-      # TODO: header row has changed for public health lab data
-      # update_from_file_public(issue, date, filename, test_mode=args.test)
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    # fmt: off
+    parser.add_argument(
+        "--test",
+        action="store_true",
+        help="do dry run only, do not update the database"
+    )
+    parser.add_argument(
+        "--file",
+        type=str,
+        help="load an existing zip file (otherwise fetch current data)"
+    )
+    parser.add_argument(
+        "--issue",
+        type=int,
+        help="issue of the file (e.g. 201740); used iff --file is given"
+    )
+    # fmt: on
+    args = parser.parse_args()
+
+    if (args.file is None) != (args.issue is None):
+        raise Exception("--file and --issue must both be present or absent")
+
+    date = datetime.datetime.now().strftime("%Y-%m-%d")
+    print(f"assuming release date is today, {date}")
+
+    if args.file:
+        update_from_file(args.issue, date, args.file, test_mode=args.test)
+        update_from_file_clinical(args.issue, date, args.file, test_mode=args.test)
+        # TODO: header row has changed for public health lab data
+        # update_from_file_public(args.issue, date, args.file, test_mode=args.test)
+    else:
+        issue, files = fluview.save_latest(path="flu_data")
+        for filename in files:
+            update_from_file(issue, date, filename, test_mode=args.test)
+            update_from_file_clinical(issue, date, filename, test_mode=args.test)
+            # TODO: header row has changed for public health lab data
+            # update_from_file_public(issue, date, filename, test_mode=args.test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/fluview/impute_missing_values.py b/src/acquisition/fluview/impute_missing_values.py
index 7f9a23231..4b3e1d684 100644
--- a/src/acquisition/fluview/impute_missing_values.py
+++ b/src/acquisition/fluview/impute_missing_values.py
@@ -59,290 +59,283 @@
 
 
 class Database:
-  """Database wrapper and abstraction layer."""
-
-  class Sql:
-    """Container for SQL constants."""
-
-    # Count the total number of imputed rows.
-    count_rows = '''
-      SELECT
-        count(1) `num`
-      FROM
-        `fluview_imputed`
-    '''
-
-    # Find (issue, epiweek) pairs that exist in table `fluview` but not in
-    # table `fluview_imputed`. Note that only issues >= 201740 are selected
-    # because that's when CDC first started posting state-level ILINet data.
-    # This assumes that `fluview` is always missing at least one location.
-    find_missing_rows = '''
-      SELECT
-        fv.`issue`, fv.`epiweek`
-      FROM (
+    """Database wrapper and abstraction layer."""
+
+    class Sql:
+        """Container for SQL constants."""
+
+        # Count the total number of imputed rows.
+        count_rows = """
         SELECT
-          `issue`, `epiweek`
+          count(1) `num`
         FROM
-          `fluview`
+          `fluview_imputed`
+        """
+
+        # Find (issue, epiweek) pairs that exist in table `fluview` but not in
+        # table `fluview_imputed`. Note that only issues >= 201740 are selected
+        # because that's when CDC first started posting state-level ILINet data.
+        # This assumes that `fluview` is always missing at least one location.
+        find_missing_rows = """
+        SELECT
+          fv.`issue`, fv.`epiweek`
+        FROM (
+          SELECT
+            `issue`, `epiweek`
+          FROM
+            `fluview`
+          WHERE
+            `issue` >= 201740
+          GROUP BY
+            `issue`, `epiweek`
+        ) fv
+        LEFT JOIN (
+          SELECT
+            `issue`, `epiweek`
+          FROM
+            `fluview_imputed`
+          GROUP BY
+            `issue`, `epiweek`
+        ) fvi
+        ON
+          fvi.`issue` = fv.`issue` AND fvi.`epiweek` = fv.`epiweek`
         WHERE
-          `issue` >= 201740
-        GROUP BY
-          `issue`, `epiweek`
-      ) fv
-      LEFT JOIN (
+          fvi.`issue` IS NULL
+        """
+
+        # Read all location rows from the `fluview` table for a given issue and
+        # epiweek.
+        get_known_values = """
         SELECT
-          `issue`, `epiweek`
+          `region`, `num_ili`, `num_patients`, `num_providers`
         FROM
-          `fluview_imputed`
-        GROUP BY
-          `issue`, `epiweek`
-      ) fvi
-      ON
-        fvi.`issue` = fv.`issue` AND fvi.`epiweek` = fv.`epiweek`
-      WHERE
-        fvi.`issue` IS NULL
-    '''
-
-    # Read all location rows from the `fluview` table for a given issue and
-    # epiweek.
-    get_known_values = '''
-      SELECT
-        `region`, `num_ili`, `num_patients`, `num_providers`
-      FROM
-        `fluview`
-      WHERE
-        `issue` = %s AND `epiweek` = %s
-    '''
-
-    # Insert location rows into the `fluview_imputed` table for a given issue
-    # and epiweek.
-    add_imputed_values = '''
-      INSERT INTO
-        `fluview_imputed` (
-          `issue`,
-          `epiweek`,
-          `region`,
-          `lag`,
-          `num_ili`,
-          `num_patients`,
-          `num_providers`,
-          `ili`
-        )
-      VALUES
-        (%s, %s, %s, %s, %s, %s, %s, %s)
-    '''
-
-  def connect(self):
-    """Connect to the database."""
-    u, p = secrets.db.epi
-    self.cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-    self.cur = self.cnx.cursor()
-
-  def close(self, commit):
-    """
-    Close the connection to the database, committing or rolling back changes as
-    indicated.
-    """
-    self.cur.close()
-    if commit:
-      self.cnx.commit()
-    else:
-      print('test mode, not committing')
-    self.cnx.close()
-
-  def count_rows(self):
-    """Count and return the number of rows in the `fluview_imputed` table."""
-    self.cur.execute(Database.Sql.count_rows)
-    for (num,) in self.cur:
-      return num
-
-  def find_missing_rows(self):
-    """
-    Find rows that still have missing values. Each missing row is uniquely
-    identified by an (issue, epiweek, location) tuple. This function finds the
-    first two.
-    """
+          `fluview`
+        WHERE
+          `issue` = %s AND `epiweek` = %s
+        """
+
+        # Insert location rows into the `fluview_imputed` table for a given issue
+        # and epiweek.
+        add_imputed_values = """
+        INSERT INTO
+          `fluview_imputed` (
+            `issue`,
+            `epiweek`,
+            `region`,
+            `lag`,
+            `num_ili`,
+            `num_patients`,
+            `num_providers`,
+            `ili`
+          )
+        VALUES
+          (%s, %s, %s, %s, %s, %s, %s, %s)
+        """
+
+    def connect(self):
+        """Connect to the database."""
+        u, p = secrets.db.epi
+        self.cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+        self.cur = self.cnx.cursor()
+
+    def close(self, commit):
+        """
+        Close the connection to the database, committing or rolling back changes as
+        indicated.
+        """
+        self.cur.close()
+        if commit:
+            self.cnx.commit()
+        else:
+            print("test mode, not committing")
+        self.cnx.close()
+
+    def count_rows(self):
+        """Count and return the number of rows in the `fluview_imputed` table."""
+        self.cur.execute(Database.Sql.count_rows)
+        for (num,) in self.cur:
+            return num
+
+    def find_missing_rows(self):
+        """
+        Find rows that still have missing values. Each missing row is uniquely
+        identified by an (issue, epiweek, location) tuple. This function finds the
+        first two.
+        """
+
+        self.cur.execute(Database.Sql.find_missing_rows)
+        return [(issue, epiweek) for (issue, epiweek) in self.cur]
+
+    def get_known_values(self, issue, epiweek):
+        """
+        Fetch ILINet data for all locations available for the given issue and
+        epiweek. The returned value is a dict mapping from locations to ILI data.
+        """
+
+        self.cur.execute(Database.Sql.get_known_values, (issue, epiweek))
+        return {loc: (n_ili, n_pat, n_prov) for (loc, n_ili, n_pat, n_prov) in self.cur}
+
+    def add_imputed_values(self, issue, epiweek, imputed):
+        """
+        Store imputed ILINet data for the given locations on the given issue and
+        epiweek. The imputed value is a dict mapping from locations to ILI data.
+        """
+
+        for loc in imputed.keys():
+            lag, n_ili, n_pat, n_prov, ili = imputed[loc]
+            args = (issue, epiweek, loc, lag, n_ili, n_pat, n_prov, ili)
+            self.cur.execute(Database.Sql.add_imputed_values, args)
 
-    self.cur.execute(Database.Sql.find_missing_rows)
-    return [(issue, epiweek) for (issue, epiweek) in self.cur]
 
-  def get_known_values(self, issue, epiweek):
-    """
-    Fetch ILINet data for all locations available for the given issue and
-    epiweek. The returned value is a dict mapping from locations to ILI data.
-    """
+class StatespaceException(Exception):
+    """Used to indicate that imputation is not possible with the given inputs."""
 
-    self.cur.execute(Database.Sql.get_known_values, (issue, epiweek))
-    return dict([
-      (loc, (n_ili, n_pat, n_prov))
-      for
-      (loc, n_ili, n_pat, n_prov)
-      in self.cur
-    ])
 
-  def add_imputed_values(self, issue, epiweek, imputed):
+def get_location_graph():
     """
-    Store imputed ILINet data for the given locations on the given issue and
-    epiweek. The imputed value is a dict mapping from locations to ILI data.
+    Return a matrix where rows represent regions, columns represent atoms, and
+    each entry is a 1 if the region contains the atom, otherwise 0. The
+    corresponding lists of regions and atoms are also returned.
     """
 
-    for loc in imputed.keys():
-      lag, n_ili, n_pat, n_prov, ili = imputed[loc]
-      args = (issue, epiweek, loc, lag, n_ili, n_pat, n_prov, ili)
-      self.cur.execute(Database.Sql.add_imputed_values, args)
-
-
-class StatespaceException(Exception):
-  """Used to indicate that imputation is not possible with the given inputs."""
-
-
-def get_location_graph():
-  """
-  Return a matrix where rows represent regions, columns represent atoms, and
-  each entry is a 1 if the region contains the atom, otherwise 0. The
-  corresponding lists of regions and atoms are also returned.
-  """
-
-  regions = sorted(Locations.region_list)
-  atoms = sorted(Locations.atom_list)
-  graph = np.zeros((len(regions), len(atoms)))
-  for i, r in enumerate(regions):
-    for a in Locations.region_map[r]:
-      j = atoms.index(a)
-      graph[i, j] = 1
-  return graph, regions, atoms
+    regions = sorted(Locations.region_list)
+    atoms = sorted(Locations.atom_list)
+    graph = np.zeros((len(regions), len(atoms)))
+    for i, r in enumerate(regions):
+        for a in Locations.region_map[r]:
+            j = atoms.index(a)
+            graph[i, j] = 1
+    return graph, regions, atoms
 
 
 def get_fusion_parameters(known_locations):
-  """
-  Return a matrix that fuses known ILI values into unknown ILI values. The
-  corresponding lists of known and unknown locations are also returned.
+    """
+    Return a matrix that fuses known ILI values into unknown ILI values. The
+    corresponding lists of known and unknown locations are also returned.
 
-  The goal is to infer ILI data in all locations, given ILI data in some
-  partial set of locations. This function takes a sensor fusion approach.
+    The goal is to infer ILI data in all locations, given ILI data in some
+    partial set of locations. This function takes a sensor fusion approach.
 
-  Let $z$ be a column vector of values in reported locations. Let $y$ be the
-  desired column vector of values in unreported locations. With matrices $H$
-  (mapping from latent state to reported values), $W$ (mapping from latent
-  state to unreported values), and $R = I$ (covariance, which is identity):
+    Let $z$ be a column vector of values in reported locations. Let $y$ be the
+    desired column vector of values in unreported locations. With matrices $H$
+    (mapping from latent state to reported values), $W$ (mapping from latent
+    state to unreported values), and $R = I$ (covariance, which is identity):
 
-    $y = W (H^T R^{-1} H)^{-1} H^T R^{-1} z$
-    $y = W (H^T H)^{-1} H^T z$
+      $y = W (H^T R^{-1} H)^{-1} H^T R^{-1} z$
+      $y = W (H^T H)^{-1} H^T z$
 
-  This is equavalent to OLS regression with an added translation from atomic
-  locations to missing locations. Unknown values are computed as a linear
-  combination of known values.
-  """
+    This is equavalent to OLS regression with an added translation from atomic
+    locations to missing locations. Unknown values are computed as a linear
+    combination of known values.
+    """
 
-  graph, regions, atoms = get_location_graph()
-  is_known = np.array([r in known_locations for r in regions])
-  is_unknown = np.logical_not(is_known)
-  if not np.any(is_known):
-    raise StatespaceException('no values are known')
-  if not np.any(is_unknown):
-    raise StatespaceException('no values are unknown')
+    graph, regions, atoms = get_location_graph()
+    is_known = np.array([r in known_locations for r in regions])
+    is_unknown = np.logical_not(is_known)
+    if not np.any(is_known):
+        raise StatespaceException("no values are known")
+    if not np.any(is_unknown):
+        raise StatespaceException("no values are unknown")
 
-  H = graph[is_known, :]
-  W = graph[is_unknown, :]
-  if np.linalg.matrix_rank(H) != len(atoms):
-    raise StatespaceException('system is underdetermined')
+    H = graph[is_known, :]
+    W = graph[is_unknown, :]
+    if np.linalg.matrix_rank(H) != len(atoms):
+        raise StatespaceException("system is underdetermined")
 
-  HtH = np.dot(H.T, H)
-  HtH_inv = np.linalg.inv(HtH)
-  H_pseudo_inv = np.dot(HtH_inv, H.T)
-  fuser = np.dot(W, H_pseudo_inv)
+    HtH = np.dot(H.T, H)
+    HtH_inv = np.linalg.inv(HtH)
+    H_pseudo_inv = np.dot(HtH_inv, H.T)
+    fuser = np.dot(W, H_pseudo_inv)
 
-  locations = np.array(regions)
-  filter_locations = lambda selected: list(map(str, locations[selected]))
-  return fuser, filter_locations(is_known), filter_locations(is_unknown)
+    locations = np.array(regions)
+    filter_locations = lambda selected: list(map(str, locations[selected]))
+    return fuser, filter_locations(is_known), filter_locations(is_unknown)
 
 
 def get_lag_and_ili(issue, epiweek, num_ili, num_patients):
-  """
-  Compute and return reporting lag and percent ILI from imputed ILINet data.
-  """
-  lag = delta_epiweeks(epiweek, issue)
-  ili = 100.0 * (0 if num_patients == 0 else num_ili / num_patients)
-  return lag, ili
+    """
+    Compute and return reporting lag and percent ILI from imputed ILINet data.
+    """
+    lag = delta_epiweeks(epiweek, issue)
+    ili = 100.0 * (0 if num_patients == 0 else num_ili / num_patients)
+    return lag, ili
 
 
 def impute_missing_values(database, test_mode=False):
-  """
-  Determine whether values are missing for any states and territories. If so,
-  impute them and store them in the database.
-  """
-
-  # database connection
-  database.connect()
-  rows1 = database.count_rows()
-  print('rows before: %d' % (rows1))
-
-  # iterate over missing epiweeks
-  missing_rows = database.find_missing_rows()
-  print('missing data for %d epiweeks' % len(missing_rows))
-  for issue, epiweek in missing_rows:
-    print('i=%d e=%d' % (issue, epiweek))
-
-    # get known values from table `fluview`
-    known_values = database.get_known_values(issue, epiweek)
-
-    # Unlike most other state-level data, which typically begins publicly on
-    # 2010w40, data for PR begins on 2013w40. Before this, there are no reports
-    # for PR. Here we assume that no report is equivalent to a report of all
-    # zeros (number of ILI, patients, and providers). That's mostly true, with
-    # the notable exception of wILI, but that's not relevant here. By assuming
-    # that PR reports zero on those weeks, it's possible to impute values for
-    # VI, which are otherwise not reported until 2015w40.
-    assume_pr_zero = epiweek < 201340 and 'pr' not in known_values
-    if assume_pr_zero:
-      known_values['pr'] = (0, 0, 0)
-
-    # get the imputation matrix and lists of known and unknown locations
-    F, known, unknown = get_fusion_parameters(known_values.keys())
-
-    # finally, impute the missing values
-    z = np.array([known_values[k] for k in known])
-    y = np.dot(F, z)
-
-    # possibly also record the assumptions made for PR
-    if assume_pr_zero:
-      unknown.append('pr')
-      y = np.vstack((y, [known_values['pr']]))
-
-    # add lag and percent ILI to the data for each imputed location
-    imputed_values = {}
-    for loc, values in zip(unknown, y):
-      n_ili, n_pat, n_prov = map(int, np.rint(values))
-      lag, ili = get_lag_and_ili(issue, epiweek, n_ili, n_pat)
-      imputed_values[loc] = (lag, n_ili, n_pat, n_prov, ili)
-      print(' %s: %s' % (loc, str(imputed_values[loc])))
-
-    # save all imputed values in table `fluview_imputed`
-    database.add_imputed_values(issue, epiweek, imputed_values)
-
-  # database cleanup
-  rows2 = database.count_rows()
-  print('rows after: %d (added %d)' % (rows2, rows2 - rows1))
-  commit = not test_mode
-  database.close(commit)
+    """
+    Determine whether values are missing for any states and territories. If so,
+    impute them and store them in the database.
+    """
+
+    # database connection
+    database.connect()
+    rows1 = database.count_rows()
+    print(f"rows before: {int(rows1)}")
+
+    # iterate over missing epiweeks
+    missing_rows = database.find_missing_rows()
+    print(f"missing data for {len(missing_rows)} epiweeks")
+    for issue, epiweek in missing_rows:
+        print(f"i={int(issue)} e={int(epiweek)}")
+
+        # get known values from table `fluview`
+        known_values = database.get_known_values(issue, epiweek)
+
+        # Unlike most other state-level data, which typically begins publicly on
+        # 2010w40, data for PR begins on 2013w40. Before this, there are no reports
+        # for PR. Here we assume that no report is equivalent to a report of all
+        # zeros (number of ILI, patients, and providers). That's mostly true, with
+        # the notable exception of wILI, but that's not relevant here. By assuming
+        # that PR reports zero on those weeks, it's possible to impute values for
+        # VI, which are otherwise not reported until 2015w40.
+        assume_pr_zero = epiweek < 201340 and "pr" not in known_values
+        if assume_pr_zero:
+            known_values["pr"] = (0, 0, 0)
+
+        # get the imputation matrix and lists of known and unknown locations
+        F, known, unknown = get_fusion_parameters(known_values.keys())
+
+        # finally, impute the missing values
+        z = np.array([known_values[k] for k in known])
+        y = np.dot(F, z)
+
+        # possibly also record the assumptions made for PR
+        if assume_pr_zero:
+            unknown.append("pr")
+            y = np.vstack((y, [known_values["pr"]]))
+
+        # add lag and percent ILI to the data for each imputed location
+        imputed_values = {}
+        for loc, values in zip(unknown, y):
+            n_ili, n_pat, n_prov = map(int, np.rint(values))
+            lag, ili = get_lag_and_ili(issue, epiweek, n_ili, n_pat)
+            imputed_values[loc] = (lag, n_ili, n_pat, n_prov, ili)
+            print(f" {loc}: {str(imputed_values[loc])}")
+
+        # save all imputed values in table `fluview_imputed`
+        database.add_imputed_values(issue, epiweek, imputed_values)
+
+    # database cleanup
+    rows2 = database.count_rows()
+    print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
+    commit = not test_mode
+    database.close(commit)
 
 
 def get_argument_parser():
-  """Set up command line arguments and usage."""
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    '--test',
-    action='store_true',
-    help='do dry run only, do not update the database'
-  )
-  return parser
+    """Set up command line arguments and usage."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--test", action="store_true", help="do dry run only, do not update the database"
+    )
+    return parser
 
 
 def main():
-  """Run this script from the command line."""
-  args = get_argument_parser().parse_args()
-  impute_missing_values(Database(), test_mode=args.test)
+    """Run this script from the command line."""
+    args = get_argument_parser().parse_args()
+    impute_missing_values(Database(), test_mode=args.test)
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/ght/ght_update.py b/src/acquisition/ght/ght_update.py
index c1e9b8d94..9e8d48d1d 100644
--- a/src/acquisition/ght/ght_update.py
+++ b/src/acquisition/ght/ght_update.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -63,7 +63,7 @@
   * fixed multiple-word queries (surround with quotes)
 2015-12-01
   * Original version
-'''
+"""
 
 # standard library
 import argparse
@@ -88,304 +88,339 @@
 # 2010-04-19 and 2015-05-05
 # see: https://www.google.com/trends/correlate
 TERMS = [
-  '/m/0cycc',
-  'influenza type a',
-  'flu duration',
-  'flu fever',
-  'treating flu',
-  'fever flu',
-  'flu recovery',
-  'braun thermoscan',
-  'oscillococcinum',
-  'treating the flu',
-  'cold or flu',
-  'flu versus cold',
-  'flu remedies',
-  'contagious flu',
-  'type a influenza',
-  'flu or cold',
-  'duration of flu',
-  'cold versus flu',
-  'flu cough',
-  'flu headache',
-  'thermoscan',
-  'influenza incubation period',
-  'flu lasts',
-  'length of flu',
-  'flu stomach',
-  'cold vs flu',
-  'flu and fever',
-  'getting over the flu',
-  'influenza a',
-  'treatment for flu',
-  'flu length',
-  'treatment for the flu',
-  'influenza symptoms',
-  'over the counter flu',
-  'flu complications',
-  'cold and flu symptoms',
-  'influenza incubation',
-  'treatment of flu',
-  'human temperature',
-  'low body',
-  'flu contagious',
-  'robitussin ac',
-  'flu how long',
-  'ear thermometer',
-  'flu contagious period',
-  'treat flu',
-  'cough flu',
-  'low body temperature',
-  'expectorant',
-  'flu and cold',
-  'rapid flu',
-  'flu vs. cold',
-  'how to treat the flu',
-  'how long does the flu last?',
-  'viral pneumonia',
-  'flu in kids',
-  'type a flu',
-  'influenza treatment',
-  'fighting the flu',
-  'flu relief',
-  'treat the flu',
-  'flu medicine',
-  'dangerous fever',
-  'what is influenza',
-  'tussin',
-  'low body temp',
-  'flu care',
-  'flu in infants',
-  'flu dizziness',
-  'feed a fever',
-  'flu vs cold',
-  'flu vomiting',
-  'bacterial pneumonia',
-  'flu activity',
-  'flu chills',
-  'anas barbariae',
-  'flu germs',
-  'tylenol cold',
-  'how to get over the flu',
-  'flu in children',
-  'influenza a and b',
-  'duration of the flu',
-  'cold symptoms',
-  'flu report',
-  'rapid flu test',
-  'flu relapse',
-  'get over the flu',
-  'flu during pregnancy',
-  'flu recovery time',
-  'cure for flu',
-  'tamiflu and breastfeeding',
-  'flu chest pain',
-  'flu treatment',
-  'flu nausea',
-  'remedies for the flu',
-  'tamiflu in pregnancy',
-  'side effects of tamiflu',
-  'how to treat flu',
-  'viral bronchitis',
-  'flu how long contagious',
-  'flu remedy',
+    "/m/0cycc",
+    "influenza type a",
+    "flu duration",
+    "flu fever",
+    "treating flu",
+    "fever flu",
+    "flu recovery",
+    "braun thermoscan",
+    "oscillococcinum",
+    "treating the flu",
+    "cold or flu",
+    "flu versus cold",
+    "flu remedies",
+    "contagious flu",
+    "type a influenza",
+    "flu or cold",
+    "duration of flu",
+    "cold versus flu",
+    "flu cough",
+    "flu headache",
+    "thermoscan",
+    "influenza incubation period",
+    "flu lasts",
+    "length of flu",
+    "flu stomach",
+    "cold vs flu",
+    "flu and fever",
+    "getting over the flu",
+    "influenza a",
+    "treatment for flu",
+    "flu length",
+    "treatment for the flu",
+    "influenza symptoms",
+    "over the counter flu",
+    "flu complications",
+    "cold and flu symptoms",
+    "influenza incubation",
+    "treatment of flu",
+    "human temperature",
+    "low body",
+    "flu contagious",
+    "robitussin ac",
+    "flu how long",
+    "ear thermometer",
+    "flu contagious period",
+    "treat flu",
+    "cough flu",
+    "low body temperature",
+    "expectorant",
+    "flu and cold",
+    "rapid flu",
+    "flu vs. cold",
+    "how to treat the flu",
+    "how long does the flu last?",
+    "viral pneumonia",
+    "flu in kids",
+    "type a flu",
+    "influenza treatment",
+    "fighting the flu",
+    "flu relief",
+    "treat the flu",
+    "flu medicine",
+    "dangerous fever",
+    "what is influenza",
+    "tussin",
+    "low body temp",
+    "flu care",
+    "flu in infants",
+    "flu dizziness",
+    "feed a fever",
+    "flu vs cold",
+    "flu vomiting",
+    "bacterial pneumonia",
+    "flu activity",
+    "flu chills",
+    "anas barbariae",
+    "flu germs",
+    "tylenol cold",
+    "how to get over the flu",
+    "flu in children",
+    "influenza a and b",
+    "duration of the flu",
+    "cold symptoms",
+    "flu report",
+    "rapid flu test",
+    "flu relapse",
+    "get over the flu",
+    "flu during pregnancy",
+    "flu recovery time",
+    "cure for flu",
+    "tamiflu and breastfeeding",
+    "flu chest pain",
+    "flu treatment",
+    "flu nausea",
+    "remedies for the flu",
+    "tamiflu in pregnancy",
+    "side effects of tamiflu",
+    "how to treat flu",
+    "viral bronchitis",
+    "flu how long contagious",
+    "flu remedy",
 ]
 
 # a list of all US states, including DC and the US as a whole
 LOCATIONS = [
-  'US',
-  'AL',
-  'AK',
-  'AZ',
-  'AR',
-  'CA',
-  'CO',
-  'CT',
-  'DC',
-  'DE',
-  'FL',
-  'GA',
-  'HI',
-  'ID',
-  'IL',
-  'IN',
-  'IA',
-  'KS',
-  'KY',
-  'LA',
-  'ME',
-  'MD',
-  'MA',
-  'MI',
-  'MN',
-  'MS',
-  'MO',
-  'MT',
-  'NE',
-  'NV',
-  'NH',
-  'NJ',
-  'NM',
-  'NY',
-  'NC',
-  'ND',
-  'OH',
-  'OK',
-  'OR',
-  'PA',
-  'RI',
-  'SC',
-  'SD',
-  'TN',
-  'TX',
-  'UT',
-  'VT',
-  'VA',
-  'WA',
-  'WV',
-  'WI',
-  'WY',
+    "US",
+    "AL",
+    "AK",
+    "AZ",
+    "AR",
+    "CA",
+    "CO",
+    "CT",
+    "DC",
+    "DE",
+    "FL",
+    "GA",
+    "HI",
+    "ID",
+    "IL",
+    "IN",
+    "IA",
+    "KS",
+    "KY",
+    "LA",
+    "ME",
+    "MD",
+    "MA",
+    "MI",
+    "MN",
+    "MS",
+    "MO",
+    "MT",
+    "NE",
+    "NV",
+    "NH",
+    "NJ",
+    "NM",
+    "NY",
+    "NC",
+    "ND",
+    "OH",
+    "OK",
+    "OR",
+    "PA",
+    "RI",
+    "SC",
+    "SD",
+    "TN",
+    "TX",
+    "UT",
+    "VT",
+    "VA",
+    "WA",
+    "WV",
+    "WI",
+    "WY",
 ]
 
 
-def update(locations, terms, first=None, last=None, countries=['US']):
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
+def update(locations, terms, first=None, last=None, countries=["US"]):
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
 
-  def get_num_rows():
-    cur.execute('SELECT count(1) `num` FROM `ght`')
-    for (num,) in cur:
-      pass
-    return num
+    def get_num_rows():
+        cur.execute("SELECT count(1) `num` FROM `ght`")
+        for (num,) in cur:
+            pass
+        return num
 
-  # check from 4 weeks preceeding the last week with data through this week
-  cur.execute('SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `ght`')
-  for (ew0, ew1) in cur:
-    ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
-  ew0 = ew0 if first is None else first
-  ew1 = ew1 if last is None else last
-  print('Checking epiweeks between %d and %d...' % (ew0, ew1))
+    # check from 4 weeks preceeding the last week with data through this week
+    cur.execute("SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `ght`")
+    for (ew0, ew1) in cur:
+        ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
+    ew0 = ew0 if first is None else first
+    ew1 = ew1 if last is None else last
+    print(f"Checking epiweeks between {int(ew0)} and {int(ew1)}...")
 
-  # keep track of how many rows were added
-  rows_before = get_num_rows()
+    # keep track of how many rows were added
+    rows_before = get_num_rows()
 
-  # check Google Trends for new and/or revised data
-  sql = '''
+    # check Google Trends for new and/or revised data
+    sql = """
     INSERT INTO
       `ght` (`query`, `location`, `epiweek`, `value`)
     VALUES
       (%s, %s, %s, %s)
     ON DUPLICATE KEY UPDATE
       `value` = %s
-  '''
-  total_rows = 0
-  ght = GHT(API_KEY)
-  for term in terms:
-    print(' [%s] using term' % term)
-    ll, cl = len(locations), len(countries)
-    for i in range(max(ll,cl)):
-      location = locations[i] if i < ll else locations[0]
-      country = countries[i] if i < cl else countries[0]
-      try:
-        #term2 = ('"%s"' % term) if ' ' in term else term
-        term2 = term
-        attempt = 0
-        while True:
-          attempt += 1
-          try:
-            result = ght.get_data(ew0, ew1, location, term2, country=country)
-            break
-          except Exception as ex:
-            if attempt >= 5:
-              raise ex
-            else:
-              delay = 2 ** attempt
-              print(' [%s|%s] caught exception (will retry in %ds):' % (term, location, delay), ex)
-              time.sleep(delay)
-        values = [p['value'] for p in result['data']['lines'][0]['points']]
-        ew = result['start_week']
-        num_missing = 0
-        for v in values:
-          # Default SQL location value for US country for backwards compatibility
-          # i.e. California's location is still stored as 'CA',
-          # and having location == 'US' is still stored as 'US'
-          sql_location = location if location != NO_LOCATION_STR else country
-
-          # Change SQL location for non-US countries
-          if country != 'US':
-            # Underscore added to distinguish countries from 2-letter US states
-            sql_location = country + "_"
-            if location != NO_LOCATION_STR:
-              sql_location = sql_location + location
-          sql_data = (term, sql_location, ew, v, v)
-          cur.execute(sql, sql_data)
-          total_rows += 1
-          if v == 0:
-            num_missing += 1
-            #print(' [%s|%s|%d] missing value' % (term, location, ew))
-          ew = flu.add_epiweeks(ew, 1)
-        if num_missing > 0:
-          print(' [%s|%s] missing %d/%d value(s)' % (term, location, num_missing, len(values)))
-      except Exception as ex:
-        print(' [%s|%s] caught exception (will NOT retry):' % (term, location), ex)
-
-  # keep track of how many rows were added
-  rows_after = get_num_rows()
-  print('Inserted %d/%d row(s)'%(rows_after - rows_before, total_rows))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
+  """
+    total_rows = 0
+    ght = GHT(API_KEY)
+    for term in terms:
+        print(f" [{term}] using term")
+        ll, cl = len(locations), len(countries)
+        for i in range(max(ll, cl)):
+            location = locations[i] if i < ll else locations[0]
+            country = countries[i] if i < cl else countries[0]
+            try:
+                # term2 = ('"%s"' % term) if ' ' in term else term
+                term2 = term
+                attempt = 0
+                while True:
+                    attempt += 1
+                    try:
+                        result = ght.get_data(ew0, ew1, location, term2, country=country)
+                        break
+                    except Exception as ex:
+                        if attempt >= 5:
+                            raise ex
+                        else:
+                            delay = 2**attempt
+                            print(
+                                f" [{term}|{location}] caught exception (will retry in {int(delay)}s):",
+                                ex,
+                            )
+                            time.sleep(delay)
+                values = [p["value"] for p in result["data"]["lines"][0]["points"]]
+                ew = result["start_week"]
+                num_missing = 0
+                for v in values:
+                    # Default SQL location value for US country for backwards compatibility
+                    # i.e. California's location is still stored as 'CA',
+                    # and having location == 'US' is still stored as 'US'
+                    sql_location = location if location != NO_LOCATION_STR else country
+
+                    # Change SQL location for non-US countries
+                    if country != "US":
+                        # Underscore added to distinguish countries from 2-letter US states
+                        sql_location = country + "_"
+                        if location != NO_LOCATION_STR:
+                            sql_location = sql_location + location
+                    sql_data = (term, sql_location, ew, v, v)
+                    cur.execute(sql, sql_data)
+                    total_rows += 1
+                    if v == 0:
+                        num_missing += 1
+                        # print(' [%s|%s|%d] missing value' % (term, location, ew))
+                    ew = flu.add_epiweeks(ew, 1)
+                if num_missing > 0:
+                    print(f" [{term}|{location}] missing {int(num_missing)}/{len(values)} value(s)")
+            except Exception as ex:
+                print(f" [{term}|{location}] caught exception (will NOT retry):", ex)
+
+    # keep track of how many rows were added
+    rows_after = get_num_rows()
+    print(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s)")
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('location', action='store', type=str, default=None, help='location(s) (ex: all; US; TX; CA,LA,WY)')
-  parser.add_argument('term', action='store', type=str, default=None, help='term/query/topic (ex: all; /m/0cycc; "flu fever")')
-  parser.add_argument('--first', '-f', default=None, type=int, help='first epiweek override')
-  parser.add_argument('--last', '-l', default=None, type=int, help='last epiweek override')
-  parser.add_argument('--country', '-c', default='US', type=str, help='location country (ex: US; BR)')
-  args = parser.parse_args()
-
-  # sanity check
-  first, last = args.first, args.last
-  if first is not None:
-    flu.check_epiweek(first)
-  if last is not None:
-    flu.check_epiweek(last)
-  if first is not None and last is not None and first > last:
-    raise Exception('epiweeks in the wrong order')
-
-  # decide what to update
-  if args.location.lower() == 'all':
-    locations = LOCATIONS
-  elif args.location.lower() == 'none':
-    locations = [NO_LOCATION_STR]
-  else:
-    locations = args.location.upper().split(',')
-  if args.term.lower() == 'all':
-    terms = TERMS
-  else:
-    terms = [args.term]
-
-  # country argument
-  # Check that country follows ISO 1366 Alpha-2 code. 
-  # See https://www.iso.org/obp/ui/#search.
-  countries = args.country.upper().split(',')
-  if not all(map(lambda x: len(x) == 2, countries)):
-    raise Exception('country name must be two letters (ISO 1366 Alpha-2)')
-
-  # if length of locations and countries is > 1, need to be the same
-  if len(locations) > 1 and len(countries) > 1 and len(locations) != len(countries):
-    raise Exception('locations and countries must be length 1, or same length')
-
-  # run the update
-  update(locations, terms, first, last, countries)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    # fmt: off
+    parser.add_argument(
+        "location",
+        action="store",
+        type=str,
+        default=None,
+        help="location(s) (ex: all; US; TX; CA,LA,WY)"
+    )
+    parser.add_argument(
+        "term",
+        action="store",
+        type=str,
+        default=None,
+        help='term/query/topic (ex: all; /m/0cycc; "flu fever")'
+    )
+    parser.add_argument(
+        "--first",
+        "-f",
+        default=None,
+        type=int,
+        help="first epiweek override"
+    )
+    parser.add_argument(
+        "--last",
+        "-l",
+        default=None,
+        type=int,
+        help="last epiweek override"
+    )
+    parser.add_argument(
+        "--country",
+        "-c",
+        default="US",
+        type=str,
+        help="location country (ex: US; BR)"
+    )
+    # fmt: on
+    args = parser.parse_args()
+
+    # sanity check
+    first, last = args.first, args.last
+    if first is not None:
+        flu.check_epiweek(first)
+    if last is not None:
+        flu.check_epiweek(last)
+    if first is not None and last is not None and first > last:
+        raise Exception("epiweeks in the wrong order")
+
+    # decide what to update
+    if args.location.lower() == "all":
+        locations = LOCATIONS
+    elif args.location.lower() == "none":
+        locations = [NO_LOCATION_STR]
+    else:
+        locations = args.location.upper().split(",")
+    if args.term.lower() == "all":
+        terms = TERMS
+    else:
+        terms = [args.term]
+
+    # country argument
+    # Check that country follows ISO 1366 Alpha-2 code.
+    # See https://www.iso.org/obp/ui/#search.
+    countries = args.country.upper().split(",")
+    if not all(map(lambda x: len(x) == 2, countries)):
+        raise Exception("country name must be two letters (ISO 1366 Alpha-2)")
+
+    # if length of locations and countries is > 1, need to be the same
+    if len(locations) > 1 and len(countries) > 1 and len(locations) != len(countries):
+        raise Exception("locations and countries must be length 1, or same length")
+
+    # run the update
+    update(locations, terms, first, last, countries)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/ght/google_health_trends.py b/src/acquisition/ght/google_health_trends.py
index 66a11c227..4bb8df25f 100644
--- a/src/acquisition/ght/google_health_trends.py
+++ b/src/acquisition/ght/google_health_trends.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -18,7 +18,7 @@
   + sample command line usage
   + extract array of values from returned data
   * separated GHT class from ght_update.py
-'''
+"""
 
 # standard library
 import argparse
@@ -31,109 +31,144 @@
 from delphi.utils.epidate import EpiDate
 import delphi.utils.epiweek as flu
 
-NO_LOCATION_STR = 'none'
+NO_LOCATION_STR = "none"
+
 
 class GHT:
 
-  # Google Trends API endpoint
-  DISCOVERY_URL = 'https://www.googleapis.com/discovery/v1/apis/trends/v1beta/rest'
-
-  def __init__(self, key, delay=1):
-    self.service = build('trends', 'v1beta', developerKey=key, discoveryServiceUrl=GHT.DISCOVERY_URL)
-    self.delay = delay
-
-  # converts a YYYYWW week into a YYYY-MM-DD date (using Sunday of the week)
-  @staticmethod
-  def _ew2date(ew):
-    # parse the epiweek
-    year, week = flu.split_epiweek(ew)
-    # get the date object (middle of the week; Wednesday)
-    date = EpiDate.from_epiweek(year, week)
-    # go to the first day of the week (Sunday)
-    date = date.add_days(-3)
-    # date as string
-    return str(date)
-
-  # get data from Google APIs
-  # see: https://developers.google.com/apis-explorer/#p/trends/v1beta/trends.getTimelinesForHealth
-  def get_data(self, start_week, end_week, location, term, resolution='week', country='US'):
-    start_date = GHT._ew2date(start_week)
-    end_date = GHT._ew2date(end_week)
-    num_weeks = flu.delta_epiweeks(start_week, end_week) + 1
-
-    # getTimelinesForHealth parameters
-    params = {
-      'terms': term,
-      'time_startDate': start_date,
-      'time_endDate': end_date,
-      'timelineResolution': resolution,
-    }
-    # We have a special check for the US for backwards compatibility.
-    # i.e. if the country is 'US' AND the location is 'US', just put the geo-restriction for country.
-    # In contrast, another country might have a sub-region with initials 'US' and we want the region restriction instead.
-    if country == 'US':
-      if location == 'US' or location == NO_LOCATION_STR:
-        params['geoRestriction_country'] = 'US'
-      else:
-        params['geoRestriction_region'] = 'US-' + location
-    else:
-      if location == NO_LOCATION_STR:
-        params['geoRestriction_country'] = country
-      else:
-        params['geoRestriction_region'] = country + '-' + location
-
-    # make the API call
-    data = self.service.getTimelinesForHealth(**params).execute()
-
-    # extract the values
-    try:
-      values = [p['value'] for p in data['lines'][0]['points']]
-    except:
-      values = None
-
-    # throttle request rate
-    time.sleep(self.delay)
-
-    # return the results
-    return {
-      'start_week': start_week,
-      'end_week': end_week,
-      'num_weeks': num_weeks,
-      'location': location,
-      'country' : country,
-      'term': term,
-      'resolution': resolution,
-      'data': data,
-      'values': values,
-    }
+    # Google Trends API endpoint
+    DISCOVERY_URL = "https://www.googleapis.com/discovery/v1/apis/trends/v1beta/rest"
+
+    def __init__(self, key, delay=1):
+        self.service = build(
+            "trends", "v1beta", developerKey=key, discoveryServiceUrl=GHT.DISCOVERY_URL
+        )
+        self.delay = delay
+
+    # converts a YYYYWW week into a YYYY-MM-DD date (using Sunday of the week)
+    @staticmethod
+    def _ew2date(ew):
+        # parse the epiweek
+        year, week = flu.split_epiweek(ew)
+        # get the date object (middle of the week; Wednesday)
+        date = EpiDate.from_epiweek(year, week)
+        # go to the first day of the week (Sunday)
+        date = date.add_days(-3)
+        # date as string
+        return str(date)
+
+    # get data from Google APIs
+    # see: https://developers.google.com/apis-explorer/#p/trends/v1beta/trends.getTimelinesForHealth
+    def get_data(self, start_week, end_week, location, term, resolution="week", country="US"):
+        start_date = GHT._ew2date(start_week)
+        end_date = GHT._ew2date(end_week)
+        num_weeks = flu.delta_epiweeks(start_week, end_week) + 1
+
+        # getTimelinesForHealth parameters
+        params = {
+            "terms": term,
+            "time_startDate": start_date,
+            "time_endDate": end_date,
+            "timelineResolution": resolution,
+        }
+        # We have a special check for the US for backwards compatibility.
+        # i.e. if the country is 'US' AND the location is 'US', just put the geo-restriction for country.
+        # In contrast, another country might have a sub-region with initials 'US' and we want the region restriction instead.
+        if country == "US":
+            if location == "US" or location == NO_LOCATION_STR:
+                params["geoRestriction_country"] = "US"
+            else:
+                params["geoRestriction_region"] = "US-" + location
+        else:
+            if location == NO_LOCATION_STR:
+                params["geoRestriction_country"] = country
+            else:
+                params["geoRestriction_region"] = country + "-" + location
+
+        # make the API call
+        data = self.service.getTimelinesForHealth(**params).execute()
+
+        # extract the values
+        try:
+            values = [p["value"] for p in data["lines"][0]["points"]]
+        except:
+            values = None
+
+        # throttle request rate
+        time.sleep(self.delay)
+
+        # return the results
+        return {
+            "start_week": start_week,
+            "end_week": end_week,
+            "num_weeks": num_weeks,
+            "location": location,
+            "country": country,
+            "term": term,
+            "resolution": resolution,
+            "data": data,
+            "values": values,
+        }
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('apikey', action='store', type=str, default=None, help='API key')
-  parser.add_argument('startweek', action='store', type=int, default=None, help='first week (ex: 201440)')
-  parser.add_argument('endweek', action='store', type=int, default=None, help='last week (ex: 201520)')
-  parser.add_argument('location', action='store', type=str, default=None, help='location (ex: US)')
-  parser.add_argument('term', action='store', type=str, default=None, help='term/query/topic (ex: /m/0cycc)')
-  args = parser.parse_args()
-
-  # get the data
-  ght = GHT(args.apikey)
-  result = ght.get_data(args.startweek, args.endweek, args.location, args.term)
-  values = result['values']
-
-  # sanity check
-  expected_weeks = result['num_weeks']
-  received_weeks = len([v for v in values if v is not None and type(v) == float and v >= 0])
-  if expected_weeks != received_weeks:
-    raise Exception('expected %d weeks, received %d' % (expected_weeks, received_weeks))
-
-  # results
-  epiweeks = [ew for ew in flu.range_epiweeks(args.startweek, args.endweek, inclusive=True)]
-  for (epiweek, value) in zip(epiweeks, values):
-    print('%6d: %.3f' % (epiweek, value))
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    # fmt: off
+    parser.add_argument(
+        "apikey",
+        action="store",
+        type=str,
+        default=None,
+        help="API key"
+    )
+    parser.add_argument(
+        "startweek",
+        action="store",
+        type=int,
+        default=None,
+        help="first week (ex: 201440)"
+    )
+    parser.add_argument(
+        "endweek",
+        action="store",
+        type=int,
+        default=None,
+        help="last week (ex: 201520)"
+    )
+    parser.add_argument(
+        "location",
+        action="store",
+        type=str,
+        default=None,
+        help="location (ex: US)"
+    )
+    parser.add_argument(
+        "term",
+        action="store",
+        type=str,
+        default=None,
+        help="term/query/topic (ex: /m/0cycc)"
+    )
+    # fmt: on
+    args = parser.parse_args()
+
+    # get the data
+    ght = GHT(args.apikey)
+    result = ght.get_data(args.startweek, args.endweek, args.location, args.term)
+    values = result["values"]
+
+    # sanity check
+    expected_weeks = result["num_weeks"]
+    received_weeks = len([v for v in values if v is not None and type(v) == float and v >= 0])
+    if expected_weeks != received_weeks:
+        raise Exception(f"expected {int(expected_weeks)} weeks, received {int(received_weeks)}")
+
+    # results
+    epiweeks = [ew for ew in flu.range_epiweeks(args.startweek, args.endweek, inclusive=True)]
+    for (epiweek, value) in zip(epiweeks, values):
+        print(f"{int(epiweek):6}: {value:.3f}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/kcdc/kcdc_update.py b/src/acquisition/kcdc/kcdc_update.py
index 70c167738..713b21f00 100644
--- a/src/acquisition/kcdc/kcdc_update.py
+++ b/src/acquisition/kcdc/kcdc_update.py
@@ -42,12 +42,14 @@
 from delphi.utils.epiweek import delta_epiweeks, range_epiweeks, add_epiweeks
 from delphi.utils.epidate import EpiDate
 
+
 def ensure_tables_exist():
-    (u,p) = secrets.db.epi
-    cnx = mysql.connector.connect(user=u,password=p,database='epidata')
+    (u, p) = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     try:
         cursor = cnx.cursor()
-        cursor.execute('''
+        cursor.execute(
+            """
             CREATE TABLE IF NOT EXISTS `kcdc_ili` (
                 `id` INT(11) NOT NULL PRIMARY KEY AUTO_INCREMENT,
                 `release_date` DATE NOT NULL,
@@ -58,69 +60,76 @@ def ensure_tables_exist():
                 `ili` DOUBLE NOT NULL,
                 UNIQUE KEY (`issue`, `epiweek`, `region`)
             );
-        ''');
+            """
+        )
         cnx.commit()
     finally:
         cnx.close()
 
+
 def safe_float(f):
     try:
-        return float(f.replace(',',''))
+        return float(f.replace(",", ""))
     except:
         return 0
 
+
 def safe_int(i):
     try:
-        return int(i.replace(',',''))
+        return int(i.replace(",", ""))
     except:
         return 0
 
-def get_rows(cnx, table='kcdc_ili'):
-  # Count and return the number of rows in the `kcdc_ili` table.
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM %s' % table)
-  for (num,) in select:
-    pass
-  select.close()
-  return num
+
+def get_rows(cnx, table="kcdc_ili"):
+    # Count and return the number of rows in the `kcdc_ili` table.
+    select = cnx.cursor()
+    select.execute(f"SELECT count(1) num FROM {table}")
+    for (num,) in select:
+        pass
+    select.close()
+    return num
+
 
 def get_kcdc_data():
     issue = EpiDate.today().get_ew()
-    last_season = issue//100 + (1 if issue % 100 > 35 else 0)
-    url = 'http://www.cdc.go.kr/npt/biz/npp/iss/influenzaListAjax.do'
+    last_season = issue // 100 + (1 if issue % 100 > 35 else 0)
+    url = "https://www.cdc.go.kr/npt/biz/npp/iss/influenzaListAjax.do"
+    # Started in 2004
     params = {
-    'icdNm': 'influenza',
-    'startYear': '2004', # Started in 2004
-    'endYear': str(last_season)
+        "icdNm": "influenza",
+        "startYear": "2004",
+        "endYear": str(last_season),
     }
     response = requests.post(url, params)
     datas = response.json()
-    data = datas['data']
+    data = datas["data"]
     ews = []
     ilis = []
     ew1 = 200436
-    for year in range(2004,last_season):
-        year_data = data[year-2004]
+    for year in range(2004, last_season):
+        year_data = data[year - 2004]
         if year > 2004:
             ew1 = ews[-1] + 1
-        ili_yr = year_data["VALUE"].split('`')
-        ili_yr = [float(f) for f in ili_yr if f != '']
-        ew2 = add_epiweeks(ew1,len(ili_yr))
-        new_ews = list(range_epiweeks(ew1,ew2))
+        ili_yr = year_data["VALUE"].split("`")
+        ili_yr = [float(f) for f in ili_yr if f != ""]
+        ew2 = add_epiweeks(ew1, len(ili_yr))
+        new_ews = list(range_epiweeks(ew1, ew2))
         for i in range(len(new_ews)):
             j = float(ili_yr[i])
             ilis.append(j)
             ews.append(new_ews[i])
     return ews, ilis
 
+
 def update_from_data(ews, ilis, date, issue, test_mode=False):
     u, p = secrets.db.epi
-    cnx = mysql.connector.connect(user=u, password=p, database='epidata')
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     rows1 = get_rows(cnx)
-    print('rows before: %d' % (rows1))
+    print(f"rows before: {int(rows1)}")
     insert = cnx.cursor()
 
-    sql = '''
+    sql = """
     INSERT INTO
         `kcdc_ili` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
         `ili`)
@@ -129,15 +138,15 @@ def update_from_data(ews, ilis, date, issue, test_mode=False):
     ON DUPLICATE KEY UPDATE
         `release_date` = least(`release_date`, '%s'),
         `ili` = %s
-    '''
+    """
 
     for i in range(len(ews)):
         ew = ews[i]
         ili = ilis[i]
         lag = delta_epiweeks(ews[i], issue)
 
-        insert_args = [date,issue,ew,'ROK',lag,ili]
-        update_args = [date,ili]
+        insert_args = [date, issue, ew, "ROK", lag, ili]
+        update_args = [date, ili]
         try:
             insert.execute(sql % tuple(insert_args + update_args))
         except Exception:
@@ -146,34 +155,33 @@ def update_from_data(ews, ilis, date, issue, test_mode=False):
     # cleanup
     insert.close()
     if test_mode:
-        print('test mode, not committing')
+        print("test mode, not committing")
         rows2 = rows1
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print('rows after: %d (added %d)' % (rows2,rows2-rows1))
+    print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
     cnx.close()
 
+
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--test',
-        action='store_true',
-        help='do dry run only, do not update the database'
+        "--test", action="store_true", help="do dry run only, do not update the database"
     )
     args = parser.parse_args()
 
-    date = datetime.datetime.now().strftime('%Y-%m-%d')
-    print('assuming release date is today, %s' % date)
+    date = datetime.datetime.now().strftime("%Y-%m-%d")
+    print(f"assuming release date is today, {date}")
     issue = EpiDate.today().get_ew()
 
     ensure_tables_exist()
 
-    ews,ilis = get_kcdc_data()
+    ews, ilis = get_kcdc_data()
 
     update_from_data(ews, ilis, date, issue, test_mode=args.test)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/src/acquisition/nidss/taiwan_nidss.py b/src/acquisition/nidss/taiwan_nidss.py
index 27da863e1..b2e369e63 100644
--- a/src/acquisition/nidss/taiwan_nidss.py
+++ b/src/acquisition/nidss/taiwan_nidss.py
@@ -4,7 +4,7 @@
 ===============
 
 Scrapes weekly flu data from Taiwan's National Infectious Disease Statistics
-System (NIDSS): http://nidss.cdc.gov.tw/en/
+System (NIDSS): https://nidss.cdc.gov.tw/en/
 
 
 =================
@@ -37,233 +37,234 @@
 
 
 class NIDSS:
-  """An API for scraping the NIDSS site."""
+    """An API for scraping the NIDSS site."""
 
-  # The page where the flu data is kept
-  FLU_URL = 'https://nidss.cdc.gov.tw/en/CDCWNH01.aspx?dc=wnh'
+    # The page where the flu data is kept
+    FLU_URL = "https://nidss.cdc.gov.tw/en/CDCWNH01.aspx?dc=wnh"
 
-  # Link to the dengue data
-  DENGUE_URL = 'http://nidss.cdc.gov.tw/Download/Weekly_Age_County_Gender_061.csv'
+    # Link to the dengue data
+    DENGUE_URL = "https://nidss.cdc.gov.tw/Download/Weekly_Age_County_Gender_061.csv"
 
-  # Translate location names to English
-  # https://en.wikipedia.org/wiki/List_of_administrative_divisions_of_Taiwan
-  _TRANSLATED = {
-    b'5Y2X5oqV57ij': 'Nantou_County',
-    b'5Y+w5Lit5biC': 'Taichung_City',
-    b'5Y+w5YyX5biC': 'Taipei_City',
-    b'5Y+w5Y2X5biC': 'Tainan_City',
-    b'5Y+w5p2x57ij': 'Taitung_County',
-    b'5ZiJ576p5biC': 'Chiayi_City',
-    b'5ZiJ576p57ij': 'Chiayi_County',
-    b'5Z+66ZqG5biC': 'Keelung_City',
-    b'5a6c6Jit57ij': 'Yilan_County',
-    b'5bGP5p2x57ij': 'Pingtung_County',
-    b'5b2w5YyW57ij': 'Changhua_County',
-    b'5paw5YyX5biC': 'New_Taipei_City',
-    b'5paw56u55biC': 'Hsinchu_City',
-    b'5paw56u557ij': 'Hsinchu_County',
-    b'5qGD5ZyS5biC': 'Taoyuan_City',
-    b'5r6O5rmW57ij': 'Penghu_County',
-    b'6Iqx6JOu57ij': 'Hualien_County',
-    b'6IuX5qCX57ij': 'Miaoli_County',
-    b'6YeR6ZaA57ij': 'Kinmen_County',
-    b'6Zuy5p6X57ij': 'Yunlin_County',
-    b'6auY6ZuE5biC': 'Kaohsiung_City',
-    b'6YCj5rGf57ij': 'Lienchiang_County',
-  }
+    # Translate location names to English
+    # https://en.wikipedia.org/wiki/List_of_administrative_divisions_of_Taiwan
+    _TRANSLATED = {
+        b"5Y2X5oqV57ij": "Nantou_County",
+        b"5Y+w5Lit5biC": "Taichung_City",
+        b"5Y+w5YyX5biC": "Taipei_City",
+        b"5Y+w5Y2X5biC": "Tainan_City",
+        b"5Y+w5p2x57ij": "Taitung_County",
+        b"5ZiJ576p5biC": "Chiayi_City",
+        b"5ZiJ576p57ij": "Chiayi_County",
+        b"5Z+66ZqG5biC": "Keelung_City",
+        b"5a6c6Jit57ij": "Yilan_County",
+        b"5bGP5p2x57ij": "Pingtung_County",
+        b"5b2w5YyW57ij": "Changhua_County",
+        b"5paw5YyX5biC": "New_Taipei_City",
+        b"5paw56u55biC": "Hsinchu_City",
+        b"5paw56u557ij": "Hsinchu_County",
+        b"5qGD5ZyS5biC": "Taoyuan_City",
+        b"5r6O5rmW57ij": "Penghu_County",
+        b"6Iqx6JOu57ij": "Hualien_County",
+        b"6IuX5qCX57ij": "Miaoli_County",
+        b"6YeR6ZaA57ij": "Kinmen_County",
+        b"6Zuy5p6X57ij": "Yunlin_County",
+        b"6auY6ZuE5biC": "Kaohsiung_City",
+        b"6YCj5rGf57ij": "Lienchiang_County",
+    }
 
-  # Map locations to regions
-  # https://en.wikipedia.org/wiki/List_of_administrative_divisions_of_Taiwan
-  # https://en.wikipedia.org/wiki/Regions_of_Taiwan#Hexchotomy
-  LOCATION_TO_REGION = {
-    # Taipei
-    'Taipei_City': 'Taipei',
-    'Keelung_City': 'Taipei',
-    'New_Taipei_City': 'Taipei',
-    'Yilan_County': 'Taipei',
-    'Kinmen_County': 'Taipei',
-    'Lienchiang_County': 'Taipei',
-    # Northern
-    'Hsinchu_City': 'Northern',
-    'Taoyuan_City': 'Northern',
-    'Hsinchu_County': 'Northern',
-    'Miaoli_County': 'Northern',
-    # Central
-    'Taichung_City': 'Central',
-    'Changhua_County': 'Central',
-    'Nantou_County': 'Central',
-    # Southern
-    'Tainan_City': 'Southern',
-    'Chiayi_City': 'Southern',
-    'Yunlin_County': 'Southern',
-    'Chiayi_County': 'Southern',
-    # Kaoping
-    'Kaohsiung_City': 'Kaoping',
-    'Pingtung_County': 'Kaoping',
-    'Penghu_County': 'Kaoping',
-    # Eastern
-    'Hualien_County': 'Eastern',
-    'Taitung_County': 'Eastern',
-  }
+    # Map locations to regions
+    # https://en.wikipedia.org/wiki/List_of_administrative_divisions_of_Taiwan
+    # https://en.wikipedia.org/wiki/Regions_of_Taiwan#Hexchotomy
+    LOCATION_TO_REGION = {
+        # Taipei
+        "Taipei_City": "Taipei",
+        "Keelung_City": "Taipei",
+        "New_Taipei_City": "Taipei",
+        "Yilan_County": "Taipei",
+        "Kinmen_County": "Taipei",
+        "Lienchiang_County": "Taipei",
+        # Northern
+        "Hsinchu_City": "Northern",
+        "Taoyuan_City": "Northern",
+        "Hsinchu_County": "Northern",
+        "Miaoli_County": "Northern",
+        # Central
+        "Taichung_City": "Central",
+        "Changhua_County": "Central",
+        "Nantou_County": "Central",
+        # Southern
+        "Tainan_City": "Southern",
+        "Chiayi_City": "Southern",
+        "Yunlin_County": "Southern",
+        "Chiayi_County": "Southern",
+        # Kaoping
+        "Kaohsiung_City": "Kaoping",
+        "Pingtung_County": "Kaoping",
+        "Penghu_County": "Kaoping",
+        # Eastern
+        "Hualien_County": "Eastern",
+        "Taitung_County": "Eastern",
+    }
 
-  @staticmethod
-  def _get_metadata(html):
-    issue_pattern = re.compile('^.*Latest available data: Week (\\d+), (\\d{4})\\..*$')
-    release_pattern = re.compile('^.*Data as of \\d+:\\d+:\\d+, (\\d{4})/(\\d{2})/(\\d{2})\\..*$')
-    issue, release = None, None
-    for line in html.split('\n'):
-      match = issue_pattern.match(line)
-      if match is not None:
-        year, week = int(match.group(2)), int(match.group(1))
-        issue = year * 100 + week
-      match = release_pattern.match(line)
-      if match is not None:
-        year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
-        release = '%04d-%02d-%02d' % (year, month, day)
-    if issue is None or release is None:
-      raise Exception('metadata not found')
-    return issue, release
+    @staticmethod
+    def _get_metadata(html):
+        issue_pattern = re.compile("^.*Latest available data: Week (\\d+), (\\d{4})\\..*$")
+        release_pattern = re.compile(
+            "^.*Data as of \\d+:\\d+:\\d+, (\\d{4})/(\\d{2})/(\\d{2})\\..*$"
+        )
+        issue, release = None, None
+        for line in html.split("\n"):
+            match = issue_pattern.match(line)
+            if match is not None:
+                year, week = int(match.group(2)), int(match.group(1))
+                issue = year * 100 + week
+            match = release_pattern.match(line)
+            if match is not None:
+                year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
+                release = f"{int(year):04}-{int(month):02}-{int(day):02}"
+        if issue is None or release is None:
+            raise Exception("metadata not found")
+        return issue, release
 
-  @staticmethod
-  def _get_flu_data(html):
-    week_pattern = re.compile('^categories: \\[(.*)\\],$')
-    value_pattern = re.compile('^series: \\[(.*)\\],$')
-    data = {}
-    parsing_ili = True
-    for line in html.split('\n'):
-      line = line.strip()
-      match = week_pattern.match(line)
-      if match is not None:
-        weeks = [int(x[1:-1]) for x in match.group(1).split(',')]
-        for week in weeks:
-          check_epiweek(week)
-          if week not in data:
-            data[week] = {}
-      match = value_pattern.match(line)
-      if match is not None:
-        for item in match.group(1).split('},{'):
-          parts = item.replace('{', '').replace('}', '').strip().split(' ')
-          location = parts[1][1:-2]
-          def num(value):
-            if parsing_ili:
-              return float(value)
-            else:
-              if '.' in value:
-                raise Exception('expected type int for visits')
-              return int(value)
-          values = [num(x) for x in parts[3][1:-1].split(',')]
-          unit = 'ili' if parsing_ili else 'visits'
-          if len(weeks) != len(values):
-            raise Exception('len(weeks) != len(values)')
-          for week, value in zip(weeks, values):
-            if location not in data[week]:
-              data[week][location] = {}
-            data[week][location][unit] = value
-        parsing_ili = False
-    if len(data) == 0:
-      raise Exception('no data')
-    return data
+    @staticmethod
+    def _get_flu_data(html):
+        week_pattern = re.compile("^categories: \\[(.*)\\],$")
+        value_pattern = re.compile("^series: \\[(.*)\\],$")
+        data = {}
+        parsing_ili = True
+        for line in html.split("\n"):
+            line = line.strip()
+            match = week_pattern.match(line)
+            if match is not None:
+                weeks = [int(x[1:-1]) for x in match.group(1).split(",")]
+                for week in weeks:
+                    check_epiweek(week)
+                    if week not in data:
+                        data[week] = {}
+            match = value_pattern.match(line)
+            if match is not None:
+                for item in match.group(1).split("},{"):
+                    parts = item.replace("{", "").replace("}", "").strip().split(" ")
+                    location = parts[1][1:-2]
+
+                    def num(value):
+                        if parsing_ili:
+                            return float(value)
+                        else:
+                            if "." in value:
+                                raise Exception("expected type int for visits")
+                            return int(value)
 
-  @staticmethod
-  def get_flu_data():
-    # Fetch the flu page
-    response = requests.get(NIDSS.FLU_URL)
-    if response.status_code != 200:
-      raise Exception('request failed [%d]' % response.status_code)
-    html = response.text
-    # Parse metadata
-    latest_week, release_date = NIDSS._get_metadata(html)
-    # Parse flu data
-    data = NIDSS._get_flu_data(html)
-    # Return results indexed by week and location
-    return latest_week, release_date, data
+                    values = [num(x) for x in parts[3][1:-1].split(",")]
+                    unit = "ili" if parsing_ili else "visits"
+                    if len(weeks) != len(values):
+                        raise Exception("len(weeks) != len(values)")
+                    for week, value in zip(weeks, values):
+                        if location not in data[week]:
+                            data[week][location] = {}
+                        data[week][location][unit] = value
+                parsing_ili = False
+        if len(data) == 0:
+            raise Exception("no data")
+        return data
 
-  @staticmethod
-  def get_dengue_data(first_week, last_week):
-    # Check week order
-    if first_week > last_week:
-      first_week, last_week = last_week, first_week
-    # Bounds check
-    if first_week < 200301 or last_week < 200301:
-      raise Exception('week out of range')
-    # Initialize data by week and location (zeroes are not reported)
-    data = {}
-    for week in range_epiweeks(first_week, add_epiweeks(last_week, 1)):
-      data[week] = {}
-      for location in NIDSS.LOCATION_TO_REGION.keys():
-        data[week][location] = 0
-    # Download CSV
-    response = requests.get(NIDSS.DENGUE_URL)
-    if response.status_code != 200:
-      raise Exception('export Dengue failed [%d]' % response.status_code)
-    csv = response.content.decode('big5-tw')
-    # Parse the data
-    lines = [l.strip() for l in csv.split('\n')[1:] if l.strip() != '']
-    for line in lines:
-      fields = line.split(',')
-      location_b64 = base64.b64encode(fields[3].encode('utf-8'))
-      location = NIDSS._TRANSLATED[location_b64]
-      # Fields currently unused:
-      # region = NIDSS.LOCATION_TO_REGION[location]
-      # imported_b64 = base64.b64encode(fields[6].encode('utf-8'))
-      # imported = imported_b64 == b'5piv'
-      # sex = fields[5]
-      # age = fields[7]
-      count = int(fields[8])
-      year = int(fields[1])
-      week = int(fields[2])
-      # Week 53 was reported each year in 2003-2007
-      if year < 2008 and year != 2003 and week > 52:
-        week = 52
-      # Epiweek system change in 2009
-      # See also: http://research.undefinedx.com/forum/index.php?topic=300.0
-      if year == 2009:
-        week -= 1
-        if week == 0:
-          year, week = 2008, 53
-      epiweek = year * 100 + week
-      if epiweek < first_week or epiweek > last_week:
-        # Outside of the requested range
-        continue
-      if epiweek not in data or location not in data[epiweek]:
-        # Not a vaild U.S. epiweek
-        raise Exception('data missing %d-%s' % (epiweek, location))
-      # Add the counts to the location on this epiweek
-      data[epiweek][location] += count
-    # Return results indexed by week and location
-    return data
+    @staticmethod
+    def get_flu_data():
+        # Fetch the flu page
+        response = requests.get(NIDSS.FLU_URL)
+        if response.status_code != 200:
+            raise Exception(f"request failed [{int(response.status_code)}]")
+        html = response.text
+        # Parse metadata
+        latest_week, release_date = NIDSS._get_metadata(html)
+        # Parse flu data
+        data = NIDSS._get_flu_data(html)
+        # Return results indexed by week and location
+        return latest_week, release_date, data
+
+    @staticmethod
+    def get_dengue_data(first_week, last_week):
+        # Check week order
+        if first_week > last_week:
+            first_week, last_week = last_week, first_week
+        # Bounds check
+        if first_week < 200301 or last_week < 200301:
+            raise Exception("week out of range")
+        # Initialize data by week and location (zeroes are not reported)
+        data = {}
+        for week in range_epiweeks(first_week, add_epiweeks(last_week, 1)):
+            data[week] = {}
+            for location in NIDSS.LOCATION_TO_REGION.keys():
+                data[week][location] = 0
+        # Download CSV
+        response = requests.get(NIDSS.DENGUE_URL)
+        if response.status_code != 200:
+            raise Exception(f"export Dengue failed [{int(response.status_code)}]")
+        csv = response.content.decode("big5-tw")
+        # Parse the data
+        lines = [l.strip() for l in csv.split("\n")[1:] if l.strip() != ""]
+        for line in lines:
+            fields = line.split(",")
+            location_b64 = base64.b64encode(fields[3].encode("utf-8"))
+            location = NIDSS._TRANSLATED[location_b64]
+            # Fields currently unused:
+            # region = NIDSS.LOCATION_TO_REGION[location]
+            # imported_b64 = base64.b64encode(fields[6].encode('utf-8'))
+            # imported = imported_b64 == b'5piv'
+            # sex = fields[5]
+            # age = fields[7]
+            count = int(fields[8])
+            year = int(fields[1])
+            week = int(fields[2])
+            # Week 53 was reported each year in 2003-2007
+            if year < 2008 and year != 2003 and week > 52:
+                week = 52
+            # Epiweek system change in 2009
+            # See also: https://research.undefinedx.com/forum/index.php?topic=300.0
+            if year == 2009:
+                week -= 1
+                if week == 0:
+                    year, week = 2008, 53
+            epiweek = year * 100 + week
+            if epiweek < first_week or epiweek > last_week:
+                # Outside of the requested range
+                continue
+            if epiweek not in data or location not in data[epiweek]:
+                # Not a vaild U.S. epiweek
+                raise Exception(f"data missing {int(epiweek)}-{location}")
+            # Add the counts to the location on this epiweek
+            data[epiweek][location] += count
+        # Return results indexed by week and location
+        return data
 
 
 def main():
-  # Args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    'epiweek',
-    action='store',
-    type=int,
-    help='fetch data on this epiweek (ex: 201537)'
-  )
-  args = parser.parse_args()
-  ew = args.epiweek
+    # Args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "epiweek", action="store", type=int, help="fetch data on this epiweek (ex: 201537)"
+    )
+    args = parser.parse_args()
+    ew = args.epiweek
 
-  # Get the data
-  latest_week, release_date, fdata = NIDSS.get_flu_data()
-  ddata = NIDSS.get_dengue_data(ew, ew)
+    # Get the data
+    latest_week, release_date, fdata = NIDSS.get_flu_data()
+    ddata = NIDSS.get_dengue_data(ew, ew)
 
-  # Print the results
-  print('*** Meta ***')
-  print('latest_week:', latest_week)
-  print('release_date:', release_date)
-  print('*** Flu ***')
-  for region in sorted(list(fdata[ew].keys())):
-    visits, ili = fdata[ew][region]['visits'], fdata[ew][region]['ili']
-    print('region=%s | visits=%d | ili=%.3f' % (region, visits, ili))
-  print('*** Dengue ***')
-  for location in sorted(list(ddata[ew].keys())):
-    region = NIDSS.LOCATION_TO_REGION[location]
-    count = ddata[ew][location]
-    print('location=%s | region=%s | count=%d' % (location, region, count))
+    # Print the results
+    print("*** Meta ***")
+    print("latest_week:", latest_week)
+    print("release_date:", release_date)
+    print("*** Flu ***")
+    for region in sorted(list(fdata[ew].keys())):
+        visits, ili = fdata[ew][region]["visits"], fdata[ew][region]["ili"]
+        print(f"region={region} | visits={int(visits)} | ili={ili:.3f}")
+    print("*** Dengue ***")
+    for location in sorted(list(ddata[ew].keys())):
+        region = NIDSS.LOCATION_TO_REGION[location]
+        count = ddata[ew][location]
+        print(f"location={location} | region={region} | count={int(count)}")
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/nidss/taiwan_update.py b/src/acquisition/nidss/taiwan_update.py
index 830a7738d..30d458481 100644
--- a/src/acquisition/nidss/taiwan_update.py
+++ b/src/acquisition/nidss/taiwan_update.py
@@ -87,92 +87,88 @@
 
 # Get a row count just to know how many new rows are inserted
 def get_rows(cnx):
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM nidss_flu')
-  for (num,) in select:
-    rows_flu = num
-  select.execute('SELECT count(1) num FROM nidss_dengue')
-  for (num,) in select:
-    rows_dengue = num
-  select.close()
-  return (rows_flu, rows_dengue)
+    select = cnx.cursor()
+    select.execute("SELECT count(1) num FROM nidss_flu")
+    for (num,) in select:
+        rows_flu = num
+    select.execute("SELECT count(1) num FROM nidss_dengue")
+    for (num,) in select:
+        rows_dengue = num
+    select.close()
+    return (rows_flu, rows_dengue)
 
 
 def update(test_mode=False):
-  # test mode
-  if test_mode:
-    print('test mode enabled: changes will not be saved')
-
-  # Database connection
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  rows1 = get_rows(cnx)
-  print('rows before (flu): %d' % (rows1[0]))
-  print('rows before (dengue): %d' % (rows1[1]))
-  insert = cnx.cursor()
-  sql_flu = '''
-  INSERT INTO
-    `nidss_flu` (`release_date`, `issue`, `epiweek`, `region`, `lag`, `visits`, `ili`)
-  VALUES
-    (%s, %s, %s, %s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-    `release_date` = least(`release_date`, %s), `visits` = %s, `ili` = %s
-  '''
-  sql_dengue = '''
-  INSERT INTO
-    `nidss_dengue` (`epiweek`, `location`, `region`, `count`)
-  VALUES
-    (%s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-    `count` =  %s
-  '''
-
-  # Scrape flu data
-  current_week, release_date, data = NIDSS.get_flu_data()
-  for epiweek in sorted(list(data.keys())):
-    lag = delta_epiweeks(epiweek, current_week)
-    for region in data[epiweek].keys():
-      visits, ili = data[epiweek][region]['visits'], data[epiweek][region]['ili']
-      params1 = [release_date, current_week, epiweek, region, lag, visits, ili]
-      params2 = [release_date, visits, ili]
-      insert.execute(sql_flu, tuple(params1 + params2))
-
-  # Scrape dengue data from the past year
-  data = NIDSS.get_dengue_data(add_epiweeks(current_week, -51), current_week)
-  for epiweek in sorted(list(data.keys())):
-    for location in sorted(list(data[epiweek].keys())):
-      region = NIDSS.LOCATION_TO_REGION[location]
-      count = data[epiweek][location]
-      params = (epiweek, location, region, count, count)
-      insert.execute(sql_dengue, params)
-
-  # Cleanup
-  insert.close()
-  rows2 = get_rows(cnx)
-  print('rows after (flu): %d (added %d)' % (rows2[0], rows2[0] - rows1[0]))
-  print('rows after (dengue): %d (added %d)' % (rows2[1], rows2[1] - rows1[1]))
-  if test_mode:
-    print('test mode: changes not commited')
-  else:
-    cnx.commit()
-  cnx.close()
+    # test mode
+    if test_mode:
+        print("test mode enabled: changes will not be saved")
+
+    # Database connection
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx)
+    print(f"rows before (flu): {int(rows1[0])}")
+    print(f"rows before (dengue): {int(rows1[1])}")
+    insert = cnx.cursor()
+    sql_flu = """
+    INSERT INTO
+      `nidss_flu` (`release_date`, `issue`, `epiweek`, `region`, `lag`, `visits`, `ili`)
+    VALUES
+      (%s, %s, %s, %s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+      `release_date` = least(`release_date`, %s), `visits` = %s, `ili` = %s
+    """
+    sql_dengue = """
+    INSERT INTO
+      `nidss_dengue` (`epiweek`, `location`, `region`, `count`)
+    VALUES
+      (%s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+      `count` =  %s
+    """
+
+    # Scrape flu data
+    current_week, release_date, data = NIDSS.get_flu_data()
+    for epiweek in sorted(list(data.keys())):
+        lag = delta_epiweeks(epiweek, current_week)
+        for region in data[epiweek].keys():
+            visits, ili = data[epiweek][region]["visits"], data[epiweek][region]["ili"]
+            params1 = [release_date, current_week, epiweek, region, lag, visits, ili]
+            params2 = [release_date, visits, ili]
+            insert.execute(sql_flu, tuple(params1 + params2))
+
+    # Scrape dengue data from the past year
+    data = NIDSS.get_dengue_data(add_epiweeks(current_week, -51), current_week)
+    for epiweek in sorted(list(data.keys())):
+        for location in sorted(list(data[epiweek].keys())):
+            region = NIDSS.LOCATION_TO_REGION[location]
+            count = data[epiweek][location]
+            params = (epiweek, location, region, count, count)
+            insert.execute(sql_dengue, params)
+
+    # Cleanup
+    insert.close()
+    rows2 = get_rows(cnx)
+    print(f"rows after (flu): {int(rows2[0])} (added {int(rows2[0] - rows1[0])})")
+    print(f"rows after (dengue): {int(rows2[1])} (added {int(rows2[1] - rows1[1])})")
+    if test_mode:
+        print("test mode: changes not commited")
+    else:
+        cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    '--test',
-    '-t',
-    action='store_true',
-    default=False,
-    help='test mode, do not commit changes'
-  )
-  args = parser.parse_args()
-
-  # fetch and store NIDSS data
-  update(args.test)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--test", "-t", action="store_true", default=False, help="test mode, do not commit changes"
+    )
+    args = parser.parse_args()
+
+    # fetch and store NIDSS data
+    update(args.test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/paho/paho_db_update.py b/src/acquisition/paho/paho_db_update.py
index d07885f79..b351d3ff2 100644
--- a/src/acquisition/paho/paho_db_update.py
+++ b/src/acquisition/paho/paho_db_update.py
@@ -50,9 +50,8 @@
 import csv
 import datetime
 import glob
-import subprocess
-import random
 from io import StringIO
+import tempfile
 
 # third party
 import mysql.connector
@@ -64,12 +63,14 @@
 from delphi.utils.epiweek import delta_epiweeks, check_epiweek
 from delphi.utils.epidate import EpiDate
 
+
 def ensure_tables_exist():
-    (u,p) = secrets.db.epi
-    cnx = mysql.connector.connect(user=u,password=p,database='epidata')
+    (u, p) = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     try:
         cursor = cnx.cursor()
-        cursor.execute('''
+        cursor.execute(
+            """
             CREATE TABLE IF NOT EXISTS `paho_dengue` (
                 `id` INT(11) NOT NULL PRIMARY KEY AUTO_INCREMENT,
                 `release_date` DATE NOT NULL,
@@ -85,35 +86,44 @@ def ensure_tables_exist():
                 `num_deaths` INT(11) NOT NULL,
                 UNIQUE KEY (`issue`, `epiweek`, `region`)
             );
-        ''');
+        """
+        )
         cnx.commit()
     finally:
         cnx.close()
 
+
 def safe_float(f):
     try:
-        return float(f.replace(',',''))
+        return float(f.replace(",", ""))
     except:
         return 0
 
+
 def safe_int(i):
     try:
-        return int(i.replace(',',''))
+        return int(i.replace(",", ""))
     except:
         return 0
 
-def get_rows(cnx, table='paho_dengue'):
-  # Count and return the number of rows in the `fluview` table.
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM %s' % table)
-  for (num,) in select:
-    pass
-  select.close()
-  return num
+
+def get_rows(cnx, table="paho_dengue"):
+    # Count and return the number of rows in the `fluview` table.
+    select = cnx.cursor()
+    select.execute(f"SELECT count(1) num FROM {table}")
+    for (num,) in select:
+        pass
+    select.close()
+    return num
+
 
 def get_paho_row(row):
-    if row[0] == "\ufeffIncidence Rate (c)" and row != "\ufeffIncidence Rate (c),(SD/D) x100 (e),CFR (f),ID,Country or Subregion,Deaths,EW,Confirmed,Epidemiological Week (a),Pop (no usar),Serotype,Severe Dengue (d),Total of Dengue Cases (b),Year,Population x 1000".split(","):
-        raise Exception('PAHO header row has changed')
+    if row[
+        0
+    ] == "\ufeffIncidence Rate (c)" and row != "\ufeffIncidence Rate (c),(SD/D) x100 (e),CFR (f),ID,Country or Subregion,Deaths,EW,Confirmed,Epidemiological Week (a),Pop (no usar),Serotype,Severe Dengue (d),Total of Dengue Cases (b),Year,Population x 1000".split(
+        ","
+    ):
+        raise Exception("PAHO header row has changed")
     if len(row) == 1 or row[0] == "Incidence Rate (c)":
         # this is a header row
         return None
@@ -128,23 +138,26 @@ def get_paho_row(row):
             except:
                 return None
     try:
-        check_epiweek(safe_int(row[13])*100 + safe_int(row[8]), safe_int(row[13])*100 + safe_int(row[6]))
+        check_epiweek(
+            safe_int(row[13]) * 100 + safe_int(row[8]), safe_int(row[13]) * 100 + safe_int(row[6])
+        )
     except:
         return None
     return {
-        'issue': safe_int(row[13])*100 + safe_int(row[6]),
-        'epiweek': safe_int(row[13])*100 + safe_int(row[8]),
-        'region': country,
-        'total_pop': safe_int(row[14]),
-        'serotype': row[10],
-        'num_dengue': safe_int(row[12]),
-        'incidence_rate': safe_float(row[0]),
-        'num_severe': safe_int(row[11]),
-        'num_deaths': safe_int(row[5]),
-        'severe_ratio': safe_float(row[1]),
-        'cfr': safe_float(row[2])
+        "issue": safe_int(row[13]) * 100 + safe_int(row[6]),
+        "epiweek": safe_int(row[13]) * 100 + safe_int(row[8]),
+        "region": country,
+        "total_pop": safe_int(row[14]),
+        "serotype": row[10],
+        "num_dengue": safe_int(row[12]),
+        "incidence_rate": safe_float(row[0]),
+        "num_severe": safe_int(row[11]),
+        "num_deaths": safe_int(row[5]),
+        "severe_ratio": safe_float(row[1]),
+        "cfr": safe_float(row[2]),
     }
 
+
 def update_from_file(issue, date, filename, test_mode=False):
     # Read PAHO data from CSV and insert into (or update) the database.
 
@@ -156,23 +169,23 @@ def update_from_file(issue, date, filename, test_mode=False):
 
     # database connection
     u, p = secrets.db.epi
-    cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-    rows1 = get_rows(cnx, 'paho_dengue')
-    print('rows before: %d' % (rows1))
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx, "paho_dengue")
+    print(f"rows before: {int(rows1)}")
     insert = cnx.cursor()
 
     # load the data, ignoring empty rows
-    print('loading data from %s as issued on %d' % (filename, issue))
-    with open(filename,'r',encoding='utf-8') as f:
+    print(f"loading data from {filename} as issued on {int(issue)}")
+    with open(filename, encoding="utf-8") as f:
         c = f.read()
     rows = []
-    for l in csv.reader(StringIO(c), delimiter=','):
+    for l in csv.reader(StringIO(c), delimiter=","):
         rows.append(get_paho_row(l))
-    print(' loaded %d rows' % len(rows))
+    print(f" loaded {len(rows)} rows")
     entries = [obj for obj in rows if obj]
-    print(' found %d entries' % len(entries))
+    print(f" found {len(entries)} entries")
 
-    sql = '''
+    sql = """
     INSERT INTO
         `paho_dengue` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
         `total_pop`, `serotype`, `num_dengue`, `incidence_rate`,
@@ -187,55 +200,64 @@ def update_from_file(issue, date, filename, test_mode=False):
         `incidence_rate` = %s,
         `num_severe` = %s,
         `num_deaths` = %s
-    '''
+    """
 
     for row in entries:
-        if row['issue'] > issue: # Issued in a week that hasn't happened yet
+        if row["issue"] > issue:  # Issued in a week that hasn't happened yet
             continue
-        lag = delta_epiweeks(row['epiweek'], issue)
-        data_args = [row['total_pop'], row['serotype'], row['num_dengue'],
-                     row['incidence_rate'], row['num_severe'], row['num_deaths']]
+        lag = delta_epiweeks(row["epiweek"], issue)
+        data_args = [
+            row["total_pop"],
+            row["serotype"],
+            row["num_dengue"],
+            row["incidence_rate"],
+            row["num_severe"],
+            row["num_deaths"],
+        ]
 
-        insert_args = [date,issue,row['epiweek'],row['region'],lag] + data_args
+        insert_args = [date, issue, row["epiweek"], row["region"], lag] + data_args
         update_args = [date] + data_args
         insert.execute(sql % tuple(insert_args + update_args))
 
     # cleanup
     insert.close()
     if test_mode:
-        print('test mode, not committing')
+        print("test mode, not committing")
         rows2 = rows1
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print('rows after: %d (added %d)' % (rows2,rows2-rows1))
+    print(f"rows after: {int(rows2)} (added {int(rows2 - rows1)})")
     cnx.close()
 
+
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
+    # fmt: off
     parser.add_argument(
-        '--test',
-        action='store_true',
-        help='do dry run only, do not update the database'
+        "--test",
+        action="store_true",
+        help="do dry run only, do not update the database"
     )
     parser.add_argument(
-        '--file',
+        "--file",
         type=str,
-        help='load an existing zip file (otherwise fetch current data)'
+        help="load an existing zip file (otherwise fetch current data)"
     )
     parser.add_argument(
-        '--issue',
+        "--issue",
         type=int,
-        help='issue of the file (e.g. 201740); used iff --file is given'
+        help="issue of the file (e.g. 201740); used iff --file is given"
     )
+    # fmt: on
     args = parser.parse_args()
 
     if (args.file is None) != (args.issue is None):
-        raise Exception('--file and --issue must both be present or absent')
+        raise Exception("--file and --issue must both be present or absent")
 
-    date = datetime.datetime.now().strftime('%Y-%m-%d')
-    print('assuming release date is today, %s' % date)
+    date = datetime.datetime.now().strftime("%Y-%m-%d")
+    print(f"assuming release date is today, {date}")
 
     if args.file:
         update_from_file(args.issue, date, args.file, test_mode=args.test)
@@ -247,34 +269,31 @@ def main():
         max_tries = 5
         while flag < max_tries:
             flag = flag + 1
-            tmp_dir = ''.join(random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for i in range(8))
-            tmp_dir = 'downloads_' + tmp_dir
-            subprocess.call(["mkdir",tmp_dir])
-            # Use temporary directory to avoid data from different time
-            #   downloaded to same folder
-            get_paho_data(dir=tmp_dir)
-            issue = EpiDate.today().get_ew()
-            # Check to make sure we downloaded a file for every week
-            issueset = set()
-            files = glob.glob('%s/*.csv' % tmp_dir)
-            for filename in files:
-                with open(filename,'r') as f:
-                    _ = f.readline()
-                    data = f.readline().split(',')
-                    issueset.add(data[6])
-            db_error = False
-            if len(issueset) >= 53: # Shouldn't be more than 53
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                # Use temporary directory to avoid data from different time
+                #   downloaded to same folder
+                get_paho_data(dir=tmp_dir)
+                issue = EpiDate.today().get_ew()
+                # Check to make sure we downloaded a file for every week
+                issueset = set()
+                files = glob.glob(f"{tmp_dir}/*.csv")
                 for filename in files:
-                    try:
-                        update_from_file(issue, date, filename, test_mode=args.test)
-                        subprocess.call(["rm",filename])
-                    except:
-                        db_error = True
-                subprocess.call(["rm","-r",tmp_dir])
-                if not db_error:
-                    break # Exit loop with success
+                    with open(filename) as f:
+                        _ = f.readline()
+                        data = f.readline().split(",")
+                        issueset.add(data[6])
+                db_error = False
+                if len(issueset) >= 53:  # Shouldn't be more than 53
+                    for filename in files:
+                        try:
+                            update_from_file(issue, date, filename, test_mode=args.test)
+                        except:
+                            db_error = True
+                    if not db_error:
+                        break  # Exit loop with success
             if flag >= max_tries:
-                print('WARNING: Database `paho_dengue` did not update successfully')
+                print("WARNING: Database `paho_dengue` did not update successfully")
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/src/acquisition/paho/paho_download.py b/src/acquisition/paho/paho_download.py
index 60dd13ae8..c6fa70285 100644
--- a/src/acquisition/paho/paho_download.py
+++ b/src/acquisition/paho/paho_download.py
@@ -1,4 +1,3 @@
-
 # IMPORTANT: This code is extremely unstable.
 # Slight changes to the PAHO website may render this script partially or entirely useless.
 
@@ -15,42 +14,51 @@
 
 headerheight = 0
 
+
 def wait_for(browser, css_selector, delay=10):
     try:
-        WebDriverWait(browser, delay).until(EC.presence_of_element_located((By.CSS_SELECTOR, css_selector)))
-        WebDriverWait(browser, delay).until(EC.element_to_be_clickable((By.CSS_SELECTOR, css_selector)))
-        print('Success Loading %s' % (css_selector))
+        WebDriverWait(browser, delay).until(
+            EC.presence_of_element_located((By.CSS_SELECTOR, css_selector))
+        )
+        WebDriverWait(browser, delay).until(
+            EC.element_to_be_clickable((By.CSS_SELECTOR, css_selector))
+        )
+        print(f"Success Loading {css_selector}")
     except TimeoutException:
-        print("Loading %s took too much time!" % (css_selector))
-        
+        print(f"Loading {css_selector} took too much time!")
+
+
 def find_and_click(browser, element):
     element.location_once_scrolled_into_view
     browser.switch_to.default_content()
-    browser.execute_script("window.scrollBy(0,-%d)"%headerheight)
+    browser.execute_script(f"window.scrollBy(0,-{int(headerheight)})")
     browser.switch_to.frame(browser.find_element_by_tag_name("iframe"))
     browser.switch_to.frame(browser.find_element_by_tag_name("iframe"))
     element.click()
 
-def get_paho_data(offset=0, dir='downloads'):
+
+def get_paho_data(offset=0, dir="downloads"):
     opts = Options()
     opts.set_headless()
     assert opts.headless  # Operating in headless mode
 
     fp = FirefoxProfile()
-    fp.set_preference("browser.download.folderList",2)
-    fp.set_preference("browser.download.manager.showWhenStarting",False)
-    fp.set_preference("browser.download.dir",os.path.abspath(dir))
-    fp.set_preference("browser.helperApps.neverAsk.saveToDisk","text/csv")
-
-    browser = Firefox(options=opts,firefox_profile=fp)
-    browser.get('http://www.paho.org/data/index.php/en/mnu-topics/indicadores-dengue-en/dengue-nacional-en/252-dengue-pais-ano-en.html?showall=&start=1')
+    fp.set_preference("browser.download.folderList", 2)
+    fp.set_preference("browser.download.manager.showWhenStarting", False)
+    fp.set_preference("browser.download.dir", os.path.abspath(dir))
+    fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")
+
+    browser = Firefox(options=opts, firefox_profile=fp)
+    browser.get(
+        "https://www.paho.org/data/index.php/en/mnu-topics/indicadores-dengue-en/dengue-nacional-en/252-dengue-pais-ano-en.html?showall=&start=1"
+    )
     tab1 = browser.window_handles[0]
-    browser.execute_script('''window.open("","_blank");''')
+    browser.execute_script("""window.open("","_blank");""")
     tab2 = browser.window_handles[1]
     browser.switch_to.window(tab1)
-    
+
     curr_offset = offset
-    
+
     wait_for(browser, "div.rt-top-inner", delay=30)
     header = browser.find_element_by_css_selector("div.rt-top-inner")
     global headerheight
@@ -59,41 +67,51 @@ def get_paho_data(offset=0, dir='downloads'):
     # The actual content of the data of this webpage is within 2 iframes, so we need to navigate into them first
     browser.switch_to.frame(browser.find_element_by_tag_name("iframe"))
     browser.switch_to.frame(browser.find_element_by_tag_name("iframe"))
-    
+
     # Locate the button that allows to download the table
-    downloadoption = browser.find_elements_by_css_selector("div.tabToolbarButton.tab-widget.download")[0]
+    downloadoption = browser.find_elements_by_css_selector(
+        "div.tabToolbarButton.tab-widget.download"
+    )[0]
     find_and_click(browser, downloadoption)
 
     wait_for(browser, "div[data-tb-test-id='DownloadImage-Button']")
 
     # Locate the button that prepares the table for download as an image
-    imagebutton = browser.find_elements_by_css_selector("div[data-tb-test-id='DownloadImage-Button']")[0]
+    imagebutton = browser.find_elements_by_css_selector(
+        "div[data-tb-test-id='DownloadImage-Button']"
+    )[0]
     find_and_click(browser, imagebutton)
 
     wait_for(browser, ".tabDownloadFileButton[data-test-id='DownloadLink']")
 
     # Locate the button that downloads the table as an image
-    downloadbutton = browser.find_elements_by_css_selector(".tabDownloadFileButton[data-test-id='DownloadLink']")[0]
+    downloadbutton = browser.find_elements_by_css_selector(
+        ".tabDownloadFileButton[data-test-id='DownloadLink']"
+    )[0]
 
     # Extract session ID
     href = downloadbutton.get_attribute("href")
     startidx = href.index("sessions/") + len("sessions/")
-    endidx = href.index("/",startidx)
+    endidx = href.index("/", startidx)
     sessionid = href[startidx:endidx]
 
-    dataurl = "http://phip.paho.org/vizql/w/Casosdedengue_tben/v/ByLastAvailableEpiWeek/viewData/sessions/%s/views/18076444178507886853_9530488980060483892?maxrows=200&viz=%%7B%%22worksheet%%22:%%22W%%20By%%20Last%%20Available%%20EpiWeek%%22,%%22dashboard%%22:%%22By%%20Last%%20Available%%20Epi%%20Week%%22%%7D"%sessionid
+    dataurl = f"https://phip.paho.org/vizql/w/Casosdedengue_tben/v/ByLastAvailableEpiWeek/viewData/sessions/{sessionid}/views/18076444178507886853_9530488980060483892?maxrows=200&viz=%%7B%%22worksheet%%22:%%22W%%20By%%20Last%%20Available%%20EpiWeek%%22,%%22dashboard%%22:%%22By%%20Last%%20Available%%20Epi%%20Week%%22%%7D"
 
     wait_for(browser, "div[data-tb-test-id='CancelBtn-Button']")
 
     # Cancel image download
-    cancelbutton = browser.find_elements_by_css_selector("div[data-tb-test-id='CancelBtn-Button']")[0]
+    cancelbutton = browser.find_elements_by_css_selector("div[data-tb-test-id='CancelBtn-Button']")[
+        0
+    ]
     find_and_click(browser, cancelbutton)
 
     wait_for(browser, "div[id='tableau_base_widget_FilterPanel_0']")
 
     # Default is to show data for current year, we want to get all years
     # Clicks drop-down menu to open options
-    yearselector = browser.find_elements_by_css_selector("div[id='tableau_base_widget_FilterPanel_0']")[0]
+    yearselector = browser.find_elements_by_css_selector(
+        "div[id='tableau_base_widget_FilterPanel_0']"
+    )[0]
     find_and_click(browser, yearselector)
 
     wait_for(browser, "div.facetOverflow")
@@ -107,27 +125,29 @@ def get_paho_data(offset=0, dir='downloads'):
 
     for i in range(offset):
         gp = browser.find_element_by_css_selector("div.wcGlassPane")
-        #print gp.is_enabled()
-        #print gp.is_selected()
-        #print gp.is_displayed()
+        # print gp.is_enabled()
+        # print gp.is_selected()
+        # print gp.is_displayed()
         try:
             WebDriverWait(browser, 10).until(EC.staleness_of(gp))
-            print("Loaded next week % d" % (53-offset))
+            print(f"Loaded next week {int(53 - offset)}")
         except TimeoutException:
-            print("Loading next week %d took too much time!" % (53-offset))
+            print(f"Loading next week {int(53 - offset)} took too much time!")
         gp = browser.find_element_by_css_selector("div.wcGlassPane")
-        #print gp.is_enabled()
-        #print gp.is_selected()
-        #print gp.is_displayed()
-        x = browser.find_elements_by_css_selector("div.dijitReset.dijitSliderButtonContainer.dijitSliderButtonContainerH.tableauArrowDec")[0]
+        # print gp.is_enabled()
+        # print gp.is_selected()
+        # print gp.is_displayed()
+        x = browser.find_elements_by_css_selector(
+            "div.dijitReset.dijitSliderButtonContainer.dijitSliderButtonContainerH.tableauArrowDec"
+        )[0]
         find_and_click(browser, x)
 
     # Cycle through all weeks, downloading each week as a separate .csv
     # Theoretically, need to cycle 53 times, but in practice only 54 works, unsure why
-    for i in range(54-offset):
+    for i in range(54 - offset):
         # If something goes wrong for whatever reason, try from the beginning
         try:
-            print('Loading week %d' % (53-i))
+            print(f"Loading week {int(53 - i)}")
             # (Re-)load URL
             browser.switch_to.window(tab2)
             browser.get(dataurl)
@@ -137,7 +157,9 @@ def get_paho_data(offset=0, dir='downloads'):
             full_data_tab = browser.find_elements_by_css_selector("li[id='tab-view-full-data']")[0]
             full_data_tab.click()
 
-            wait_for(browser, "a.csvLink") # Sometimes this fails but the button is successfully clicked anyway, not sure why
+            wait_for(
+                browser, "a.csvLink"
+            )  # Sometimes this fails but the button is successfully clicked anyway, not sure why
             # Actually download the data as a .csv (Will be downloaded to Firefox's default download destination)
             data_links = browser.find_elements_by_css_selector("a.csvLink")
             data_link = None
@@ -149,16 +171,22 @@ def get_paho_data(offset=0, dir='downloads'):
 
             # Locate button that decreases the current week by 1
             browser.switch_to.window(tab1)
-            wait_for(browser, "div.dijitReset.dijitSliderButtonContainer.dijitSliderButtonContainerH.tableauArrowDec")
-
-            x = browser.find_elements_by_css_selector("div.dijitReset.dijitSliderButtonContainer.dijitSliderButtonContainerH.tableauArrowDec")[0]
+            wait_for(
+                browser,
+                "div.dijitReset.dijitSliderButtonContainer.dijitSliderButtonContainerH.tableauArrowDec",
+            )
+
+            x = browser.find_elements_by_css_selector(
+                "div.dijitReset.dijitSliderButtonContainer.dijitSliderButtonContainerH.tableauArrowDec"
+            )[0]
             find_and_click(browser, x)
             curr_offset += 1
         except Exception as e:
-            print('Got exception %s\nTrying again from week %d' % (e,53-offset))
+            print(f"Got exception {e}\nTrying again from week {int(53 - offset)}")
             browser.quit()
             get_paho_data(offset=curr_offset)
     browser.quit()
 
-if __name__ == '__main__':
-    get_paho_data(dir='downloads/')
+
+if __name__ == "__main__":
+    get_paho_data(dir="downloads/")
diff --git a/src/acquisition/quidel/quidel.py b/src/acquisition/quidel/quidel.py
index a7c9a2918..0540d5e7c 100644
--- a/src/acquisition/quidel/quidel.py
+++ b/src/acquisition/quidel/quidel.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -15,7 +15,7 @@
     * add end date, end week check
 2017-12-02:
     * original version
-'''
+"""
 
 # standard library
 from collections import defaultdict
@@ -35,148 +35,187 @@
 import delphi.utils.epidate as ED
 from delphi.utils.geo.locations import Locations
 
-def word_map(row,terms):
-    for (k,v) in terms.items():
-        row = row.replace(k,v)
+
+def word_map(row, terms):
+    for (k, v) in terms.items():
+        row = row.replace(k, v)
     return row
 
-def date_less_than(d1,d2):
-    y1,m1,d1 = [int(x) for x in d1.split('-')]
-    y2,m2,d2 = [int(x) for x in d2.split('-')]
 
-    if y1*10000+m1*100+d1<y2*10000+m2*100+d2:
+def date_less_than(d1, d2):
+    y1, m1, d1 = (int(x) for x in d1.split("-"))
+    y2, m2, d2 = (int(x) for x in d2.split("-"))
+
+    if y1 * 10000 + m1 * 100 + d1 < y2 * 10000 + m2 * 100 + d2:
         return 1
-    elif y1*10000+m1*100+d1==y2*10000+m2*100+d2:
+    elif y1 * 10000 + m1 * 100 + d1 == y2 * 10000 + m2 * 100 + d2:
         return 0
     else:
         return -1
 
+
 # shift>0: shifted to future
 def date_to_epiweek(date, shift=0):
-    y,m,d = [int(x) for x in date.split('-')]
+    y, m, d = (int(x) for x in date.split("-"))
 
-    epidate = ED.EpiDate(y,m,d)
+    epidate = ED.EpiDate(y, m, d)
     epidate = epidate.add_days(shift)
     ew = epidate.get_ew()
     return ew
 
+
 # convert measurment to time series format
 # startweek and endweek are inclusive
-def measurement_to_ts(m,index,startweek=None,endweek=None):
+def measurement_to_ts(m, index, startweek=None, endweek=None):
     if startweek is None:
         startweek = 0
     if endweek is None:
         endweek = 999999
     res = {}
-    for r,rdict in m.items():
-        res[r]={}
-        for t,vals in rdict.items():
-            if index>=len(vals):
+    for r, rdict in m.items():
+        res[r] = {}
+        for t, vals in rdict.items():
+            if index >= len(vals):
                 raise Exception("Index is invalid")
-            if t>=startweek and t<=endweek:
+            if t >= startweek and t <= endweek:
                 res[r][t] = vals[index]
     return res
 
+
 class QuidelData:
     def __init__(self, raw_path, load_email=True):
         self.data_path = raw_path
-        self.excel_uptodate_path = join(raw_path,'excel/uptodate')
-        self.excel_history_path = join(raw_path,'excel/history')
-        self.csv_path = join(raw_path,'csv')
+        self.excel_uptodate_path = join(raw_path, "excel/uptodate")
+        self.excel_history_path = join(raw_path, "excel/history")
+        self.csv_path = join(raw_path, "csv")
         self.xlsx_uptodate_list = [
-            f[:-5] for f in listdir(self.excel_uptodate_path) if isfile(join(self.excel_uptodate_path, f)) and f[-5:]=='.xlsx'
+            f[:-5]
+            for f in listdir(self.excel_uptodate_path)
+            if isfile(join(self.excel_uptodate_path, f)) and f[-5:] == ".xlsx"
         ]
         self.xlsx_history_list = [
-            f[:-5] for f in listdir(self.excel_history_path) if isfile(join(self.excel_history_path, f)) and f[-5:]=='.xlsx'
+            f[:-5]
+            for f in listdir(self.excel_history_path)
+            if isfile(join(self.excel_history_path, f)) and f[-5:] == ".xlsx"
+        ]
+        self.csv_list = [
+            f[:-4]
+            for f in listdir(self.csv_path)
+            if isfile(join(self.csv_path, f)) and f[-4:] == ".csv"
         ]
-        self.csv_list = [f[:-4] for f in listdir(self.csv_path) if isfile(join(self.csv_path, f)) and f[-4:]=='.csv']
         self.map_terms = {
-            ' FL  34637"':'FL',
+            ' FL  34637"': "FL",
         }
         # hardcoded parameters
         self.date_dim = 1
         self.state_dim = 4
         self.fields = [
-            'sofia_ser','date','fac_id','city','state','zip','age',
-            'fluA','fluB','fluAll','county','fac_type'
+            "sofia_ser",
+            "date",
+            "fac_id",
+            "city",
+            "state",
+            "zip",
+            "age",
+            "fluA",
+            "fluB",
+            "fluAll",
+            "county",
+            "fac_type",
         ]
-        self.fields_to_keep = ['fac_id','fluA','fluB','fluAll']
+        self.fields_to_keep = ["fac_id", "fluA", "fluB", "fluAll"]
         self.dims_to_keep = [self.fields.index(x) for x in self.fields_to_keep]
         if load_email:
             self.retrieve_excels()
         self.prepare_csv()
 
     def retrieve_excels(self):
-        detach_dir = self.excel_uptodate_path # directory where to save attachments (default: current)
+        detach_dir = (
+            self.excel_uptodate_path
+        )  # directory where to save attachments (default: current)
 
         # connecting to the gmail imap server
         m = imaplib.IMAP4_SSL("imap.gmail.com")
-        m.login(secrets.quidel.email_addr,secrets.quidel.email_pwd)
-        m.select("INBOX") # here you a can choose a mail box like INBOX instead
+        m.login(secrets.quidel.email_addr, secrets.quidel.email_pwd)
+        m.select("INBOX")  # here you a can choose a mail box like INBOX instead
         # use m.list() to get all the mailboxes
-        _, items = m.search(None, "ALL") # you could filter using the IMAP rules here (check http://www.example-code.com/csharp/imap-search-critera.asp)
-        items = items[0].split() # getting the mails id
+        # you could filter using the IMAP rules here (check https://www.example-code.com/csharp/imap-search-critera.asp)
+        _, items = m.search(None, "ALL")
+        items = items[0].split()  # getting the mails id
 
         # The emailids are ordered from past to now
         for emailid in items:
-            _, data = m.fetch(emailid, "(RFC822)") # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc
-            email_body = data[0][1].decode('utf-8') # getting the mail content
-            mail = email.message_from_string(email_body) # parsing the mail content to get a mail object
-
-            #Check if any attachments at all
-            if mail.get_content_maintype() != 'multipart':
+            _, data = m.fetch(
+                emailid, "(RFC822)"
+            )  # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc
+            email_body = data[0][1].decode("utf-8")  # getting the mail content
+            mail = email.message_from_string(
+                email_body
+            )  # parsing the mail content to get a mail object
+
+            # Check if any attachments at all
+            if mail.get_content_maintype() != "multipart":
                 continue
 
             # we use walk to create a generator so we can iterate on the parts and forget about the recursive headach
             for part in mail.walk():
                 # multipart are just containers, so we skip them
-                if part.get_content_maintype() == 'multipart':
+                if part.get_content_maintype() == "multipart":
                     continue
 
                 # is this part an attachment ?
-                if part.get('Content-Disposition') is None:
+                if part.get("Content-Disposition") is None:
                     continue
 
                 filename = part.get_filename()
                 # check duplicates
-                if filename[-5:]!='.xlsx' or filename[:-5] in self.xlsx_uptodate_list+self.xlsx_history_list:
+                if (
+                    filename[-5:] != ".xlsx"
+                    or filename[:-5] in self.xlsx_uptodate_list + self.xlsx_history_list
+                ):
                     continue
 
                 self.xlsx_uptodate_list.append(filename[:-5])
                 att_path = os.path.join(detach_dir, filename)
 
-                #Check if its already there
-                if not os.path.isfile(att_path) :
+                # Check if its already there
+                if not os.path.isfile(att_path):
                     # finally write the stuff
-                    fp = open(att_path, 'wb')
+                    fp = open(att_path, "wb")
                     fp.write(part.get_payload(decode=True))
                     fp.close()
 
     def prepare_csv(self):
-        need_update=False
+        need_update = False
         for f in self.xlsx_uptodate_list:
             if f in self.csv_list:
                 continue
             else:
-                need_update=True
+                need_update = True
 
-            date_regex = '\d{2}-\d{2}-\d{4}'
-            date_items = re.findall(date_regex,f)
+            date_regex = r"\d{2}-\d{2}-\d{4}"
+            date_items = re.findall(date_regex, f)
             if date_items:
-                end_date = '-'.join(date_items[-1].split('-')[x] for x in [2,0,1])
+                end_date = "-".join(date_items[-1].split("-")[x] for x in [2, 0, 1])
             else:
-                print("End date not found in file name:"+f)
+                print("End date not found in file name:" + f)
                 end_date = None
 
-            df_dict = pd.read_excel(join(self.excel_uptodate_path, f+'.xlsx'), sheet_name=None)
-            for (_,df) in df_dict.items():
-                df = df.dropna(axis=0, how='all')
-                df['TestDate'] = df['TestDate'].apply(lambda x: x.strftime('%Y-%m-%d'))
-                df_filtered = df[df['TestDate']!='']
+            df_dict = pd.read_excel(join(self.excel_uptodate_path, f + ".xlsx"), sheet_name=None)
+            for (_, df) in df_dict.items():
+                df = df.dropna(axis=0, how="all")
+                df["TestDate"] = df["TestDate"].apply(lambda x: x.strftime("%Y-%m-%d"))
+                df_filtered = df[df["TestDate"] != ""]
                 if end_date is not None:
-                    df_filtered = df_filtered[df.apply(lambda x: date_less_than(end_date,x['TestDate'])!=1, axis=1)]
-                df_filtered.to_csv(join(self.csv_path, f+'.csv'), index=False, encoding='utf-8')
-        self.csv_list = [f[:-4] for f in listdir(self.csv_path) if isfile(join(self.csv_path, f)) and f[-4:]=='.csv']
+                    df_filtered = df_filtered[
+                        df.apply(lambda x: date_less_than(end_date, x["TestDate"]) != 1, axis=1)
+                    ]
+                df_filtered.to_csv(join(self.csv_path, f + ".csv"), index=False, encoding="utf-8")
+        self.csv_list = [
+            f[:-4]
+            for f in listdir(self.csv_path)
+            if isfile(join(self.csv_path, f)) and f[-4:] == ".csv"
+        ]
         self.need_update = need_update
 
     def load_csv(self, dims=None):
@@ -186,12 +225,12 @@ def load_csv(self, dims=None):
         for f in self.csv_list:
             if f in self.xlsx_history_list:
                 continue
-            rf = open(join(self.csv_path,f+'.csv'))
+            rf = open(join(self.csv_path, f + ".csv"))
 
             lines = rf.readlines()
             for l in lines[1:]:
-                l = word_map(l,self.map_terms)
-                row = l.strip().split(',')
+                l = word_map(l, self.map_terms)
+                row = l.strip().split(",")
                 date = row[self.date_dim]
                 state = row[self.state_dim]
                 if state not in parsed_dict[date]:
@@ -202,7 +241,7 @@ def load_csv(self, dims=None):
 
     # hardcoded aggregation function
     # output: [#unique_device,fluA,fluB,fluAll,total]
-    def prepare_measurements(self,data_dict,use_hhs=True,start_weekday=6):
+    def prepare_measurements(self, data_dict, use_hhs=True, start_weekday=6):
         buffer_dict = {}
         if use_hhs:
             region_list = Locations.hhs_list
@@ -210,34 +249,35 @@ def prepare_measurements(self,data_dict,use_hhs=True,start_weekday=6):
             region_list = Locations.atom_list
 
         def get_hhs_region(atom):
-          for region in Locations.hhs_list:
-            if atom.lower() in Locations.hhs_map[region]:
-              return region
-          if atom.lower() == 'ny':
-            return 'hhs2'
-          return atom
+            for region in Locations.hhs_list:
+                if atom.lower() in Locations.hhs_map[region]:
+                    return region
+            if atom.lower() == "ny":
+                return "hhs2"
+            return atom
 
         day_shift = 6 - start_weekday
-        time_map = lambda x:date_to_epiweek(x,shift=day_shift)
-        region_map = lambda x:get_hhs_region(x) \
-                    if use_hhs and x not in Locations.hhs_list else x # a bit hacky
+        time_map = lambda x: date_to_epiweek(x, shift=day_shift)
+        region_map = (
+            lambda x: get_hhs_region(x) if use_hhs and x not in Locations.hhs_list else x
+        )  # a bit hacky
 
         end_date = sorted(data_dict.keys())[-1]
         # count the latest week in only if Thurs data is included
-        end_epiweek = date_to_epiweek(end_date,shift=-4)
+        end_epiweek = date_to_epiweek(end_date, shift=-4)
         # first pass: prepare device_id set
         device_dict = {}
-        for (date,daily_dict) in data_dict.items():
+        for (date, daily_dict) in data_dict.items():
             if not date:
                 continue
             ew = time_map(date)
-            if ew == -1 or ew>end_epiweek:
+            if ew == -1 or ew > end_epiweek:
                 continue
             if ew not in device_dict:
-                device_dict[ew]={}
+                device_dict[ew] = {}
                 for r in region_list:
                     device_dict[ew][r] = set()
-            for (state,rec_list) in daily_dict.items():
+            for (state, rec_list) in daily_dict.items():
                 region = region_map(state)
                 # get rid of non-US regions
                 if region not in region_list:
@@ -247,38 +287,40 @@ def get_hhs_region(atom):
                     device_dict[ew][region].add(fac)
 
         # second pass: prepare all measurements
-        for (date,daily_dict) in data_dict.items():
+        for (date, daily_dict) in data_dict.items():
             ew = time_map(date)
-            if ew == -1 or ew>end_epiweek:
+            if ew == -1 or ew > end_epiweek:
                 continue
             if ew not in buffer_dict:
-                buffer_dict[ew]={}
+                buffer_dict[ew] = {}
                 for r in region_list:
-                    buffer_dict[ew][r] = [0.0]*8
+                    buffer_dict[ew][r] = [0.0] * 8
 
-            for (state,rec_list) in daily_dict.items():
+            for (state, rec_list) in daily_dict.items():
                 region = region_map(state)
                 # get rid of non-US regions
                 if region not in region_list:
                     continue
                 for rec in rec_list:
                     fac_num = float(len(device_dict[ew][region]))
-                    buffer_dict[ew][region]= np.add(
-                        buffer_dict[ew][region],[
-                            rec[1]=='positive',
-                            rec[2]=='positive',
-                            rec[3]=='positive',
+                    buffer_dict[ew][region] = np.add(
+                        buffer_dict[ew][region],
+                        [
+                            rec[1] == "positive",
+                            rec[2] == "positive",
+                            rec[3] == "positive",
                             1.0,
-                            float(rec[1]=='positive')/fac_num,
-                            float(rec[2]=='positive')/fac_num,
-                            float(rec[3]=='positive')/fac_num,
-                            1.0/fac_num,
-                    ]).tolist()
+                            float(rec[1] == "positive") / fac_num,
+                            float(rec[2] == "positive") / fac_num,
+                            float(rec[3] == "positive") / fac_num,
+                            1.0 / fac_num,
+                        ],
+                    ).tolist()
         # switch two dims of dict
         result_dict = {}
         for r in region_list:
-            result_dict[r]={}
-            for (k,v) in buffer_dict.items():
-                result_dict[r][k]=v[r]
+            result_dict[r] = {}
+            for (k, v) in buffer_dict.items():
+                result_dict[r][k] = v[r]
 
         return result_dict
diff --git a/src/acquisition/quidel/quidel_update.py b/src/acquisition/quidel/quidel_update.py
index b6303533c..563cea898 100644
--- a/src/acquisition/quidel/quidel_update.py
+++ b/src/acquisition/quidel/quidel_update.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -33,7 +33,7 @@
 
 2017-12-02:
   * original version
-'''
+"""
 
 # standard library
 import argparse
@@ -49,106 +49,142 @@
 from delphi.utils.geo.locations import Locations
 
 LOCATIONS = Locations.hhs_list
-DATAPATH = '/home/automation/quidel_data'
+DATAPATH = "/home/automation/quidel_data"
+
 
 def update(locations, first=None, last=None, force_update=False, load_email=True):
-  # download and prepare data first
-  qd = quidel.QuidelData(DATAPATH,load_email)
-  if not qd.need_update and not force_update:
-    print('Data not updated, nothing needs change.')
-    return
-
-  qd_data = qd.load_csv()
-  qd_measurements = qd.prepare_measurements(qd_data,start_weekday=4)
-  qd_ts = quidel.measurement_to_ts(qd_measurements,7,startweek=first,endweek=last)
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  def get_num_rows():
-    cur.execute('SELECT count(1) `num` FROM `quidel`')
-    for (num,) in cur:
-      pass
-    return num
-
-  # check from 4 weeks preceeding the last week with data through this week
-  cur.execute('SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `quidel`')
-  for (ew0, ew1) in cur:
-    ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
-  ew0 = ew0 if first is None else first
-  ew1 = ew1 if last is None else last
-  print('Checking epiweeks between %d and %d...' % (ew0, ew1))
-
-  # keep track of how many rows were added
-  rows_before = get_num_rows()
-
-  # check Quidel for new and/or revised data
-  sql = '''
+    # download and prepare data first
+    qd = quidel.QuidelData(DATAPATH, load_email)
+    if not qd.need_update and not force_update:
+        print("Data not updated, nothing needs change.")
+        return
+
+    qd_data = qd.load_csv()
+    qd_measurements = qd.prepare_measurements(qd_data, start_weekday=4)
+    qd_ts = quidel.measurement_to_ts(qd_measurements, 7, startweek=first, endweek=last)
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    def get_num_rows():
+        cur.execute("SELECT count(1) `num` FROM `quidel`")
+        for (num,) in cur:
+            pass
+        return num
+
+    # check from 4 weeks preceeding the last week with data through this week
+    cur.execute("SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `quidel`")
+    for (ew0, ew1) in cur:
+        ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
+    ew0 = ew0 if first is None else first
+    ew1 = ew1 if last is None else last
+    print(f"Checking epiweeks between {int(ew0)} and {int(ew1)}...")
+
+    # keep track of how many rows were added
+    rows_before = get_num_rows()
+
+    # check Quidel for new and/or revised data
+    sql = """
     INSERT INTO
       `quidel` (`location`, `epiweek`, `value`)
     VALUES
       (%s, %s, %s)
     ON DUPLICATE KEY UPDATE
       `value` = %s
-  '''
-
-  total_rows = 0
-
-  for location in locations:
-    if location not in qd_ts:
-      continue
-    ews = sorted(qd_ts[location].keys())
-    num_missing = 0
-    for ew in ews:
-      v = qd_ts[location][ew]
-      sql_data = (location, ew, v, v)
-      cur.execute(sql, sql_data)
-      total_rows += 1
-      if v == 0:
-        num_missing += 1
-    if num_missing > 0:
-      print(' [%s] missing %d/%d value(s)' % (location, num_missing, len(ews)))
-
-  # keep track of how many rows were added
-  rows_after = get_num_rows()
-  print('Inserted %d/%d row(s)'%(rows_after - rows_before, total_rows))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
+    """
+
+    total_rows = 0
+
+    for location in locations:
+        if location not in qd_ts:
+            continue
+        ews = sorted(qd_ts[location].keys())
+        num_missing = 0
+        for ew in ews:
+            v = qd_ts[location][ew]
+            sql_data = (location, ew, v, v)
+            cur.execute(sql, sql_data)
+            total_rows += 1
+            if v == 0:
+                num_missing += 1
+        if num_missing > 0:
+            print(f" [{location}] missing {int(num_missing)}/{len(ews)} value(s)")
+
+    # keep track of how many rows were added
+    rows_after = get_num_rows()
+    print(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s)")
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('--location', action='store', type=str, default=None, help='location(s) (ex: all; any of hhs1-10)')
-  parser.add_argument('--first', '-f', default=None, type=int, help='first epiweek override')
-  parser.add_argument('--last', '-l', default=None, type=int, help='last epiweek override')
-  parser.add_argument('--force_update', '-u', action='store_true', help='force update db values')
-  parser.add_argument('--skip_email', '-s', action='store_true', help='skip email downloading step')
-  args = parser.parse_args()
-
-  # sanity check
-  first, last, force_update, skip_email = args.first, args.last, args.force_update, args.skip_email
-  load_email = not skip_email
-  if first is not None:
-    flu.check_epiweek(first)
-  if last is not None:
-    flu.check_epiweek(last)
-  if first is not None and last is not None and first > last:
-    raise Exception('epiweeks in the wrong order')
-
-  # decide what to update
-  if args.location.lower() == 'all':
-    locations = LOCATIONS
-  else:
-    locations = args.location.lower().split(',')
-
-  # run the update
-  update(locations, first, last, force_update, load_email)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    # fmt: off
+    parser.add_argument(
+        "--location",
+        action="store",
+        type=str,
+        default=None,
+        help="location(s) (ex: all; any of hhs1-10)"
+    )
+    parser.add_argument(
+        "--first",
+        "-f",
+        default=None,
+        type=int,
+        help="first epiweek override"
+    )
+    parser.add_argument(
+        "--last",
+        "-l",
+        default=None,
+        type=int,
+        help="last epiweek override"
+    )
+    parser.add_argument(
+        "--force_update",
+        "-u",
+        action="store_true",
+        help="force update db values"
+    )
+    parser.add_argument(
+        "--skip_email",
+        "-s",
+        action="store_true",
+        help="skip email downloading step"
+    )
+    # fmt: on
+    args = parser.parse_args()
+
+    # sanity check
+    first, last, force_update, skip_email = (
+        args.first,
+        args.last,
+        args.force_update,
+        args.skip_email,
+    )
+    load_email = not skip_email
+    if first is not None:
+        flu.check_epiweek(first)
+    if last is not None:
+        flu.check_epiweek(last)
+    if first is not None and last is not None and first > last:
+        raise Exception("epiweeks in the wrong order")
+
+    # decide what to update
+    if args.location.lower() == "all":
+        locations = LOCATIONS
+    else:
+        locations = args.location.lower().split(",")
+
+    # run the update
+    update(locations, first, last, force_update, load_email)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/twtr/healthtweets.py b/src/acquisition/twtr/healthtweets.py
index 78eb2b3ec..c1e345162 100644
--- a/src/acquisition/twtr/healthtweets.py
+++ b/src/acquisition/twtr/healthtweets.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -20,7 +20,7 @@
  * Fetching daily values instead of weekly values
 2015-03-??
  * Original version
-'''
+"""
 
 # standard library
 import argparse
@@ -36,132 +36,242 @@
 
 class HealthTweets:
 
-  # mapping from state abbreviations to location codes used by healthtweets.org
-  STATE_CODES = {'AL': 3024, 'AK': 3025, 'AZ': 3026, 'AR': 3027, 'CA': 440, 'CO': 3029, 'CT': 3030, 'DE': 3031, 'DC': 3032, 'FL': 3033, 'GA': 3034, 'HI': 3035, 'ID': 3036, 'IL': 3037, 'IN': 3038, 'IA': 3039, 'KS': 3040, 'KY': 3041, 'LA': 2183, 'ME': 3043, 'MD': 3044, 'MA': 450, 'MI': 3046, 'MN': 3047, 'MS': 3048, 'MO': 3049, 'MT': 3050, 'NE': 3051, 'NV': 3052, 'NH': 3053, 'NJ': 478, 'NM': 2225, 'NY': 631, 'NC': 3057, 'ND': 3058, 'OH': 3059, 'OK': 3060, 'OR': 281, 'PA': 3062, 'RI': 3063, 'SC': 3064, 'SD': 3065, 'TN': 3066, 'TX': 3067, 'UT': 2272, 'VT': 3069, 'VA': 3070, 'WA': 3071, 'WV': 3072, 'WI': 3073, 'WY': 3074}
-
-  def __init__(self, username, password, debug=False):
-    self.debug = debug
-    self.session = requests.Session()
-    # spoof a web browser
-    self.session.headers.update({
-      'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
-    })
-    # get the login token
-    response = self._go('http://www.healthtweets.org/accounts/login')
-    token = self._get_token(response.text)
-    if self.debug:
-      print('token=%s'%(token))
-    data = {
-      'csrfmiddlewaretoken': token,
-      'username': username,
-      'password': password,
-      'next': '/',
+    # mapping from state abbreviations to location codes used by healthtweets.org
+    STATE_CODES = {
+        "AL": 3024,
+        "AK": 3025,
+        "AZ": 3026,
+        "AR": 3027,
+        "CA": 440,
+        "CO": 3029,
+        "CT": 3030,
+        "DE": 3031,
+        "DC": 3032,
+        "FL": 3033,
+        "GA": 3034,
+        "HI": 3035,
+        "ID": 3036,
+        "IL": 3037,
+        "IN": 3038,
+        "IA": 3039,
+        "KS": 3040,
+        "KY": 3041,
+        "LA": 2183,
+        "ME": 3043,
+        "MD": 3044,
+        "MA": 450,
+        "MI": 3046,
+        "MN": 3047,
+        "MS": 3048,
+        "MO": 3049,
+        "MT": 3050,
+        "NE": 3051,
+        "NV": 3052,
+        "NH": 3053,
+        "NJ": 478,
+        "NM": 2225,
+        "NY": 631,
+        "NC": 3057,
+        "ND": 3058,
+        "OH": 3059,
+        "OK": 3060,
+        "OR": 281,
+        "PA": 3062,
+        "RI": 3063,
+        "SC": 3064,
+        "SD": 3065,
+        "TN": 3066,
+        "TX": 3067,
+        "UT": 2272,
+        "VT": 3069,
+        "VA": 3070,
+        "WA": 3071,
+        "WV": 3072,
+        "WI": 3073,
+        "WY": 3074,
     }
-    # login to the site
-    response = self._go('http://www.healthtweets.org/accounts/login', data=data)
-    if response.status_code != 200 or 'Your username and password' in response.text:
-      raise Exception('login failed')
-
-  def get_values(self, state, date1, date2):
-    '''
-    state: two-letter state abbreviation (see STATE_CODES)
-    date1: the first date in the range, inclusive (format: YYYY-MM-DD)
-    date2: the last date in the range, inclusive (format: YYYY-MM-DD)
-    returns a dictionary (by date) of number of flu tweets (num) and total tweets (total)
-    '''
-    # get raw values (number of flu tweets) and normalized values (flu tweets as a percent of total tweets)
-    raw_values = self._get_values(state, date1, date2, False)
-    normalized_values = self._get_values(state, date1, date2, True)
-    values = {}
-    # save the raw number and calculate the total
-    for date in raw_values.keys():
-      if normalized_values[date] == 0:
-        continue
-      values[date] = {
-        'num': round(raw_values[date]),
-        'total': round(100 * raw_values[date] / normalized_values[date]),
-      }
-      print(date, raw_values[date], normalized_values[date])
-    return values
-
-  def _get_values(self, state, date1, date2, normalized):
-    if state not in HealthTweets.STATE_CODES:
-      raise Exception('invalid state')
-    state_code = HealthTweets.STATE_CODES[state]
-    d1, d2 = datetime.strptime(date1, '%Y-%m-%d'), datetime.strptime(date2, '%Y-%m-%d')
-    s1, s2 = d1.strftime('%m%%2F%d%%2F%Y'), d2.strftime('%m%%2F%d%%2F%Y')
-    count_type = 'normalized' if normalized else 'raw'
-    url = 'http://www.healthtweets.org/trends/plot?resolution=Day&count_type=%s&dayNum=%d&from=%s&to=%s&plot1_disease=65&location_plot1=%d'%(count_type, (d2 - d1).days, s1, s2, state_code)
-    response = self._go('http://www.healthtweets.org/trends/plot?resolution=Day&count_type=%s&dayNum=%d&from=%s&to=%s&plot1_disease=65&location_plot1=%d'%(count_type, (d2 - d1).days, s1, s2, state_code))
-    #print(state, date1, date2, normalized)
-    #print(url)
-    #print(response.status_code)
-    if response.status_code != 200:
-      raise Exception('plot status is ' + str(response.status_code) + ' (when was data last updated?)')
-    lines = [line.strip() for line in response.text.split('\n')]
-    data_line = [line for line in lines if line[:16] == 'var chartData = ']
-    if len(data_line) != 1:
-      raise Exception('lookup failed')
-    values = json.loads(data_line[0][16:-1])
-    return dict([(datetime.strptime(v[0], '%m/%d/%Y').strftime('%Y-%m-%d'), float(v[1])) for v in values])
-
-  def check_state(self, state):
-    '''
-    Sanity checks state code mapping.
-    state: two-letter state abbreviation (see STATE_CODES)
-    returns the full state name associated with the state abbreviation
-    '''
-    if state not in HealthTweets.STATE_CODES:
-      raise Exception('invalid state')
-    state_code = HealthTweets.STATE_CODES[state]
-    response = self._go('http://www.healthtweets.org/trends/plot?resolution=Day&count_type=normalized&dayNum=7&from=01%%2F01%%2F2015&to=01%%2F07%%2F2015&plot1_disease=65&location_plot1=%d'%(state_code))
-    lines = [line.strip() for line in response.text.split('\n')]
-    data_line = [line for line in lines if line[:29] == 'var plotNames = ["Influenza (']
-    if len(data_line) == 0:
-      raise Exception('check failed')
-    name = data_line[0][29:]
-    name = name.split('(')[0]
-    return name.strip()
-
-  def _get_token(self, html):
-    page = PageParser.parse(html)
-    hidden = PageParser.filter_all(page, [('html',), ('body',), ('div',), ('div',), ('div',), ('form',), ('input',)])
-    return hidden['attrs']['value']
-
-  def _go(self, url, method=None, referer=None, data=None):
-    if self.debug:
-      print('%s'%(url))
-    if method is None:
-      if data is None:
-        method = self.session.get
-      else:
-        method = self.session.post
-    response = method(url, headers={'referer': referer}, data=data)
-    html = response.text
-    if self.debug:
-      for item in response.history:
-        print(' [%d to %s]'%(item.status_code, item.headers['Location']))
-      print(' %d (%d bytes)'%(response.status_code, len(html)))
-    return response
+
+    def __init__(self, username, password, debug=False):
+        self.debug = debug
+        self.session = requests.Session()
+        # spoof a web browser
+        self.session.headers.update(
+            {
+                "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
+            }
+        )
+        # get the login token
+        response = self._go("https://www.healthtweets.org/accounts/login")
+        token = self._get_token(response.text)
+        if self.debug:
+            print(f"token={token}")
+        data = {
+            "csrfmiddlewaretoken": token,
+            "username": username,
+            "password": password,
+            "next": "/",
+        }
+        # login to the site
+        response = self._go("https://www.healthtweets.org/accounts/login", data=data)
+        if response.status_code != 200 or "Your username and password" in response.text:
+            raise Exception("login failed")
+
+    def get_values(self, state, date1, date2):
+        """
+        state: two-letter state abbreviation (see STATE_CODES)
+        date1: the first date in the range, inclusive (format: YYYY-MM-DD)
+        date2: the last date in the range, inclusive (format: YYYY-MM-DD)
+        returns a dictionary (by date) of number of flu tweets (num) and total tweets (total)
+        """
+        # get raw values (number of flu tweets) and normalized values (flu tweets as a percent of total tweets)
+        raw_values = self._get_values(state, date1, date2, False)
+        normalized_values = self._get_values(state, date1, date2, True)
+        values = {}
+        # save the raw number and calculate the total
+        for date in raw_values.keys():
+            if normalized_values[date] == 0:
+                continue
+            values[date] = {
+                "num": round(raw_values[date]),
+                "total": round(100 * raw_values[date] / normalized_values[date]),
+            }
+            print(date, raw_values[date], normalized_values[date])
+        return values
+
+    def _get_values(self, state, date1, date2, normalized):
+        if state not in HealthTweets.STATE_CODES:
+            raise Exception("invalid state")
+        state_code = HealthTweets.STATE_CODES[state]
+        d1, d2 = datetime.strptime(date1, "%Y-%m-%d"), datetime.strptime(date2, "%Y-%m-%d")
+        s1, s2 = d1.strftime("%m%%2F%d%%2F%Y"), d2.strftime("%m%%2F%d%%2F%Y")
+        count_type = "normalized" if normalized else "raw"
+        response = self._go(
+            "https://www.healthtweets.org/trends/plot?resolution=Day"
+            f"&count_type={count_type}&dayNum={(d2 - d1).days}&from={s1}"
+            f"&to={s2}&plot1_disease=65&location_plot1={int(state_code)}"
+        )
+        # print(state, date1, date2, normalized)
+        # print(url)
+        # print(response.status_code)
+        if response.status_code != 200:
+            raise Exception(
+                "plot status is " + str(response.status_code) + " (when was data last updated?)"
+            )
+        lines = [line.strip() for line in response.text.split("\n")]
+        data_line = [line for line in lines if line[:16] == "var chartData = "]
+        if len(data_line) != 1:
+            raise Exception("lookup failed")
+        values = json.loads(data_line[0][16:-1])
+        return {
+            datetime.strptime(v[0], "%m/%d/%Y").strftime("%Y-%m-%d"): float(v[1]) for v in values
+        }
+
+    def check_state(self, state):
+        """
+        Sanity checks state code mapping.
+        state: two-letter state abbreviation (see STATE_CODES)
+        returns the full state name associated with the state abbreviation
+        """
+        if state not in HealthTweets.STATE_CODES:
+            raise Exception("invalid state")
+        state_code = HealthTweets.STATE_CODES[state]
+        response = self._go(
+            "https://www.healthtweets.org/trends/plot?resolution=Day"
+            "&count_type=normalized&dayNum=7&from=01%%2F01%%2F2015"
+            f"&to=01%%2F07%%2F2015&plot1_disease=65&location_plot1={int(state_code)}"
+        )
+        lines = [line.strip() for line in response.text.split("\n")]
+        data_line = [line for line in lines if line[:29] == 'var plotNames = ["Influenza (']
+        if len(data_line) == 0:
+            raise Exception("check failed")
+        name = data_line[0][29:]
+        name = name.split("(")[0]
+        return name.strip()
+
+    def _get_token(self, html):
+        page = PageParser.parse(html)
+        hidden = PageParser.filter_all(
+            page, [("html",), ("body",), ("div",), ("div",), ("div",), ("form",), ("input",)]
+        )
+        return hidden["attrs"]["value"]
+
+    def _go(self, url, method=None, referer=None, data=None):
+        if self.debug:
+            print(url)
+        if method is None:
+            if data is None:
+                method = self.session.get
+            else:
+                method = self.session.post
+        response = method(url, headers={"referer": referer}, data=data)
+        html = response.text
+        if self.debug:
+            for item in response.history:
+                print(f" [{int(item.status_code)} to {item.headers['Location']}]")
+            print(f" {int(response.status_code)} ({len(html)} bytes)")
+        return response
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('username', action='store', type=str, help='healthtweets.org username')
-  parser.add_argument('password', action='store', type=str, help='healthtweets.org password')
-  parser.add_argument('state', action='store', type=str, choices=list(HealthTweets.STATE_CODES.keys()), help='U.S. state (ex: TX)')
-  parser.add_argument('date1', action='store', type=str, help='first date, inclusive (ex: 2015-01-01)')
-  parser.add_argument('date2', action='store', type=str, help='last date, inclusive (ex: 2015-01-01)')
-  parser.add_argument('-d', '--debug', action='store_const', const=True, default=False, help='enable debug mode')
-  args = parser.parse_args()
-
-  ht = HealthTweets(args.username, args.password, debug=args.debug)
-  values = ht.get_values(args.state, args.date1, args.date2)
-  print('Daily counts in %s from %s to %s:'%(ht.check_state(args.state), args.date1, args.date2))
-  for date in sorted(list(values.keys())):
-    print('%s: num=%-4d total=%-5d (%.3f%%)'%(date, values[date]['num'], values[date]['total'], 100 * values[date]['num'] / values[date]['total']))
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    # fmt: off
+    parser.add_argument(
+        "username",
+        action="store",
+        type=str,
+        help="healthtweets.org username"
+    )
+    parser.add_argument(
+        "password",
+        action="store",
+        type=str,
+        help="healthtweets.org password"
+    )
+    parser.add_argument(
+        "state",
+        action="store",
+        type=str,
+        choices=list(HealthTweets.STATE_CODES.keys()),
+        help="U.S. state (ex: TX)"
+    )
+    parser.add_argument(
+        "date1",
+        action="store",
+        type=str,
+        help="first date, inclusive (ex: 2015-01-01)"
+    )
+    parser.add_argument(
+        "date2",
+        action="store",
+        type=str,
+        help="last date, inclusive (ex: 2015-01-01)"
+    )
+    parser.add_argument(
+        "-d",
+        "--debug", 
+        action="store_const",
+        const=True,
+        default=False,
+        help="enable debug mode"
+    )
+    # fmt: on
+    args = parser.parse_args()
+
+    ht = HealthTweets(args.username, args.password, debug=args.debug)
+    values = ht.get_values(args.state, args.date1, args.date2)
+    print(f"Daily counts in {ht.check_state(args.state)} from {args.date1} to {args.date2}:")
+    for date in sorted(list(values.keys())):
+        print(
+            "%s: num=%-4d total=%-5d (%.3f%%)"
+            % (
+                date,
+                values[date]["num"],
+                values[date]["total"],
+                100 * values[date]["num"] / values[date]["total"],
+            )
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/twtr/pageparser.py b/src/acquisition/twtr/pageparser.py
index 5e9aaaea1..2b2183c89 100644
--- a/src/acquisition/twtr/pageparser.py
+++ b/src/acquisition/twtr/pageparser.py
@@ -5,74 +5,73 @@
 
 
 class PageParser(HTMLParser):
-  '''
-  This is an HTML parser! All of the hard work is done by the superclass
-  (which is a Python built-in). This class puts the HTML into a hierarchy
-  that's (hopefully) easier to work with than raw string parsing.
-  '''
+    """
+    This is an HTML parser! All of the hard work is done by the superclass
+    (which is a Python built-in). This class puts the HTML into a hierarchy
+    that's (hopefully) easier to work with than raw string parsing.
+    """
 
-  @staticmethod
-  def parse(html):
-    parser = PageParser()
-    parser.feed(html)
-    return parser.get_root_node()
+    @staticmethod
+    def parse(html):
+        parser = PageParser()
+        parser.feed(html)
+        return parser.get_root_node()
 
-  @staticmethod
-  def banlist():
-    '''Commonly unclosed tags'''
-    return ('br', 'img', 'meta')
+    @staticmethod
+    def banlist():
+        """Commonly unclosed tags"""
+        return ("br", "img", "meta")
 
-  @staticmethod
-  def new_node(type):
-    '''An empty node of the HTML tree'''
-    return {'type': type, 'attrs': {}, 'nodes': [], 'data': ''}
+    @staticmethod
+    def new_node(type):
+        """An empty node of the HTML tree"""
+        return {"type": type, "attrs": {}, "nodes": [], "data": ""}
 
-  @staticmethod
-  def filter_all(node, filters):
-    '''Applies all filters'''
-    for f in filters:
-        node = PageParser.filter(node, *f)
-    return node
+    @staticmethod
+    def filter_all(node, filters):
+        """Applies all filters"""
+        for f in filters:
+            node = PageParser.filter(node, *f)
+        return node
 
-  @staticmethod
-  def filter(node, type, index=0):
-    '''Finds a sub-node of the given type, specified by index'''
-    i = 0
-    for node in node['nodes']:
-      if node['type'] == type:
-        if i == index:
-          return node
-        i += 1
-    return None
+    @staticmethod
+    def filter(node, type, index=0):
+        """Finds a sub-node of the given type, specified by index"""
+        i = 0
+        for node in node["nodes"]:
+            if node["type"] == type:
+                if i == index:
+                    return node
+                i += 1
+        return None
 
-  def __init__(self):
-    HTMLParser.__init__(self)
-    self.root = PageParser.new_node(None)
-    self.stack = [self.root]
-    self.indent = 0
+    def __init__(self):
+        HTMLParser.__init__(self)
+        self.root = PageParser.new_node(None)
+        self.stack = [self.root]
+        self.indent = 0
 
-  def get_root_node(self):
-    '''After parsing, returns the abstract root node (which contains the html node)'''
-    return self.root
+    def get_root_node(self):
+        """After parsing, returns the abstract root node (which contains the html node)"""
+        return self.root
 
-  def handle_starttag(self, tag, attrs):
-    '''Inherited - called when a start tag is found'''
-    if tag in PageParser.banlist():
-        return
-    element = PageParser.new_node(tag)
-    for (k, v) in attrs:
-        element['attrs'][k] = v
-    self.stack[-1]['nodes'].append(element)
-    self.stack.append(element)
+    def handle_starttag(self, tag, attrs):
+        """Inherited - called when a start tag is found"""
+        if tag in PageParser.banlist():
+            return
+        element = PageParser.new_node(tag)
+        for (k, v) in attrs:
+            element["attrs"][k] = v
+        self.stack[-1]["nodes"].append(element)
+        self.stack.append(element)
 
-  def handle_endtag(self, tag):
-    '''Inherited - called when an end tag is found'''
-    if tag in PageParser.banlist():
-        return
-    self.stack.pop()
+    def handle_endtag(self, tag):
+        """Inherited - called when an end tag is found"""
+        if tag in PageParser.banlist():
+            return
+        self.stack.pop()
 
-
-  def handle_data(self, data):
-    '''Inherited - called when a data string is found'''
-    element = self.stack[-1]
-    element['data'] += data
+    def handle_data(self, data):
+        """Inherited - called when a data string is found"""
+        element = self.stack[-1]
+        element["data"] += data
diff --git a/src/acquisition/twtr/twitter_update.py b/src/acquisition/twtr/twitter_update.py
index 5c1f3f45b..80a023f19 100644
--- a/src/acquisition/twtr/twitter_update.py
+++ b/src/acquisition/twtr/twitter_update.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -49,7 +49,7 @@
   * Small documentation update
 2015-05-22
   * Original version
-'''
+"""
 
 # third party
 import mysql.connector
@@ -60,46 +60,55 @@
 
 
 def run():
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  def get_num_rows():
-    cur.execute('SELECT count(1) `num` FROM `twitter`')
-    for (num,) in cur:
-      pass
-    return num
-
-  # check from 7 days preceeding the last date with data through yesterday (healthtweets.org 404's if today's date is part of the range)
-  cur.execute('SELECT date_sub(max(`date`), INTERVAL 7 DAY) `date1`, date_sub(date(now()), INTERVAL 1 DAY) `date2` FROM `twitter`')
-  for (date1, date2) in cur:
-    date1, date2 = date1.strftime('%Y-%m-%d'), date2.strftime('%Y-%m-%d')
-  print('Checking dates between %s and %s...'%(date1, date2))
-
-  # keep track of how many rows were added
-  rows_before = get_num_rows()
-
-  # check healthtweets.org for new and/or revised data
-  ht = HealthTweets(*secrets.healthtweets.login)
-  sql = 'INSERT INTO `twitter` (`date`, `state`, `num`, `total`) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE `num` = %s, `total` = %s'
-  total_rows = 0
-  for state in sorted(HealthTweets.STATE_CODES.keys()):
-    values = ht.get_values(state, date1, date2)
-    for date in sorted(list(values.keys())):
-      sql_data = (date, state, values[date]['num'], values[date]['total'], values[date]['num'], values[date]['total'])
-      cur.execute(sql, sql_data)
-      total_rows += 1
-
-  # keep track of how many rows were added
-  rows_after = get_num_rows()
-  print('Inserted %d/%d row(s)'%(rows_after - rows_before, total_rows))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
-
-
-if __name__ == '__main__':
-  run()
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    def get_num_rows():
+        cur.execute("SELECT count(1) `num` FROM `twitter`")
+        for (num,) in cur:
+            pass
+        return num
+
+    # check from 7 days preceeding the last date with data through yesterday (healthtweets.org 404's if today's date is part of the range)
+    cur.execute(
+        "SELECT date_sub(max(`date`), INTERVAL 7 DAY) `date1`, date_sub(date(now()), INTERVAL 1 DAY) `date2` FROM `twitter`"
+    )
+    for (date1, date2) in cur:
+        date1, date2 = date1.strftime("%Y-%m-%d"), date2.strftime("%Y-%m-%d")
+    print(f"Checking dates between {date1} and {date2}...")
+
+    # keep track of how many rows were added
+    rows_before = get_num_rows()
+
+    # check healthtweets.org for new and/or revised data
+    ht = HealthTweets(*secrets.healthtweets.login)
+    sql = "INSERT INTO `twitter` (`date`, `state`, `num`, `total`) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE `num` = %s, `total` = %s"
+    total_rows = 0
+    for state in sorted(HealthTweets.STATE_CODES.keys()):
+        values = ht.get_values(state, date1, date2)
+        for date in sorted(list(values.keys())):
+            sql_data = (
+                date,
+                state,
+                values[date]["num"],
+                values[date]["total"],
+                values[date]["num"],
+                values[date]["total"],
+            )
+            cur.execute(sql, sql_data)
+            total_rows += 1
+
+    # keep track of how many rows were added
+    rows_after = get_num_rows()
+    print(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s)")
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
+
+
+if __name__ == "__main__":
+    run()
diff --git a/src/acquisition/wiki/wiki.py b/src/acquisition/wiki/wiki.py
index 602e21102..c57582918 100644
--- a/src/acquisition/wiki/wiki.py
+++ b/src/acquisition/wiki/wiki.py
@@ -1,112 +1,112 @@
 """
-===============
-=== Purpose ===
-===============
-
-Wrapper for the entire wiki data collection process:
-  1. Uses wiki_update.py to fetch metadata for new access logs
-  2. Uses wiki_download.py to download the access logs
-  3. Uses wiki_extract.py to store article access counts
-
+===============
+=== Purpose ===
+===============
+
+Wrapper for the entire wiki data collection process:
+  1. Uses wiki_update.py to fetch metadata for new access logs
+  2. Uses wiki_download.py to download the access logs
+  3. Uses wiki_extract.py to store article access counts
+
 See also: master.php
-
-
-=======================
-=== Data Dictionary ===
-=======================
-
-`wiki_raw` is a staging table where extracted access log data is stored for
-further processing. When wiki_update.py finds a new log, it saves the name and
-hash to this table, with a status of 0. This table is read by master.php, which
-then hands out "jobs" (independently and in parallel) to wiki_download.py.
-After wiki_download.py downloads the log and extracts the counts, it submits
-the data (as JSON) to master.php, which then stores the "raw" JSON counts in
-this table.
-+----------+---------------+------+-----+---------+----------------+
-| Field    | Type          | Null | Key | Default | Extra          |
-+----------+---------------+------+-----+---------+----------------+
-| id       | int(11)       | NO   | PRI | NULL    | auto_increment |
-| name     | varchar(64)   | NO   | UNI | NULL    |                |
-| hash     | char(32)      | NO   |     | NULL    |                |
-| status   | int(11)       | NO   | MUL | 0       |                |
-| size     | int(11)       | YES  |     | NULL    |                |
-| datetime | datetime      | YES  |     | NULL    |                |
-| worker   | varchar(256)  | YES  |     | NULL    |                |
-| elapsed  | float         | YES  |     | NULL    |                |
-| data     | varchar(2048) | YES  |     | NULL    |                |
-+----------+---------------+------+-----+---------+----------------+
-id: unique identifier for each record
-name: name of the access log
-hash: md5 hash of the file, as reported by the dumps site (all zeroes if no
-  hash is provided)
-status: the status of the job, using the following values:
-  0: queued for download
-  1: download in progress
-  2: queued for extraction
-  3: extracted to `wiki` table
-  (any negative value indicates failure)
-size: the size, in bytes, of the downloaded file
-datetime: the timestamp of the most recent status update
-worker: name (user@hostname) of the machine working on the job
-elapsed: time, in seconds, taken to complete the job
-data: a JSON string containing counts for selected articles in the access log
-
-`wiki` is the table where access counts are stored (parsed from wiki_raw). The
-"raw" JSON counts are parsed by wiki_extract.py and stored directly in this
-table.
-+----------+-------------+------+-----+---------+----------------+
-| Field    | Type        | Null | Key | Default | Extra          |
-+----------+-------------+------+-----+---------+----------------+
-| id       | int(11)     | NO   | PRI | NULL    | auto_increment |
-| datetime | datetime    | NO   | MUL | NULL    |                |
-| article  | varchar(64) | NO   | MUL | NULL    |                |
-| count    | int(11)     | NO   |     | NULL    |                |
-+----------+-------------+------+-----+---------+----------------+
-id: unique identifier for each record
-datetime: UTC timestamp (rounded to the nearest hour) of article access
-article: name of the article
-count: number of times the article was accessed in the hour
-
-`wiki_meta` is a metadata table for this dataset. It contains pre-calculated
-date and epiweeks fields, and more importantly, the total number of English
-article hits (denominator) for each `datetime` in the `wiki` table. This table
-is populated in parallel with `wiki` by the wiki_extract.py script.
-+----------+----------+------+-----+---------+----------------+
-| Field    | Type     | Null | Key | Default | Extra          |
-+----------+----------+------+-----+---------+----------------+
-| id       | int(11)  | NO   | PRI | NULL    | auto_increment |
-| datetime | datetime | NO   | UNI | NULL    |                |
-| date     | date     | NO   |     | NULL    |                |
-| epiweek  | int(11)  | NO   |     | NULL    |                |
-| total    | int(11)  | NO   |     | NULL    |                |
-+----------+----------+------+-----+---------+----------------+
-id: unique identifier for each record
-datetime: UTC timestamp (rounded to the nearest hour) of article access
-date: the date portion of `datetime`
-epiweek: the year and week containing `datetime`
-total: total number of English article hits in the hour
-
-
-=================
-=== Changelog ===
-=================
-
+
+
+=======================
+=== Data Dictionary ===
+=======================
+
+`wiki_raw` is a staging table where extracted access log data is stored for
+further processing. When wiki_update.py finds a new log, it saves the name and
+hash to this table, with a status of 0. This table is read by master.php, which
+then hands out "jobs" (independently and in parallel) to wiki_download.py.
+After wiki_download.py downloads the log and extracts the counts, it submits
+the data (as JSON) to master.php, which then stores the "raw" JSON counts in
+this table.
++----------+---------------+------+-----+---------+----------------+
+| Field    | Type          | Null | Key | Default | Extra          |
++----------+---------------+------+-----+---------+----------------+
+| id       | int(11)       | NO   | PRI | NULL    | auto_increment |
+| name     | varchar(64)   | NO   | UNI | NULL    |                |
+| hash     | char(32)      | NO   |     | NULL    |                |
+| status   | int(11)       | NO   | MUL | 0       |                |
+| size     | int(11)       | YES  |     | NULL    |                |
+| datetime | datetime      | YES  |     | NULL    |                |
+| worker   | varchar(256)  | YES  |     | NULL    |                |
+| elapsed  | float         | YES  |     | NULL    |                |
+| data     | varchar(2048) | YES  |     | NULL    |                |
++----------+---------------+------+-----+---------+----------------+
+id: unique identifier for each record
+name: name of the access log
+hash: md5 hash of the file, as reported by the dumps site (all zeroes if no
+  hash is provided)
+status: the status of the job, using the following values:
+  0: queued for download
+  1: download in progress
+  2: queued for extraction
+  3: extracted to `wiki` table
+  (any negative value indicates failure)
+size: the size, in bytes, of the downloaded file
+datetime: the timestamp of the most recent status update
+worker: name (user@hostname) of the machine working on the job
+elapsed: time, in seconds, taken to complete the job
+data: a JSON string containing counts for selected articles in the access log
+
+`wiki` is the table where access counts are stored (parsed from wiki_raw). The
+"raw" JSON counts are parsed by wiki_extract.py and stored directly in this
+table.
++----------+-------------+------+-----+---------+----------------+
+| Field    | Type        | Null | Key | Default | Extra          |
++----------+-------------+------+-----+---------+----------------+
+| id       | int(11)     | NO   | PRI | NULL    | auto_increment |
+| datetime | datetime    | NO   | MUL | NULL    |                |
+| article  | varchar(64) | NO   | MUL | NULL    |                |
+| count    | int(11)     | NO   |     | NULL    |                |
++----------+-------------+------+-----+---------+----------------+
+id: unique identifier for each record
+datetime: UTC timestamp (rounded to the nearest hour) of article access
+article: name of the article
+count: number of times the article was accessed in the hour
+
+`wiki_meta` is a metadata table for this dataset. It contains pre-calculated
+date and epiweeks fields, and more importantly, the total number of English
+article hits (denominator) for each `datetime` in the `wiki` table. This table
+is populated in parallel with `wiki` by the wiki_extract.py script.
++----------+----------+------+-----+---------+----------------+
+| Field    | Type     | Null | Key | Default | Extra          |
++----------+----------+------+-----+---------+----------------+
+| id       | int(11)  | NO   | PRI | NULL    | auto_increment |
+| datetime | datetime | NO   | UNI | NULL    |                |
+| date     | date     | NO   |     | NULL    |                |
+| epiweek  | int(11)  | NO   |     | NULL    |                |
+| total    | int(11)  | NO   |     | NULL    |                |
++----------+----------+------+-----+---------+----------------+
+id: unique identifier for each record
+datetime: UTC timestamp (rounded to the nearest hour) of article access
+date: the date portion of `datetime`
+epiweek: the year and week containing `datetime`
+total: total number of English article hits in the hour
+
+
+=================
+=== Changelog ===
+=================
+
 2017-02-24
   * secrets and small improvements
 2016-08-14
   * Increased job limit (6 -> 12) (pageviews files are ~2x smaller)
-2015-08-26
+2015-08-26
   * Reduced job limit (8 -> 6)
-2015-08-14
+2015-08-14
   * Reduced job limit (10 -> 8)
-2015-08-11
+2015-08-11
   + New table `wiki_meta`
-2015-05-22
+2015-05-22
   * Updated status codes for `wiki_raw` table
-2015-05-21
+2015-05-21
   * Original version
 """
-
+
 # first party
 from . import wiki_update
 from . import wiki_download
@@ -115,31 +115,27 @@
 
 
 def main():
-  # step 1: find new access logs (aka "jobs")
-  print('looking for new jobs...')
-  try:
-    wiki_update.run()
-  except:
-    print('wiki_update failed')
-
-  # step 2: run a few jobs
-  print('running jobs...')
-  try:
-    wiki_download.run(
-      secrets.wiki.hmac,
-      download_limit=1024 * 1024 * 1024,
-      job_limit=12
-    )
-  except:
-    print('wiki_download failed')
-
-  # step 3: extract counts from the staging data
-  print('extracting counts...')
-  try:
-    wiki_extract.run(job_limit=100)
-  except:
-    print('wiki_extract failed')
-
-
-if __name__ == '__main__':
-  main()
+    # step 1: find new access logs (aka "jobs")
+    print("looking for new jobs...")
+    try:
+        wiki_update.run()
+    except:
+        print("wiki_update failed")
+
+    # step 2: run a few jobs
+    print("running jobs...")
+    try:
+        wiki_download.run(secrets.wiki.hmac, download_limit=1024 * 1024 * 1024, job_limit=12)
+    except:
+        print("wiki_download failed")
+
+    # step 3: extract counts from the staging data
+    print("extracting counts...")
+    try:
+        wiki_extract.run(job_limit=100)
+    except:
+        print("wiki_extract failed")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/wiki/wiki_download.py b/src/acquisition/wiki/wiki_download.py
index 1a01b7f8e..6192eab02 100644
--- a/src/acquisition/wiki/wiki_download.py
+++ b/src/acquisition/wiki/wiki_download.py
@@ -27,16 +27,16 @@
 """
 
 # python 2 and 3
-from __future__ import print_function
 import sys
+
 if sys.version_info.major == 2:
-  # python 2 libraries
-  from urllib import urlencode
-  from urllib2 import urlopen
+    # python 2 libraries
+    from urllib import urlencode
+    from urllib2 import urlopen
 else:
-  # python 3 libraries
-  from urllib.parse import urlencode
-  from urllib.request import urlopen
+    # python 3 libraries
+    from urllib.parse import urlencode
+    from urllib.request import urlopen
 
 # common libraries
 import argparse
@@ -53,234 +53,302 @@
 
 
 VERSION = 10
-MASTER_URL = 'https://delphi.cmu.edu/~automation/public/wiki/master.php'
+MASTER_URL = "https://delphi.cmu.edu/~automation/public/wiki/master.php"
+
 
 def text(data_string):
-  return str(data_string.decode('utf-8'))
+    return str(data_string.decode("utf-8"))
 
 
 def data(text_string):
-  if sys.version_info.major == 2:
-    return text_string
-  else:
-    return bytes(text_string, 'utf-8')
+    if sys.version_info.major == 2:
+        return text_string
+    else:
+        return bytes(text_string, "utf-8")
 
 
 def get_hmac_sha256(key, msg):
-  key_bytes, msg_bytes = key.encode('utf-8'), msg.encode('utf-8')
-  return hmac.new(key_bytes, msg_bytes, hashlib.sha256).hexdigest()
+    key_bytes, msg_bytes = key.encode("utf-8"), msg.encode("utf-8")
+    return hmac.new(key_bytes, msg_bytes, hashlib.sha256).hexdigest()
 
 
 def extract_article_counts(filename, language, articles, debug_mode):
-  """
-  Support multiple languages ('en' | 'es' | 'pt')
-  Running time optimized to O(M), which means only need to scan the whole file once
-  :param filename:
-  :param language: Different languages such as 'en', 'es', and 'pt'
-  :param articles:
-  :param debug_mode:
-  :return:
-  """
-  counts = {}
-  articles_set = set(map(lambda x: x.lower(), articles))
-  total = 0
-  with open(filename, "r", encoding="utf8") as f:
-    for line in f:
-      content = line.strip().split()
-      if len(content) != 4:
-        print('unexpected article format: {0}'.format(line))
-        continue
-      article_title = content[1].lower()
-      article_count = int(content[2])
-      if content[0] == language:
-        total += article_count
-      if content[0] == language and article_title in articles_set:
-        if debug_mode:
-          print("Find article {0}: {1}".format(article_title, line))
-        counts[article_title] = article_count
-  if debug_mode:
-    print("Total number of counts for language {0} is {1}".format(language, total))
-  counts['total'] = total
-  return counts
+    """
+    Support multiple languages ('en' | 'es' | 'pt')
+    Running time optimized to O(M), which means only need to scan the whole file once
+    :param filename:
+    :param language: Different languages such as 'en', 'es', and 'pt'
+    :param articles:
+    :param debug_mode:
+    :return:
+    """
+    counts = {}
+    articles_set = set(map(lambda x: x.lower(), articles))
+    total = 0
+    with open(filename, encoding="utf8") as f:
+        for line in f:
+            content = line.strip().split()
+            if len(content) != 4:
+                print(f"unexpected article format: {line}")
+                continue
+            article_title = content[1].lower()
+            article_count = int(content[2])
+            if content[0] == language:
+                total += article_count
+            if content[0] == language and article_title in articles_set:
+                if debug_mode:
+                    print(f"Find article {article_title}: {line}")
+                counts[article_title] = article_count
+    if debug_mode:
+        print(f"Total number of counts for language {language} is {total}")
+    counts["total"] = total
+    return counts
 
 
 def extract_article_counts_orig(articles, debug_mode):
-  """
-  The original method which extracts article counts by shell command grep (only support en articles).
-  As it is difficult to deal with other languages (utf-8 encoding), we choose to use python read files.
-  Another things is that it is slower to go over the whole file once and once again, the time complexity is O(NM),
-  where N is the number of articles and M is the lines in the file
-  In our new implementation extract_article_counts(), the time complexity is O(M), and it can cope with utf8 encoding
-  :param articles:
-  :param debug_mode:
-  :return:
-  """
-  counts = {}
-  for article in articles:
-    if debug_mode:
-      print(' %s' % (article))
+    """
+    The original method which extracts article counts by shell command grep (only support en articles).
+    As it is difficult to deal with other languages (utf-8 encoding), we choose to use python read files.
+    Another things is that it is slower to go over the whole file once and once again, the time complexity is O(NM),
+    where N is the number of articles and M is the lines in the file
+    In our new implementation extract_article_counts(), the time complexity is O(M), and it can cope with utf8 encoding
+    :param articles:
+    :param debug_mode:
+    :return:
+    """
+    counts = {}
+    for article in articles:
+        if debug_mode:
+            print(f" {article}")
+        out = text(
+            subprocess.check_output(
+                f'LC_ALL=C grep -a -i "^en {article.lower()} " raw2 | cat', shell=True
+            )
+        ).strip()
+        count = 0
+        if len(out) > 0:
+            for line in out.split("\n"):
+                fields = line.split()
+                if len(fields) != 4:
+                    print(f"unexpected article format: [{line}]")
+                else:
+                    count += int(fields[2])
+        # print ' %4d %s'%(count, article)
+        counts[article.lower()] = count
+        if debug_mode:
+            print(f"  {int(count)}")
+    print("getting total count...")
     out = text(
-      subprocess.check_output('LC_ALL=C grep -a -i "^en %s " raw2 | cat' % (article.lower()), shell=True)).strip()
-    count = 0
-    if len(out) > 0:
-      for line in out.split('\n'):
-        fields = line.split()
-        if len(fields) != 4:
-          print('unexpected article format: [%s]' % (line))
-        else:
-          count += int(fields[2])
-    # print ' %4d %s'%(count, article)
-    counts[article.lower()] = count
+        subprocess.check_output(
+            'cat raw2 | LC_ALL=C grep -a -i "^en " | cut -d" " -f 3 | awk \'{s+=$1} END {printf "%.0f", s}\'',
+            shell=True,
+        )
+    )
+    total = int(out)
     if debug_mode:
-      print('  %d' % (count))
-  print('getting total count...')
-  out = text(subprocess.check_output(
-    'cat raw2 | LC_ALL=C grep -a -i "^en " | cut -d" " -f 3 | awk \'{s+=$1} END {printf "%.0f", s}\'', shell=True))
-  total = int(out)
-  if debug_mode:
-    print(total)
-  counts['total'] = total
-  return counts
+        print(total)
+    counts["total"] = total
+    return counts
 
 
 def run(secret, download_limit=None, job_limit=None, sleep_time=1, job_type=0, debug_mode=False):
 
-  worker = text(subprocess.check_output("echo `whoami`@`hostname`", shell=True)).strip()
-  print('this is [%s]'%(worker))
-  if debug_mode:
-    print('*** running in debug mode ***')
-
-  total_download = 0
-  passed_jobs = 0
-  failed_jobs = 0
-  while (download_limit is None or total_download < download_limit) and (job_limit is None or (passed_jobs + failed_jobs) < job_limit):
-    try:
-      time_start = datetime.datetime.now()
-      req = urlopen(MASTER_URL + '?get=x&type=%s'%(job_type))
-      code = req.getcode()
-      if code != 200:
-        if code == 201:
-          print('no jobs available')
-          if download_limit is None and job_limit is None:
-            time.sleep(60)
-            continue
-          else:
-            print('nothing to do, exiting')
-            return
-        else:
-          raise Exception('server response code (get) was %d'%(code))
-      # Make the code compatible with mac os system
-      if platform == "darwin":
-        job_content = text(req.readlines()[1])
-      else:
-        job_content = text(req.readlines()[0])
-      if job_content == 'no jobs':
-        print('no jobs available')
-        if download_limit is None and job_limit is None:
-          time.sleep(60)
-          continue
-        else:
-          print('nothing to do, exiting')
-          return
-      job = json.loads(job_content)
-      print('received job [%d|%s]'%(job['id'], job['name']))
-      # updated parsing for pageviews - maybe use a regex in the future
-      #year, month = int(job['name'][11:15]), int(job['name'][15:17])
-      year, month = int(job['name'][10:14]), int(job['name'][14:16])
-      #print 'year=%d | month=%d'%(year, month)
-      url = 'https://dumps.wikimedia.org/other/pageviews/%d/%d-%02d/%s'%(year, year, month, job['name'])
-      print('downloading file [%s]...'%(url))
-      subprocess.check_call('curl -s %s > raw.gz'%(url), shell=True)
-      print('checking file size...')
-      # Make the code cross-platfrom, so use python to get the size of the file
-      # size = int(text(subprocess.check_output('ls -l raw.gz | cut -d" " -f 5', shell=True)))
-      size = os.stat("raw.gz").st_size
-      if debug_mode:
-        print(size)
-      total_download += size
-      if job['hash'] != '00000000000000000000000000000000':
-        print('checking hash...')
-        out = text(subprocess.check_output('md5sum raw.gz', shell=True))
-        result = out[0:32]
-        if result != job['hash']:
-          raise Exception('wrong hash [expected %s, got %s]'%(job['hash'], result))
-        if debug_mode:
-          print(result)
-      print('decompressing...')
-      subprocess.check_call('gunzip -f raw.gz', shell=True)
-      #print 'converting case...'
-      #subprocess.check_call('cat raw | tr "[:upper:]" "[:lower:]" > raw2', shell=True)
-      #subprocess.check_call('rm raw', shell=True)
-      subprocess.check_call('mv raw raw2', shell=True)
-      print('extracting article counts...')
-
-      # Use python to read the file and extract counts, if you want to use the original shell method, please use
-      counts = {}
-      for language in wiki_util.Articles.available_languages:
-        lang2articles = {'en': wiki_util.Articles.en_articles, 'es': wiki_util.Articles.es_articles, 'pt': wiki_util.Articles.pt_articles}
-        articles = lang2articles[language]
-        articles = sorted(articles)
-        if debug_mode:
-          print("Language is {0} and target articles are {1}".format(language, articles))
-        temp_counts = extract_article_counts("raw2", language, articles, debug_mode)
-        counts[language] = temp_counts
-
-      if not debug_mode:
-        print('deleting files...')
-        subprocess.check_call('rm raw2', shell=True)
-      print('saving results...')
-      time_stop = datetime.datetime.now()
-      result = {
-        'id': job['id'],
-        'size': size,
-        'data': json.dumps(counts),
-        'worker': worker,
-        'elapsed': (time_stop - time_start).total_seconds(),
-      }
-      payload = json.dumps(result)
-      hmac_str = get_hmac_sha256(secret, payload)
-      if debug_mode:
-        print(' hmac: %s' % hmac_str)
-      post_data = urlencode({'put': payload, 'hmac': hmac_str})
-      req = urlopen(MASTER_URL, data=data(post_data))
-      code = req.getcode()
-      if code != 200:
-        raise Exception('server response code (put) was %d'%(code))
-      print('done! (dl=%d)'%(total_download))
-      passed_jobs += 1
-    except Exception as ex:
-      print('***** Caught Exception: %s *****'%(str(ex)))
-      failed_jobs += 1
-      time.sleep(30)
-    print('passed=%d | failed=%d | total=%d'%(passed_jobs, failed_jobs, passed_jobs + failed_jobs))
-    time.sleep(sleep_time)
-
-  if download_limit is not None and total_download >= download_limit:
-    print('download limit has been reached [%d >= %d]'%(total_download, download_limit))
-  if job_limit is not None and (passed_jobs + failed_jobs) >= job_limit:
-    print('job limit has been reached [%d >= %d]'%(passed_jobs + failed_jobs, job_limit))
+    worker = text(subprocess.check_output("echo `whoami`@`hostname`", shell=True)).strip()
+    print(f"this is [{worker}]")
+    if debug_mode:
+        print("*** running in debug mode ***")
+
+    total_download = 0
+    passed_jobs = 0
+    failed_jobs = 0
+    while (download_limit is None or total_download < download_limit) and (
+        job_limit is None or (passed_jobs + failed_jobs) < job_limit
+    ):
+        try:
+            time_start = datetime.datetime.now()
+            req = urlopen(MASTER_URL + f"?get=x&type={job_type}")
+            code = req.getcode()
+            if code != 200:
+                if code == 201:
+                    print("no jobs available")
+                    if download_limit is None and job_limit is None:
+                        time.sleep(60)
+                        continue
+                    else:
+                        print("nothing to do, exiting")
+                        return
+                else:
+                    raise Exception(f"server response code (get) was {int(code)}")
+            # Make the code compatible with mac os system
+            if platform == "darwin":
+                job_content = text(req.readlines()[1])
+            else:
+                job_content = text(req.readlines()[0])
+            if job_content == "no jobs":
+                print("no jobs available")
+                if download_limit is None and job_limit is None:
+                    time.sleep(60)
+                    continue
+                else:
+                    print("nothing to do, exiting")
+                    return
+            job = json.loads(job_content)
+            print(f"received job [{int(job['id'])}|{job['name']}]")
+            # updated parsing for pageviews - maybe use a regex in the future
+            # year, month = int(job['name'][11:15]), int(job['name'][15:17])
+            year, month = int(job["name"][10:14]), int(job["name"][14:16])
+            # print 'year=%d | month=%d'%(year, month)
+            url = (
+                "https://dumps.wikimedia.org/other/"
+                f"pageviews/{year}/{year}-{month:02d}/{job['name']}"
+            )
+            print(f"downloading file [{url}]...")
+            subprocess.check_call(f"curl -s {url} > raw.gz", shell=True)
+            print("checking file size...")
+            # Make the code cross-platfrom, so use python to get the size of the file
+            # size = int(text(subprocess.check_output('ls -l raw.gz | cut -d" " -f 5', shell=True)))
+            size = os.stat("raw.gz").st_size
+            if debug_mode:
+                print(size)
+            total_download += size
+            if job["hash"] != "00000000000000000000000000000000":
+                print("checking hash...")
+                out = text(subprocess.check_output("md5sum raw.gz", shell=True))
+                result = out[0:32]
+                if result != job["hash"]:
+                    raise Exception(f"wrong hash [expected {job['hash']}, got {result}]")
+                if debug_mode:
+                    print(result)
+            print("decompressing...")
+            subprocess.check_call("gunzip -f raw.gz", shell=True)
+            # print 'converting case...'
+            # subprocess.check_call('cat raw | tr "[:upper:]" "[:lower:]" > raw2', shell=True)
+            # subprocess.check_call('rm raw', shell=True)
+            subprocess.check_call("mv raw raw2", shell=True)
+            print("extracting article counts...")
+
+            # Use python to read the file and extract counts, if you want to use the original shell method, please use
+            counts = {}
+            for language in wiki_util.Articles.available_languages:
+                lang2articles = {
+                    "en": wiki_util.Articles.en_articles,
+                    "es": wiki_util.Articles.es_articles,
+                    "pt": wiki_util.Articles.pt_articles,
+                }
+                articles = lang2articles[language]
+                articles = sorted(articles)
+                if debug_mode:
+                    print(f"Language is {language} and target articles are {articles}")
+                temp_counts = extract_article_counts("raw2", language, articles, debug_mode)
+                counts[language] = temp_counts
+
+            if not debug_mode:
+                print("deleting files...")
+                subprocess.check_call("rm raw2", shell=True)
+            print("saving results...")
+            time_stop = datetime.datetime.now()
+            result = {
+                "id": job["id"],
+                "size": size,
+                "data": json.dumps(counts),
+                "worker": worker,
+                "elapsed": (time_stop - time_start).total_seconds(),
+            }
+            payload = json.dumps(result)
+            hmac_str = get_hmac_sha256(secret, payload)
+            if debug_mode:
+                print(f" hmac: {hmac_str}")
+            post_data = urlencode({"put": payload, "hmac": hmac_str})
+            req = urlopen(MASTER_URL, data=data(post_data))
+            code = req.getcode()
+            if code != 200:
+                raise Exception(f"server response code (put) was {int(code)}")
+            print(f"done! (dl={int(total_download)})")
+            passed_jobs += 1
+        except Exception as ex:
+            print(f"***** Caught Exception: {str(ex)} *****")
+            failed_jobs += 1
+            time.sleep(30)
+        print(
+            "passed=%d | failed=%d | total=%d"
+            % (passed_jobs, failed_jobs, passed_jobs + failed_jobs)
+        )
+        time.sleep(sleep_time)
+
+    if download_limit is not None and total_download >= download_limit:
+        print(f"download limit has been reached [{int(total_download)} >= {int(download_limit)}]")
+    if job_limit is not None and (passed_jobs + failed_jobs) >= job_limit:
+        print(f"job limit has been reached [{int(passed_jobs + failed_jobs)} >= {int(job_limit)}]")
 
 
 def main():
-  # version info
-  print('version', VERSION)
-
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('secret', type=str, help='hmac secret key')
-  parser.add_argument('-b', '--blimit', action='store', type=int, default=None, help='download limit, in bytes')
-  parser.add_argument('-j', '--jlimit', action='store', type=int, default=None, help='job limit')
-  parser.add_argument('-s', '--sleep', action='store', type=int, default=1, help='seconds to sleep between each job')
-  parser.add_argument('-t', '--type', action='store', type=int, default=0, help='type of job')
-  parser.add_argument('-d', '--debug', action='store_const', const=True, default=False, help='enable debug mode')
-  args = parser.parse_args()
-
-  # runtime options
-  secret, download_limit, job_limit, sleep_time, job_type, debug_mode = args.secret, args.blimit, args.jlimit, args.sleep, args.type, args.debug
-
-  # run
-  run(secret, download_limit, job_limit, sleep_time, job_type, debug_mode)
-
-
-if __name__ == '__main__':
-  main()
+    # version info
+    print("version", VERSION)
+
+    # args and usage
+    parser = argparse.ArgumentParser()
+    # fmt: off
+    parser.add_argument(
+        "secret",
+        type=str,
+        help="hmac secret key"
+    )
+    parser.add_argument(
+        "-b",
+        "--blimit",
+        action="store",
+        type=int,
+        default=None,
+        help="download limit, in bytes"
+    )
+    parser.add_argument(
+        "-j",
+        "--jlimit",
+        action="store",
+        type=int,
+        default=None,
+        help="job limit"
+    )
+    parser.add_argument(
+        "-s",
+        "--sleep",
+        action="store",
+        type=int,
+        default=1,
+        help="seconds to sleep between each job"
+    )
+    parser.add_argument(
+        "-t",
+        "--type",
+        action="store",
+        type=int,
+        default=0,
+        help="type of job"
+    )
+    parser.add_argument(
+        "-d",
+        "--debug",
+        action="store_const",
+        const=True,
+        default=False,
+        help="enable debug mode"
+    )
+    # fmt: on
+    args = parser.parse_args()
+
+    # runtime options
+    secret, download_limit, job_limit, sleep_time, job_type, debug_mode = (
+        args.secret,
+        args.blimit,
+        args.jlimit,
+        args.sleep,
+        args.type,
+        args.debug,
+    )
+
+    # run
+    run(secret, download_limit, job_limit, sleep_time, job_type, debug_mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/wiki/wiki_extract.py b/src/acquisition/wiki/wiki_extract.py
index 839d7d6dc..718a64c20 100644
--- a/src/acquisition/wiki/wiki_extract.py
+++ b/src/acquisition/wiki/wiki_extract.py
@@ -35,74 +35,96 @@
 
 
 def floor_timestamp(timestamp):
-  return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour)
+    return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour)
 
 
 def ceil_timestamp(timestamp):
-  return floor_timestamp(timestamp) + timedelta(hours=1)
+    return floor_timestamp(timestamp) + timedelta(hours=1)
 
 
 def round_timestamp(timestamp):
-  before = floor_timestamp(timestamp)
-  after = ceil_timestamp(timestamp)
-  if (timestamp - before) < (after - timestamp):
-    return before
-  else:
-    return after
+    before = floor_timestamp(timestamp)
+    after = ceil_timestamp(timestamp)
+    if (timestamp - before) < (after - timestamp):
+        return before
+    else:
+        return after
 
 
 def get_timestamp(name):
-  # new parsing for pageviews compared to pagecounts - maybe switch to regex in the future
-  #return datetime(int(name[11:15]), int(name[15:17]), int(name[17:19]), int(name[20:22]), int(name[22:24]), int(name[24:26]))
-  return datetime(int(name[10:14]), int(name[14:16]), int(name[16:18]), int(name[19:21]), int(name[21:23]), int(name[23:25]))
+    # new parsing for pageviews compared to pagecounts - maybe switch to regex in the future
+    # return datetime(int(name[11:15]), int(name[15:17]), int(name[17:19]), int(name[20:22]), int(name[22:24]), int(name[24:26]))
+    return datetime(
+        int(name[10:14]),
+        int(name[14:16]),
+        int(name[16:18]),
+        int(name[19:21]),
+        int(name[21:23]),
+        int(name[23:25]),
+    )
 
 
 def run(job_limit=100):
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-  
-  # # Some preparation for utf-8, and it is a temporary trick solution. The real solution should change those char set and collation encoding to utf8 permanently
-  # cur.execute("SET NAMES utf8;")
-  # cur.execute("SET CHARACTER SET utf8;")
-  # # I print SHOW SESSION VARIABLES LIKE 'character\_set\_%'; and SHOW SESSION VARIABLES LIKE 'collation\_%'; on my local computer
-  # cur.execute("SET character_set_client=utf8mb4;")
-  # cur.execute("SET character_set_connection=utf8mb4;")
-  # cur.execute("SET character_set_database=utf8;")
-  # cur.execute("SET character_set_results=utf8mb4;")
-  # cur.execute("SET character_set_server=utf8;")
-  # cur.execute("SET collation_connection=utf8mb4_general_ci;")
-  # cur.execute("SET collation_database=utf8_general_ci;")
-  # cur.execute("SET collation_server=utf8_general_ci;")
-
-  # find jobs that are queued for extraction
-  cur.execute('SELECT `id`, `name`, `data` FROM `wiki_raw` WHERE `status` = 2 ORDER BY `name` ASC LIMIT %s', (job_limit,))
-  jobs = []
-  for (id, name, data_str) in cur:
-    jobs.append((id, name, json.loads(data_str)))
-  print('Processing data from %d jobs'%(len(jobs)))
-
-  # get the counts from the json object and insert into (or update) the database
-  # Notice that data_collect contains data with different languages
-  for (id, name, data_collect) in jobs:
-    print('processing job [%d|%s]...'%(id, name))
-    timestamp = round_timestamp(get_timestamp(name))
-    for language in data_collect.keys():
-      data = data_collect[language]
-      for article in sorted(data.keys()):
-        count = data[article]
-        cur.execute('INSERT INTO `wiki` (`datetime`, `article`, `count`, `language`) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE `count` = `count` + %s', (str(timestamp), article.encode('utf-8').decode('latin-1'), count, language, count))
-        if article == 'total':
-          cur.execute('INSERT INTO `wiki_meta` (`datetime`, `date`, `epiweek`, `total`, `language`) VALUES (%s, date(%s), yearweek(%s, 6), %s, %s) ON DUPLICATE KEY UPDATE `total` = `total` + %s', (str(timestamp), str(timestamp), str(timestamp), count, language, count))
-    # update the job
-    cur.execute('UPDATE `wiki_raw` SET `status` = 3 WHERE `id` = %s', (id,))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
-
-
-if __name__ == '__main__':
-  run()
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    # # Some preparation for utf-8, and it is a temporary trick solution. The real solution should change those char set and collation encoding to utf8 permanently
+    # cur.execute("SET NAMES utf8;")
+    # cur.execute("SET CHARACTER SET utf8;")
+    # # I print SHOW SESSION VARIABLES LIKE 'character\_set\_%'; and SHOW SESSION VARIABLES LIKE 'collation\_%'; on my local computer
+    # cur.execute("SET character_set_client=utf8mb4;")
+    # cur.execute("SET character_set_connection=utf8mb4;")
+    # cur.execute("SET character_set_database=utf8;")
+    # cur.execute("SET character_set_results=utf8mb4;")
+    # cur.execute("SET character_set_server=utf8;")
+    # cur.execute("SET collation_connection=utf8mb4_general_ci;")
+    # cur.execute("SET collation_database=utf8_general_ci;")
+    # cur.execute("SET collation_server=utf8_general_ci;")
+
+    # find jobs that are queued for extraction
+    cur.execute(
+        "SELECT `id`, `name`, `data` FROM `wiki_raw` WHERE `status` = 2 ORDER BY `name` ASC LIMIT %s",
+        (job_limit,),
+    )
+    jobs = []
+    for (id, name, data_str) in cur:
+        jobs.append((id, name, json.loads(data_str)))
+    print(f"Processing data from {len(jobs)} jobs")
+
+    # get the counts from the json object and insert into (or update) the database
+    # Notice that data_collect contains data with different languages
+    for (id, name, data_collect) in jobs:
+        print(f"processing job [{int(id)}|{name}]...")
+        timestamp = round_timestamp(get_timestamp(name))
+        for language in data_collect.keys():
+            data = data_collect[language]
+            for article in sorted(data.keys()):
+                count = data[article]
+                cur.execute(
+                    "INSERT INTO `wiki` (`datetime`, `article`, `count`, `language`) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE `count` = `count` + %s",
+                    (
+                        str(timestamp),
+                        article.encode("utf-8").decode("latin-1"),
+                        count,
+                        language,
+                        count,
+                    ),
+                )
+                if article == "total":
+                    cur.execute(
+                        "INSERT INTO `wiki_meta` (`datetime`, `date`, `epiweek`, `total`, `language`) VALUES (%s, date(%s), yearweek(%s, 6), %s, %s) ON DUPLICATE KEY UPDATE `total` = `total` + %s",
+                        (str(timestamp), str(timestamp), str(timestamp), count, language, count),
+                    )
+        # update the job
+        cur.execute("UPDATE `wiki_raw` SET `status` = 3 WHERE `id` = %s", (id,))
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
+
+
+if __name__ == "__main__":
+    run()
diff --git a/src/acquisition/wiki/wiki_update.py b/src/acquisition/wiki/wiki_update.py
index 411544810..a9f240629 100644
--- a/src/acquisition/wiki/wiki_update.py
+++ b/src/acquisition/wiki/wiki_update.py
@@ -32,87 +32,100 @@
 
 
 def floor_timestamp(timestamp):
-  return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour)
+    return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour)
 
 
 def ceil_timestamp(timestamp):
-  return floor_timestamp(timestamp) + timedelta(hours=1)
+    return floor_timestamp(timestamp) + timedelta(hours=1)
 
 
 def round_timestamp(timestamp):
-  before = floor_timestamp(timestamp)
-  after = ceil_timestamp(timestamp)
-  if (timestamp - before) < (after - timestamp):
-    return before
-  else:
-    return after
+    before = floor_timestamp(timestamp)
+    after = ceil_timestamp(timestamp)
+    if (timestamp - before) < (after - timestamp):
+        return before
+    else:
+        return after
 
 
 def get_timestamp(name):
-  # If the program is cold start (there are no previous names in the table, and the name will be None)
-  if name is None:
-    curr = datetime.now()
-    return datetime(curr.year, curr.month, curr.day, curr.hour, curr.minute, curr.second)
-  # new parsing for pageviews compared to pagecounts - maybe switch to regex in the future
-  #return datetime(int(name[11:15]), int(name[15:17]), int(name[17:19]), int(name[20:22]), int(name[22:24]), int(name[24:26]))
-  return datetime(int(name[10:14]), int(name[14:16]), int(name[16:18]), int(name[19:21]), int(name[21:23]), int(name[23:25]))
+    # If the program is cold start (there are no previous names in the table, and the name will be None)
+    if name is None:
+        curr = datetime.now()
+        return datetime(curr.year, curr.month, curr.day, curr.hour, curr.minute, curr.second)
+    # new parsing for pageviews compared to pagecounts - maybe switch to regex in the future
+    # return datetime(int(name[11:15]), int(name[15:17]), int(name[17:19]), int(name[20:22]), int(name[22:24]), int(name[24:26]))
+    return datetime(
+        int(name[10:14]),
+        int(name[14:16]),
+        int(name[16:18]),
+        int(name[19:21]),
+        int(name[21:23]),
+        int(name[23:25]),
+    )
 
 
 def get_manifest(year, month, optional=False):
-  # unlike pagecounts-raw, pageviews doesn't provide hashes
-  #url = 'https://dumps.wikimedia.org/other/pagecounts-raw/%d/%d-%02d/md5sums.txt'%(year, year, month)
-  url = 'https://dumps.wikimedia.org/other/pageviews/%d/%d-%02d/' % (year, year, month)
-  print('Checking manifest at %s...'%(url))
-  response = requests.get(url)
-  if response.status_code == 200:
-    #manifest = [line.strip().split() for line in response.text.split('\n') if 'pagecounts' in line]
-    manifest = [('00000000000000000000000000000000', line[9:37]) for line in response.text.split('\n') if '<a href="pageviews-' in line]
-  else:
-    if optional:
-      manifest = []
+    # unlike pagecounts-raw, pageviews doesn't provide hashes
+    # url = 'https://dumps.wikimedia.org/other/pagecounts-raw/%d/%d-%02d/md5sums.txt'%(year, year, month)
+    url = f"https://dumps.wikimedia.org/other/pageviews/{int(year)}/{int(year)}-{int(month):02}/"
+    print(f"Checking manifest at {url}...")
+    response = requests.get(url)
+    if response.status_code == 200:
+        # manifest = [line.strip().split() for line in response.text.split('\n') if 'pagecounts' in line]
+        manifest = [
+            ("00000000000000000000000000000000", line[9:37])
+            for line in response.text.split("\n")
+            if '<a href="pageviews-' in line
+        ]
     else:
-      raise Exception('expected 200 status code, but got %d'%(response.status_code))
-  print('Found %d access log(s)'%(len(manifest)))
-  return manifest
+        if optional:
+            manifest = []
+        else:
+            raise Exception(f"expected 200 status code, but got {int(response.status_code)}")
+    print(f"Found {len(manifest)} access log(s)")
+    return manifest
 
 
 def run():
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  # get the most recent job in wiki_raw
-  # luckily, "pageviews" is lexicographically greater than "pagecounts-raw"
-  cur.execute('SELECT max(`name`) FROM `wiki_raw`')
-  for (max_name,) in cur:
-    pass
-  print('Last known file: %s'%(max_name))
-  timestamp = get_timestamp(max_name)
-
-  # crawl dumps.wikimedia.org to find more recent access logs
-  t1, t2 = floor_timestamp(timestamp), ceil_timestamp(timestamp)
-  manifest = get_manifest(t1.year, t1.month, optional=False)
-  if t2.month != t1.month:
-    manifest += get_manifest(t2.year, t2.month, optional=True)
-
-  # find access logs newer than the most recent job
-  new_logs = {}
-  for (hash, name) in manifest:
-    if max_name is None or name > max_name:
-      new_logs[name] = hash
-      print(' New job: %s [%s]'%(name, hash))
-  print('Found %d new job(s)'%(len(new_logs)))
-
-  # store metadata for new jobs
-  for name in sorted(new_logs.keys()):
-    cur.execute('INSERT INTO `wiki_raw` (`name`, `hash`) VALUES (%s, %s)', (name, new_logs[name]))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
-
-
-if __name__ == '__main__':
-  run()
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    # get the most recent job in wiki_raw
+    # luckily, "pageviews" is lexicographically greater than "pagecounts-raw"
+    cur.execute("SELECT max(`name`) FROM `wiki_raw`")
+    for (max_name,) in cur:
+        pass
+    print(f"Last known file: {max_name}")
+    timestamp = get_timestamp(max_name)
+
+    # crawl dumps.wikimedia.org to find more recent access logs
+    t1, t2 = floor_timestamp(timestamp), ceil_timestamp(timestamp)
+    manifest = get_manifest(t1.year, t1.month, optional=False)
+    if t2.month != t1.month:
+        manifest += get_manifest(t2.year, t2.month, optional=True)
+
+    # find access logs newer than the most recent job
+    new_logs = {}
+    for (hash, name) in manifest:
+        if max_name is None or name > max_name:
+            new_logs[name] = hash
+            print(f" New job: {name} [{hash}]")
+    print(f"Found {len(new_logs)} new job(s)")
+
+    # store metadata for new jobs
+    for name in sorted(new_logs.keys()):
+        cur.execute(
+            "INSERT INTO `wiki_raw` (`name`, `hash`) VALUES (%s, %s)", (name, new_logs[name])
+        )
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
+
+
+if __name__ == "__main__":
+    run()
diff --git a/src/acquisition/wiki/wiki_util.py b/src/acquisition/wiki/wiki_util.py
index ed3c743bc..55bf3e2ca 100644
--- a/src/acquisition/wiki/wiki_util.py
+++ b/src/acquisition/wiki/wiki_util.py
@@ -1,159 +1,156 @@
-
-
-
 class Articles:
 
     # Notice that all languages must be two chars, because that `language` column in table `wiki` is CHAR(2)
-    available_languages = ['en', 'es', 'pt']
+    available_languages = ["en", "es", "pt"]
 
     en_articles_flu = [
-        'Influenza_B_virus',
-        'Influenza_A_virus',
-        'Human_flu',
-        'Influenzavirus_C',
-        'Oseltamivir',
-        'Influenza',
-        'Influenzavirus_A',
-        'Influenza_A_virus_subtype_H1N1',
-        'Zanamivir',
-        'Influenza-like_illness',
-        'Common_cold',
-        'Sore_throat',
-        'Flu_season',
-        'Chills',
-        'Fever',
-        'Influenza_A_virus_subtype_H2N2',
-        'Swine_influenza',
-        'Shivering',
-        'Canine_influenza',
-        'Influenza_A_virus_subtype_H3N2',
-        'Neuraminidase_inhibitor',
-        'Influenza_pandemic',
-        'Viral_pneumonia',
-        'Influenza_prevention',
-        'Influenza_A_virus_subtype_H1N2',
-        'Rhinorrhea',
-        'Orthomyxoviridae',
-        'Nasal_congestion',
-        'Gastroenteritis',
-        'Rimantadine',
-        'Paracetamol',
-        'Amantadine',
-        'Viral_neuraminidase',
-        'Headache',
-        'Influenza_vaccine',
-        'Vomiting',
-        'Cough',
-        'Influenza_A_virus_subtype_H5N1',
-        'Nausea',
-        'Avian_influenza',
-        'Influenza_A_virus_subtype_H7N9',
-        'Influenza_A_virus_subtype_H10N7',
-        'Influenza_A_virus_subtype_H9N2',
-        'Hemagglutinin_(influenza)',
-        'Influenza_A_virus_subtype_H7N7',
-        'Fatigue_(medical)',
-        'Myalgia',
-        'Influenza_A_virus_subtype_H7N3',
-        'Malaise',
-        'Equine_influenza',
-        'Cat_flu',
-        'Influenza_A_virus_subtype_H3N8',
-        'Antiviral_drugs',
-        'Influenza_A_virus_subtype_H7N2',
+        "Influenza_B_virus",
+        "Influenza_A_virus",
+        "Human_flu",
+        "Influenzavirus_C",
+        "Oseltamivir",
+        "Influenza",
+        "Influenzavirus_A",
+        "Influenza_A_virus_subtype_H1N1",
+        "Zanamivir",
+        "Influenza-like_illness",
+        "Common_cold",
+        "Sore_throat",
+        "Flu_season",
+        "Chills",
+        "Fever",
+        "Influenza_A_virus_subtype_H2N2",
+        "Swine_influenza",
+        "Shivering",
+        "Canine_influenza",
+        "Influenza_A_virus_subtype_H3N2",
+        "Neuraminidase_inhibitor",
+        "Influenza_pandemic",
+        "Viral_pneumonia",
+        "Influenza_prevention",
+        "Influenza_A_virus_subtype_H1N2",
+        "Rhinorrhea",
+        "Orthomyxoviridae",
+        "Nasal_congestion",
+        "Gastroenteritis",
+        "Rimantadine",
+        "Paracetamol",
+        "Amantadine",
+        "Viral_neuraminidase",
+        "Headache",
+        "Influenza_vaccine",
+        "Vomiting",
+        "Cough",
+        "Influenza_A_virus_subtype_H5N1",
+        "Nausea",
+        "Avian_influenza",
+        "Influenza_A_virus_subtype_H7N9",
+        "Influenza_A_virus_subtype_H10N7",
+        "Influenza_A_virus_subtype_H9N2",
+        "Hemagglutinin_(influenza)",
+        "Influenza_A_virus_subtype_H7N7",
+        "Fatigue_(medical)",
+        "Myalgia",
+        "Influenza_A_virus_subtype_H7N3",
+        "Malaise",
+        "Equine_influenza",
+        "Cat_flu",
+        "Influenza_A_virus_subtype_H3N8",
+        "Antiviral_drugs",
+        "Influenza_A_virus_subtype_H7N2",
     ]
 
     en_articles_noro = [
-        'Norovirus',
-        'Diarrhea',
-        'Dehydration',
-        'Gastroenteritis',
-        'Vomiting',
-        'Abdominal_pain',
-        'Nausea',
-        'Foodborne_illness',
-        'Rotavirus',
-        'Fecal–oral_route',
-        'Intravenous_therapy',
-        'Oral_rehydration_therapy',
-        'Shellfish',
-        'Caliciviridae',
-        'Leaky_scanning',
+        "Norovirus",
+        "Diarrhea",
+        "Dehydration",
+        "Gastroenteritis",
+        "Vomiting",
+        "Abdominal_pain",
+        "Nausea",
+        "Foodborne_illness",
+        "Rotavirus",
+        "Fecal–oral_route",
+        "Intravenous_therapy",
+        "Oral_rehydration_therapy",
+        "Shellfish",
+        "Caliciviridae",
+        "Leaky_scanning",
     ]
 
     en_articles_dengue = [
-        'Dengue_fever',
-        'Dengue_virus',
-        'Aedes',
-        'Aedes_aegypti',
-        'Dengue_vaccine',
-        'Mosquito',
-        'Mosquito-borne_disease',
-        'Blood_transfusion',
-        'Paracetamol',
-        'Fever',
-        'Headache',
-        'Rhinitis',
-        'Flavivirus',
-        'Exanthem',
-        'Myalgia',
-        'Arthralgia',
-        'Thrombocytopenia',
-        'Hematuria',
-        'Nosebleed',
-        'Petechia',
-        'Nausea',
-        'Vomiting',
-        'Diarrhea',
+        "Dengue_fever",
+        "Dengue_virus",
+        "Aedes",
+        "Aedes_aegypti",
+        "Dengue_vaccine",
+        "Mosquito",
+        "Mosquito-borne_disease",
+        "Blood_transfusion",
+        "Paracetamol",
+        "Fever",
+        "Headache",
+        "Rhinitis",
+        "Flavivirus",
+        "Exanthem",
+        "Myalgia",
+        "Arthralgia",
+        "Thrombocytopenia",
+        "Hematuria",
+        "Nosebleed",
+        "Petechia",
+        "Nausea",
+        "Vomiting",
+        "Diarrhea",
     ]
 
     en_articles = list(set(en_articles_flu + en_articles_noro + en_articles_dengue))
 
     es_articles = [
-        'Dengue',
-        'Virus_dengue',
-        'Aedes',
-        'Aedes_aegypti',
-        'Culicidae',
-        'Transfusión_de_sangre',
-        'Paracetamol',
-        'Fiebre',
-        'Cefalea',
-        'Coriza',
-        'Flavivirus',
-        'Exantema',
-        'Mosquito',
-        'Mialgia',
-        'Artralgia',
-        'Trombocitopenia',
-        'Hematuria',
-        'Epistaxis',
-        'Petequia',
-        'Náusea',
-        'Vómito',
-        'Diarrea',
+        "Dengue",
+        "Virus_dengue",
+        "Aedes",
+        "Aedes_aegypti",
+        "Culicidae",
+        "Transfusión_de_sangre",
+        "Paracetamol",
+        "Fiebre",
+        "Cefalea",
+        "Coriza",
+        "Flavivirus",
+        "Exantema",
+        "Mosquito",
+        "Mialgia",
+        "Artralgia",
+        "Trombocitopenia",
+        "Hematuria",
+        "Epistaxis",
+        "Petequia",
+        "Náusea",
+        "Vómito",
+        "Diarrea",
     ]
 
     pt_articles = [
-        'Dengue',
-        'Vírus_da_dengue',
-        'Aedes',
-        'Aedes_aegypti',
-        'Culicidae',
-        'Transfusão_de_sangue',
-        'Paracetamol',
-        'Febre',
-        'Cefaleia',
-        'Coriza',
-        'Flavivírus',
-        'Exantema',
-        'Mialgia',
-        'Artralgia',
-        'Trombocitopenia',
-        'Hematúria',
-        'Epistaxe',
-        'Petéquia',
-        'Náusea',
-        'Vômito',
-        'Diarreia',
+        "Dengue",
+        "Vírus_da_dengue",
+        "Aedes",
+        "Aedes_aegypti",
+        "Culicidae",
+        "Transfusão_de_sangue",
+        "Paracetamol",
+        "Febre",
+        "Cefaleia",
+        "Coriza",
+        "Flavivírus",
+        "Exantema",
+        "Mialgia",
+        "Artralgia",
+        "Trombocitopenia",
+        "Hematúria",
+        "Epistaxe",
+        "Petéquia",
+        "Náusea",
+        "Vômito",
+        "Diarreia",
     ]