diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 000000000..8a80734f0
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,21 @@
+# EditorConfig helps developers define and maintain consistent
+# coding styles between different editors and IDEs
+# editorconfig.org
+
+root = true
+
+
+[*]
+
+# Change these settings to your own preference
+indent_style = space
+indent_size = 4
+
+# We recommend you to keep these unchanged
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+[*.md]
+trim_trailing_whitespace = false
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 000000000..445436af3
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,15 @@
+1278d2716c9f14018d4c7160ecaba24be955d92e
+d07d9f4138ff7868f4f3d4ee33a61fa5a647aae0
+9bf4b91ccfd74ce1ae384daf8d54802b3977306b
+355b8f31a5279f04413696b2b8b7639810e5a988
+7c2331d52aada34f383300d4cf76adc7dcade346
+22855140f3c478370ca5261eb38c0597ed895fcc
+8f33ff506c457f3f1f657bf421b3119a43ddc708
+a4da4d20541c09405d983e4bc281f8c1e5406c1f
+87f1facd8b170c56c97b7f092ad86dc47cdab8fe
+61427c5540b37123ad2f1db7d3558cd14500163a
+2dad3e8f27d4129b2438e751c474e197321b1993
+39146ec9c526cd64c590e24bddaf3ec178358084
+46b28c8d0c2a04a116947430b8b632d5eac10734
+546742beae8000463c8bc94e28ceaa63b5501568
+3840136ab3386a2237c7a69d297d47c86fe0e860
\ No newline at end of file
diff --git a/src/acquisition/afhsb/afhsb_csv.py b/src/acquisition/afhsb/afhsb_csv.py
index b839c4053..f4d620803 100644
--- a/src/acquisition/afhsb/afhsb_csv.py
+++ b/src/acquisition/afhsb/afhsb_csv.py
@@ -1,4 +1,4 @@
-'''
+"""
 afhsb_csv.py creates CSV files filled_00to13.csv, filled_13to17.csv and  simple_DMISID_FY2018.csv
 which will be later used to create MYSQL data tables. 
 
@@ -9,7 +9,7 @@
 ili_1_2000_5_2013_new.sas7bdat and ili_1_2013_11_2017_new.sas7bdat under SOURCE_DIR
 country_codes.csv and DMISID_FY2018.csv under TARGET_DIR
 All intermediate files and final csv files will be stored in TARGET_DIR
-'''
+"""
 
 import csv
 import os
@@ -19,174 +19,181 @@
 import epiweeks as epi
 
 
-DATAPATH = '/home/automation/afhsb_data'
+DATAPATH = "/home/automation/afhsb_data"
 SOURCE_DIR = DATAPATH
 TARGET_DIR = DATAPATH
 
 INVALID_DMISIDS = set()
 
+
 def get_flu_cat(dx):
-	# flu1 (influenza)
-	if len(dx) == 0:
-		return None
-	dx = dx.capitalize()
-	if dx.isnumeric():
-		for prefix in ["487", "488"]:
-			if dx.startswith(prefix):
-				return 1
-		for i in range(0, 7):
-			prefix = str(480 + i)
-			if dx.startswith(prefix):
-				return 2
-		for i in range(0, 7):
-			prefix = str(460 + i)
-			if dx.startswith(prefix):
-				return 3
-		for prefix in ["07999", "3829", "7806", "7862"]:
-			if dx.startswith(prefix):
-				return 3
-	elif (dx[0].isalpha() and dx[1:].isnumeric()):
-		for prefix in ["J09", "J10", "J11"]:
-			if dx.startswith(prefix):
-				return 1
-		for i in range(12, 19):
-			prefix = "J{}".format(i)
-			if dx.startswith(prefix):
-				return 2
-		for i in range(0, 7):
-			prefix = "J0{}".format(i)
-			if dx.startswith(prefix):
-				return 3
-		for i in range(20, 23):
-			prefix = "J{}".format(i)
-			if dx.startswith(prefix):
-				return 3
-		for prefix in ["J40", "R05", "H669", "R509", "B9789"]:
-			if dx.startswith(prefix):
-				return 3
-	else:
-		return None
+    # flu1 (influenza)
+    if len(dx) == 0:
+        return None
+    dx = dx.capitalize()
+    if dx.isnumeric():
+        for prefix in ["487", "488"]:
+            if dx.startswith(prefix):
+                return 1
+        for i in range(0, 7):
+            prefix = str(480 + i)
+            if dx.startswith(prefix):
+                return 2
+        for i in range(0, 7):
+            prefix = str(460 + i)
+            if dx.startswith(prefix):
+                return 3
+        for prefix in ["07999", "3829", "7806", "7862"]:
+            if dx.startswith(prefix):
+                return 3
+    elif dx[0].isalpha() and dx[1:].isnumeric():
+        for prefix in ["J09", "J10", "J11"]:
+            if dx.startswith(prefix):
+                return 1
+        for i in range(12, 19):
+            prefix = "J{}".format(i)
+            if dx.startswith(prefix):
+                return 2
+        for i in range(0, 7):
+            prefix = "J0{}".format(i)
+            if dx.startswith(prefix):
+                return 3
+        for i in range(20, 23):
+            prefix = "J{}".format(i)
+            if dx.startswith(prefix):
+                return 3
+        for prefix in ["J40", "R05", "H669", "R509", "B9789"]:
+            if dx.startswith(prefix):
+                return 3
+    else:
+        return None
+
 
 def aggregate_data(sourcefile, targetfile):
-	reader = sas7bdat.SAS7BDAT(os.path.join(SOURCE_DIR, sourcefile), skip_header=True) 
-	# map column names to column indices
-	col_2_idx = {column.name.decode('utf-8'): column.col_id for column in reader.columns}
-
-	def get_field(row, column):
-		return row[col_2_idx[column]]
-
-	def row2flu(row):
-		for i in range(1, 9):
-			dx = get_field(row, "dx{}".format(i))
-			flu_cat = get_flu_cat(dx)
-			if flu_cat is not None:
-				return flu_cat
-		return 0
-
-	def row2epiweek(row):
-		date = get_field(row, 'd_event')
-		year, month, day = date.year, date.month, date.day
-		week_tuple = epi.Week.fromdate(year, month, day).weektuple()
-		year, week_num = week_tuple[0], week_tuple[1]
-		return year, week_num
-
-	results_dict = {}
-	for _, row in enumerate(reader):
-		# if (r >= 1000000): break
-		if get_field(row, 'type') != "Outpt":
-			continue
-		year, week_num = row2epiweek(row)
-		dmisid = get_field(row, 'DMISID')
-		flu_cat = row2flu(row)
-
-		key_list = [year, week_num, dmisid, flu_cat]
-		curr_dict = results_dict
-		for i, key in enumerate(key_list):
-			if i == len(key_list) - 1:
-				if key not in curr_dict:
-					curr_dict[key] = 0
-				curr_dict[key] += 1
-			else:
-				if key not in curr_dict:
-					curr_dict[key] = {}
-				curr_dict = curr_dict[key]
-
-	results_path = os.path.join(TARGET_DIR, targetfile)
-	with open(results_path, 'wb') as f:
-		pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)
+    reader = sas7bdat.SAS7BDAT(os.path.join(SOURCE_DIR, sourcefile), skip_header=True)
+    # map column names to column indices
+    col_2_idx = {column.name.decode("utf-8"): column.col_id for column in reader.columns}
+
+    def get_field(row, column):
+        return row[col_2_idx[column]]
+
+    def row2flu(row):
+        for i in range(1, 9):
+            dx = get_field(row, "dx{}".format(i))
+            flu_cat = get_flu_cat(dx)
+            if flu_cat is not None:
+                return flu_cat
+        return 0
+
+    def row2epiweek(row):
+        date = get_field(row, "d_event")
+        year, month, day = date.year, date.month, date.day
+        week_tuple = epi.Week.fromdate(year, month, day).weektuple()
+        year, week_num = week_tuple[0], week_tuple[1]
+        return year, week_num
+
+    results_dict = {}
+    for _, row in enumerate(reader):
+        # if (r >= 1000000): break
+        if get_field(row, "type") != "Outpt":
+            continue
+        year, week_num = row2epiweek(row)
+        dmisid = get_field(row, "DMISID")
+        flu_cat = row2flu(row)
+
+        key_list = [year, week_num, dmisid, flu_cat]
+        curr_dict = results_dict
+        for i, key in enumerate(key_list):
+            if i == len(key_list) - 1:
+                if key not in curr_dict:
+                    curr_dict[key] = 0
+                curr_dict[key] += 1
+            else:
+                if key not in curr_dict:
+                    curr_dict[key] = {}
+                curr_dict = curr_dict[key]
+
+    results_path = os.path.join(TARGET_DIR, targetfile)
+    with open(results_path, "wb") as f:
+        pickle.dump(results_dict, f, pickle.HIGHEST_PROTOCOL)
 
 
 ################# Functions for geographical information ####################
 
+
 def get_country_mapping():
-	filename = "country_codes.csv"
-	mapping = dict()
-	with open(os.path.join(TARGET_DIR, filename), "r") as csvfile:
-		reader = csv.DictReader(csvfile)
-		for row in reader:
-			print(row.keys())
-			alpha2 = row['alpha-2']
-			alpha3 = row['alpha-3']
-			mapping[alpha2] = alpha3
-
-	return mapping
+    filename = "country_codes.csv"
+    mapping = dict()
+    with open(os.path.join(TARGET_DIR, filename), "r") as csvfile:
+        reader = csv.DictReader(csvfile)
+        for row in reader:
+            print(row.keys())
+            alpha2 = row["alpha-2"]
+            alpha3 = row["alpha-3"]
+            mapping[alpha2] = alpha3
+
+    return mapping
+
 
 def format_dmisid_csv(filename, target_name):
-	src_path = os.path.join(TARGET_DIR, "{}.csv".format(filename))
-	dst_path = os.path.join(TARGET_DIR, target_name)
-
-	src_csv = open(src_path, "r", encoding='utf-8-sig')
-	reader = csv.DictReader(src_csv)
-
-	dst_csv = open(dst_path, "w")
-	fieldnames = ['dmisid', 'country', 'state', 'zip5']
-	writer = csv.DictWriter(dst_csv, fieldnames=fieldnames)
-	writer.writeheader()
-
-	country_mapping = get_country_mapping()
-
-	for row in reader:
-		country2 = row['Facility ISO Country Code']
-		if country2 == "":
-			country3 = ""
-		elif country2 not in country_mapping:
-			for key in row.keys():
-				print(key, row[key])
-			continue
-		else:
-			country3 = country_mapping[country2]
-		new_row = {'dmisid': row['DMIS ID'],
-					'country': country3,
-					'state': row['Facility State Code'],
-					'zip5': row['Facility 5-Digit ZIP Code']}
-		writer.writerow(new_row)
+    src_path = os.path.join(TARGET_DIR, "{}.csv".format(filename))
+    dst_path = os.path.join(TARGET_DIR, target_name)
+
+    src_csv = open(src_path, "r", encoding="utf-8-sig")
+    reader = csv.DictReader(src_csv)
+
+    dst_csv = open(dst_path, "w")
+    fieldnames = ["dmisid", "country", "state", "zip5"]
+    writer = csv.DictWriter(dst_csv, fieldnames=fieldnames)
+    writer.writeheader()
+
+    country_mapping = get_country_mapping()
+
+    for row in reader:
+        country2 = row["Facility ISO Country Code"]
+        if country2 == "":
+            country3 = ""
+        elif country2 not in country_mapping:
+            for key in row.keys():
+                print(key, row[key])
+            continue
+        else:
+            country3 = country_mapping[country2]
+        new_row = {"dmisid": row["DMIS ID"], "country": country3, "state": row["Facility State Code"], "zip5": row["Facility 5-Digit ZIP Code"]}
+        writer.writerow(new_row)
+
 
 def dmisid():
-	filename = 'DMISID_FY2018'
-	target_name = "simple_DMISID_FY2018.csv"
-	format_dmisid_csv(filename, target_name)
-
-
-cen2states = {'cen1': {'CT', 'ME', 'MA', 'NH', 'RI', 'VT'},
-            'cen2': {'NJ', 'NY', 'PA'},
-            'cen3': {'IL', 'IN', 'MI', 'OH', 'WI'},
-            'cen4': {'IA', 'KS', 'MN', 'MO', 'NE', 'ND', 'SD'},
-            'cen5': {'DE', 'DC', 'FL', 'GA', 'MD', 'NC', 'SC', 'VA', 'WV'},
-            'cen6': {'AL', 'KY', 'MS', 'TN'},
-            'cen7': {'AR', 'LA', 'OK', 'TX'},
-            'cen8': {'AZ', 'CO', 'ID', 'MT', 'NV', 'NM', 'UT', 'WY'},
-            'cen9': {'AK', 'CA', 'HI', 'OR', 'WA'}}
-
-hhs2states = {'hhs1': {'VT', 'CT', 'ME', 'MA', 'NH', 'RI'},
-            'hhs2': {'NJ', 'NY'},
-            'hhs3': {'DE', 'DC', 'MD', 'PA', 'VA', 'WV'},
-            'hhs4': {'AL', 'FL', 'GA', 'KY', 'MS', 'NC', 'TN', 'SC'},
-            'hhs5': {'IL', 'IN', 'MI', 'MN', 'OH', 'WI'},
-            'hhs6': {'AR', 'LA', 'NM', 'OK', 'TX'},
-            'hhs7': {'IA', 'KS', 'MO', 'NE'},
-            'hhs8': {'CO', 'MT', 'ND', 'SD', 'UT', 'WY'},
-            'hhs9': {'AZ', 'CA', 'HI', 'NV'},
-            'hhs10': {'AK', 'ID', 'OR', 'WA'}}
+    filename = "DMISID_FY2018"
+    target_name = "simple_DMISID_FY2018.csv"
+    format_dmisid_csv(filename, target_name)
+
+
+cen2states = {
+    "cen1": {"CT", "ME", "MA", "NH", "RI", "VT"},
+    "cen2": {"NJ", "NY", "PA"},
+    "cen3": {"IL", "IN", "MI", "OH", "WI"},
+    "cen4": {"IA", "KS", "MN", "MO", "NE", "ND", "SD"},
+    "cen5": {"DE", "DC", "FL", "GA", "MD", "NC", "SC", "VA", "WV"},
+    "cen6": {"AL", "KY", "MS", "TN"},
+    "cen7": {"AR", "LA", "OK", "TX"},
+    "cen8": {"AZ", "CO", "ID", "MT", "NV", "NM", "UT", "WY"},
+    "cen9": {"AK", "CA", "HI", "OR", "WA"},
+}
+
+hhs2states = {
+    "hhs1": {"VT", "CT", "ME", "MA", "NH", "RI"},
+    "hhs2": {"NJ", "NY"},
+    "hhs3": {"DE", "DC", "MD", "PA", "VA", "WV"},
+    "hhs4": {"AL", "FL", "GA", "KY", "MS", "NC", "TN", "SC"},
+    "hhs5": {"IL", "IN", "MI", "MN", "OH", "WI"},
+    "hhs6": {"AR", "LA", "NM", "OK", "TX"},
+    "hhs7": {"IA", "KS", "MO", "NE"},
+    "hhs8": {"CO", "MT", "ND", "SD", "UT", "WY"},
+    "hhs9": {"AZ", "CA", "HI", "NV"},
+    "hhs10": {"AK", "ID", "OR", "WA"},
+}
+
 
 def state2region(D):
     results = dict()
@@ -197,155 +204,161 @@ def state2region(D):
             results[state] = region
     return results
 
+
 def state2region_csv():
-	to_hhs = state2region(hhs2states)
-	to_cen = state2region(cen2states)
-	states = to_hhs.keys()
-	target_name = "state2region.csv"
-	fieldnames = ['state', 'hhs', 'cen']
-	with open(target_name, "w") as csvfile:
-		writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-		writer.writeheader()
-		for state in states:
-			content = {"state": state, "hhs": to_hhs[state], "cen": to_cen[state]}
-			writer.writerow(content)
+    to_hhs = state2region(hhs2states)
+    to_cen = state2region(cen2states)
+    states = to_hhs.keys()
+    target_name = "state2region.csv"
+    fieldnames = ["state", "hhs", "cen"]
+    with open(target_name, "w") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+        writer.writeheader()
+        for state in states:
+            content = {"state": state, "hhs": to_hhs[state], "cen": to_cen[state]}
+            writer.writerow(content)
+
 
 ################# Functions for geographical information ####################
 
 ######################### Functions for AFHSB data ##########################
 
+
 def write_afhsb_csv(period):
-	flu_mapping = {0: "ili-flu3", 1: "flu1", 2:"flu2-flu1", 3: "flu3-flu2"}
-	results_dict = pickle.load(open(os.path.join(TARGET_DIR, "{}.pickle".format(period)), 'rb'))
-
-	fieldnames = ["id", "epiweek", "dmisid", "flu_type", "visit_sum"]
-	with open(os.path.join(TARGET_DIR, "{}.csv".format(period)), 'w') as csvfile:
-		writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-		writer.writeheader()
-
-		i = 0
-		for year in sorted(results_dict.keys()):
-			year_dict = results_dict[year]
-			for week in sorted(year_dict.keys()):
-				week_dict = year_dict[week]
-				for dmisid in sorted(week_dict.keys()):
-					dmisid_dict = week_dict[dmisid]
-					for flu in sorted(dmisid_dict.keys()):
-						visit_sum = dmisid_dict[flu]
-						i += 1
-						epiweek = int("{}{:02d}".format(year, week))
-						flu_type = flu_mapping[flu]
-
-						row = {"epiweek": epiweek, "dmisid": None if (not dmisid.isnumeric()) else dmisid, 
-							"flu_type": flu_type, "visit_sum": visit_sum, "id": i}
-						writer.writerow(row)
-						if i % 100000 == 0:
-							print(row)
+    flu_mapping = {0: "ili-flu3", 1: "flu1", 2: "flu2-flu1", 3: "flu3-flu2"}
+    results_dict = pickle.load(open(os.path.join(TARGET_DIR, "{}.pickle".format(period)), "rb"))
+
+    fieldnames = ["id", "epiweek", "dmisid", "flu_type", "visit_sum"]
+    with open(os.path.join(TARGET_DIR, "{}.csv".format(period)), "w") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+        writer.writeheader()
+
+        i = 0
+        for year in sorted(results_dict.keys()):
+            year_dict = results_dict[year]
+            for week in sorted(year_dict.keys()):
+                week_dict = year_dict[week]
+                for dmisid in sorted(week_dict.keys()):
+                    dmisid_dict = week_dict[dmisid]
+                    for flu in sorted(dmisid_dict.keys()):
+                        visit_sum = dmisid_dict[flu]
+                        i += 1
+                        epiweek = int("{}{:02d}".format(year, week))
+                        flu_type = flu_mapping[flu]
+
+                        row = {"epiweek": epiweek, "dmisid": None if (not dmisid.isnumeric()) else dmisid, "flu_type": flu_type, "visit_sum": visit_sum, "id": i}
+                        writer.writerow(row)
+                        if i % 100000 == 0:
+                            print(row)
+
 
 def dmisid_start_time_from_file(filename):
-	starttime_record = dict()
-	with open(filename, 'r') as csvfile:
-		reader = csv.DictReader(csvfile)
-		for row in reader:
-			dmisid = row['dmisid']
-			epiweek = int(row['epiweek'])
-			if dmisid not in starttime_record:
-				starttime_record[dmisid] = epiweek
-			else:
-				starttime_record[dmisid] = min(epiweek, starttime_record[dmisid])
-	return starttime_record
+    starttime_record = dict()
+    with open(filename, "r") as csvfile:
+        reader = csv.DictReader(csvfile)
+        for row in reader:
+            dmisid = row["dmisid"]
+            epiweek = int(row["epiweek"])
+            if dmisid not in starttime_record:
+                starttime_record[dmisid] = epiweek
+            else:
+                starttime_record[dmisid] = min(epiweek, starttime_record[dmisid])
+    return starttime_record
+
 
 def dmisid_start_time():
-	record1 = dmisid_start_time_from_file(os.path.join(TARGET_DIR, "00to13.csv"))
-	record2 = dmisid_start_time_from_file(os.path.join(TARGET_DIR, "13to17.csv"))
-	record = record1
-	for dmisid, epiweek in record2.items():
-		if dmisid in record:
-			record[dmisid] = min(record[dmisid], epiweek)
-		else:
-			record[dmisid] = epiweek
-	return record
+    record1 = dmisid_start_time_from_file(os.path.join(TARGET_DIR, "00to13.csv"))
+    record2 = dmisid_start_time_from_file(os.path.join(TARGET_DIR, "13to17.csv"))
+    record = record1
+    for dmisid, epiweek in record2.items():
+        if dmisid in record:
+            record[dmisid] = min(record[dmisid], epiweek)
+        else:
+            record[dmisid] = epiweek
+    return record
+
 
 def fillin_zero_to_csv(period, dmisid_start_record):
-	src_path = os.path.join(TARGET_DIR, "{}.csv".format(period))
-	dst_path = os.path.join(TARGET_DIR, "filled_{}.csv".format(period))
-
-	# Load data into a dictionary
-	src_csv = open(src_path, "r")
-	reader = csv.DictReader(src_csv)
-
-	results_dict = dict() # epiweek -> dmisid -> flu_type: visit_sum
-	for i, row in enumerate(reader):
-		epiweek = int(row['epiweek'])
-		dmisid = row['dmisid']
-		flu_type = row['flu_type']
-		visit_sum = row['visit_sum']
-		if epiweek not in results_dict:
-			results_dict[epiweek] = dict()
-		week_dict = results_dict[epiweek]
-		if dmisid not in week_dict:
-			week_dict[dmisid] = dict()
-		dmisid_dict = week_dict[dmisid]
-		dmisid_dict[flu_type] = visit_sum
-
-	# Fill in zero count records
-	dmisid_group = dmisid_start_record.keys()
-	flutype_group = ["ili-flu3", "flu1", "flu2-flu1", "flu3-flu2"]
-
-	for epiweek in results_dict.keys():
-		week_dict = results_dict[epiweek]
-		for dmisid in dmisid_group:
-			start_week = dmisid_start_record[dmisid]
-			if start_week > epiweek:
-				continue
-
-			if dmisid not in week_dict:
-				week_dict[dmisid] = dict()
-
-			dmisid_dict = week_dict[dmisid]
-			for flutype in flutype_group:
-				if flutype not in dmisid_dict:
-					dmisid_dict[flutype] = 0
-
-	# Write to csv files
-	dst_csv = open(dst_path, "w")
-	fieldnames = ["id", "epiweek", "dmisid", "flu_type", "visit_sum"]
-	writer = csv.DictWriter(dst_csv, fieldnames=fieldnames)
-	writer.writeheader()
-
-	i = 1
-	for epiweek in results_dict:
-		for dmisid in results_dict[epiweek]:
-			for flutype in results_dict[epiweek][dmisid]:
-				visit_sum = results_dict[epiweek][dmisid][flutype]
-				row = {"id": i, "epiweek": epiweek, "dmisid": dmisid,
-						"flu_type": flutype, "visit_sum": visit_sum}
-				writer.writerow(row)
-				if i % 100000 == 0:
-					print(row)
-				i += 1
-	print("Wrote {} rows".format(i))
+    src_path = os.path.join(TARGET_DIR, "{}.csv".format(period))
+    dst_path = os.path.join(TARGET_DIR, "filled_{}.csv".format(period))
+
+    # Load data into a dictionary
+    src_csv = open(src_path, "r")
+    reader = csv.DictReader(src_csv)
+
+    results_dict = dict()  # epiweek -> dmisid -> flu_type: visit_sum
+    for i, row in enumerate(reader):
+        epiweek = int(row["epiweek"])
+        dmisid = row["dmisid"]
+        flu_type = row["flu_type"]
+        visit_sum = row["visit_sum"]
+        if epiweek not in results_dict:
+            results_dict[epiweek] = dict()
+        week_dict = results_dict[epiweek]
+        if dmisid not in week_dict:
+            week_dict[dmisid] = dict()
+        dmisid_dict = week_dict[dmisid]
+        dmisid_dict[flu_type] = visit_sum
+
+    # Fill in zero count records
+    dmisid_group = dmisid_start_record.keys()
+    flutype_group = ["ili-flu3", "flu1", "flu2-flu1", "flu3-flu2"]
+
+    for epiweek in results_dict.keys():
+        week_dict = results_dict[epiweek]
+        for dmisid in dmisid_group:
+            start_week = dmisid_start_record[dmisid]
+            if start_week > epiweek:
+                continue
+
+            if dmisid not in week_dict:
+                week_dict[dmisid] = dict()
+
+            dmisid_dict = week_dict[dmisid]
+            for flutype in flutype_group:
+                if flutype not in dmisid_dict:
+                    dmisid_dict[flutype] = 0
+
+    # Write to csv files
+    dst_csv = open(dst_path, "w")
+    fieldnames = ["id", "epiweek", "dmisid", "flu_type", "visit_sum"]
+    writer = csv.DictWriter(dst_csv, fieldnames=fieldnames)
+    writer.writeheader()
+
+    i = 1
+    for epiweek in results_dict:
+        for dmisid in results_dict[epiweek]:
+            for flutype in results_dict[epiweek][dmisid]:
+                visit_sum = results_dict[epiweek][dmisid][flutype]
+                row = {"id": i, "epiweek": epiweek, "dmisid": dmisid, "flu_type": flutype, "visit_sum": visit_sum}
+                writer.writerow(row)
+                if i % 100000 == 0:
+                    print(row)
+                i += 1
+    print("Wrote {} rows".format(i))
+
 
 ######################### Functions for AFHSB data ##########################
 
+
 def main():
-	# Build tables containing geographical information
-	state2region_csv()
-	dmisid()
+    # Build tables containing geographical information
+    state2region_csv()
+    dmisid()
 
-	# Aggregate raw data into pickle files
-	aggregate_data("ili_1_2000_5_2013_new.sas7bdat", "00to13.pickle")
-	aggregate_data("ili_1_2013_11_2017_new.sas7bdat", "13to17.pickle")
+    # Aggregate raw data into pickle files
+    aggregate_data("ili_1_2000_5_2013_new.sas7bdat", "00to13.pickle")
+    aggregate_data("ili_1_2013_11_2017_new.sas7bdat", "13to17.pickle")
 
     # write pickle content to csv files
-	write_afhsb_csv("00to13")
-	write_afhsb_csv("13to17")
+    write_afhsb_csv("00to13")
+    write_afhsb_csv("13to17")
 
     # Fill in zero count records
-	dmisid_start_record = dmisid_start_time()
-	fillin_zero_to_csv("00to13", dmisid_start_record)
-	fillin_zero_to_csv("13to17", dmisid_start_record)
+    dmisid_start_record = dmisid_start_time()
+    fillin_zero_to_csv("00to13", dmisid_start_record)
+    fillin_zero_to_csv("13to17", dmisid_start_record)
 
 
-if __name__ == '__main__':
-	main()
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/afhsb/afhsb_sql.py b/src/acquisition/afhsb/afhsb_sql.py
index 278f3fc38..3ffd7c0fb 100644
--- a/src/acquisition/afhsb/afhsb_sql.py
+++ b/src/acquisition/afhsb/afhsb_sql.py
@@ -11,17 +11,17 @@
 def init_dmisid_table(sourcefile):
     (u, p) = secrets.db.epi
     cnx = connector.connect(user=u, passwd=p, database="epidata")
-    table_name = 'dmisid_table'
-    create_table_cmd = '''
-        CREATE TABLE `{}` (
+    table_name = "dmisid_table"
+    create_table_cmd = f"""
+        CREATE TABLE `{table_name}` (
         `dmisid` INT(4) NOT NULL PRIMARY KEY,
         `country` CHAR(3) NULL,
         `state` CHAR(2) NULL
         );
-        '''.format(table_name)
-    populate_table_cmd = '''
-        LOAD DATA INFILE '{}'
-        INTO TABLE {}
+        """
+    populate_table_cmd = f"""
+        LOAD DATA INFILE '{sourcefile}'
+        INTO TABLE {table_name}
         FIELDS TERMINATED BY ',' 
         ENCLOSED BY '"'
         LINES TERMINATED BY '\r\n'
@@ -32,7 +32,7 @@ def init_dmisid_table(sourcefile):
             country = nullif(@country, ''),
             state = nullif(@state, '')
         ;
-	'''.format(sourcefile, table_name)
+	"""
     try:
         cursor = cnx.cursor()
         cursor.execute(create_table_cmd)
@@ -41,27 +41,28 @@ def init_dmisid_table(sourcefile):
     finally:
         cnx.close()
 
+
 def init_region_table(sourcefile):
     (u, p) = secrets.db.epi
     cnx = connector.connect(user=u, passwd=p, database="epidata")
-    table_name = 'state2region_table'
-    create_table_cmd = '''
-        CREATE TABLE `{}` (
+    table_name = "state2region_table"
+    create_table_cmd = f"""
+        CREATE TABLE `{table_name}` (
         `state` CHAR(2) NOT NULL PRIMARY KEY,
         `hhs` CHAR(5) NOT NULL,
         `cen` CHAR(4) NOT NULL
         );
-        '''.format(table_name)
-    populate_table_cmd = '''
-        LOAD DATA INFILE '{}'
-        INTO TABLE {}
+        """
+    populate_table_cmd = f"""
+        LOAD DATA INFILE '{sourcefile}'
+        INTO TABLE {table_name}
         FIELDS TERMINATED BY ',' 
         ENCLOSED BY '"'
         LINES TERMINATED BY '\r\n'
         IGNORE 1 ROWS
         (@state, @hhs, @cen)
         SET state=@state, hhs=@hhs, cen=@cen;
-	'''.format(sourcefile, table_name)
+	"""
     try:
         cursor = cnx.cursor()
         cursor.execute(create_table_cmd)
@@ -75,8 +76,8 @@ def init_raw_data(table_name, sourcefile):
     print("Initialize {}".format(table_name))
     (u, p) = secrets.db.epi
     cnx = connector.connect(user=u, passwd=p, database="epidata")
-    create_table_cmd = '''
-        CREATE TABLE IF NOT EXISTS `{}` (
+    create_table_cmd = f"""
+        CREATE TABLE IF NOT EXISTS `{table_name}` (
         `id` INT(11) NOT NULL PRIMARY KEY AUTO_INCREMENT,
         `epiweek` INT(6) NOT NULL,
         `dmisid` CHAR(4) NULL,
@@ -87,10 +88,10 @@ def init_raw_data(table_name, sourcefile):
         KEY `dmisid` (`dmisid`),
         KEY `flu_type` (`flu_type`)
         );
-        '''.format(table_name)
-    populate_table_cmd = '''
-        LOAD DATA INFILE '{}'
-        INTO TABLE {}
+        """
+    populate_table_cmd = f"""
+        LOAD DATA INFILE '{sourcefile}'
+        INTO TABLE {table_name}
         FIELDS TERMINATED BY ',' 
         ENCLOSED BY '"'
         LINES TERMINATED BY '\r\n'
@@ -103,7 +104,7 @@ def init_raw_data(table_name, sourcefile):
             flu_type = @flu,
             visit_sum = @visits
         ;
-        '''.format(sourcefile, table_name)
+        """
     try:
         cursor = cnx.cursor()
         cursor.execute(create_table_cmd)
@@ -112,18 +113,19 @@ def init_raw_data(table_name, sourcefile):
     finally:
         cnx.close()
 
+
 def agg_by_state(src_table, dest_table):
     print("Aggregating records by states...")
     (u, p) = secrets.db.epi
     cnx = connector.connect(user=u, passwd=p, database="epidata")
-    cmd = '''
-        CREATE TABLE {}
+    cmd = f"""
+        CREATE TABLE {dest_table}
         SELECT a.epiweek, a.flu_type, d.state, d.country, sum(a.visit_sum) visit_sum
-        FROM {} a
+        FROM {src_table} a
         LEFT JOIN dmisid_table d 
         ON a.dmisid = d.dmisid 
         GROUP BY a.epiweek, a.flu_type, d.state, d.country;
-    '''.format(dest_table, src_table)
+    """
     try:
         cursor = cnx.cursor()
         cursor.execute(cmd)
@@ -131,18 +133,19 @@ def agg_by_state(src_table, dest_table):
     finally:
         cnx.close()
 
+
 def agg_by_region(src_table, dest_table):
     print("Aggregating records by regions...")
     (u, p) = secrets.db.epi
     cnx = connector.connect(user=u, passwd=p, database="epidata")
-    cmd = '''
-        CREATE TABLE {}
+    cmd = f"""
+        CREATE TABLE {dest_table}
         SELECT s.epiweek, s.flu_type, r.hhs, r.cen, sum(s.visit_sum) visit_sum
-        FROM {} s
+        FROM {src_table} s
         LEFT JOIN state2region_table r
         ON s.state = r.state
         GROUP BY s.epiweek, s.flu_type, r.hhs, r.cen;
-    '''.format(dest_table, src_table)
+    """
     try:
         cursor = cnx.cursor()
         cursor.execute(cmd)
@@ -150,26 +153,29 @@ def agg_by_region(src_table, dest_table):
     finally:
         cnx.close()
 
+
 def init_all_tables(datapath):
     init_dmisid_table(os.path.join(datapath, "simple_DMISID_FY2018.csv"))
     init_region_table(os.path.join(datapath, "state2region.csv"))
 
     periods = ["00to13", "13to17"]
     for period in periods:
-        raw_table_name = 'afhsb_{}_raw'.format(period)
-        state_table_name = 'afhsb_{}_state'.format(period)
-        region_table_name = 'afhsb_{}_region'.format(period)
+        raw_table_name = f"afhsb_{period}_raw"
+        state_table_name = f"afhsb_{period}_state"
+        region_table_name = f"afhsb_{period}_region"
 
-        init_raw_data(raw_table_name, os.path.join(datapath, "filled_{}.csv".format(period)))
+        init_raw_data(raw_table_name, os.path.join(datapath, f"filled_{period}.csv"))
         agg_by_state(raw_table_name, state_table_name)
         agg_by_region(state_table_name, region_table_name)
 
+
 def dangerously_drop_all_afhsb_tables():
     (u, p) = secrets.db.epi
     cnx = connector.connect(user=u, passwd=p, database="epidata")
     try:
         cursor = cnx.cursor()
-        cursor.execute('''
+        cursor.execute(
+            """
           DROP TABLE IF EXISTS `afhsb_00to13_raw`,
                                `afhsb_00to13_region`,
                                `afhsb_00to13_state`,
@@ -178,11 +184,13 @@ def dangerously_drop_all_afhsb_tables():
                                `afhsb_13to17_state`,
                                `state2region_table`,
                                `dmisid_table`;
-        ''')
-        cnx.commit() # (might do nothing; each DROP commits itself anyway)
+        """
+        )
+        cnx.commit()  # (might do nothing; each DROP commits itself anyway)
     finally:
         cnx.close()
 
+
 def run_cmd(cmd):
     (u, p) = secrets.db.epi
     cnx = connector.connect(user=u, passwd=p, database="epidata")
diff --git a/src/acquisition/afhsb/afhsb_update.py b/src/acquisition/afhsb/afhsb_update.py
index c5a8635c8..0eea23e60 100644
--- a/src/acquisition/afhsb/afhsb_update.py
+++ b/src/acquisition/afhsb/afhsb_update.py
@@ -8,11 +8,12 @@
 # first party
 from . import afhsb_sql
 
-DEFAULT_DATAPATH = '/home/automation/afhsb_data'
+DEFAULT_DATAPATH = "/home/automation/afhsb_data"
+
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--datapath', action='store', type=str, default=DEFAULT_DATAPATH, help='filepath to directory containing csv files to input into database')
+    parser.add_argument("--datapath", action="store", type=str, default=DEFAULT_DATAPATH, help="filepath to directory containing csv files to input into database")
     args = parser.parse_args()
     # MariaDB appears to refuse to LOAD DATA INFILE except on files under
     # /var/lib/mysql (which seems dedicated to its own files) or /tmp; create a
@@ -35,5 +36,5 @@ def main():
     #   (Temporary parent directory should be deleted automatically.)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/src/acquisition/cdcp/cdc_dropbox_receiver.py b/src/acquisition/cdcp/cdc_dropbox_receiver.py
index eb0d97f2a..65626101b 100644
--- a/src/acquisition/cdcp/cdc_dropbox_receiver.py
+++ b/src/acquisition/cdcp/cdc_dropbox_receiver.py
@@ -29,128 +29,128 @@
 
 
 # location constants
-DROPBOX_BASE_DIR = '/cdc_page_stats'
-DELPHI_BASE_DIR = '/common/cdc_stage'
+DROPBOX_BASE_DIR = "/cdc_page_stats"
+DELPHI_BASE_DIR = "/common/cdc_stage"
 
 
 def get_timestamp_string():
-  """
-  Return the current local date and time as a string.
+    """
+    Return the current local date and time as a string.
 
-  The format is "%Y%m%d_%H%M%S".
-  """
-  return datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
+    The format is "%Y%m%d_%H%M%S".
+    """
+    return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
 
 
 def trigger_further_processing():
-  """Add CDCP processing scripts to the Automation run queue."""
+    """Add CDCP processing scripts to the Automation run queue."""
 
-  # connect
-  u, p = secrets.db.auto
-  cnx = mysql.connector.connect(user=u, password=p, database='automation')
-  cur = cnx.cursor()
+    # connect
+    u, p = secrets.db.auto
+    cnx = mysql.connector.connect(user=u, password=p, database="automation")
+    cur = cnx.cursor()
 
-  # add step "Process CDCP Data" to queue
-  cur.execute('CALL automation.RunStep(46)')
+    # add step "Process CDCP Data" to queue
+    cur.execute("CALL automation.RunStep(46)")
 
-  # disconnect
-  cur.close()
-  cnx.commit()
-  cnx.close()
+    # disconnect
+    cur.close()
+    cnx.commit()
+    cnx.close()
 
 
 def fetch_data():
-  """
-  Check for new files on dropbox, download them, zip them, cleanup dropbox, and
-  trigger further processing of new data.
-  """
-
-  # initialize dropbox api
-  dbx = dropbox.Dropbox(secrets.cdcp.dropbox_token)
-
-  # look for new CDC data files
-  print('checking dropbox:%s' % DROPBOX_BASE_DIR)
-  save_list = []
-  for entry in dbx.files_list_folder(DROPBOX_BASE_DIR).entries:
-    name = entry.name
-    if name.endswith('.csv') or name.endswith('.zip'):
-      print(' download "%s"' % name)
-      save_list.append(name)
-    else:
-      print(' skip "%s"' % name)
-
-  # determine if there's anything to be done
-  if len(save_list) == 0:
-    print('did not find any new data files')
-    return
-
-  # download new files, saving them inside of a new zip file
-  timestamp = get_timestamp_string()
-  zip_path = '%s/dropbox_%s.zip' % (DELPHI_BASE_DIR, timestamp)
-  print('downloading into delphi:%s' % zip_path)
-  with ZipFile(zip_path, 'w', ZIP_DEFLATED) as zf:
+    """
+    Check for new files on dropbox, download them, zip them, cleanup dropbox, and
+    trigger further processing of new data.
+    """
+
+    # initialize dropbox api
+    dbx = dropbox.Dropbox(secrets.cdcp.dropbox_token)
+
+    # look for new CDC data files
+    print(f"checking dropbox: {DROPBOX_BASE_DIR}")
+    save_list = []
+    for entry in dbx.files_list_folder(DROPBOX_BASE_DIR).entries:
+        name = entry.name
+        if name.endswith(".csv") or name.endswith(".zip"):
+            print(f" download: {name}")
+            save_list.append(name)
+        else:
+            print(f" skip: {name}")
+
+    # determine if there's anything to be done
+    if len(save_list) == 0:
+        print("did not find any new data files")
+        return
+
+    # download new files, saving them inside of a new zip file
+    timestamp = get_timestamp_string()
+    zip_path = f"{DELPHI_BASE_DIR}/dropbox_{timestamp}.zip"
+    print(f"downloading into delphi:{zip_path}")
+    with ZipFile(zip_path, "w", ZIP_DEFLATED) as zf:
+        for name in save_list:
+            # location of the file on dropbox
+            dropbox_path = f"{DROPBOX_BASE_DIR}/{name}"
+            print(f" {dropbox_path}")
+
+            # start the download
+            meta, resp = dbx.files_download(dropbox_path)
+
+            # check status and length
+            if resp.status_code != 200:
+                raise Exception(["resp.status_code", resp.status_code])
+            dropbox_len = meta.size
+            print("  need %d bytes..." % dropbox_len)
+            content_len = int(resp.headers.get("Content-Length", -1))
+            if dropbox_len != content_len:
+                info = ["dropbox_len", dropbox_len, "content_len", content_len]
+                raise Exception(info)
+
+            # finish the download, holding the data in this variable
+            filedata = resp.content
+
+            # check the length again
+            payload_len = len(filedata)
+            print("  downloaded")
+            if dropbox_len != payload_len:
+                info = ["dropbox_len", dropbox_len, "payload_len", payload_len]
+                raise Exception(info)
+
+            # add the downloaded file to the zip file
+            zf.writestr(name, filedata)
+            print("  added")
+
+    # At this point, all the data is stored and awaiting further processing on
+    # the delphi server.
+    print(f"saved all new data in {zip_path}")
+
+    # on dropbox, archive downloaded files so they won't be downloaded again
+    archive_dir = f"archived_reports/processed_{timestamp}"
+    print("archiving files...")
     for name in save_list:
-      # location of the file on dropbox
-      dropbox_path = '%s/%s' % (DROPBOX_BASE_DIR, name)
-      print(' %s' % dropbox_path)
-
-      # start the download
-      meta, resp = dbx.files_download(dropbox_path)
-
-      # check status and length
-      if resp.status_code != 200:
-        raise Exception(['resp.status_code', resp.status_code])
-      dropbox_len = meta.size
-      print('  need %d bytes...' % dropbox_len)
-      content_len = int(resp.headers.get('Content-Length', -1))
-      if dropbox_len != content_len:
-        info = ['dropbox_len', dropbox_len, 'content_len', content_len]
-        raise Exception(info)
-
-      # finish the download, holding the data in this variable
-      filedata = resp.content
-
-      # check the length again
-      payload_len = len(filedata)
-      print('  downloaded')
-      if dropbox_len != payload_len:
-        info = ['dropbox_len', dropbox_len, 'payload_len', payload_len]
-        raise Exception(info)
-
-      # add the downloaded file to the zip file
-      zf.writestr(name, filedata)
-      print('  added')
-
-  # At this point, all the data is stored and awaiting further processing on
-  # the delphi server.
-  print('saved all new data in %s' % zip_path)
-
-  # on dropbox, archive downloaded files so they won't be downloaded again
-  archive_dir = 'archived_reports/processed_%s' % timestamp
-  print('archiving files...')
-  for name in save_list:
-    # source and destination
-    dropbox_src = '%s/%s' % (DROPBOX_BASE_DIR, name)
-    dropbox_dst = '%s/%s/%s' % (DROPBOX_BASE_DIR, archive_dir, name)
-    print(' "%s" -> "%s"' % (dropbox_src, dropbox_dst))
-
-    # move the file
-    meta = dbx.files_move(dropbox_src, dropbox_dst)
-
-    # sanity check
-    if archive_dir not in meta.path_lower:
-      raise Exception('failed to move "%s"' % name)
-
-  # finally, trigger the usual processing flow
-  print('triggering processing flow')
-  trigger_further_processing()
-  print('done')
+        # source and destination
+        dropbox_src = f"{DROPBOX_BASE_DIR}/{name}"
+        dropbox_dst = f"{DROPBOX_BASE_DIR}/{archive_dir}/{name}"
+        print(f" {dropbox_src} -> {dropbox_dst}")
+
+        # move the file
+        meta = dbx.files_move(dropbox_src, dropbox_dst)
+
+        # sanity check
+        if archive_dir not in meta.path_lower:
+            raise Exception(f"failed to move {name}")
+
+    # finally, trigger the usual processing flow
+    print("triggering processing flow")
+    trigger_further_processing()
+    print("done")
 
 
 def main():
-  # fetch new data
-  fetch_data()
+    # fetch new data
+    fetch_data()
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/cdcp/cdc_extract.py b/src/acquisition/cdcp/cdc_extract.py
index 83ed08d5b..45519307e 100644
--- a/src/acquisition/cdcp/cdc_extract.py
+++ b/src/acquisition/cdcp/cdc_extract.py
@@ -75,7 +75,7 @@
 
 
 def get_num_hits(cur, epiweek, state, page):
-  sql = '''
+    sql = """
     SELECT
       sum(c.`num`) `num`
     FROM
@@ -86,36 +86,36 @@ def get_num_hits(cur, epiweek, state, page):
       m.`date` = c.`date` AND m.`state` = c.`state`
     WHERE
       m.`epiweek` = %s AND c.`state` = %s AND c.`page` LIKE %s
-  '''
-  num = None
-  cur.execute(sql, (epiweek, state, page))
-  for (num,) in cur:
-    pass
-  if num is None:
-    return 0
-  return num
+    """
+    num = None
+    cur.execute(sql, (epiweek, state, page))
+    for (num,) in cur:
+        pass
+    if num is None:
+        return 0
+    return num
 
 
 def get_total_hits(cur, epiweek, state):
-  sql = '''
+    sql = """
     SELECT
       sum(m.`total`) `total`
     FROM
       `cdc_meta` m
     WHERE
       m.`epiweek` = %s AND m.`state` = %s
-  '''
-  total = None
-  cur.execute(sql, (epiweek, state))
-  for (total,) in cur:
-    pass
-  if total is None:
-    raise Exception('missing data for %d-%s' % (epiweek, state))
-  return total
+    """
+    total = None
+    cur.execute(sql, (epiweek, state))
+    for (total,) in cur:
+        pass
+    if total is None:
+        raise Exception("missing data for %d-%s" % (epiweek, state))
+    return total
 
 
 def store_result(cur, epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total):
-  sql = '''
+    sql = """
     INSERT INTO
       `cdc_extract` (`epiweek`, `state`, `num1`, `num2`, `num3`, `num4`, `num5`, `num6`, `num7`, `num8`, `total`)
     VALUES
@@ -130,94 +130,94 @@ def store_result(cur, epiweek, state, num1, num2, num3, num4, num5, num6, num7,
       `num7` = %s,
       `num8` = %s,
       `total` = %s
-  '''
-  values = [num1, num2, num3, num4, num5, num6, num7, num8, total]
-  args = tuple([epiweek, state] + values + values)
-  cur.execute(sql, args)
+    """
+    values = [num1, num2, num3, num4, num5, num6, num7, num8, total]
+    args = tuple([epiweek, state] + values + values)
+    cur.execute(sql, args)
 
 
 def extract(first_week=None, last_week=None, test_mode=False):
-  # page title templates
-  pages = [
-    '%What You Should Know for the % Influenza Season%',
-    '%What To Do If You Get Sick%',
-    '%Flu Symptoms & Severity%',
-    '%How Flu Spreads%',
-    '%What You Should Know About Flu Antiviral Drugs%',
-    '%Weekly US Map%',
-    '%Basics%',
-    '%Flu Activity & Surveillance%',
-  ]
-
-  # location information
-  states = sorted(cdc_upload.STATES.values())
-
-  # connect
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  # weeks to update
-  if first_week is None:
-    cur.execute('SELECT max(`epiweek`) FROM `cdc_extract`')
-    for (first_week,) in cur:
-      pass
-  if last_week is None:
-    cur.execute('SELECT max(`epiweek`) FROM `cdc_meta`')
-    for (last_week,) in cur:
-      pass
-  print('extracting %d--%d' % (first_week, last_week))
-
-  # update each epiweek
-  for epiweek in flu.range_epiweeks(first_week, last_week, inclusive=True):
-    # update each state
-    for state in states:
-      try:
-        num1 = get_num_hits(cur, epiweek, state, pages[0])
-        num2 = get_num_hits(cur, epiweek, state, pages[1])
-        num3 = get_num_hits(cur, epiweek, state, pages[2])
-        num4 = get_num_hits(cur, epiweek, state, pages[3])
-        num5 = get_num_hits(cur, epiweek, state, pages[4])
-        num6 = get_num_hits(cur, epiweek, state, pages[5])
-        num7 = get_num_hits(cur, epiweek, state, pages[6])
-        num8 = get_num_hits(cur, epiweek, state, pages[7])
-        total = get_total_hits(cur, epiweek, state)
-        store_result(cur, epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total)
-        print(' %d-%s: %d %d %d %d %d %d %d %d (%d)' % (epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total))
-      except Exception as ex:
-        print(' %d-%s: failed' % (epiweek, state), ex)
-        #raise ex
-      sys.stdout.flush()
-
-  # disconnect
-  cur.close()
-  if not test_mode:
-    cnx.commit()
-  cnx.close()
+    # page title templates
+    pages = [
+        "%What You Should Know for the % Influenza Season%",
+        "%What To Do If You Get Sick%",
+        "%Flu Symptoms & Severity%",
+        "%How Flu Spreads%",
+        "%What You Should Know About Flu Antiviral Drugs%",
+        "%Weekly US Map%",
+        "%Basics%",
+        "%Flu Activity & Surveillance%",
+    ]
+
+    # location information
+    states = sorted(cdc_upload.STATES.values())
+
+    # connect
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    # weeks to update
+    if first_week is None:
+        cur.execute("SELECT max(`epiweek`) FROM `cdc_extract`")
+        for (first_week,) in cur:
+            pass
+    if last_week is None:
+        cur.execute("SELECT max(`epiweek`) FROM `cdc_meta`")
+        for (last_week,) in cur:
+            pass
+    print("extracting %d--%d" % (first_week, last_week))
+
+    # update each epiweek
+    for epiweek in flu.range_epiweeks(first_week, last_week, inclusive=True):
+        # update each state
+        for state in states:
+            try:
+                num1 = get_num_hits(cur, epiweek, state, pages[0])
+                num2 = get_num_hits(cur, epiweek, state, pages[1])
+                num3 = get_num_hits(cur, epiweek, state, pages[2])
+                num4 = get_num_hits(cur, epiweek, state, pages[3])
+                num5 = get_num_hits(cur, epiweek, state, pages[4])
+                num6 = get_num_hits(cur, epiweek, state, pages[5])
+                num7 = get_num_hits(cur, epiweek, state, pages[6])
+                num8 = get_num_hits(cur, epiweek, state, pages[7])
+                total = get_total_hits(cur, epiweek, state)
+                store_result(cur, epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total)
+                print(" %d-%s: %d %d %d %d %d %d %d %d (%d)" % (epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total))
+            except Exception as ex:
+                print(" %d-%s: failed" % (epiweek, state), ex)
+                # raise ex
+            sys.stdout.flush()
+
+    # disconnect
+    cur.close()
+    if not test_mode:
+        cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('--first', '-f', default=None, type=int, help='first epiweek override')
-  parser.add_argument('--last', '-l', default=None, type=int, help='last epiweek override')
-  parser.add_argument('--epiweek', '-w', default=None, type=int, help='epiweek override')
-  parser.add_argument('--test', '-t', default=False, action='store_true', help='dry run only')
-  args = parser.parse_args()
-
-  # sanity check
-  first, last, week = args.first, args.last, args.epiweek
-  for ew in [first, last, week]:
-    if ew is not None:
-      flu.check_epiweek(ew)
-  if first is not None and last is not None and first > last:
-    raise Exception('epiweeks in the wrong order')
-  if week is not None:
-    first = last = week
-
-  # extract the page hits for all states on the specified weeks
-  extract(first, last, args.test)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--first", "-f", default=None, type=int, help="first epiweek override")
+    parser.add_argument("--last", "-l", default=None, type=int, help="last epiweek override")
+    parser.add_argument("--epiweek", "-w", default=None, type=int, help="epiweek override")
+    parser.add_argument("--test", "-t", default=False, action="store_true", help="dry run only")
+    args = parser.parse_args()
+
+    # sanity check
+    first, last, week = args.first, args.last, args.epiweek
+    for ew in [first, last, week]:
+        if ew is not None:
+            flu.check_epiweek(ew)
+    if first is not None and last is not None and first > last:
+        raise Exception("epiweeks in the wrong order")
+    if week is not None:
+        first = last = week
+
+    # extract the page hits for all states on the specified weeks
+    extract(first, last, args.test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/cdcp/cdc_upload.py b/src/acquisition/cdcp/cdc_upload.py
index c9c206dfa..fef0821b7 100644
--- a/src/acquisition/cdcp/cdc_upload.py
+++ b/src/acquisition/cdcp/cdc_upload.py
@@ -87,191 +87,192 @@
 
 
 STATES = {
-  'Alabama': 'AL',
-  'Alaska': 'AK',
-  'Arizona': 'AZ',
-  'Arkansas': 'AR',
-  'California': 'CA',
-  'Colorado': 'CO',
-  'Connecticut': 'CT',
-  'Delaware': 'DE',
-  'District of Columbia': 'DC',
-  'Florida': 'FL',
-  'Georgia': 'GA',
-  'Hawaii': 'HI',
-  'Idaho': 'ID',
-  'Illinois': 'IL',
-  'Indiana': 'IN',
-  'Iowa': 'IA',
-  'Kansas': 'KS',
-  'Kentucky': 'KY',
-  'Louisiana': 'LA',
-  'Maine': 'ME',
-  'Maryland': 'MD',
-  'Massachusetts': 'MA',
-  'Michigan': 'MI',
-  'Minnesota': 'MN',
-  'Mississippi': 'MS',
-  'Missouri': 'MO',
-  'Montana': 'MT',
-  'Nebraska': 'NE',
-  'Nevada': 'NV',
-  'New Hampshire': 'NH',
-  'New Jersey': 'NJ',
-  'New Mexico': 'NM',
-  'New York': 'NY',
-  'North Carolina': 'NC',
-  'North Dakota': 'ND',
-  'Ohio': 'OH',
-  'Oklahoma': 'OK',
-  'Oregon': 'OR',
-  'Pennsylvania': 'PA',
-  'Rhode Island': 'RI',
-  'South Carolina': 'SC',
-  'South Dakota': 'SD',
-  'Tennessee': 'TN',
-  'Texas': 'TX',
-  'Utah': 'UT',
-  'Vermont': 'VT',
-  'Virginia': 'VA',
-  'Washington': 'WA',
-  'West Virginia': 'WV',
-  'Wisconsin': 'WI',
-  'Wyoming': 'WY',
-  #'Puerto Rico': 'PR',
-  #'Virgin Islands': 'VI',
-  #'Guam': 'GU',
+    "Alabama": "AL",
+    "Alaska": "AK",
+    "Arizona": "AZ",
+    "Arkansas": "AR",
+    "California": "CA",
+    "Colorado": "CO",
+    "Connecticut": "CT",
+    "Delaware": "DE",
+    "District of Columbia": "DC",
+    "Florida": "FL",
+    "Georgia": "GA",
+    "Hawaii": "HI",
+    "Idaho": "ID",
+    "Illinois": "IL",
+    "Indiana": "IN",
+    "Iowa": "IA",
+    "Kansas": "KS",
+    "Kentucky": "KY",
+    "Louisiana": "LA",
+    "Maine": "ME",
+    "Maryland": "MD",
+    "Massachusetts": "MA",
+    "Michigan": "MI",
+    "Minnesota": "MN",
+    "Mississippi": "MS",
+    "Missouri": "MO",
+    "Montana": "MT",
+    "Nebraska": "NE",
+    "Nevada": "NV",
+    "New Hampshire": "NH",
+    "New Jersey": "NJ",
+    "New Mexico": "NM",
+    "New York": "NY",
+    "North Carolina": "NC",
+    "North Dakota": "ND",
+    "Ohio": "OH",
+    "Oklahoma": "OK",
+    "Oregon": "OR",
+    "Pennsylvania": "PA",
+    "Rhode Island": "RI",
+    "South Carolina": "SC",
+    "South Dakota": "SD",
+    "Tennessee": "TN",
+    "Texas": "TX",
+    "Utah": "UT",
+    "Vermont": "VT",
+    "Virginia": "VA",
+    "Washington": "WA",
+    "West Virginia": "WV",
+    "Wisconsin": "WI",
+    "Wyoming": "WY",
+    #'Puerto Rico': 'PR',
+    #'Virgin Islands': 'VI',
+    #'Guam': 'GU',
 }
 
-sql_cdc = '''
+sql_cdc = """
   INSERT INTO
     `cdc` (`date`, `page`, `state`, `num`)
   VALUES
     (%s, %s, %s, %s)
   ON DUPLICATE KEY UPDATE
     `num` = %s
-'''
+"""
 
-sql_cdc_meta = '''
+sql_cdc_meta = """
   INSERT INTO
     `cdc_meta` (`date`, `epiweek`, `state`, `total`)
   VALUES
     (%s, yearweek(%s, 6), %s, %s)
   ON DUPLICATE KEY UPDATE
     `total` = %s
-'''
+"""
 
 
 def upload(test_mode):
-  # connect
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  # insert (or update) table `cdc`
-  def insert_cdc(date, page, state, num):
-    cur.execute(sql_cdc, (date, page, state, num, num))
-
-  # insert (or update) table `cdc_meta`
-  def insert_cdc_meta(date, state, total):
-    cur.execute(sql_cdc_meta, (date, date, state, total, total))
-
-  # loop over rows until the header row is found
-  def find_header(reader):
-    for row in reader:
-      if len(row) > 0 and row[0] == 'Date':
-        return True
-    return False
-
-  # parse csv files for `cdc` and `cdc_meta`
-  def parse_csv(meta):
-    def handler(reader):
-      if not find_header(reader):
-        raise Exception('header not found')
-      count = 0
-      cols = 3 if meta else 4
-      for row in reader:
-        if len(row) != cols:
-          continue
-        if meta:
-          (a, c, d) = row
-        else:
-          (a, b, c, d) = row
-        c = c[:-16]
-        if c not in STATES:
-          continue
-        a = datetime.strptime(a, '%b %d, %Y').strftime('%Y-%m-%d')
-        c = STATES[c]
-        d = int(d)
-        if meta:
-          insert_cdc_meta(a, c, d)
+    # connect
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    # insert (or update) table `cdc`
+    def insert_cdc(date, page, state, num):
+        cur.execute(sql_cdc, (date, page, state, num, num))
+
+    # insert (or update) table `cdc_meta`
+    def insert_cdc_meta(date, state, total):
+        cur.execute(sql_cdc_meta, (date, date, state, total, total))
+
+    # loop over rows until the header row is found
+    def find_header(reader):
+        for row in reader:
+            if len(row) > 0 and row[0] == "Date":
+                return True
+        return False
+
+    # parse csv files for `cdc` and `cdc_meta`
+    def parse_csv(meta):
+        def handler(reader):
+            if not find_header(reader):
+                raise Exception("header not found")
+            count = 0
+            cols = 3 if meta else 4
+            for row in reader:
+                if len(row) != cols:
+                    continue
+                if meta:
+                    (a, c, d) = row
+                else:
+                    (a, b, c, d) = row
+                c = c[:-16]
+                if c not in STATES:
+                    continue
+                a = datetime.strptime(a, "%b %d, %Y").strftime("%Y-%m-%d")
+                c = STATES[c]
+                d = int(d)
+                if meta:
+                    insert_cdc_meta(a, c, d)
+                else:
+                    insert_cdc(a, b, c, d)
+                count += 1
+            return count
+
+        return handler
+
+    # recursively open zip files
+    def parse_zip(zf, level=1):
+        for name in zf.namelist():
+            prefix = " " * level
+            print(prefix, name)
+            if name[-4:] == ".zip":
+                with zf.open(name) as temp:
+                    with ZipFile(io.BytesIO(temp.read())) as zf2:
+                        parse_zip(zf2, level + 1)
+            elif name[-4:] == ".csv":
+                handler = None
+                if "Flu Pages by Region" in name:
+                    handler = parse_csv(False)
+                elif "Regions for all CDC" in name:
+                    handler = parse_csv(True)
+                else:
+                    print(prefix, " (skipped)")
+                if handler is not None:
+                    with zf.open(name) as temp:
+                        count = handler(csv.reader(io.StringIO(str(temp.read(), "utf-8"))))
+                    print(prefix, " %d rows" % count)
+            else:
+                print(prefix, " (ignored)")
+
+    # find, parse, and move zip files
+    zip_files = glob.glob("/common/cdc_stage/*.zip")
+    print("searching...")
+    for f in zip_files:
+        print(" ", f)
+    print("parsing...")
+    for f in zip_files:
+        with ZipFile(f) as zf:
+            parse_zip(zf)
+    print("moving...")
+    for f in zip_files:
+        src = f
+        dst = os.path.join("/home/automation/cdc_page_stats/", os.path.basename(src))
+        print(" ", src, "->", dst)
+        if test_mode:
+            print("  (test mode enabled - not moved)")
         else:
-          insert_cdc(a, b, c, d)
-        count += 1
-      return count
-    return handler
-
-  # recursively open zip files
-  def parse_zip(zf, level=1):
-    for name in zf.namelist():
-      prefix = ' ' * level
-      print(prefix, name)
-      if name[-4:] == '.zip':
-        with zf.open(name) as temp:
-          with ZipFile(io.BytesIO(temp.read())) as zf2:
-            parse_zip(zf2, level + 1)
-      elif name[-4:] == '.csv':
-        handler = None
-        if 'Flu Pages by Region' in name:
-          handler = parse_csv(False)
-        elif 'Regions for all CDC' in name:
-          handler = parse_csv(True)
-        else:
-          print(prefix, ' (skipped)')
-        if handler is not None:
-          with zf.open(name) as temp:
-            count = handler(csv.reader(io.StringIO(str(temp.read(), 'utf-8'))))
-          print(prefix, ' %d rows' % count)
-      else:
-        print(prefix, ' (ignored)')
-
-  # find, parse, and move zip files
-  zip_files = glob.glob('/common/cdc_stage/*.zip')
-  print('searching...')
-  for f in zip_files:
-    print(' ', f)
-  print('parsing...')
-  for f in zip_files:
-    with ZipFile(f) as zf:
-      parse_zip(zf)
-  print('moving...')
-  for f in zip_files:
-    src = f
-    dst = os.path.join('/home/automation/cdc_page_stats/', os.path.basename(src))
-    print(' ', src, '->', dst)
-    if test_mode:
-      print('  (test mode enabled - not moved)')
-    else:
-      shutil.move(src, dst)
-      if not os.path.isfile(dst):
-        raise Exception('unable to move file')
-
-  # disconnect
-  cur.close()
-  if not test_mode:
-    cnx.commit()
-  cnx.close()
+            shutil.move(src, dst)
+            if not os.path.isfile(dst):
+                raise Exception("unable to move file")
+
+    # disconnect
+    cur.close()
+    if not test_mode:
+        cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('--test', '-t', default=False, action='store_true', help='dry run only')
-  args = parser.parse_args()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--test", "-t", default=False, action="store_true", help="dry run only")
+    args = parser.parse_args()
 
-  # make it happen
-  upload(args.test)
+    # make it happen
+    upload(args.test)
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/covid_hosp/common/database.py b/src/acquisition/covid_hosp/common/database.py
index 4fd0981a1..57071bc8d 100644
--- a/src/acquisition/covid_hosp/common/database.py
+++ b/src/acquisition/covid_hosp/common/database.py
@@ -15,263 +15,247 @@
 
 Columndef = namedtuple("Columndef", "csv_name sql_name dtype")
 
-class Database:
 
-  def __init__(self,
-               connection,
-               table_name=None,
-               hhs_dataset_id=None,
-               columns_and_types=None,
-               key_columns=None,
-               additional_fields=None):
-    """Create a new Database object.
-
-    Parameters
-    ----------
-    connection
-      An open connection to a database.
-    table_name : str
-      The name of the table which holds the dataset.
-    hhs_dataset_id : str
-      The 9-character healthdata.gov identifier for this dataset.
-    columns_and_types : tuple[str, str, Callable]
-      List of 3-tuples of (CSV header name, SQL column name, data type) for
-      all the columns in the CSV file.
-    additional_fields : tuple[str]
-      List of 2-tuples of (value, SQL column name) fordditional fields to include
-      at the end of the row which are not present in the CSV data.
-    """
-
-    self.connection = connection
-    self.table_name = table_name
-    self.hhs_dataset_id = hhs_dataset_id
-    self.publication_col_name = "issue" if table_name == 'covid_hosp_state_timeseries' else \
-      'publication_date'
-    self.columns_and_types = {
-      c.csv_name: c
-      for c in (columns_and_types if columns_and_types is not None else [])
-    }
-    self.key_columns = key_columns if key_columns is not None else []
-    self.additional_fields = additional_fields if additional_fields is not None else []
-
-  @classmethod
-  def logger(database_class):
-    return get_structured_logger(f"{database_class.__module__}")
-
-  @classmethod
-  @contextmanager
-  def connect(database_class, mysql_connector_impl=mysql.connector):
-    """Connect to a database and provide the connection as a context manager.
-
-    As long as the context manager exits normally, the connection's transaction
-    will be committed. Otherwise, if the context is exited by an Exception, the
-    transaction will be rolled back.
-
-    In any case, the connection will be gracefully closed upon exiting the
-    context manager.
-    """
-
-    # connect to the database
-    user, password = secrets.db.epi
-    connection = mysql_connector_impl.connect(
-        host=secrets.db.host,
-        user=user,
-        password=password,
-        database='epidata')
-
-    try:
-      # provide the connection to the context manager
-      yield database_class(connection)
-
-      # rollback by default; the following commit will only take place if no
-      # exception was raised in calling code
-      connection.commit()
-    finally:
-      # close the connection in any case
-      connection.close()
-
-  @contextmanager
-  def new_cursor(self):
-    """Create and provide a database cursor as a context manager.
-
-    The cursor will be gracefully closed upon exiting the context manager.
-    """
-
-    cursor = self.connection.cursor()
-    try:
-      yield cursor
-    finally:
-      cursor.close()
-
-  def contains_revision(self, revision):
-    """Return whether the given revision already exists in the database.
-
-    Parameters
-    ----------
-    revision : str
-      Unique revision string.
-
-    Returns
-    -------
-    bool
-      True iff the revision already exists.
-    """
-
-    with self.new_cursor() as cursor:
-      cursor.execute('''
-        SELECT
-          count(1) > 0
-        FROM
-          `covid_hosp_meta`
-        WHERE
-          `hhs_dataset_id` = %s AND `revision_timestamp` = %s
-      ''', (self.hhs_dataset_id, revision))
-      for (result,) in cursor:
-        return bool(result)
-
-  def insert_metadata(self, publication_date, revision, meta_json, logger=False):
-    """Add revision metadata to the database.
-
-    Parameters
-    ----------
-    publication_date : int
-      Date when the dataset was published in YYYYMMDD format.
-    revision : str
-      Unique revision string.
-    meta_json : str
-      Metadata serialized as a JSON string.
-    logger structlog.Logger [optional; default False]
-      Logger to receive messages
-    """
-
-    with self.new_cursor() as cursor:
-      cursor.execute('''
-        INSERT INTO
-          `covid_hosp_meta` (
-            `dataset_name`,
-            `hhs_dataset_id`,
-            `publication_date`,
-            `revision_timestamp`,
-            `metadata_json`,
-            `acquisition_datetime`
-          )
-        VALUES
-          (%s, %s, %s, %s, %s, NOW())
-      ''', (self.table_name, self.hhs_dataset_id, publication_date, revision, meta_json))
-
-  def insert_dataset(self, publication_date, dataframe, logger=False):
-    """Add a dataset to the database.
-
-    Parameters
-    ----------
-    publication_date : int
-      Date when the dataset was published in YYYYMMDD format.
-    dataframe : pandas.DataFrame
-      The dataset.
-    logger structlog.Logger [optional; default False]
-      Logger to receive messages.
-    """
-    dataframe_columns_and_types = [
-      x for x in self.columns_and_types.values() if x.csv_name in dataframe.columns
-    ]
-
-    def nan_safe_dtype(dtype, value):
-      if isinstance(value, float) and math.isnan(value):
-        return None
-      return dtype(value)
-
-    # first convert keys and save the results; we'll need them later
-    for csv_name in self.key_columns:
-      dataframe.loc[:, csv_name] = dataframe[csv_name].map(self.columns_and_types[csv_name].dtype)
-
-    num_columns = 2 + len(dataframe_columns_and_types) + len(self.additional_fields)
-    value_placeholders = ', '.join(['%s'] * num_columns)
-    columns = ', '.join(f'`{i.sql_name}`' for i in dataframe_columns_and_types + self.additional_fields)
-    sql = f'INSERT INTO `{self.table_name}` (`id`, `{self.publication_col_name}`, {columns}) ' \
-          f'VALUES ({value_placeholders})'
-    id_and_publication_date = (0, publication_date)
-    if logger:
-      logger.info('updating values', count=len(dataframe.index))
-    n = 0
-    many_values = []
-    with self.new_cursor() as cursor:
-      for index, row in dataframe.iterrows():
-        values = []
-        for c in dataframe_columns_and_types:
-          values.append(nan_safe_dtype(c.dtype, row[c.csv_name]))
-        many_values.append(id_and_publication_date +
-          tuple(values) +
-          tuple(i.csv_name for i in self.additional_fields))
-        n += 1
-        # insert in batches because one at a time is slow and all at once makes
-        # the connection drop :(
-        if n % 5_000 == 0:
-          try:
-            cursor.executemany(sql, many_values)
-            many_values = []
-          except Exception as e:
+class Database:
+    def __init__(self, connection, table_name=None, hhs_dataset_id=None, columns_and_types=None, key_columns=None, additional_fields=None):
+        """Create a new Database object.
+
+        Parameters
+        ----------
+        connection
+          An open connection to a database.
+        table_name : str
+          The name of the table which holds the dataset.
+        hhs_dataset_id : str
+          The 9-character healthdata.gov identifier for this dataset.
+        columns_and_types : tuple[str, str, Callable]
+          List of 3-tuples of (CSV header name, SQL column name, data type) for
+          all the columns in the CSV file.
+        additional_fields : tuple[str]
+          List of 2-tuples of (value, SQL column name) fordditional fields to include
+          at the end of the row which are not present in the CSV data.
+        """
+
+        self.connection = connection
+        self.table_name = table_name
+        self.hhs_dataset_id = hhs_dataset_id
+        self.publication_col_name = "issue" if table_name == "covid_hosp_state_timeseries" else "publication_date"
+        self.columns_and_types = {c.csv_name: c for c in (columns_and_types if columns_and_types is not None else [])}
+        self.key_columns = key_columns if key_columns is not None else []
+        self.additional_fields = additional_fields if additional_fields is not None else []
+
+    @classmethod
+    def logger(database_class):
+        return get_structured_logger(f"{database_class.__module__}")
+
+    @classmethod
+    @contextmanager
+    def connect(database_class, mysql_connector_impl=mysql.connector):
+        """Connect to a database and provide the connection as a context manager.
+
+        As long as the context manager exits normally, the connection's transaction
+        will be committed. Otherwise, if the context is exited by an Exception, the
+        transaction will be rolled back.
+
+        In any case, the connection will be gracefully closed upon exiting the
+        context manager.
+        """
+
+        # connect to the database
+        user, password = secrets.db.epi
+        connection = mysql_connector_impl.connect(host=secrets.db.host, user=user, password=password, database="epidata")
+
+        try:
+            # provide the connection to the context manager
+            yield database_class(connection)
+
+            # rollback by default; the following commit will only take place if no
+            # exception was raised in calling code
+            connection.commit()
+        finally:
+            # close the connection in any case
+            connection.close()
+
+    @contextmanager
+    def new_cursor(self):
+        """Create and provide a database cursor as a context manager.
+
+        The cursor will be gracefully closed upon exiting the context manager.
+        """
+
+        cursor = self.connection.cursor()
+        try:
+            yield cursor
+        finally:
+            cursor.close()
+
+    def contains_revision(self, revision):
+        """Return whether the given revision already exists in the database.
+
+        Parameters
+        ----------
+        revision : str
+          Unique revision string.
+
+        Returns
+        -------
+        bool
+          True iff the revision already exists.
+        """
+
+        with self.new_cursor() as cursor:
+            cursor.execute(
+                """
+                SELECT
+                count(1) > 0
+                FROM
+                `covid_hosp_meta`
+                WHERE
+                `hhs_dataset_id` = %s AND `revision_timestamp` = %s
+                """,
+                (self.hhs_dataset_id, revision),
+            )
+            for (result,) in cursor:
+                return bool(result)
+
+    def insert_metadata(self, publication_date, revision, meta_json, logger=False):
+        """Add revision metadata to the database.
+
+        Parameters
+        ----------
+        publication_date : int
+          Date when the dataset was published in YYYYMMDD format.
+        revision : str
+          Unique revision string.
+        meta_json : str
+          Metadata serialized as a JSON string.
+        logger structlog.Logger [optional; default False]
+          Logger to receive messages
+        """
+
+        with self.new_cursor() as cursor:
+            cursor.execute(
+                """
+                INSERT INTO
+                `covid_hosp_meta` (
+                    `dataset_name`,
+                    `hhs_dataset_id`,
+                    `publication_date`,
+                    `revision_timestamp`,
+                    `metadata_json`,
+                    `acquisition_datetime`
+                )
+                VALUES
+                (%s, %s, %s, %s, %s, NOW())
+                """,
+                (self.table_name, self.hhs_dataset_id, publication_date, revision, meta_json),
+            )
+
+    def insert_dataset(self, publication_date, dataframe, logger=False):
+        """Add a dataset to the database.
+
+        Parameters
+        ----------
+        publication_date : int
+          Date when the dataset was published in YYYYMMDD format.
+        dataframe : pandas.DataFrame
+          The dataset.
+        logger structlog.Logger [optional; default False]
+          Logger to receive messages.
+        """
+        dataframe_columns_and_types = [x for x in self.columns_and_types.values() if x.csv_name in dataframe.columns]
+
+        def nan_safe_dtype(dtype, value):
+            if isinstance(value, float) and math.isnan(value):
+                return None
+            return dtype(value)
+
+        # first convert keys and save the results; we'll need them later
+        for csv_name in self.key_columns:
+            dataframe.loc[:, csv_name] = dataframe[csv_name].map(self.columns_and_types[csv_name].dtype)
+
+        num_columns = 2 + len(dataframe_columns_and_types) + len(self.additional_fields)
+        value_placeholders = ", ".join(["%s"] * num_columns)
+        columns = ", ".join(f"`{i.sql_name}`" for i in dataframe_columns_and_types + self.additional_fields)
+        sql = f"INSERT INTO `{self.table_name}` (`id`, `{self.publication_col_name}`, {columns}) " f"VALUES ({value_placeholders})"
+        id_and_publication_date = (0, publication_date)
+        if logger:
+            logger.info("updating values", count=len(dataframe.index))
+        n = 0
+        many_values = []
+        with self.new_cursor() as cursor:
+            for index, row in dataframe.iterrows():
+                values = []
+                for c in dataframe_columns_and_types:
+                    values.append(nan_safe_dtype(c.dtype, row[c.csv_name]))
+                many_values.append(id_and_publication_date + tuple(values) + tuple(i.csv_name for i in self.additional_fields))
+                n += 1
+                # insert in batches because one at a time is slow and all at once makes
+                # the connection drop :(
+                if n % 5_000 == 0:
+                    try:
+                        cursor.executemany(sql, many_values)
+                        many_values = []
+                    except Exception as e:
+                        if logger:
+                            logger.error("error on insert", publ_date=publication_date, in_lines=(n - 5_000, n), index=index, values=values, exception=e)
+                        raise e
+            # insert final batch
+            if many_values:
+                cursor.executemany(sql, many_values)
+
+        # deal with non/seldomly updated columns used like a fk table (if this database needs it)
+        if hasattr(self, "AGGREGATE_KEY_COLS"):
+            if logger:
+                logger.info("updating keys")
+            ak_cols = self.AGGREGATE_KEY_COLS
+
+            # restrict data to just the key columns and remove duplicate rows
+            # sort by key columns to ensure that the last ON DUPLICATE KEY overwrite
+            # uses the most-recent aggregate key information
+            ak_data = dataframe[set(ak_cols + self.key_columns)].sort_values(self.key_columns)[ak_cols].drop_duplicates()
+            # cast types
+            for col in ak_cols:
+                ak_data[col] = ak_data[col].map(lambda value: nan_safe_dtype(self.columns_and_types[col].dtype, value))
+            # fix NULLs
+            ak_data = ak_data.to_numpy(na_value=None).tolist()
+
+            # create string of tick-quoted and comma-seperated column list
+            ak_cols_str = ",".join(f"`{col}`" for col in ak_cols)
+            # ...and ticked and comma-sep'd "column=column" list for ON UPDATE (to keep only the most recent values for each pk)
+            ak_updates_str = ",".join(f"`{col}`=v.{col}" for col in ak_cols)
+            # ...and string of VALUES placeholders
+            values_str = ",".join(["%s"] * len(ak_cols))
+            # use aggregate key table alias
+            ak_table = self.table_name + "_key"
+            # assemble full SQL statement
+            ak_insert_sql = f"INSERT INTO `{ak_table}` ({ak_cols_str}) VALUES ({values_str}) AS v ON DUPLICATE KEY UPDATE {ak_updates_str}"
+            if logger:
+                logger.info("database query", sql=ak_insert_sql)
+
+            # commit the data
+            with self.new_cursor() as cur:
+                cur.executemany(ak_insert_sql, ak_data)
+
+    def get_max_issue(self, logger=False):
+        """Fetch the most recent issue.
+
+        This is used to bookend what updates we pull in from the HHS metadata.
+        """
+        with self.new_cursor() as cursor:
+            cursor.execute(
+                f"""
+                SELECT
+                max(publication_date)
+                from
+                `covid_hosp_meta`
+                WHERE
+                hhs_dataset_id = "{self.hhs_dataset_id}"
+                """
+            )
+            for (result,) in cursor:
+                if result is not None:
+                    return pd.Timestamp(str(result))
             if logger:
-              logger.error('error on insert', publ_date=publication_date, in_lines=(n-5_000, n), index=index, values=values, exception=e)
-            raise e
-      # insert final batch
-      if many_values:
-        cursor.executemany(sql, many_values)
-
-    # deal with non/seldomly updated columns used like a fk table (if this database needs it)
-    if hasattr(self, 'AGGREGATE_KEY_COLS'):
-      if logger:
-        logger.info('updating keys')
-      ak_cols = self.AGGREGATE_KEY_COLS
-
-      # restrict data to just the key columns and remove duplicate rows
-      # sort by key columns to ensure that the last ON DUPLICATE KEY overwrite
-      # uses the most-recent aggregate key information
-      ak_data = (dataframe[set(ak_cols + self.key_columns)]
-                 .sort_values(self.key_columns)[ak_cols]
-                 .drop_duplicates())
-      # cast types
-      for col in ak_cols:
-          ak_data[col] = ak_data[col].map(
-            lambda value: nan_safe_dtype(self.columns_and_types[col].dtype, value)
-          )
-      # fix NULLs
-      ak_data = ak_data.to_numpy(na_value=None).tolist()
-
-      # create string of tick-quoted and comma-seperated column list
-      ak_cols_str = ','.join(f'`{col}`' for col in ak_cols)
-      # ...and ticked and comma-sep'd "column=column" list for ON UPDATE (to keep only the most recent values for each pk)
-      ak_updates_str = ','.join(f'`{col}`=v.{col}' for col in ak_cols)
-      # ...and string of VALUES placeholders
-      values_str = ','.join( ['%s'] * len(ak_cols) )
-      # use aggregate key table alias
-      ak_table = self.table_name + '_key'
-      # assemble full SQL statement
-      ak_insert_sql = f'INSERT INTO `{ak_table}` ({ak_cols_str}) VALUES ({values_str}) AS v ON DUPLICATE KEY UPDATE {ak_updates_str}'
-      if logger:
-        logger.info("database query", sql=ak_insert_sql)
-
-      # commit the data
-      with self.new_cursor() as cur:
-        cur.executemany(ak_insert_sql, ak_data)
-
-
-  def get_max_issue(self, logger=False):
-    """Fetch the most recent issue.
-
-    This is used to bookend what updates we pull in from the HHS metadata.
-    """
-    with self.new_cursor() as cursor:
-      cursor.execute(f'''
-        SELECT
-          max(publication_date)
-        from
-          `covid_hosp_meta`
-        WHERE
-          hhs_dataset_id = "{self.hhs_dataset_id}"
-      ''')
-      for (result,) in cursor:
-        if result is not None:
-          return pd.Timestamp(str(result))
-      if logger:
-        logger.warn("get_max_issue", msg="no matching results in meta table; returning 1900/1/1 epoch")
-      return pd.Timestamp("1900/1/1")
+                logger.warn("get_max_issue", msg="no matching results in meta table; returning 1900/1/1 epoch")
+            return pd.Timestamp("1900/1/1")
diff --git a/src/acquisition/covid_hosp/common/network.py b/src/acquisition/covid_hosp/common/network.py
index 7b6228f16..ff80c4c37 100644
--- a/src/acquisition/covid_hosp/common/network.py
+++ b/src/acquisition/covid_hosp/common/network.py
@@ -3,52 +3,51 @@
 
 
 class Network:
-  METADATA_URL_TEMPLATE = \
-      'https://healthdata.gov/api/views/%s/rows.csv'
-
-  def fetch_metadata_for_dataset(dataset_id, logger=False):
-    """Download and return metadata.
-
-    Parameters
-    ----------
-    dataset_id : str
-      healthdata.gov dataset identifier of the dataset.
-    logger : structlog.Logger [optional; default False]
-      Logger to receive messages.
-
-    Returns
-    -------
-    object
-      The metadata object.
-    """
-    url = Network.METADATA_URL_TEMPLATE % dataset_id
-    if logger:
-      logger.info('fetching metadata', url=url)
-    df = Network.fetch_dataset(url)
-    df["Update Date"] = pandas.to_datetime(df["Update Date"])
-    df.sort_values("Update Date", inplace=True)
-    df.set_index("Update Date", inplace=True)
-    return df
-
-  def fetch_dataset(url, pandas_impl=pandas, logger=False):
-    """Download and return a dataset.
-
-    Type inference is disabled in favor of explicit type casting at the
-    database abstraction layer. Pandas behavior is to represent non-missing
-    values as strings and missing values as `math.nan`.
-
-    Parameters
-    ----------
-    url : str
-      URL to the dataset in CSV format.
-    logger : structlog.Logger [optional; default False]
-      Logger to receive messages.
-
-    Returns
-    -------
-    pandas.DataFrame
-      The dataset.
-    """
-    if logger:
-      logger.info('fetching dataset', url=url)
-    return pandas_impl.read_csv(url, dtype=str)
+    METADATA_URL_TEMPLATE = "https://healthdata.gov/api/views/%s/rows.csv"
+
+    def fetch_metadata_for_dataset(dataset_id, logger=False):
+        """Download and return metadata.
+
+        Parameters
+        ----------
+        dataset_id : str
+          healthdata.gov dataset identifier of the dataset.
+        logger : structlog.Logger [optional; default False]
+          Logger to receive messages.
+
+        Returns
+        -------
+        object
+          The metadata object.
+        """
+        url = Network.METADATA_URL_TEMPLATE % dataset_id
+        if logger:
+            logger.info("fetching metadata", url=url)
+        df = Network.fetch_dataset(url)
+        df["Update Date"] = pandas.to_datetime(df["Update Date"])
+        df.sort_values("Update Date", inplace=True)
+        df.set_index("Update Date", inplace=True)
+        return df
+
+    def fetch_dataset(url, pandas_impl=pandas, logger=False):
+        """Download and return a dataset.
+
+        Type inference is disabled in favor of explicit type casting at the
+        database abstraction layer. Pandas behavior is to represent non-missing
+        values as strings and missing values as `math.nan`.
+
+        Parameters
+        ----------
+        url : str
+          URL to the dataset in CSV format.
+        logger : structlog.Logger [optional; default False]
+          Logger to receive messages.
+
+        Returns
+        -------
+        pandas.DataFrame
+          The dataset.
+        """
+        if logger:
+            logger.info("fetching dataset", url=url)
+        return pandas_impl.read_csv(url, dtype=str)
diff --git a/src/acquisition/covid_hosp/common/test_utils.py b/src/acquisition/covid_hosp/common/test_utils.py
index 2a737b383..b5fb9384a 100644
--- a/src/acquisition/covid_hosp/common/test_utils.py
+++ b/src/acquisition/covid_hosp/common/test_utils.py
@@ -17,43 +17,41 @@
 
 class UnitTestUtils:
 
-  # path to `covid_hosp` test data, relative to the top of the repo
-  PATH_TO_TESTDATA = 'testdata/acquisition/covid_hosp'
-
-  def __init__(self, abs_path_to_caller):
-    # navigate to the root of the delphi-epidata repo
-    dataset_name = None
-    current_path = Path(abs_path_to_caller)
-    while not (current_path / 'testdata').exists():
-
-      # bail if we made it all the way to root
-      if not current_path.name:
-        raise Exception('unable to determine path to delphi-epidata repo')
-
-      # looking for a path like .../acquisition/covid_hosp/<dataset>
-      if current_path.parent.name == 'covid_hosp':
-        dataset_name = current_path.name
-
-      # move up one level
-      current_path = current_path.parent
-
-    # the loop above stops at the top of the repo
-    path_to_repo = current_path
-
-    if not dataset_name:
-      raise Exception('unable to determine name of dataset under test')
-
-    # path dataset-specific test data, relative to the root of the repo
-    self.data_dir = (
-        path_to_repo / UnitTestUtils.PATH_TO_TESTDATA / dataset_name
-    ).resolve()
-
-  def load_sample_metadata(self, metadata_name='metadata.csv'):
-    df = pandas.read_csv(self.data_dir / metadata_name, dtype=str)
-    df["Update Date"] = pandas.to_datetime(df["Update Date"])
-    df.sort_values("Update Date", inplace=True)
-    df.set_index("Update Date", inplace=True)
-    return df
-
-  def load_sample_dataset(self, dataset_name='dataset.csv'):
-    return pandas.read_csv(self.data_dir / dataset_name, dtype=str)
+    # path to `covid_hosp` test data, relative to the top of the repo
+    PATH_TO_TESTDATA = "testdata/acquisition/covid_hosp"
+
+    def __init__(self, abs_path_to_caller):
+        # navigate to the root of the delphi-epidata repo
+        dataset_name = None
+        current_path = Path(abs_path_to_caller)
+        while not (current_path / "testdata").exists():
+
+            # bail if we made it all the way to root
+            if not current_path.name:
+                raise Exception("unable to determine path to delphi-epidata repo")
+
+            # looking for a path like .../acquisition/covid_hosp/<dataset>
+            if current_path.parent.name == "covid_hosp":
+                dataset_name = current_path.name
+
+            # move up one level
+            current_path = current_path.parent
+
+        # the loop above stops at the top of the repo
+        path_to_repo = current_path
+
+        if not dataset_name:
+            raise Exception("unable to determine name of dataset under test")
+
+        # path dataset-specific test data, relative to the root of the repo
+        self.data_dir = (path_to_repo / UnitTestUtils.PATH_TO_TESTDATA / dataset_name).resolve()
+
+    def load_sample_metadata(self, metadata_name="metadata.csv"):
+        df = pandas.read_csv(self.data_dir / metadata_name, dtype=str)
+        df["Update Date"] = pandas.to_datetime(df["Update Date"])
+        df.sort_values("Update Date", inplace=True)
+        df.set_index("Update Date", inplace=True)
+        return df
+
+    def load_sample_dataset(self, dataset_name="dataset.csv"):
+        return pandas.read_csv(self.data_dir / dataset_name, dtype=str)
diff --git a/src/acquisition/covid_hosp/common/utils.py b/src/acquisition/covid_hosp/common/utils.py
index 5f718ad69..04363755e 100644
--- a/src/acquisition/covid_hosp/common/utils.py
+++ b/src/acquisition/covid_hosp/common/utils.py
@@ -8,219 +8,211 @@
 
 
 class CovidHospException(Exception):
-  """Exception raised exclusively by `covid_hosp` utilities."""
+    """Exception raised exclusively by `covid_hosp` utilities."""
 
 
 class Utils:
 
-  # regex to extract issue date from revision field
-  # example revision: "Mon, 11/16/2020 - 00:55"
-  REVISION_PATTERN = re.compile(r'^.*\s(\d+)/(\d+)/(\d+)\s.*$')
-
-  def launch_if_main(entrypoint, runtime_name):
-    """Call the given function in the main entry point, otherwise no-op."""
-
-    if runtime_name == '__main__':
-      entrypoint()
-
-  def int_from_date(date):
-    """Convert a YYYY/MM/DD date from a string to a YYYYMMDD int.
-
-    Parameters
-    ----------
-    date : str
-      Date in "YYYY/MM/DD.*" format.
-
-    Returns
-    -------
-    int
-      Date in YYYYMMDD format.
-    """
-    if isinstance(date, str):
-      return int(date[:10].replace('/', '').replace('-', ''))
-    return date
-
-  def parse_bool(value):
-    """Convert a string to a boolean.
-
-    Parameters
-    ----------
-    value : str
-      Boolean-like value, like "true" or "false".
-
-    Returns
-    -------
-    bool
-      If the string contains some version of "true" or "false".
-    None
-      If the string is None or empty.
-
-    Raises
-    ------
-    CovidHospException
-      If the string constains something other than a version of "true" or
-      "false".
-    """
-
-    if not value:
-      return None
-    if value.lower() == 'true':
-      return True
-    if value.lower() == 'false':
-      return False
-    raise CovidHospException(f'cannot convert "{value}" to bool')
-
-  def limited_string_fn(length):
-    def limited_string(value):
-      value = str(value)
-      if len(value) > length:
-        raise CovidHospException(f"Value '{value}':{len(value)} longer than max {length}")
-      return value
-    return limited_string
-
-  GEOCODE_LENGTH = 32
-  GEOCODE_PATTERN = re.compile(r'POINT \((-?[0-9.]+) (-?[0-9.]+)\)')
-  def limited_geocode(value):
-    if len(value) < Utils.GEOCODE_LENGTH:
-      return value
-    # otherwise parse and set precision to 6 decimal places
-    m = Utils.GEOCODE_PATTERN.match(value)
-    if not m:
-      raise CovidHospException(f"Couldn't parse geocode '{value}'")
-    return f'POINT ({" ".join(f"{float(x):.6f}" for x in m.groups())})'
-
-  def issues_to_fetch(metadata, newer_than, older_than, logger=False):
-    """
-    Construct all issue dates and URLs to be ingested based on metadata.
-
-    Parameters
-    ----------
-    metadata pd.DataFrame
-      HHS metadata indexed by issue date and with column "Archive Link"
-    newer_than Date
-      Lower bound (exclusive) of days to get issues for.
-    older_than Date
-      Upper bound (exclusive) of days to get issues for
-    logger structlog.Logger [optional; default False]
-      Logger to receive messages
-    Returns
-    -------
-      Dictionary of {issue day: list of (download urls, index)}
-      for issues after newer_than and before older_than
-    """
-    daily_issues = {}
-    n_beyond = 0
-    n_selected = 0
-    for index in sorted(set(metadata.index)):
-      day = index.date()
-      if day > newer_than and day < older_than:
-        urls = metadata.loc[index, "Archive Link"]
-        urls_list = [(urls, index)] if isinstance(urls, str) else [(url, index) for url in urls]
-        if day not in daily_issues:
-          daily_issues[day] = urls_list
-        else:
-          daily_issues[day] += urls_list
-        n_selected += len(urls_list)
-      elif day >= older_than:
-        n_beyond += 1
-    if logger:
-      if n_beyond > 0:
-        logger.info("issues available beyond selection", on_or_newer=older_than, count=n_beyond)
-      logger.info("issues selected", newer_than=str(newer_than), older_than=str(older_than), count=n_selected)
-    return daily_issues
-
-  @staticmethod
-  def merge_by_key_cols(dfs, key_cols, logger=False):
-    """Merge a list of data frames as a series of updates.
-
-    Parameters:
-    -----------
-      dfs : list(pd.DataFrame)
-        Data frames to merge, ordered from earliest to latest.
-      key_cols: list(str)
-        Columns to use as the index.
-      logger structlog.Logger [optional; default False]
-        Logger to receive messages
-
-    Returns a single data frame containing the most recent data for each state+date.
-    """
-
-    dfs = [df.set_index(key_cols) for df in dfs
-           if not all(k in df.index.names for k in key_cols)]
-    result = dfs[0]
-    if logger and len(dfs) > 7:
-      logger.warning(
-        "expensive operation",
-        msg="concatenating more than 7 files may result in long running times",
-        count=len(dfs))
-    for df in dfs[1:]:
-      # update values for existing keys
-      result.update(df)
-      # add any new keys.
-      ## repeated concatenation in pandas is expensive, but (1) we don't expect
-      ## batch sizes to be terribly large (7 files max) and (2) this way we can
-      ## more easily capture the next iteration's updates to any new keys
-      result_index_set = set(result.index.to_list())
-      new_rows = df.loc[[i for i in df.index.to_list() if i not in result_index_set]]
-      result = pd.concat([result, new_rows])
-
-    # convert the index rows back to columns
-    return result.reset_index(level=key_cols)
-
-  @staticmethod
-  def update_dataset(database, network, newer_than=None, older_than=None):
-    """Acquire the most recent dataset, unless it was previously acquired.
-
-    Parameters
-    ----------
-    database : delphi.epidata.acquisition.covid_hosp.common.database.Database
-      A `Database` subclass for a particular dataset.
-    network : delphi.epidata.acquisition.covid_hosp.common.network.Network
-      A `Network` subclass for a particular dataset.
-    newer_than : date
-      Lower bound (exclusive) of days to get issues for.
-    older_than : date
-      Upper bound (exclusive) of days to get issues for
-
-    Returns
-    -------
-    bool
-      Whether a new dataset was acquired.
-    """
-    logger = database.logger()
-    
-    metadata = network.fetch_metadata(logger=logger)
-    datasets = []
-    with database.connect() as db:
-      max_issue = db.get_max_issue(logger=logger)
-
-    older_than = datetime.datetime.today().date() if newer_than is None else older_than
-    newer_than = max_issue if newer_than is None else newer_than
-    daily_issues = Utils.issues_to_fetch(metadata, newer_than, older_than, logger=logger)
-    if not daily_issues:
-      logger.info("no new issues; nothing to do")
-      return False
-    for issue, revisions in daily_issues.items():
-      issue_int = int(issue.strftime("%Y%m%d"))
-      # download the dataset and add it to the database
-      dataset = Utils.merge_by_key_cols([network.fetch_dataset(url, logger=logger) for url, _ in revisions],
-                                        db.KEY_COLS,
-                                        logger=logger)
-      # add metadata to the database
-      all_metadata = []
-      for url, index in revisions:
-        all_metadata.append((url, metadata.loc[index].reset_index().to_json()))
-      datasets.append((
-        issue_int,
-        dataset,
-        all_metadata
-      ))
-    with database.connect() as db:
-      for issue_int, dataset, all_metadata in datasets:
-        db.insert_dataset(issue_int, dataset, logger=logger)
-        for url, metadata_json in all_metadata:
-          db.insert_metadata(issue_int, url, metadata_json, logger=logger)
-        logger.info("acquired rows", count=len(dataset))
-
-      # note that the transaction is committed by exiting the `with` block
-      return True
+    # regex to extract issue date from revision field
+    # example revision: "Mon, 11/16/2020 - 00:55"
+    REVISION_PATTERN = re.compile(r"^.*\s(\d+)/(\d+)/(\d+)\s.*$")
+
+    def launch_if_main(entrypoint, runtime_name):
+        """Call the given function in the main entry point, otherwise no-op."""
+
+        if runtime_name == "__main__":
+            entrypoint()
+
+    def int_from_date(date):
+        """Convert a YYYY/MM/DD date from a string to a YYYYMMDD int.
+
+        Parameters
+        ----------
+        date : str
+          Date in "YYYY/MM/DD.*" format.
+
+        Returns
+        -------
+        int
+          Date in YYYYMMDD format.
+        """
+        if isinstance(date, str):
+            return int(date[:10].replace("/", "").replace("-", ""))
+        return date
+
+    def parse_bool(value):
+        """Convert a string to a boolean.
+
+        Parameters
+        ----------
+        value : str
+          Boolean-like value, like "true" or "false".
+
+        Returns
+        -------
+        bool
+          If the string contains some version of "true" or "false".
+        None
+          If the string is None or empty.
+
+        Raises
+        ------
+        CovidHospException
+          If the string constains something other than a version of "true" or
+          "false".
+        """
+
+        if not value:
+            return None
+        if value.lower() == "true":
+            return True
+        if value.lower() == "false":
+            return False
+        raise CovidHospException(f'cannot convert "{value}" to bool')
+
+    def limited_string_fn(length):
+        def limited_string(value):
+            value = str(value)
+            if len(value) > length:
+                raise CovidHospException(f"Value '{value}':{len(value)} longer than max {length}")
+            return value
+
+        return limited_string
+
+    GEOCODE_LENGTH = 32
+    GEOCODE_PATTERN = re.compile(r"POINT \((-?[0-9.]+) (-?[0-9.]+)\)")
+
+    def limited_geocode(value):
+        if len(value) < Utils.GEOCODE_LENGTH:
+            return value
+        # otherwise parse and set precision to 6 decimal places
+        m = Utils.GEOCODE_PATTERN.match(value)
+        if not m:
+            raise CovidHospException(f"Couldn't parse geocode '{value}'")
+        return f'POINT ({" ".join(f"{float(x):.6f}" for x in m.groups())})'
+
+    def issues_to_fetch(metadata, newer_than, older_than, logger=False):
+        """
+        Construct all issue dates and URLs to be ingested based on metadata.
+
+        Parameters
+        ----------
+        metadata pd.DataFrame
+          HHS metadata indexed by issue date and with column "Archive Link"
+        newer_than Date
+          Lower bound (exclusive) of days to get issues for.
+        older_than Date
+          Upper bound (exclusive) of days to get issues for
+        logger structlog.Logger [optional; default False]
+          Logger to receive messages
+        Returns
+        -------
+          Dictionary of {issue day: list of (download urls, index)}
+          for issues after newer_than and before older_than
+        """
+        daily_issues = {}
+        n_beyond = 0
+        n_selected = 0
+        for index in sorted(set(metadata.index)):
+            day = index.date()
+            if day > newer_than and day < older_than:
+                urls = metadata.loc[index, "Archive Link"]
+                urls_list = [(urls, index)] if isinstance(urls, str) else [(url, index) for url in urls]
+                if day not in daily_issues:
+                    daily_issues[day] = urls_list
+                else:
+                    daily_issues[day] += urls_list
+                n_selected += len(urls_list)
+            elif day >= older_than:
+                n_beyond += 1
+        if logger:
+            if n_beyond > 0:
+                logger.info("issues available beyond selection", on_or_newer=older_than, count=n_beyond)
+            logger.info("issues selected", newer_than=str(newer_than), older_than=str(older_than), count=n_selected)
+        return daily_issues
+
+    @staticmethod
+    def merge_by_key_cols(dfs, key_cols, logger=False):
+        """Merge a list of data frames as a series of updates.
+
+        Parameters:
+        -----------
+          dfs : list(pd.DataFrame)
+            Data frames to merge, ordered from earliest to latest.
+          key_cols: list(str)
+            Columns to use as the index.
+          logger structlog.Logger [optional; default False]
+            Logger to receive messages
+
+        Returns a single data frame containing the most recent data for each state+date.
+        """
+
+        dfs = [df.set_index(key_cols) for df in dfs if not all(k in df.index.names for k in key_cols)]
+        result = dfs[0]
+        if logger and len(dfs) > 7:
+            logger.warning("expensive operation", msg="concatenating more than 7 files may result in long running times", count=len(dfs))
+        for df in dfs[1:]:
+            # update values for existing keys
+            result.update(df)
+            # add any new keys.
+            ## repeated concatenation in pandas is expensive, but (1) we don't expect
+            ## batch sizes to be terribly large (7 files max) and (2) this way we can
+            ## more easily capture the next iteration's updates to any new keys
+            result_index_set = set(result.index.to_list())
+            new_rows = df.loc[[i for i in df.index.to_list() if i not in result_index_set]]
+            result = pd.concat([result, new_rows])
+
+        # convert the index rows back to columns
+        return result.reset_index(level=key_cols)
+
+    @staticmethod
+    def update_dataset(database, network, newer_than=None, older_than=None):
+        """Acquire the most recent dataset, unless it was previously acquired.
+
+        Parameters
+        ----------
+        database : delphi.epidata.acquisition.covid_hosp.common.database.Database
+          A `Database` subclass for a particular dataset.
+        network : delphi.epidata.acquisition.covid_hosp.common.network.Network
+          A `Network` subclass for a particular dataset.
+        newer_than : date
+          Lower bound (exclusive) of days to get issues for.
+        older_than : date
+          Upper bound (exclusive) of days to get issues for
+
+        Returns
+        -------
+        bool
+          Whether a new dataset was acquired.
+        """
+        logger = database.logger()
+
+        metadata = network.fetch_metadata(logger=logger)
+        datasets = []
+        with database.connect() as db:
+            max_issue = db.get_max_issue(logger=logger)
+
+        older_than = datetime.datetime.today().date() if newer_than is None else older_than
+        newer_than = max_issue if newer_than is None else newer_than
+        daily_issues = Utils.issues_to_fetch(metadata, newer_than, older_than, logger=logger)
+        if not daily_issues:
+            logger.info("no new issues; nothing to do")
+            return False
+        for issue, revisions in daily_issues.items():
+            issue_int = int(issue.strftime("%Y%m%d"))
+            # download the dataset and add it to the database
+            dataset = Utils.merge_by_key_cols([network.fetch_dataset(url, logger=logger) for url, _ in revisions], db.KEY_COLS, logger=logger)
+            # add metadata to the database
+            all_metadata = []
+            for url, index in revisions:
+                all_metadata.append((url, metadata.loc[index].reset_index().to_json()))
+            datasets.append((issue_int, dataset, all_metadata))
+        with database.connect() as db:
+            for issue_int, dataset, all_metadata in datasets:
+                db.insert_dataset(issue_int, dataset, logger=logger)
+                for url, metadata_json in all_metadata:
+                    db.insert_metadata(issue_int, url, metadata_json, logger=logger)
+                logger.info("acquired rows", count=len(dataset))
+
+            # note that the transaction is committed by exiting the `with` block
+            return True
diff --git a/src/acquisition/covid_hosp/facility/database.py b/src/acquisition/covid_hosp/facility/database.py
index 172f32dc4..1fa642c72 100644
--- a/src/acquisition/covid_hosp/facility/database.py
+++ b/src/acquisition/covid_hosp/facility/database.py
@@ -7,213 +7,151 @@
 
 class Database(BaseDatabase):
 
-  TABLE_NAME = 'covid_hosp_facility'
-  KEY_COLS = ['hospital_pk', 'collection_week']
-  AGGREGATE_KEY_COLS = ['address', 'ccn', 'city', 'fips_code', 'geocoded_hospital_address', 'hhs_ids', 'hospital_name', 'hospital_pk', 'hospital_subtype', 'is_metro_micro', 'state', 'zip']
-  # These are 3-tuples of (
-  #   CSV header name,
-  #   SQL db column name,
-  #   data type
-  # ) for all the columns in the CSV file.
-  # Note that the corresponding database column names may be shorter
-  # due to constraints on the length of column names. See
-  # /src/ddl/covid_hosp.sql for more information.
-  ORDERED_CSV_COLUMNS = [
-      Columndef('hospital_pk', 'hospital_pk', str),
-      Columndef('collection_week', 'collection_week', Utils.int_from_date),
-      Columndef('address', 'address', str),
-      Columndef('all_adult_hospital_beds_7_day_avg', 'all_adult_hospital_beds_7_day_avg', float),
-      Columndef('all_adult_hospital_beds_7_day_coverage', 'all_adult_hospital_beds_7_day_coverage', int),
-      Columndef('all_adult_hospital_beds_7_day_sum', 'all_adult_hospital_beds_7_day_sum', int),
-      Columndef('all_adult_hospital_inpatient_bed_occupied_7_day_avg',
-       'all_adult_hospital_inpatient_bed_occupied_7_day_avg', float),
-      Columndef('all_adult_hospital_inpatient_bed_occupied_7_day_coverage',
-       'all_adult_hospital_inpatient_bed_occupied_7_day_coverage', int),
-      Columndef('all_adult_hospital_inpatient_bed_occupied_7_day_sum',
-       'all_adult_hospital_inpatient_bed_occupied_7_day_sum', int),
-      Columndef('all_adult_hospital_inpatient_beds_7_day_avg', 'all_adult_hospital_inpatient_beds_7_day_avg',
-       float),
-      Columndef('all_adult_hospital_inpatient_beds_7_day_coverage',
-       'all_adult_hospital_inpatient_beds_7_day_coverage', int),
-      Columndef('all_adult_hospital_inpatient_beds_7_day_sum', 'all_adult_hospital_inpatient_beds_7_day_sum',
-       int),
-      Columndef('ccn', 'ccn', str),
-      Columndef('city', 'city', str),
-      Columndef('fips_code', 'fips_code', str),
-      Columndef('geocoded_hospital_address', 'geocoded_hospital_address', Utils.limited_geocode),
-      Columndef('hhs_ids', 'hhs_ids', str),
-      Columndef('hospital_name', 'hospital_name', str),
-      Columndef('hospital_subtype', 'hospital_subtype', str),
-      Columndef('icu_beds_used_7_day_avg', 'icu_beds_used_7_day_avg', float),
-      Columndef('icu_beds_used_7_day_coverage', 'icu_beds_used_7_day_coverage', int),
-      Columndef('icu_beds_used_7_day_sum', 'icu_beds_used_7_day_sum', int),
-      Columndef('icu_patients_confirmed_influenza_7_day_avg', 'icu_patients_confirmed_influenza_7_day_avg',
-       float),
-      Columndef('icu_patients_confirmed_influenza_7_day_coverage',
-       'icu_patients_confirmed_influenza_7_day_coverage', int),
-      Columndef('icu_patients_confirmed_influenza_7_day_sum', 'icu_patients_confirmed_influenza_7_day_sum',
-       int),
-      Columndef('inpatient_beds_7_day_avg', 'inpatient_beds_7_day_avg', float),
-      Columndef('inpatient_beds_7_day_coverage', 'inpatient_beds_7_day_coverage', int),
-      Columndef('inpatient_beds_7_day_sum', 'inpatient_beds_7_day_sum', int),
-      Columndef('inpatient_beds_used_7_day_avg', 'inpatient_beds_used_7_day_avg', float),
-      Columndef('inpatient_beds_used_7_day_coverage', 'inpatient_beds_used_7_day_coverage', int),
-      Columndef('inpatient_beds_used_7_day_sum', 'inpatient_beds_used_7_day_sum', int),
-      Columndef('is_corrected', 'is_corrected', Utils.parse_bool),
-      Columndef('is_metro_micro', 'is_metro_micro', Utils.parse_bool),
-      Columndef('previous_day_admission_adult_covid_confirmed_18-19_7_day_sum',
-       'previous_day_admission_adult_covid_confirmed_18_19_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_20-29_7_day_sum',
-       'previous_day_admission_adult_covid_confirmed_20_29_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_30-39_7_day_sum',
-       'previous_day_admission_adult_covid_confirmed_30_39_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_40-49_7_day_sum',
-       'previous_day_admission_adult_covid_confirmed_40_49_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_50-59_7_day_sum',
-       'previous_day_admission_adult_covid_confirmed_50_59_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_60-69_7_day_sum',
-       'previous_day_admission_adult_covid_confirmed_60_69_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_70-79_7_day_sum',
-       'previous_day_admission_adult_covid_confirmed_70_79_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_7_day_coverage',
-       'previous_day_admission_adult_covid_confirmed_7_day_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_7_day_sum',
-       'previous_day_admission_adult_covid_confirmed_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_80+_7_day_sum',
-       'previous_day_admission_adult_covid_confirmed_80plus_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_unknown_7_day_sum',
-       'previous_day_admission_adult_covid_confirmed_unknown_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_suspected_18-19_7_day_sum',
-       'previous_day_admission_adult_covid_suspected_18_19_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_suspected_20-29_7_day_sum',
-       'previous_day_admission_adult_covid_suspected_20_29_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_suspected_30-39_7_day_sum',
-       'previous_day_admission_adult_covid_suspected_30_39_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_suspected_40-49_7_day_sum',
-       'previous_day_admission_adult_covid_suspected_40_49_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_suspected_50-59_7_day_sum',
-       'previous_day_admission_adult_covid_suspected_50_59_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_suspected_60-69_7_day_sum',
-       'previous_day_admission_adult_covid_suspected_60_69_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_suspected_70-79_7_day_sum',
-       'previous_day_admission_adult_covid_suspected_70_79_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_suspected_7_day_coverage',
-       'previous_day_admission_adult_covid_suspected_7_day_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_7_day_sum',
-       'previous_day_admission_adult_covid_suspected_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_suspected_80+_7_day_sum',
-       'previous_day_admission_adult_covid_suspected_80plus_7_day_sum', int),
-      Columndef('previous_day_admission_adult_covid_suspected_unknown_7_day_sum',
-       'previous_day_admission_adult_covid_suspected_unknown_7_day_sum', int),
-      Columndef('previous_day_admission_influenza_confirmed_7_day_sum',
-       'previous_day_admission_influenza_confirmed_7_day_sum', int),
-      Columndef('previous_day_admission_pediatric_covid_confirmed_7_day_coverage',
-       'previous_day_admission_pediatric_covid_confirmed_7_day_coverage', int),
-      Columndef('previous_day_admission_pediatric_covid_confirmed_7_day_sum',
-       'previous_day_admission_pediatric_covid_confirmed_7_day_sum', int),
-      Columndef('previous_day_admission_pediatric_covid_suspected_7_day_coverage',
-       'previous_day_admission_pediatric_covid_suspected_7_day_coverage', int),
-      Columndef('previous_day_admission_pediatric_covid_suspected_7_day_sum',
-       'previous_day_admission_pediatric_covid_suspected_7_day_sum', int),
-      Columndef('previous_day_covid_ED_visits_7_day_sum', 'previous_day_covid_ed_visits_7_day_sum', int),
-      Columndef('previous_day_total_ED_visits_7_day_sum', 'previous_day_total_ed_visits_7_day_sum', int),
-      Columndef('previous_week_patients_covid_vaccinated_doses_all_7_day',
-       'previous_week_patients_covid_vaccinated_doses_all_7_day', int),
-      Columndef('previous_week_patients_covid_vaccinated_doses_all_7_day_sum',
-       'previous_week_patients_covid_vaccinated_doses_all_7_day_sum', int),
-      Columndef('previous_week_patients_covid_vaccinated_doses_one_7_day',
-       'previous_week_patients_covid_vaccinated_doses_one_7_day', int),
-      Columndef('previous_week_patients_covid_vaccinated_doses_one_7_day_sum',
-       'previous_week_patients_covid_vaccinated_doses_one_7_day_sum', int),
-      Columndef('previous_week_personnel_covid_vaccinated_doses_administered_7_day',
-       'previous_week_personnel_covid_vaccd_doses_administered_7_day', int),
-      Columndef('previous_week_personnel_covid_vaccinated_doses_administered_7_day_sum',
-       'previous_week_personnel_covid_vaccd_doses_administered_7_day_sum', int),
-      Columndef('staffed_adult_icu_bed_occupancy_7_day_avg', 'staffed_adult_icu_bed_occupancy_7_day_avg',
-       float),
-      Columndef('staffed_adult_icu_bed_occupancy_7_day_coverage',
-       'staffed_adult_icu_bed_occupancy_7_day_coverage', int),
-      Columndef('staffed_adult_icu_bed_occupancy_7_day_sum', 'staffed_adult_icu_bed_occupancy_7_day_sum',
-       int),
-      Columndef('staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_avg',
-       'staffed_icu_adult_patients_confirmed_suspected_covid_7d_avg', float),
-      Columndef('staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_coverage',
-       'staffed_icu_adult_patients_confirmed_suspected_covid_7d_cov', int),
-      Columndef('staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum',
-       'staffed_icu_adult_patients_confirmed_suspected_covid_7d_sum', int),
-      Columndef('staffed_icu_adult_patients_confirmed_covid_7_day_avg',
-       'staffed_icu_adult_patients_confirmed_covid_7_day_avg', float),
-      Columndef('staffed_icu_adult_patients_confirmed_covid_7_day_coverage',
-       'staffed_icu_adult_patients_confirmed_covid_7_day_coverage', int),
-      Columndef('staffed_icu_adult_patients_confirmed_covid_7_day_sum',
-       'staffed_icu_adult_patients_confirmed_covid_7_day_sum', int),
-      Columndef('state', 'state', str),
-      Columndef('total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg',
-       'total_adult_patients_hosp_confirmed_suspected_covid_7d_avg', float),
-      Columndef('total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage',
-       'total_adult_patients_hosp_confirmed_suspected_covid_7d_cov', int),
-      Columndef('total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum',
-       'total_adult_patients_hosp_confirmed_suspected_covid_7d_sum', int),
-      Columndef('total_adult_patients_hospitalized_confirmed_covid_7_day_avg',
-       'total_adult_patients_hospitalized_confirmed_covid_7_day_avg', float),
-      Columndef('total_adult_patients_hospitalized_confirmed_covid_7_day_coverage',
-       'total_adult_patients_hospitalized_confirmed_covid_7_day_coverage', int),
-      Columndef('total_adult_patients_hospitalized_confirmed_covid_7_day_sum',
-       'total_adult_patients_hospitalized_confirmed_covid_7_day_sum', int),
-      Columndef('total_beds_7_day_avg', 'total_beds_7_day_avg', float),
-      Columndef('total_beds_7_day_coverage', 'total_beds_7_day_coverage', int),
-      Columndef('total_beds_7_day_sum', 'total_beds_7_day_sum', int),
-      Columndef('total_icu_beds_7_day_avg', 'total_icu_beds_7_day_avg', float),
-      Columndef('total_icu_beds_7_day_coverage', 'total_icu_beds_7_day_coverage', int),
-      Columndef('total_icu_beds_7_day_sum', 'total_icu_beds_7_day_sum', int),
-      Columndef('total_patients_hospitalized_confirmed_influenza_7_day_avg',
-       'total_patients_hospitalized_confirmed_influenza_7_day_avg', float),
-      Columndef('total_patients_hospitalized_confirmed_influenza_7_day_coverage',
-       'total_patients_hospitalized_confirmed_influenza_7_day_coverage', int),
-      Columndef('total_patients_hospitalized_confirmed_influenza_7_day_sum',
-       'total_patients_hospitalized_confirmed_influenza_7_day_sum', int),
-      Columndef('total_patients_hospitalized_confirmed_influenza_and_covid_7_day_avg',
-       'total_patients_hosp_confirmed_influenza_and_covid_7d_avg', float),
-      Columndef('total_patients_hospitalized_confirmed_influenza_and_covid_7_day_coverage',
-       'total_patients_hosp_confirmed_influenza_and_covid_7d_cov', int),
-      Columndef('total_patients_hospitalized_confirmed_influenza_and_covid_7_day_sum',
-       'total_patients_hosp_confirmed_influenza_and_covid_7d_sum', int),
-      Columndef('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg',
-       'total_pediatric_patients_hosp_confirmed_suspected_covid_7d_avg', float),
-      Columndef('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage',
-       'total_pediatric_patients_hosp_confirmed_suspected_covid_7d_cov', int),
-      Columndef('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum',
-       'total_pediatric_patients_hosp_confirmed_suspected_covid_7d_sum', int),
-      Columndef('total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg',
-       'total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg', float),
-      Columndef('total_pediatric_patients_hospitalized_confirmed_covid_7_day_coverage',
-       'total_pediatric_patients_hosp_confirmed_covid_7d_cov', int),
-      Columndef('total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum',
-       'total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum', int),
-      Columndef('total_personnel_covid_vaccinated_doses_all_7_day',
-       'total_personnel_covid_vaccinated_doses_all_7_day', int),
-      Columndef('total_personnel_covid_vaccinated_doses_all_7_day_sum',
-       'total_personnel_covid_vaccinated_doses_all_7_day_sum', int),
-      Columndef('total_personnel_covid_vaccinated_doses_none_7_day',
-       'total_personnel_covid_vaccinated_doses_none_7_day', int),
-      Columndef('total_personnel_covid_vaccinated_doses_none_7_day_sum',
-       'total_personnel_covid_vaccinated_doses_none_7_day_sum', int),
-      Columndef('total_personnel_covid_vaccinated_doses_one_7_day',
-       'total_personnel_covid_vaccinated_doses_one_7_day', int),
-      Columndef('total_personnel_covid_vaccinated_doses_one_7_day_sum',
-       'total_personnel_covid_vaccinated_doses_one_7_day_sum', int),
-      Columndef('total_staffed_adult_icu_beds_7_day_avg', 'total_staffed_adult_icu_beds_7_day_avg', float),
-      Columndef('total_staffed_adult_icu_beds_7_day_coverage', 'total_staffed_adult_icu_beds_7_day_coverage',
-       int),
-      Columndef('total_staffed_adult_icu_beds_7_day_sum', 'total_staffed_adult_icu_beds_7_day_sum', int),
-      Columndef('zip', 'zip', str),
-  ]
+    TABLE_NAME = "covid_hosp_facility"
+    KEY_COLS = ["hospital_pk", "collection_week"]
+    AGGREGATE_KEY_COLS = [
+        "address",
+        "ccn",
+        "city",
+        "fips_code",
+        "geocoded_hospital_address",
+        "hhs_ids",
+        "hospital_name",
+        "hospital_pk",
+        "hospital_subtype",
+        "is_metro_micro",
+        "state",
+        "zip",
+    ]
+    # These are 3-tuples of (
+    #   CSV header name,
+    #   SQL db column name,
+    #   data type
+    # ) for all the columns in the CSV file.
+    # Note that the corresponding database column names may be shorter
+    # due to constraints on the length of column names. See
+    # /src/ddl/covid_hosp.sql for more information.
+    ORDERED_CSV_COLUMNS = [
+        Columndef("hospital_pk", "hospital_pk", str),
+        Columndef("collection_week", "collection_week", Utils.int_from_date),
+        Columndef("address", "address", str),
+        Columndef("all_adult_hospital_beds_7_day_avg", "all_adult_hospital_beds_7_day_avg", float),
+        Columndef("all_adult_hospital_beds_7_day_coverage", "all_adult_hospital_beds_7_day_coverage", int),
+        Columndef("all_adult_hospital_beds_7_day_sum", "all_adult_hospital_beds_7_day_sum", int),
+        Columndef("all_adult_hospital_inpatient_bed_occupied_7_day_avg", "all_adult_hospital_inpatient_bed_occupied_7_day_avg", float),
+        Columndef("all_adult_hospital_inpatient_bed_occupied_7_day_coverage", "all_adult_hospital_inpatient_bed_occupied_7_day_coverage", int),
+        Columndef("all_adult_hospital_inpatient_bed_occupied_7_day_sum", "all_adult_hospital_inpatient_bed_occupied_7_day_sum", int),
+        Columndef("all_adult_hospital_inpatient_beds_7_day_avg", "all_adult_hospital_inpatient_beds_7_day_avg", float),
+        Columndef("all_adult_hospital_inpatient_beds_7_day_coverage", "all_adult_hospital_inpatient_beds_7_day_coverage", int),
+        Columndef("all_adult_hospital_inpatient_beds_7_day_sum", "all_adult_hospital_inpatient_beds_7_day_sum", int),
+        Columndef("ccn", "ccn", str),
+        Columndef("city", "city", str),
+        Columndef("fips_code", "fips_code", str),
+        Columndef("geocoded_hospital_address", "geocoded_hospital_address", Utils.limited_geocode),
+        Columndef("hhs_ids", "hhs_ids", str),
+        Columndef("hospital_name", "hospital_name", str),
+        Columndef("hospital_subtype", "hospital_subtype", str),
+        Columndef("icu_beds_used_7_day_avg", "icu_beds_used_7_day_avg", float),
+        Columndef("icu_beds_used_7_day_coverage", "icu_beds_used_7_day_coverage", int),
+        Columndef("icu_beds_used_7_day_sum", "icu_beds_used_7_day_sum", int),
+        Columndef("icu_patients_confirmed_influenza_7_day_avg", "icu_patients_confirmed_influenza_7_day_avg", float),
+        Columndef("icu_patients_confirmed_influenza_7_day_coverage", "icu_patients_confirmed_influenza_7_day_coverage", int),
+        Columndef("icu_patients_confirmed_influenza_7_day_sum", "icu_patients_confirmed_influenza_7_day_sum", int),
+        Columndef("inpatient_beds_7_day_avg", "inpatient_beds_7_day_avg", float),
+        Columndef("inpatient_beds_7_day_coverage", "inpatient_beds_7_day_coverage", int),
+        Columndef("inpatient_beds_7_day_sum", "inpatient_beds_7_day_sum", int),
+        Columndef("inpatient_beds_used_7_day_avg", "inpatient_beds_used_7_day_avg", float),
+        Columndef("inpatient_beds_used_7_day_coverage", "inpatient_beds_used_7_day_coverage", int),
+        Columndef("inpatient_beds_used_7_day_sum", "inpatient_beds_used_7_day_sum", int),
+        Columndef("is_corrected", "is_corrected", Utils.parse_bool),
+        Columndef("is_metro_micro", "is_metro_micro", Utils.parse_bool),
+        Columndef("previous_day_admission_adult_covid_confirmed_18-19_7_day_sum", "previous_day_admission_adult_covid_confirmed_18_19_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_20-29_7_day_sum", "previous_day_admission_adult_covid_confirmed_20_29_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_30-39_7_day_sum", "previous_day_admission_adult_covid_confirmed_30_39_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_40-49_7_day_sum", "previous_day_admission_adult_covid_confirmed_40_49_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_50-59_7_day_sum", "previous_day_admission_adult_covid_confirmed_50_59_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_60-69_7_day_sum", "previous_day_admission_adult_covid_confirmed_60_69_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_70-79_7_day_sum", "previous_day_admission_adult_covid_confirmed_70_79_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_7_day_coverage", "previous_day_admission_adult_covid_confirmed_7_day_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_7_day_sum", "previous_day_admission_adult_covid_confirmed_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_80+_7_day_sum", "previous_day_admission_adult_covid_confirmed_80plus_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_unknown_7_day_sum", "previous_day_admission_adult_covid_confirmed_unknown_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_suspected_18-19_7_day_sum", "previous_day_admission_adult_covid_suspected_18_19_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_suspected_20-29_7_day_sum", "previous_day_admission_adult_covid_suspected_20_29_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_suspected_30-39_7_day_sum", "previous_day_admission_adult_covid_suspected_30_39_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_suspected_40-49_7_day_sum", "previous_day_admission_adult_covid_suspected_40_49_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_suspected_50-59_7_day_sum", "previous_day_admission_adult_covid_suspected_50_59_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_suspected_60-69_7_day_sum", "previous_day_admission_adult_covid_suspected_60_69_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_suspected_70-79_7_day_sum", "previous_day_admission_adult_covid_suspected_70_79_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_suspected_7_day_coverage", "previous_day_admission_adult_covid_suspected_7_day_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_7_day_sum", "previous_day_admission_adult_covid_suspected_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_suspected_80+_7_day_sum", "previous_day_admission_adult_covid_suspected_80plus_7_day_sum", int),
+        Columndef("previous_day_admission_adult_covid_suspected_unknown_7_day_sum", "previous_day_admission_adult_covid_suspected_unknown_7_day_sum", int),
+        Columndef("previous_day_admission_influenza_confirmed_7_day_sum", "previous_day_admission_influenza_confirmed_7_day_sum", int),
+        Columndef("previous_day_admission_pediatric_covid_confirmed_7_day_coverage", "previous_day_admission_pediatric_covid_confirmed_7_day_coverage", int),
+        Columndef("previous_day_admission_pediatric_covid_confirmed_7_day_sum", "previous_day_admission_pediatric_covid_confirmed_7_day_sum", int),
+        Columndef("previous_day_admission_pediatric_covid_suspected_7_day_coverage", "previous_day_admission_pediatric_covid_suspected_7_day_coverage", int),
+        Columndef("previous_day_admission_pediatric_covid_suspected_7_day_sum", "previous_day_admission_pediatric_covid_suspected_7_day_sum", int),
+        Columndef("previous_day_covid_ED_visits_7_day_sum", "previous_day_covid_ed_visits_7_day_sum", int),
+        Columndef("previous_day_total_ED_visits_7_day_sum", "previous_day_total_ed_visits_7_day_sum", int),
+        Columndef("previous_week_patients_covid_vaccinated_doses_all_7_day", "previous_week_patients_covid_vaccinated_doses_all_7_day", int),
+        Columndef("previous_week_patients_covid_vaccinated_doses_all_7_day_sum", "previous_week_patients_covid_vaccinated_doses_all_7_day_sum", int),
+        Columndef("previous_week_patients_covid_vaccinated_doses_one_7_day", "previous_week_patients_covid_vaccinated_doses_one_7_day", int),
+        Columndef("previous_week_patients_covid_vaccinated_doses_one_7_day_sum", "previous_week_patients_covid_vaccinated_doses_one_7_day_sum", int),
+        Columndef("previous_week_personnel_covid_vaccinated_doses_administered_7_day", "previous_week_personnel_covid_vaccd_doses_administered_7_day", int),
+        Columndef("previous_week_personnel_covid_vaccinated_doses_administered_7_day_sum", "previous_week_personnel_covid_vaccd_doses_administered_7_day_sum", int),
+        Columndef("staffed_adult_icu_bed_occupancy_7_day_avg", "staffed_adult_icu_bed_occupancy_7_day_avg", float),
+        Columndef("staffed_adult_icu_bed_occupancy_7_day_coverage", "staffed_adult_icu_bed_occupancy_7_day_coverage", int),
+        Columndef("staffed_adult_icu_bed_occupancy_7_day_sum", "staffed_adult_icu_bed_occupancy_7_day_sum", int),
+        Columndef("staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_avg", "staffed_icu_adult_patients_confirmed_suspected_covid_7d_avg", float),
+        Columndef("staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_coverage", "staffed_icu_adult_patients_confirmed_suspected_covid_7d_cov", int),
+        Columndef("staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum", "staffed_icu_adult_patients_confirmed_suspected_covid_7d_sum", int),
+        Columndef("staffed_icu_adult_patients_confirmed_covid_7_day_avg", "staffed_icu_adult_patients_confirmed_covid_7_day_avg", float),
+        Columndef("staffed_icu_adult_patients_confirmed_covid_7_day_coverage", "staffed_icu_adult_patients_confirmed_covid_7_day_coverage", int),
+        Columndef("staffed_icu_adult_patients_confirmed_covid_7_day_sum", "staffed_icu_adult_patients_confirmed_covid_7_day_sum", int),
+        Columndef("state", "state", str),
+        Columndef("total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg", "total_adult_patients_hosp_confirmed_suspected_covid_7d_avg", float),
+        Columndef("total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage", "total_adult_patients_hosp_confirmed_suspected_covid_7d_cov", int),
+        Columndef("total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum", "total_adult_patients_hosp_confirmed_suspected_covid_7d_sum", int),
+        Columndef("total_adult_patients_hospitalized_confirmed_covid_7_day_avg", "total_adult_patients_hospitalized_confirmed_covid_7_day_avg", float),
+        Columndef("total_adult_patients_hospitalized_confirmed_covid_7_day_coverage", "total_adult_patients_hospitalized_confirmed_covid_7_day_coverage", int),
+        Columndef("total_adult_patients_hospitalized_confirmed_covid_7_day_sum", "total_adult_patients_hospitalized_confirmed_covid_7_day_sum", int),
+        Columndef("total_beds_7_day_avg", "total_beds_7_day_avg", float),
+        Columndef("total_beds_7_day_coverage", "total_beds_7_day_coverage", int),
+        Columndef("total_beds_7_day_sum", "total_beds_7_day_sum", int),
+        Columndef("total_icu_beds_7_day_avg", "total_icu_beds_7_day_avg", float),
+        Columndef("total_icu_beds_7_day_coverage", "total_icu_beds_7_day_coverage", int),
+        Columndef("total_icu_beds_7_day_sum", "total_icu_beds_7_day_sum", int),
+        Columndef("total_patients_hospitalized_confirmed_influenza_7_day_avg", "total_patients_hospitalized_confirmed_influenza_7_day_avg", float),
+        Columndef("total_patients_hospitalized_confirmed_influenza_7_day_coverage", "total_patients_hospitalized_confirmed_influenza_7_day_coverage", int),
+        Columndef("total_patients_hospitalized_confirmed_influenza_7_day_sum", "total_patients_hospitalized_confirmed_influenza_7_day_sum", int),
+        Columndef("total_patients_hospitalized_confirmed_influenza_and_covid_7_day_avg", "total_patients_hosp_confirmed_influenza_and_covid_7d_avg", float),
+        Columndef("total_patients_hospitalized_confirmed_influenza_and_covid_7_day_coverage", "total_patients_hosp_confirmed_influenza_and_covid_7d_cov", int),
+        Columndef("total_patients_hospitalized_confirmed_influenza_and_covid_7_day_sum", "total_patients_hosp_confirmed_influenza_and_covid_7d_sum", int),
+        Columndef("total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg", "total_pediatric_patients_hosp_confirmed_suspected_covid_7d_avg", float),
+        Columndef("total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage", "total_pediatric_patients_hosp_confirmed_suspected_covid_7d_cov", int),
+        Columndef("total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum", "total_pediatric_patients_hosp_confirmed_suspected_covid_7d_sum", int),
+        Columndef("total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg", "total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg", float),
+        Columndef("total_pediatric_patients_hospitalized_confirmed_covid_7_day_coverage", "total_pediatric_patients_hosp_confirmed_covid_7d_cov", int),
+        Columndef("total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum", "total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum", int),
+        Columndef("total_personnel_covid_vaccinated_doses_all_7_day", "total_personnel_covid_vaccinated_doses_all_7_day", int),
+        Columndef("total_personnel_covid_vaccinated_doses_all_7_day_sum", "total_personnel_covid_vaccinated_doses_all_7_day_sum", int),
+        Columndef("total_personnel_covid_vaccinated_doses_none_7_day", "total_personnel_covid_vaccinated_doses_none_7_day", int),
+        Columndef("total_personnel_covid_vaccinated_doses_none_7_day_sum", "total_personnel_covid_vaccinated_doses_none_7_day_sum", int),
+        Columndef("total_personnel_covid_vaccinated_doses_one_7_day", "total_personnel_covid_vaccinated_doses_one_7_day", int),
+        Columndef("total_personnel_covid_vaccinated_doses_one_7_day_sum", "total_personnel_covid_vaccinated_doses_one_7_day_sum", int),
+        Columndef("total_staffed_adult_icu_beds_7_day_avg", "total_staffed_adult_icu_beds_7_day_avg", float),
+        Columndef("total_staffed_adult_icu_beds_7_day_coverage", "total_staffed_adult_icu_beds_7_day_coverage", int),
+        Columndef("total_staffed_adult_icu_beds_7_day_sum", "total_staffed_adult_icu_beds_7_day_sum", int),
+        Columndef("zip", "zip", str),
+    ]
 
-  def __init__(self, *args, **kwargs):
-    super().__init__(
-        *args,
-        **kwargs,
-        table_name=Database.TABLE_NAME,
-        hhs_dataset_id=Network.DATASET_ID,
-        key_columns=Database.KEY_COLS,
-        columns_and_types=Database.ORDERED_CSV_COLUMNS)
+    def __init__(self, *args, **kwargs):
+        super().__init__(
+            *args,
+            **kwargs,
+            table_name=Database.TABLE_NAME,
+            hhs_dataset_id=Network.DATASET_ID,
+            key_columns=Database.KEY_COLS,
+            columns_and_types=Database.ORDERED_CSV_COLUMNS,
+        )
diff --git a/src/acquisition/covid_hosp/facility/network.py b/src/acquisition/covid_hosp/facility/network.py
index 6a0092c7f..9ed1bf6ca 100644
--- a/src/acquisition/covid_hosp/facility/network.py
+++ b/src/acquisition/covid_hosp/facility/network.py
@@ -4,14 +4,13 @@
 
 class Network(BaseNetwork):
 
-  DATASET_ID = 'anag-cw7u'
-  METADATA_ID = 'j4ip-wfsv'
+    DATASET_ID = "anag-cw7u"
+    METADATA_ID = "j4ip-wfsv"
 
-  def fetch_metadata(*args, **kwags):
-    """Download and return metadata.
+    def fetch_metadata(*args, **kwags):
+        """Download and return metadata.
 
-    See `fetch_metadata_for_dataset`.
-    """
+        See `fetch_metadata_for_dataset`.
+        """
 
-    return Network.fetch_metadata_for_dataset(
-        *args, **kwags, dataset_id=Network.METADATA_ID)
+        return Network.fetch_metadata_for_dataset(*args, **kwags, dataset_id=Network.METADATA_ID)
diff --git a/src/acquisition/covid_hosp/facility/update.py b/src/acquisition/covid_hosp/facility/update.py
index b2b96c2e3..d269c353a 100644
--- a/src/acquisition/covid_hosp/facility/update.py
+++ b/src/acquisition/covid_hosp/facility/update.py
@@ -11,17 +11,16 @@
 
 
 class Update:
+    def run(network=Network):
+        """Acquire the most recent dataset, unless it was previously acquired.
 
-  def run(network=Network):
-    """Acquire the most recent dataset, unless it was previously acquired.
+        Returns
+        -------
+        bool
+          Whether a new dataset was acquired.
+        """
 
-    Returns
-    -------
-    bool
-      Whether a new dataset was acquired.
-    """
-
-    return Utils.update_dataset(Database, network)
+        return Utils.update_dataset(Database, network)
 
 
 # main entry point
diff --git a/src/acquisition/covid_hosp/state_daily/database.py b/src/acquisition/covid_hosp/state_daily/database.py
index 6a8228994..7dc0a0dbc 100644
--- a/src/acquisition/covid_hosp/state_daily/database.py
+++ b/src/acquisition/covid_hosp/state_daily/database.py
@@ -7,224 +7,148 @@
 
 class Database(BaseDatabase):
 
-  # note we share a database with state_timeseries
-  TABLE_NAME = 'covid_hosp_state_timeseries'
-  KEY_COLS = ['state', 'reporting_cutoff_start']
-  # These are 3-tuples of (CSV header name, SQL db column name, data type) for
-  # all the columns in the CSV file.
-  # Note that the corresponding database column names may be shorter
-  # due to constraints on the length of column names. See
-  # /src/ddl/covid_hosp.sql for more information.
-  # Additionally, all column names below are shared with state_timeseries,
-  # except for reporting_cutoff_start (here) and date (there). If you need
-  # to update a column name, do it in both places.
-  ORDERED_CSV_COLUMNS = [
-      Columndef('state', 'state', str),
-      Columndef('reporting_cutoff_start', 'date', Utils.int_from_date),
-      Columndef('adult_icu_bed_covid_utilization', 'adult_icu_bed_covid_utilization', float),
-      Columndef('adult_icu_bed_covid_utilization_coverage', 'adult_icu_bed_covid_utilization_coverage', int),
-      Columndef('adult_icu_bed_covid_utilization_denominator', 'adult_icu_bed_covid_utilization_denominator',
-       int),
-      Columndef('adult_icu_bed_covid_utilization_numerator', 'adult_icu_bed_covid_utilization_numerator',
-       int),
-      Columndef('adult_icu_bed_utilization', 'adult_icu_bed_utilization', float),
-      Columndef('adult_icu_bed_utilization_coverage', 'adult_icu_bed_utilization_coverage', int),
-      Columndef('adult_icu_bed_utilization_denominator', 'adult_icu_bed_utilization_denominator', int),
-      Columndef('adult_icu_bed_utilization_numerator', 'adult_icu_bed_utilization_numerator', int),
-      Columndef('critical_staffing_shortage_anticipated_within_week_no',
-       'critical_staffing_shortage_anticipated_within_week_no', int),
-      Columndef('critical_staffing_shortage_anticipated_within_week_not_reported',
-       'critical_staffing_shortage_anticipated_within_week_not_reported', int),
-      Columndef('critical_staffing_shortage_anticipated_within_week_yes',
-       'critical_staffing_shortage_anticipated_within_week_yes', int),
-      Columndef('critical_staffing_shortage_today_no', 'critical_staffing_shortage_today_no', int),
-      Columndef('critical_staffing_shortage_today_not_reported',
-       'critical_staffing_shortage_today_not_reported', int),
-      Columndef('critical_staffing_shortage_today_yes', 'critical_staffing_shortage_today_yes', int),
-      Columndef('deaths_covid', 'deaths_covid', int),
-      Columndef('deaths_covid_coverage', 'deaths_covid_coverage', int),
-      Columndef('geocoded_state', 'geocoded_state', str),
-      Columndef('hospital_onset_covid', 'hospital_onset_covid', int),
-      Columndef('hospital_onset_covid_coverage', 'hospital_onset_covid_coverage', int),
-      Columndef('icu_patients_confirmed_influenza', 'icu_patients_confirmed_influenza', int),
-      Columndef('icu_patients_confirmed_influenza_coverage', 'icu_patients_confirmed_influenza_coverage',
-       int),
-      Columndef('inpatient_bed_covid_utilization', 'inpatient_bed_covid_utilization', float),
-      Columndef('inpatient_bed_covid_utilization_coverage', 'inpatient_bed_covid_utilization_coverage', int),
-      Columndef('inpatient_bed_covid_utilization_denominator', 'inpatient_bed_covid_utilization_denominator',
-       int),
-      Columndef('inpatient_bed_covid_utilization_numerator', 'inpatient_bed_covid_utilization_numerator',
-       int),
-      Columndef('inpatient_beds', 'inpatient_beds', int),
-      Columndef('inpatient_beds_coverage', 'inpatient_beds_coverage', int),
-      Columndef('inpatient_beds_used', 'inpatient_beds_used', int),
-      Columndef('inpatient_beds_used_coverage', 'inpatient_beds_used_coverage', int),
-      Columndef('inpatient_beds_used_covid', 'inpatient_beds_used_covid', int),
-      Columndef('inpatient_beds_used_covid_coverage', 'inpatient_beds_used_covid_coverage', int),
-      Columndef('inpatient_beds_utilization', 'inpatient_beds_utilization', float),
-      Columndef('inpatient_beds_utilization_coverage', 'inpatient_beds_utilization_coverage', int),
-      Columndef('inpatient_beds_utilization_denominator', 'inpatient_beds_utilization_denominator', int),
-      Columndef('inpatient_beds_utilization_numerator', 'inpatient_beds_utilization_numerator', int),
-      Columndef('on_hand_supply_therapeutic_a_casirivimab_imdevimab_courses',
-       'on_hand_supply_therapeutic_a_casirivimab_imdevimab_courses', int),
-      Columndef('on_hand_supply_therapeutic_b_bamlanivimab_courses',
-       'on_hand_supply_therapeutic_b_bamlanivimab_courses', int),
-      Columndef('on_hand_supply_therapeutic_c_bamlanivimab_etesevimab_courses',
-       'on_hand_supply_therapeutic_c_bamlanivimab_etesevimab_courses', int),
-      Columndef('percent_of_inpatients_with_covid', 'percent_of_inpatients_with_covid', float),
-      Columndef('percent_of_inpatients_with_covid_coverage', 'percent_of_inpatients_with_covid_coverage',
-       int),
-      Columndef('percent_of_inpatients_with_covid_denominator',
-       'percent_of_inpatients_with_covid_denominator', int),
-      Columndef('percent_of_inpatients_with_covid_numerator', 'percent_of_inpatients_with_covid_numerator',
-       int),
-      Columndef('previous_day_admission_adult_covid_confirmed',
-       'previous_day_admission_adult_covid_confirmed', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_18-19',
-       'previous_day_admission_adult_covid_confirmed_18_19', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_18-19_coverage',
-       'previous_day_admission_adult_covid_confirmed_18_19_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_20-29',
-       'previous_day_admission_adult_covid_confirmed_20_29', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_20-29_coverage',
-       'previous_day_admission_adult_covid_confirmed_20_29_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_30-39',
-       'previous_day_admission_adult_covid_confirmed_30_39', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_30-39_coverage',
-       'previous_day_admission_adult_covid_confirmed_30_39_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_40-49',
-       'previous_day_admission_adult_covid_confirmed_40_49', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_40-49_coverage',
-       'previous_day_admission_adult_covid_confirmed_40_49_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_50-59',
-       'previous_day_admission_adult_covid_confirmed_50_59', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_50-59_coverage',
-       'previous_day_admission_adult_covid_confirmed_50_59_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_60-69',
-       'previous_day_admission_adult_covid_confirmed_60_69', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_60-69_coverage',
-       'previous_day_admission_adult_covid_confirmed_60_69_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_70-79',
-       'previous_day_admission_adult_covid_confirmed_70_79', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_70-79_coverage',
-       'previous_day_admission_adult_covid_confirmed_70_79_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_80+',
-       'previous_day_admission_adult_covid_confirmed_80plus', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_80+_coverage',
-       'previous_day_admission_adult_covid_confirmed_80plus_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_coverage',
-       'previous_day_admission_adult_covid_confirmed_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_unknown',
-       'previous_day_admission_adult_covid_confirmed_unknown', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_unknown_coverage',
-       'previous_day_admission_adult_covid_confirmed_unknown_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected',
-       'previous_day_admission_adult_covid_suspected', int),
-      Columndef('previous_day_admission_adult_covid_suspected_18-19',
-       'previous_day_admission_adult_covid_suspected_18_19', int),
-      Columndef('previous_day_admission_adult_covid_suspected_18-19_coverage',
-       'previous_day_admission_adult_covid_suspected_18_19_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_20-29',
-       'previous_day_admission_adult_covid_suspected_20_29', int),
-      Columndef('previous_day_admission_adult_covid_suspected_20-29_coverage',
-       'previous_day_admission_adult_covid_suspected_20_29_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_30-39',
-       'previous_day_admission_adult_covid_suspected_30_39', int),
-      Columndef('previous_day_admission_adult_covid_suspected_30-39_coverage',
-       'previous_day_admission_adult_covid_suspected_30_39_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_40-49',
-       'previous_day_admission_adult_covid_suspected_40_49', int),
-      Columndef('previous_day_admission_adult_covid_suspected_40-49_coverage',
-       'previous_day_admission_adult_covid_suspected_40_49_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_50-59',
-       'previous_day_admission_adult_covid_suspected_50_59', int),
-      Columndef('previous_day_admission_adult_covid_suspected_50-59_coverage',
-       'previous_day_admission_adult_covid_suspected_50_59_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_60_69', #this is correct; csv header is irregular
-       'previous_day_admission_adult_covid_suspected_60_69', int),
-      Columndef('previous_day_admission_adult_covid_suspected_60-69_coverage',
-       'previous_day_admission_adult_covid_suspected_60_69_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_70-79',
-       'previous_day_admission_adult_covid_suspected_70_79', int),
-      Columndef('previous_day_admission_adult_covid_suspected_70-79_coverage',
-       'previous_day_admission_adult_covid_suspected_70_79_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_80',
-       'previous_day_admission_adult_covid_suspected_80plus', int),
-      Columndef('previous_day_admission_adult_covid_suspected_80+_coverage',
-       'previous_day_admission_adult_covid_suspected_80plus_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_coverage',
-       'previous_day_admission_adult_covid_suspected_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_unknown',
-       'previous_day_admission_adult_covid_suspected_unknown', int),
-      Columndef('previous_day_admission_adult_covid_suspected_unknown_coverage',
-       'previous_day_admission_adult_covid_suspected_unknown_coverage', int),
-      Columndef('previous_day_admission_influenza_confirmed', 'previous_day_admission_influenza_confirmed',
-       int),
-      Columndef('previous_day_admission_influenza_confirmed_coverage',
-       'previous_day_admission_influenza_confirmed_coverage', int),
-      Columndef('previous_day_admission_pediatric_covid_confirmed',
-       'previous_day_admission_pediatric_covid_confirmed', int),
-      Columndef('previous_day_admission_pediatric_covid_confirmed_coverage',
-       'previous_day_admission_pediatric_covid_confirmed_coverage', int),
-      Columndef('previous_day_admission_pediatric_covid_suspected',
-       'previous_day_admission_pediatric_covid_suspected', int),
-      Columndef('previous_day_admission_pediatric_covid_suspected_coverage',
-       'previous_day_admission_pediatric_covid_suspected_coverage', int),
-      Columndef('previous_day_deaths_covid_and_influenza', 'previous_day_deaths_covid_and_influenza', int),
-      Columndef('previous_day_deaths_covid_and_influenza_coverage',
-       'previous_day_deaths_covid_and_influenza_coverage', int),
-      Columndef('previous_day_deaths_influenza', 'previous_day_deaths_influenza', int),
-      Columndef('previous_day_deaths_influenza_coverage', 'previous_day_deaths_influenza_coverage', int),
-      Columndef('previous_week_therapeutic_a_casirivimab_imdevimab_courses_used',
-       'previous_week_therapeutic_a_casirivimab_imdevimab_courses_used', int),
-      Columndef('previous_week_therapeutic_b_bamlanivimab_courses_used',
-       'previous_week_therapeutic_b_bamlanivimab_courses_used', int),
-      Columndef('previous_week_therapeutic_c_bamlanivimab_etesevimab_courses_used',
-       'previous_week_therapeutic_c_bamlanivimab_etesevimab_courses_used', int),
-      Columndef('staffed_adult_icu_bed_occupancy', 'staffed_adult_icu_bed_occupancy', int),
-      Columndef('staffed_adult_icu_bed_occupancy_coverage', 'staffed_adult_icu_bed_occupancy_coverage', int),
-      Columndef('staffed_icu_adult_patients_confirmed_and_suspected_covid',
-       'staffed_icu_adult_patients_confirmed_suspected_covid', int),
-      Columndef('staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage',
-       'staffed_icu_adult_patients_confirmed_suspected_covid_coverage', int),
-      Columndef('staffed_icu_adult_patients_confirmed_covid', 'staffed_icu_adult_patients_confirmed_covid',
-       int),
-      Columndef('staffed_icu_adult_patients_confirmed_covid_coverage',
-       'staffed_icu_adult_patients_confirmed_covid_coverage', int),
-      Columndef('total_adult_patients_hospitalized_confirmed_and_suspected_covid',
-       'total_adult_patients_hosp_confirmed_suspected_covid', int),
-      Columndef('total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage',
-       'total_adult_patients_hosp_confirmed_suspected_covid_coverage', int),
-      Columndef('total_adult_patients_hospitalized_confirmed_covid',
-       'total_adult_patients_hosp_confirmed_covid', int),
-      Columndef('total_adult_patients_hospitalized_confirmed_covid_coverage',
-       'total_adult_patients_hosp_confirmed_covid_coverage', int),
-      Columndef('total_patients_hospitalized_confirmed_influenza',
-       'total_patients_hospitalized_confirmed_influenza', int),
-      Columndef('total_patients_hospitalized_confirmed_influenza_coverage',
-       'total_patients_hospitalized_confirmed_influenza_coverage', int),
-      Columndef('total_patients_hospitalized_confirmed_influenza_covid',
-       'total_patients_hospitalized_confirmed_influenza_covid', int),
-      Columndef('total_patients_hospitalized_confirmed_influenza_covid_coverage',
-       'total_patients_hospitalized_confirmed_influenza_covid_coverage', int),
-      Columndef('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid',
-       'total_pediatric_patients_hosp_confirmed_suspected_covid', int),
-      Columndef('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage',
-       'total_pediatric_patients_hosp_confirmed_suspected_covid_coverage', int),
-      Columndef('total_pediatric_patients_hospitalized_confirmed_covid',
-       'total_pediatric_patients_hosp_confirmed_covid', int),
-      Columndef('total_pediatric_patients_hospitalized_confirmed_covid_coverage',
-       'total_pediatric_patients_hosp_confirmed_covid_coverage', int),
-      Columndef('total_staffed_adult_icu_beds', 'total_staffed_adult_icu_beds', int),
-      Columndef('total_staffed_adult_icu_beds_coverage', 'total_staffed_adult_icu_beds_coverage', int),
-  ]
+    # note we share a database with state_timeseries
+    TABLE_NAME = "covid_hosp_state_timeseries"
+    KEY_COLS = ["state", "reporting_cutoff_start"]
+    # These are 3-tuples of (CSV header name, SQL db column name, data type) for
+    # all the columns in the CSV file.
+    # Note that the corresponding database column names may be shorter
+    # due to constraints on the length of column names. See
+    # /src/ddl/covid_hosp.sql for more information.
+    # Additionally, all column names below are shared with state_timeseries,
+    # except for reporting_cutoff_start (here) and date (there). If you need
+    # to update a column name, do it in both places.
+    ORDERED_CSV_COLUMNS = [
+        Columndef("state", "state", str),
+        Columndef("reporting_cutoff_start", "date", Utils.int_from_date),
+        Columndef("adult_icu_bed_covid_utilization", "adult_icu_bed_covid_utilization", float),
+        Columndef("adult_icu_bed_covid_utilization_coverage", "adult_icu_bed_covid_utilization_coverage", int),
+        Columndef("adult_icu_bed_covid_utilization_denominator", "adult_icu_bed_covid_utilization_denominator", int),
+        Columndef("adult_icu_bed_covid_utilization_numerator", "adult_icu_bed_covid_utilization_numerator", int),
+        Columndef("adult_icu_bed_utilization", "adult_icu_bed_utilization", float),
+        Columndef("adult_icu_bed_utilization_coverage", "adult_icu_bed_utilization_coverage", int),
+        Columndef("adult_icu_bed_utilization_denominator", "adult_icu_bed_utilization_denominator", int),
+        Columndef("adult_icu_bed_utilization_numerator", "adult_icu_bed_utilization_numerator", int),
+        Columndef("critical_staffing_shortage_anticipated_within_week_no", "critical_staffing_shortage_anticipated_within_week_no", int),
+        Columndef("critical_staffing_shortage_anticipated_within_week_not_reported", "critical_staffing_shortage_anticipated_within_week_not_reported", int),
+        Columndef("critical_staffing_shortage_anticipated_within_week_yes", "critical_staffing_shortage_anticipated_within_week_yes", int),
+        Columndef("critical_staffing_shortage_today_no", "critical_staffing_shortage_today_no", int),
+        Columndef("critical_staffing_shortage_today_not_reported", "critical_staffing_shortage_today_not_reported", int),
+        Columndef("critical_staffing_shortage_today_yes", "critical_staffing_shortage_today_yes", int),
+        Columndef("deaths_covid", "deaths_covid", int),
+        Columndef("deaths_covid_coverage", "deaths_covid_coverage", int),
+        Columndef("geocoded_state", "geocoded_state", str),
+        Columndef("hospital_onset_covid", "hospital_onset_covid", int),
+        Columndef("hospital_onset_covid_coverage", "hospital_onset_covid_coverage", int),
+        Columndef("icu_patients_confirmed_influenza", "icu_patients_confirmed_influenza", int),
+        Columndef("icu_patients_confirmed_influenza_coverage", "icu_patients_confirmed_influenza_coverage", int),
+        Columndef("inpatient_bed_covid_utilization", "inpatient_bed_covid_utilization", float),
+        Columndef("inpatient_bed_covid_utilization_coverage", "inpatient_bed_covid_utilization_coverage", int),
+        Columndef("inpatient_bed_covid_utilization_denominator", "inpatient_bed_covid_utilization_denominator", int),
+        Columndef("inpatient_bed_covid_utilization_numerator", "inpatient_bed_covid_utilization_numerator", int),
+        Columndef("inpatient_beds", "inpatient_beds", int),
+        Columndef("inpatient_beds_coverage", "inpatient_beds_coverage", int),
+        Columndef("inpatient_beds_used", "inpatient_beds_used", int),
+        Columndef("inpatient_beds_used_coverage", "inpatient_beds_used_coverage", int),
+        Columndef("inpatient_beds_used_covid", "inpatient_beds_used_covid", int),
+        Columndef("inpatient_beds_used_covid_coverage", "inpatient_beds_used_covid_coverage", int),
+        Columndef("inpatient_beds_utilization", "inpatient_beds_utilization", float),
+        Columndef("inpatient_beds_utilization_coverage", "inpatient_beds_utilization_coverage", int),
+        Columndef("inpatient_beds_utilization_denominator", "inpatient_beds_utilization_denominator", int),
+        Columndef("inpatient_beds_utilization_numerator", "inpatient_beds_utilization_numerator", int),
+        Columndef("on_hand_supply_therapeutic_a_casirivimab_imdevimab_courses", "on_hand_supply_therapeutic_a_casirivimab_imdevimab_courses", int),
+        Columndef("on_hand_supply_therapeutic_b_bamlanivimab_courses", "on_hand_supply_therapeutic_b_bamlanivimab_courses", int),
+        Columndef("on_hand_supply_therapeutic_c_bamlanivimab_etesevimab_courses", "on_hand_supply_therapeutic_c_bamlanivimab_etesevimab_courses", int),
+        Columndef("percent_of_inpatients_with_covid", "percent_of_inpatients_with_covid", float),
+        Columndef("percent_of_inpatients_with_covid_coverage", "percent_of_inpatients_with_covid_coverage", int),
+        Columndef("percent_of_inpatients_with_covid_denominator", "percent_of_inpatients_with_covid_denominator", int),
+        Columndef("percent_of_inpatients_with_covid_numerator", "percent_of_inpatients_with_covid_numerator", int),
+        Columndef("previous_day_admission_adult_covid_confirmed", "previous_day_admission_adult_covid_confirmed", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_18-19", "previous_day_admission_adult_covid_confirmed_18_19", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_18-19_coverage", "previous_day_admission_adult_covid_confirmed_18_19_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_20-29", "previous_day_admission_adult_covid_confirmed_20_29", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_20-29_coverage", "previous_day_admission_adult_covid_confirmed_20_29_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_30-39", "previous_day_admission_adult_covid_confirmed_30_39", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_30-39_coverage", "previous_day_admission_adult_covid_confirmed_30_39_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_40-49", "previous_day_admission_adult_covid_confirmed_40_49", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_40-49_coverage", "previous_day_admission_adult_covid_confirmed_40_49_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_50-59", "previous_day_admission_adult_covid_confirmed_50_59", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_50-59_coverage", "previous_day_admission_adult_covid_confirmed_50_59_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_60-69", "previous_day_admission_adult_covid_confirmed_60_69", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_60-69_coverage", "previous_day_admission_adult_covid_confirmed_60_69_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_70-79", "previous_day_admission_adult_covid_confirmed_70_79", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_70-79_coverage", "previous_day_admission_adult_covid_confirmed_70_79_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_80+", "previous_day_admission_adult_covid_confirmed_80plus", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_80+_coverage", "previous_day_admission_adult_covid_confirmed_80plus_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_coverage", "previous_day_admission_adult_covid_confirmed_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_unknown", "previous_day_admission_adult_covid_confirmed_unknown", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_unknown_coverage", "previous_day_admission_adult_covid_confirmed_unknown_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected", "previous_day_admission_adult_covid_suspected", int),
+        Columndef("previous_day_admission_adult_covid_suspected_18-19", "previous_day_admission_adult_covid_suspected_18_19", int),
+        Columndef("previous_day_admission_adult_covid_suspected_18-19_coverage", "previous_day_admission_adult_covid_suspected_18_19_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_20-29", "previous_day_admission_adult_covid_suspected_20_29", int),
+        Columndef("previous_day_admission_adult_covid_suspected_20-29_coverage", "previous_day_admission_adult_covid_suspected_20_29_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_30-39", "previous_day_admission_adult_covid_suspected_30_39", int),
+        Columndef("previous_day_admission_adult_covid_suspected_30-39_coverage", "previous_day_admission_adult_covid_suspected_30_39_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_40-49", "previous_day_admission_adult_covid_suspected_40_49", int),
+        Columndef("previous_day_admission_adult_covid_suspected_40-49_coverage", "previous_day_admission_adult_covid_suspected_40_49_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_50-59", "previous_day_admission_adult_covid_suspected_50_59", int),
+        Columndef("previous_day_admission_adult_covid_suspected_50-59_coverage", "previous_day_admission_adult_covid_suspected_50_59_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_60_69", "previous_day_admission_adult_covid_suspected_60_69", int),  # this is correct; csv header is irregular
+        Columndef("previous_day_admission_adult_covid_suspected_60-69_coverage", "previous_day_admission_adult_covid_suspected_60_69_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_70-79", "previous_day_admission_adult_covid_suspected_70_79", int),
+        Columndef("previous_day_admission_adult_covid_suspected_70-79_coverage", "previous_day_admission_adult_covid_suspected_70_79_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_80", "previous_day_admission_adult_covid_suspected_80plus", int),
+        Columndef("previous_day_admission_adult_covid_suspected_80+_coverage", "previous_day_admission_adult_covid_suspected_80plus_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_coverage", "previous_day_admission_adult_covid_suspected_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_unknown", "previous_day_admission_adult_covid_suspected_unknown", int),
+        Columndef("previous_day_admission_adult_covid_suspected_unknown_coverage", "previous_day_admission_adult_covid_suspected_unknown_coverage", int),
+        Columndef("previous_day_admission_influenza_confirmed", "previous_day_admission_influenza_confirmed", int),
+        Columndef("previous_day_admission_influenza_confirmed_coverage", "previous_day_admission_influenza_confirmed_coverage", int),
+        Columndef("previous_day_admission_pediatric_covid_confirmed", "previous_day_admission_pediatric_covid_confirmed", int),
+        Columndef("previous_day_admission_pediatric_covid_confirmed_coverage", "previous_day_admission_pediatric_covid_confirmed_coverage", int),
+        Columndef("previous_day_admission_pediatric_covid_suspected", "previous_day_admission_pediatric_covid_suspected", int),
+        Columndef("previous_day_admission_pediatric_covid_suspected_coverage", "previous_day_admission_pediatric_covid_suspected_coverage", int),
+        Columndef("previous_day_deaths_covid_and_influenza", "previous_day_deaths_covid_and_influenza", int),
+        Columndef("previous_day_deaths_covid_and_influenza_coverage", "previous_day_deaths_covid_and_influenza_coverage", int),
+        Columndef("previous_day_deaths_influenza", "previous_day_deaths_influenza", int),
+        Columndef("previous_day_deaths_influenza_coverage", "previous_day_deaths_influenza_coverage", int),
+        Columndef("previous_week_therapeutic_a_casirivimab_imdevimab_courses_used", "previous_week_therapeutic_a_casirivimab_imdevimab_courses_used", int),
+        Columndef("previous_week_therapeutic_b_bamlanivimab_courses_used", "previous_week_therapeutic_b_bamlanivimab_courses_used", int),
+        Columndef("previous_week_therapeutic_c_bamlanivimab_etesevimab_courses_used", "previous_week_therapeutic_c_bamlanivimab_etesevimab_courses_used", int),
+        Columndef("staffed_adult_icu_bed_occupancy", "staffed_adult_icu_bed_occupancy", int),
+        Columndef("staffed_adult_icu_bed_occupancy_coverage", "staffed_adult_icu_bed_occupancy_coverage", int),
+        Columndef("staffed_icu_adult_patients_confirmed_and_suspected_covid", "staffed_icu_adult_patients_confirmed_suspected_covid", int),
+        Columndef("staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage", "staffed_icu_adult_patients_confirmed_suspected_covid_coverage", int),
+        Columndef("staffed_icu_adult_patients_confirmed_covid", "staffed_icu_adult_patients_confirmed_covid", int),
+        Columndef("staffed_icu_adult_patients_confirmed_covid_coverage", "staffed_icu_adult_patients_confirmed_covid_coverage", int),
+        Columndef("total_adult_patients_hospitalized_confirmed_and_suspected_covid", "total_adult_patients_hosp_confirmed_suspected_covid", int),
+        Columndef("total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage", "total_adult_patients_hosp_confirmed_suspected_covid_coverage", int),
+        Columndef("total_adult_patients_hospitalized_confirmed_covid", "total_adult_patients_hosp_confirmed_covid", int),
+        Columndef("total_adult_patients_hospitalized_confirmed_covid_coverage", "total_adult_patients_hosp_confirmed_covid_coverage", int),
+        Columndef("total_patients_hospitalized_confirmed_influenza", "total_patients_hospitalized_confirmed_influenza", int),
+        Columndef("total_patients_hospitalized_confirmed_influenza_coverage", "total_patients_hospitalized_confirmed_influenza_coverage", int),
+        Columndef("total_patients_hospitalized_confirmed_influenza_covid", "total_patients_hospitalized_confirmed_influenza_covid", int),
+        Columndef("total_patients_hospitalized_confirmed_influenza_covid_coverage", "total_patients_hospitalized_confirmed_influenza_covid_coverage", int),
+        Columndef("total_pediatric_patients_hospitalized_confirmed_and_suspected_covid", "total_pediatric_patients_hosp_confirmed_suspected_covid", int),
+        Columndef(
+            "total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage",
+            "total_pediatric_patients_hosp_confirmed_suspected_covid_coverage",
+            int,
+        ),
+        Columndef("total_pediatric_patients_hospitalized_confirmed_covid", "total_pediatric_patients_hosp_confirmed_covid", int),
+        Columndef("total_pediatric_patients_hospitalized_confirmed_covid_coverage", "total_pediatric_patients_hosp_confirmed_covid_coverage", int),
+        Columndef("total_staffed_adult_icu_beds", "total_staffed_adult_icu_beds", int),
+        Columndef("total_staffed_adult_icu_beds_coverage", "total_staffed_adult_icu_beds_coverage", int),
+    ]
 
-  def __init__(self, *args, **kwargs):
-    super().__init__(
-        *args,
-        **kwargs,
-        table_name=Database.TABLE_NAME,
-        hhs_dataset_id=Network.DATASET_ID,
-        columns_and_types=Database.ORDERED_CSV_COLUMNS,
-        key_columns=Database.KEY_COLS,
-        additional_fields=[Columndef('D', 'record_type', None)])
+    def __init__(self, *args, **kwargs):
+        super().__init__(
+            *args,
+            **kwargs,
+            table_name=Database.TABLE_NAME,
+            hhs_dataset_id=Network.DATASET_ID,
+            columns_and_types=Database.ORDERED_CSV_COLUMNS,
+            key_columns=Database.KEY_COLS,
+            additional_fields=[Columndef("D", "record_type", None)],
+        )
diff --git a/src/acquisition/covid_hosp/state_daily/network.py b/src/acquisition/covid_hosp/state_daily/network.py
index f4678cc9b..5e4e9e4fb 100644
--- a/src/acquisition/covid_hosp/state_daily/network.py
+++ b/src/acquisition/covid_hosp/state_daily/network.py
@@ -1,36 +1,36 @@
 # first party
 from delphi.epidata.acquisition.covid_hosp.common.network import Network as BaseNetwork
 
+
 class Network(BaseNetwork):
 
-  DATASET_ID = '6xf2-c3ie'
-  METADATA_ID = '4cnb-m4rz'
+    DATASET_ID = "6xf2-c3ie"
+    METADATA_ID = "4cnb-m4rz"
 
-  @staticmethod
-  def fetch_metadata(*args, **kwags):
-    """Download and return metadata.
+    @staticmethod
+    def fetch_metadata(*args, **kwags):
+        """Download and return metadata.
 
-    See `fetch_metadata_for_dataset`.
-    """
+        See `fetch_metadata_for_dataset`.
+        """
 
-    return Network.fetch_metadata_for_dataset(
-        *args, **kwags, dataset_id=Network.METADATA_ID)
+        return Network.fetch_metadata_for_dataset(*args, **kwags, dataset_id=Network.METADATA_ID)
 
-  @staticmethod
-  def fetch_revisions(metadata, newer_than):
-    """
-    Extract all dataset URLs from metadata for issues after newer_than.
+    @staticmethod
+    def fetch_revisions(metadata, newer_than):
+        """
+        Extract all dataset URLs from metadata for issues after newer_than.
 
-    Parameters
-    ----------
-    metadata DataFrame
-      Metadata DF containing all rows of metadata from data source page.
+        Parameters
+        ----------
+        metadata DataFrame
+          Metadata DF containing all rows of metadata from data source page.
 
-    newer_than Timestamp or datetime
-      Date and time of issue to use as lower bound for new URLs.
+        newer_than Timestamp or datetime
+          Date and time of issue to use as lower bound for new URLs.
 
-    Returns
-    -------
-      List of URLs of issues after newer_than
-    """
-    return list(metadata.loc[metadata.index > newer_than, "Archive Link"])
+        Returns
+        -------
+          List of URLs of issues after newer_than
+        """
+        return list(metadata.loc[metadata.index > newer_than, "Archive Link"])
diff --git a/src/acquisition/covid_hosp/state_daily/update.py b/src/acquisition/covid_hosp/state_daily/update.py
index 12a51e6c3..d44efa369 100644
--- a/src/acquisition/covid_hosp/state_daily/update.py
+++ b/src/acquisition/covid_hosp/state_daily/update.py
@@ -10,18 +10,17 @@
 
 
 class Update:
+    @staticmethod
+    def run(network=Network):
+        """Acquire the most recent dataset, unless it was previously acquired.
 
-  @staticmethod
-  def run(network=Network):
-    """Acquire the most recent dataset, unless it was previously acquired.
+        Returns
+        -------
+        bool
+          Whether a new dataset was acquired.
+        """
 
-    Returns
-    -------
-    bool
-      Whether a new dataset was acquired.
-    """
-
-    return Utils.update_dataset(Database, network)
+        return Utils.update_dataset(Database, network)
 
 
 # main entry point
diff --git a/src/acquisition/covid_hosp/state_timeseries/database.py b/src/acquisition/covid_hosp/state_timeseries/database.py
index 348d9fc0b..b1d1c98af 100644
--- a/src/acquisition/covid_hosp/state_timeseries/database.py
+++ b/src/acquisition/covid_hosp/state_timeseries/database.py
@@ -7,223 +7,143 @@
 
 class Database(BaseDatabase):
 
-  TABLE_NAME = 'covid_hosp_state_timeseries'
-  KEY_COLS = ['state', 'date']
-  # These are 3-tuples of (CSV header name, SQL db column name, data type) for
-  # all the columns in the CSV file.
-  # Note that the corresponding database column names may be shorter
-  # due to constraints on the length of column names. See
-  # /src/ddl/covid_hosp.sql for more information.
-  # Additionally, all column names below are shared with state_daily,
-  # except for reporting_cutoff_start (there) and date (here). If you need
-  # to update a column name, do it in both places.
-  ORDERED_CSV_COLUMNS = [
-      Columndef('state', 'state', str),
-      Columndef('date', 'date', Utils.int_from_date),
-      Columndef('adult_icu_bed_covid_utilization', 'adult_icu_bed_covid_utilization', float),
-      Columndef('adult_icu_bed_covid_utilization_coverage', 'adult_icu_bed_covid_utilization_coverage', int),
-      Columndef('adult_icu_bed_covid_utilization_denominator', 'adult_icu_bed_covid_utilization_denominator',
-       int),
-      Columndef('adult_icu_bed_covid_utilization_numerator', 'adult_icu_bed_covid_utilization_numerator',
-       int),
-      Columndef('adult_icu_bed_utilization', 'adult_icu_bed_utilization', float),
-      Columndef('adult_icu_bed_utilization_coverage', 'adult_icu_bed_utilization_coverage', int),
-      Columndef('adult_icu_bed_utilization_denominator', 'adult_icu_bed_utilization_denominator', int),
-      Columndef('adult_icu_bed_utilization_numerator', 'adult_icu_bed_utilization_numerator', int),
-      Columndef('critical_staffing_shortage_anticipated_within_week_no',
-       'critical_staffing_shortage_anticipated_within_week_no', int),
-      Columndef('critical_staffing_shortage_anticipated_within_week_not_reported',
-       'critical_staffing_shortage_anticipated_within_week_not_reported', int),
-      Columndef('critical_staffing_shortage_anticipated_within_week_yes',
-       'critical_staffing_shortage_anticipated_within_week_yes', int),
-      Columndef('critical_staffing_shortage_today_no', 'critical_staffing_shortage_today_no', int),
-      Columndef('critical_staffing_shortage_today_not_reported',
-       'critical_staffing_shortage_today_not_reported', int),
-      Columndef('critical_staffing_shortage_today_yes', 'critical_staffing_shortage_today_yes', int),
-      Columndef('deaths_covid', 'deaths_covid', int),
-      Columndef('deaths_covid_coverage', 'deaths_covid_coverage', int),
-      Columndef('geocoded_state', 'geocoded_state', str),
-      Columndef('hospital_onset_covid', 'hospital_onset_covid', int),
-      Columndef('hospital_onset_covid_coverage', 'hospital_onset_covid_coverage', int),
-      Columndef('icu_patients_confirmed_influenza', 'icu_patients_confirmed_influenza', int),
-      Columndef('icu_patients_confirmed_influenza_coverage', 'icu_patients_confirmed_influenza_coverage',
-       int),
-      Columndef('inpatient_bed_covid_utilization', 'inpatient_bed_covid_utilization', float),
-      Columndef('inpatient_bed_covid_utilization_coverage', 'inpatient_bed_covid_utilization_coverage', int),
-      Columndef('inpatient_bed_covid_utilization_denominator', 'inpatient_bed_covid_utilization_denominator',
-       int),
-      Columndef('inpatient_bed_covid_utilization_numerator', 'inpatient_bed_covid_utilization_numerator',
-       int),
-      Columndef('inpatient_beds', 'inpatient_beds', int),
-      Columndef('inpatient_beds_coverage', 'inpatient_beds_coverage', int),
-      Columndef('inpatient_beds_used', 'inpatient_beds_used', int),
-      Columndef('inpatient_beds_used_coverage', 'inpatient_beds_used_coverage', int),
-      Columndef('inpatient_beds_used_covid', 'inpatient_beds_used_covid', int),
-      Columndef('inpatient_beds_used_covid_coverage', 'inpatient_beds_used_covid_coverage', int),
-      Columndef('inpatient_beds_utilization', 'inpatient_beds_utilization', float),
-      Columndef('inpatient_beds_utilization_coverage', 'inpatient_beds_utilization_coverage', int),
-      Columndef('inpatient_beds_utilization_denominator', 'inpatient_beds_utilization_denominator', int),
-      Columndef('inpatient_beds_utilization_numerator', 'inpatient_beds_utilization_numerator', int),
-      Columndef('on_hand_supply_therapeutic_a_casirivimab_imdevimab_courses',
-       'on_hand_supply_therapeutic_a_casirivimab_imdevimab_courses', int),
-      Columndef('on_hand_supply_therapeutic_b_bamlanivimab_courses',
-       'on_hand_supply_therapeutic_b_bamlanivimab_courses', int),
-      Columndef('on_hand_supply_therapeutic_c_bamlanivimab_etesevimab_courses',
-       'on_hand_supply_therapeutic_c_bamlanivimab_etesevimab_courses', int),
-      Columndef('percent_of_inpatients_with_covid', 'percent_of_inpatients_with_covid', float),
-      Columndef('percent_of_inpatients_with_covid_coverage', 'percent_of_inpatients_with_covid_coverage',
-       int),
-      Columndef('percent_of_inpatients_with_covid_denominator',
-       'percent_of_inpatients_with_covid_denominator', int),
-      Columndef('percent_of_inpatients_with_covid_numerator', 'percent_of_inpatients_with_covid_numerator',
-       int),
-      Columndef('previous_day_admission_adult_covid_confirmed',
-       'previous_day_admission_adult_covid_confirmed', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_18-19',
-       'previous_day_admission_adult_covid_confirmed_18_19', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_18-19_coverage',
-       'previous_day_admission_adult_covid_confirmed_18_19_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_20-29',
-       'previous_day_admission_adult_covid_confirmed_20_29', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_20-29_coverage',
-       'previous_day_admission_adult_covid_confirmed_20_29_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_30-39',
-       'previous_day_admission_adult_covid_confirmed_30_39', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_30-39_coverage',
-       'previous_day_admission_adult_covid_confirmed_30_39_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_40-49',
-       'previous_day_admission_adult_covid_confirmed_40_49', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_40-49_coverage',
-       'previous_day_admission_adult_covid_confirmed_40_49_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_50-59',
-       'previous_day_admission_adult_covid_confirmed_50_59', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_50-59_coverage',
-       'previous_day_admission_adult_covid_confirmed_50_59_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_60-69',
-       'previous_day_admission_adult_covid_confirmed_60_69', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_60-69_coverage',
-       'previous_day_admission_adult_covid_confirmed_60_69_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_70-79',
-       'previous_day_admission_adult_covid_confirmed_70_79', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_70-79_coverage',
-       'previous_day_admission_adult_covid_confirmed_70_79_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_80+',
-       'previous_day_admission_adult_covid_confirmed_80plus', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_80+_coverage',
-       'previous_day_admission_adult_covid_confirmed_80plus_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_coverage',
-       'previous_day_admission_adult_covid_confirmed_coverage', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_unknown',
-       'previous_day_admission_adult_covid_confirmed_unknown', int),
-      Columndef('previous_day_admission_adult_covid_confirmed_unknown_coverage',
-       'previous_day_admission_adult_covid_confirmed_unknown_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected',
-       'previous_day_admission_adult_covid_suspected', int),
-      Columndef('previous_day_admission_adult_covid_suspected_18-19',
-       'previous_day_admission_adult_covid_suspected_18_19', int),
-      Columndef('previous_day_admission_adult_covid_suspected_18-19_coverage',
-       'previous_day_admission_adult_covid_suspected_18_19_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_20-29',
-       'previous_day_admission_adult_covid_suspected_20_29', int),
-      Columndef('previous_day_admission_adult_covid_suspected_20-29_coverage',
-       'previous_day_admission_adult_covid_suspected_20_29_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_30-39',
-       'previous_day_admission_adult_covid_suspected_30_39', int),
-      Columndef('previous_day_admission_adult_covid_suspected_30-39_coverage',
-       'previous_day_admission_adult_covid_suspected_30_39_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_40-49',
-       'previous_day_admission_adult_covid_suspected_40_49', int),
-      Columndef('previous_day_admission_adult_covid_suspected_40-49_coverage',
-       'previous_day_admission_adult_covid_suspected_40_49_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_50-59',
-       'previous_day_admission_adult_covid_suspected_50_59', int),
-      Columndef('previous_day_admission_adult_covid_suspected_50-59_coverage',
-       'previous_day_admission_adult_covid_suspected_50_59_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_60-69',
-       'previous_day_admission_adult_covid_suspected_60_69', int),
-      Columndef('previous_day_admission_adult_covid_suspected_60-69_coverage',
-       'previous_day_admission_adult_covid_suspected_60_69_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_70-79',
-       'previous_day_admission_adult_covid_suspected_70_79', int),
-      Columndef('previous_day_admission_adult_covid_suspected_70-79_coverage',
-       'previous_day_admission_adult_covid_suspected_70_79_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_80+',
-       'previous_day_admission_adult_covid_suspected_80plus', int),
-      Columndef('previous_day_admission_adult_covid_suspected_80+_coverage',
-       'previous_day_admission_adult_covid_suspected_80plus_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_coverage',
-       'previous_day_admission_adult_covid_suspected_coverage', int),
-      Columndef('previous_day_admission_adult_covid_suspected_unknown',
-       'previous_day_admission_adult_covid_suspected_unknown', int),
-      Columndef('previous_day_admission_adult_covid_suspected_unknown_coverage',
-       'previous_day_admission_adult_covid_suspected_unknown_coverage', int),
-      Columndef('previous_day_admission_influenza_confirmed', 'previous_day_admission_influenza_confirmed',
-       int),
-      Columndef('previous_day_admission_influenza_confirmed_coverage',
-       'previous_day_admission_influenza_confirmed_coverage', int),
-      Columndef('previous_day_admission_pediatric_covid_confirmed',
-       'previous_day_admission_pediatric_covid_confirmed', int),
-      Columndef('previous_day_admission_pediatric_covid_confirmed_coverage',
-       'previous_day_admission_pediatric_covid_confirmed_coverage', int),
-      Columndef('previous_day_admission_pediatric_covid_suspected',
-       'previous_day_admission_pediatric_covid_suspected', int),
-      Columndef('previous_day_admission_pediatric_covid_suspected_coverage',
-       'previous_day_admission_pediatric_covid_suspected_coverage', int),
-      Columndef('previous_day_deaths_covid_and_influenza', 'previous_day_deaths_covid_and_influenza', int),
-      Columndef('previous_day_deaths_covid_and_influenza_coverage',
-       'previous_day_deaths_covid_and_influenza_coverage', int),
-      Columndef('previous_day_deaths_influenza', 'previous_day_deaths_influenza', int),
-      Columndef('previous_day_deaths_influenza_coverage', 'previous_day_deaths_influenza_coverage', int),
-      Columndef('previous_week_therapeutic_a_casirivimab_imdevimab_courses_used',
-       'previous_week_therapeutic_a_casirivimab_imdevimab_courses_used', int),
-      Columndef('previous_week_therapeutic_b_bamlanivimab_courses_used',
-       'previous_week_therapeutic_b_bamlanivimab_courses_used', int),
-      Columndef('previous_week_therapeutic_c_bamlanivimab_etesevimab_courses_used',
-       'previous_week_therapeutic_c_bamlanivimab_etesevimab_courses_used', int),
-      Columndef('staffed_adult_icu_bed_occupancy', 'staffed_adult_icu_bed_occupancy', int),
-      Columndef('staffed_adult_icu_bed_occupancy_coverage', 'staffed_adult_icu_bed_occupancy_coverage', int),
-      Columndef('staffed_icu_adult_patients_confirmed_and_suspected_covid',
-       'staffed_icu_adult_patients_confirmed_suspected_covid', int),
-      Columndef('staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage',
-       'staffed_icu_adult_patients_confirmed_suspected_covid_coverage', int),
-      Columndef('staffed_icu_adult_patients_confirmed_covid', 'staffed_icu_adult_patients_confirmed_covid',
-       int),
-      Columndef('staffed_icu_adult_patients_confirmed_covid_coverage',
-       'staffed_icu_adult_patients_confirmed_covid_coverage', int),
-      Columndef('total_adult_patients_hospitalized_confirmed_and_suspected_covid',
-       'total_adult_patients_hosp_confirmed_suspected_covid', int),
-      Columndef('total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage',
-       'total_adult_patients_hosp_confirmed_suspected_covid_coverage', int),
-      Columndef('total_adult_patients_hospitalized_confirmed_covid',
-       'total_adult_patients_hosp_confirmed_covid', int),
-      Columndef('total_adult_patients_hospitalized_confirmed_covid_coverage',
-       'total_adult_patients_hosp_confirmed_covid_coverage', int),
-      Columndef('total_patients_hospitalized_confirmed_influenza',
-       'total_patients_hospitalized_confirmed_influenza', int),
-      Columndef('total_patients_hospitalized_confirmed_influenza_coverage',
-       'total_patients_hospitalized_confirmed_influenza_coverage', int),
-      Columndef('total_patients_hospitalized_confirmed_influenza_covid',
-       'total_patients_hospitalized_confirmed_influenza_covid', int),
-      Columndef('total_patients_hospitalized_confirmed_influenza_covid_coverage',
-       'total_patients_hospitalized_confirmed_influenza_covid_coverage', int),
-      Columndef('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid',
-       'total_pediatric_patients_hosp_confirmed_suspected_covid', int),
-      Columndef('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage',
-       'total_pediatric_patients_hosp_confirmed_suspected_covid_coverage', int),
-      Columndef('total_pediatric_patients_hospitalized_confirmed_covid',
-       'total_pediatric_patients_hosp_confirmed_covid', int),
-      Columndef('total_pediatric_patients_hospitalized_confirmed_covid_coverage',
-       'total_pediatric_patients_hosp_confirmed_covid_coverage', int),
-      Columndef('total_staffed_adult_icu_beds', 'total_staffed_adult_icu_beds', int),
-      Columndef('total_staffed_adult_icu_beds_coverage', 'total_staffed_adult_icu_beds_coverage', int),
-  ]
+    TABLE_NAME = "covid_hosp_state_timeseries"
+    KEY_COLS = ["state", "date"]
+    # These are 3-tuples of (CSV header name, SQL db column name, data type) for
+    # all the columns in the CSV file.
+    # Note that the corresponding database column names may be shorter
+    # due to constraints on the length of column names. See
+    # /src/ddl/covid_hosp.sql for more information.
+    # Additionally, all column names below are shared with state_daily,
+    # except for reporting_cutoff_start (there) and date (here). If you need
+    # to update a column name, do it in both places.
+    ORDERED_CSV_COLUMNS = [
+        Columndef("state", "state", str),
+        Columndef("date", "date", Utils.int_from_date),
+        Columndef("adult_icu_bed_covid_utilization", "adult_icu_bed_covid_utilization", float),
+        Columndef("adult_icu_bed_covid_utilization_coverage", "adult_icu_bed_covid_utilization_coverage", int),
+        Columndef("adult_icu_bed_covid_utilization_denominator", "adult_icu_bed_covid_utilization_denominator", int),
+        Columndef("adult_icu_bed_covid_utilization_numerator", "adult_icu_bed_covid_utilization_numerator", int),
+        Columndef("adult_icu_bed_utilization", "adult_icu_bed_utilization", float),
+        Columndef("adult_icu_bed_utilization_coverage", "adult_icu_bed_utilization_coverage", int),
+        Columndef("adult_icu_bed_utilization_denominator", "adult_icu_bed_utilization_denominator", int),
+        Columndef("adult_icu_bed_utilization_numerator", "adult_icu_bed_utilization_numerator", int),
+        Columndef("critical_staffing_shortage_anticipated_within_week_no", "critical_staffing_shortage_anticipated_within_week_no", int),
+        Columndef("critical_staffing_shortage_anticipated_within_week_not_reported", "critical_staffing_shortage_anticipated_within_week_not_reported", int),
+        Columndef("critical_staffing_shortage_anticipated_within_week_yes", "critical_staffing_shortage_anticipated_within_week_yes", int),
+        Columndef("critical_staffing_shortage_today_no", "critical_staffing_shortage_today_no", int),
+        Columndef("critical_staffing_shortage_today_not_reported", "critical_staffing_shortage_today_not_reported", int),
+        Columndef("critical_staffing_shortage_today_yes", "critical_staffing_shortage_today_yes", int),
+        Columndef("deaths_covid", "deaths_covid", int),
+        Columndef("deaths_covid_coverage", "deaths_covid_coverage", int),
+        Columndef("geocoded_state", "geocoded_state", str),
+        Columndef("hospital_onset_covid", "hospital_onset_covid", int),
+        Columndef("hospital_onset_covid_coverage", "hospital_onset_covid_coverage", int),
+        Columndef("icu_patients_confirmed_influenza", "icu_patients_confirmed_influenza", int),
+        Columndef("icu_patients_confirmed_influenza_coverage", "icu_patients_confirmed_influenza_coverage", int),
+        Columndef("inpatient_bed_covid_utilization", "inpatient_bed_covid_utilization", float),
+        Columndef("inpatient_bed_covid_utilization_coverage", "inpatient_bed_covid_utilization_coverage", int),
+        Columndef("inpatient_bed_covid_utilization_denominator", "inpatient_bed_covid_utilization_denominator", int),
+        Columndef("inpatient_bed_covid_utilization_numerator", "inpatient_bed_covid_utilization_numerator", int),
+        Columndef("inpatient_beds", "inpatient_beds", int),
+        Columndef("inpatient_beds_coverage", "inpatient_beds_coverage", int),
+        Columndef("inpatient_beds_used", "inpatient_beds_used", int),
+        Columndef("inpatient_beds_used_coverage", "inpatient_beds_used_coverage", int),
+        Columndef("inpatient_beds_used_covid", "inpatient_beds_used_covid", int),
+        Columndef("inpatient_beds_used_covid_coverage", "inpatient_beds_used_covid_coverage", int),
+        Columndef("inpatient_beds_utilization", "inpatient_beds_utilization", float),
+        Columndef("inpatient_beds_utilization_coverage", "inpatient_beds_utilization_coverage", int),
+        Columndef("inpatient_beds_utilization_denominator", "inpatient_beds_utilization_denominator", int),
+        Columndef("inpatient_beds_utilization_numerator", "inpatient_beds_utilization_numerator", int),
+        Columndef("on_hand_supply_therapeutic_a_casirivimab_imdevimab_courses", "on_hand_supply_therapeutic_a_casirivimab_imdevimab_courses", int),
+        Columndef("on_hand_supply_therapeutic_b_bamlanivimab_courses", "on_hand_supply_therapeutic_b_bamlanivimab_courses", int),
+        Columndef("on_hand_supply_therapeutic_c_bamlanivimab_etesevimab_courses", "on_hand_supply_therapeutic_c_bamlanivimab_etesevimab_courses", int),
+        Columndef("percent_of_inpatients_with_covid", "percent_of_inpatients_with_covid", float),
+        Columndef("percent_of_inpatients_with_covid_coverage", "percent_of_inpatients_with_covid_coverage", int),
+        Columndef("percent_of_inpatients_with_covid_denominator", "percent_of_inpatients_with_covid_denominator", int),
+        Columndef("percent_of_inpatients_with_covid_numerator", "percent_of_inpatients_with_covid_numerator", int),
+        Columndef("previous_day_admission_adult_covid_confirmed", "previous_day_admission_adult_covid_confirmed", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_18-19", "previous_day_admission_adult_covid_confirmed_18_19", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_18-19_coverage", "previous_day_admission_adult_covid_confirmed_18_19_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_20-29", "previous_day_admission_adult_covid_confirmed_20_29", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_20-29_coverage", "previous_day_admission_adult_covid_confirmed_20_29_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_30-39", "previous_day_admission_adult_covid_confirmed_30_39", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_30-39_coverage", "previous_day_admission_adult_covid_confirmed_30_39_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_40-49", "previous_day_admission_adult_covid_confirmed_40_49", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_40-49_coverage", "previous_day_admission_adult_covid_confirmed_40_49_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_50-59", "previous_day_admission_adult_covid_confirmed_50_59", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_50-59_coverage", "previous_day_admission_adult_covid_confirmed_50_59_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_60-69", "previous_day_admission_adult_covid_confirmed_60_69", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_60-69_coverage", "previous_day_admission_adult_covid_confirmed_60_69_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_70-79", "previous_day_admission_adult_covid_confirmed_70_79", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_70-79_coverage", "previous_day_admission_adult_covid_confirmed_70_79_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_80+", "previous_day_admission_adult_covid_confirmed_80plus", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_80+_coverage", "previous_day_admission_adult_covid_confirmed_80plus_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_coverage", "previous_day_admission_adult_covid_confirmed_coverage", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_unknown", "previous_day_admission_adult_covid_confirmed_unknown", int),
+        Columndef("previous_day_admission_adult_covid_confirmed_unknown_coverage", "previous_day_admission_adult_covid_confirmed_unknown_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected", "previous_day_admission_adult_covid_suspected", int),
+        Columndef("previous_day_admission_adult_covid_suspected_18-19", "previous_day_admission_adult_covid_suspected_18_19", int),
+        Columndef("previous_day_admission_adult_covid_suspected_18-19_coverage", "previous_day_admission_adult_covid_suspected_18_19_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_20-29", "previous_day_admission_adult_covid_suspected_20_29", int),
+        Columndef("previous_day_admission_adult_covid_suspected_20-29_coverage", "previous_day_admission_adult_covid_suspected_20_29_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_30-39", "previous_day_admission_adult_covid_suspected_30_39", int),
+        Columndef("previous_day_admission_adult_covid_suspected_30-39_coverage", "previous_day_admission_adult_covid_suspected_30_39_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_40-49", "previous_day_admission_adult_covid_suspected_40_49", int),
+        Columndef("previous_day_admission_adult_covid_suspected_40-49_coverage", "previous_day_admission_adult_covid_suspected_40_49_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_50-59", "previous_day_admission_adult_covid_suspected_50_59", int),
+        Columndef("previous_day_admission_adult_covid_suspected_50-59_coverage", "previous_day_admission_adult_covid_suspected_50_59_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_60-69", "previous_day_admission_adult_covid_suspected_60_69", int),
+        Columndef("previous_day_admission_adult_covid_suspected_60-69_coverage", "previous_day_admission_adult_covid_suspected_60_69_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_70-79", "previous_day_admission_adult_covid_suspected_70_79", int),
+        Columndef("previous_day_admission_adult_covid_suspected_70-79_coverage", "previous_day_admission_adult_covid_suspected_70_79_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_80+", "previous_day_admission_adult_covid_suspected_80plus", int),
+        Columndef("previous_day_admission_adult_covid_suspected_80+_coverage", "previous_day_admission_adult_covid_suspected_80plus_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_coverage", "previous_day_admission_adult_covid_suspected_coverage", int),
+        Columndef("previous_day_admission_adult_covid_suspected_unknown", "previous_day_admission_adult_covid_suspected_unknown", int),
+        Columndef("previous_day_admission_adult_covid_suspected_unknown_coverage", "previous_day_admission_adult_covid_suspected_unknown_coverage", int),
+        Columndef("previous_day_admission_influenza_confirmed", "previous_day_admission_influenza_confirmed", int),
+        Columndef("previous_day_admission_influenza_confirmed_coverage", "previous_day_admission_influenza_confirmed_coverage", int),
+        Columndef("previous_day_admission_pediatric_covid_confirmed", "previous_day_admission_pediatric_covid_confirmed", int),
+        Columndef("previous_day_admission_pediatric_covid_confirmed_coverage", "previous_day_admission_pediatric_covid_confirmed_coverage", int),
+        Columndef("previous_day_admission_pediatric_covid_suspected", "previous_day_admission_pediatric_covid_suspected", int),
+        Columndef("previous_day_admission_pediatric_covid_suspected_coverage", "previous_day_admission_pediatric_covid_suspected_coverage", int),
+        Columndef("previous_day_deaths_covid_and_influenza", "previous_day_deaths_covid_and_influenza", int),
+        Columndef("previous_day_deaths_covid_and_influenza_coverage", "previous_day_deaths_covid_and_influenza_coverage", int),
+        Columndef("previous_day_deaths_influenza", "previous_day_deaths_influenza", int),
+        Columndef("previous_day_deaths_influenza_coverage", "previous_day_deaths_influenza_coverage", int),
+        Columndef("previous_week_therapeutic_a_casirivimab_imdevimab_courses_used", "previous_week_therapeutic_a_casirivimab_imdevimab_courses_used", int),
+        Columndef("previous_week_therapeutic_b_bamlanivimab_courses_used", "previous_week_therapeutic_b_bamlanivimab_courses_used", int),
+        Columndef("previous_week_therapeutic_c_bamlanivimab_etesevimab_courses_used", "previous_week_therapeutic_c_bamlanivimab_etesevimab_courses_used", int),
+        Columndef("staffed_adult_icu_bed_occupancy", "staffed_adult_icu_bed_occupancy", int),
+        Columndef("staffed_adult_icu_bed_occupancy_coverage", "staffed_adult_icu_bed_occupancy_coverage", int),
+        Columndef("staffed_icu_adult_patients_confirmed_and_suspected_covid", "staffed_icu_adult_patients_confirmed_suspected_covid", int),
+        Columndef("staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage", "staffed_icu_adult_patients_confirmed_suspected_covid_coverage", int),
+        Columndef("staffed_icu_adult_patients_confirmed_covid", "staffed_icu_adult_patients_confirmed_covid", int),
+        Columndef("staffed_icu_adult_patients_confirmed_covid_coverage", "staffed_icu_adult_patients_confirmed_covid_coverage", int),
+        Columndef("total_adult_patients_hospitalized_confirmed_and_suspected_covid", "total_adult_patients_hosp_confirmed_suspected_covid", int),
+        Columndef("total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage", "total_adult_patients_hosp_confirmed_suspected_covid_coverage", int),
+        Columndef("total_adult_patients_hospitalized_confirmed_covid", "total_adult_patients_hosp_confirmed_covid", int),
+        Columndef("total_adult_patients_hospitalized_confirmed_covid_coverage", "total_adult_patients_hosp_confirmed_covid_coverage", int),
+        Columndef("total_patients_hospitalized_confirmed_influenza", "total_patients_hospitalized_confirmed_influenza", int),
+        Columndef("total_patients_hospitalized_confirmed_influenza_coverage", "total_patients_hospitalized_confirmed_influenza_coverage", int),
+        Columndef("total_patients_hospitalized_confirmed_influenza_covid", "total_patients_hospitalized_confirmed_influenza_covid", int),
+        Columndef("total_patients_hospitalized_confirmed_influenza_covid_coverage", "total_patients_hospitalized_confirmed_influenza_covid_coverage", int),
+        Columndef("total_pediatric_patients_hospitalized_confirmed_and_suspected_covid", "total_pediatric_patients_hosp_confirmed_suspected_covid", int),
+        Columndef("total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage", "total_pediatric_patients_hosp_confirmed_suspected_covid_coverage", int),
+        Columndef("total_pediatric_patients_hospitalized_confirmed_covid", "total_pediatric_patients_hosp_confirmed_covid", int),
+        Columndef("total_pediatric_patients_hospitalized_confirmed_covid_coverage", "total_pediatric_patients_hosp_confirmed_covid_coverage", int),
+        Columndef("total_staffed_adult_icu_beds", "total_staffed_adult_icu_beds", int),
+        Columndef("total_staffed_adult_icu_beds_coverage", "total_staffed_adult_icu_beds_coverage", int),
+    ]
 
-  def __init__(self, *args, **kwargs):
-    super().__init__(
-        *args,
-        **kwargs,
-        table_name=Database.TABLE_NAME,
-        hhs_dataset_id=Network.DATASET_ID,
-        columns_and_types=Database.ORDERED_CSV_COLUMNS,
-        key_columns=Database.KEY_COLS,
-        additional_fields=[Columndef('T', 'record_type', None)])
+    def __init__(self, *args, **kwargs):
+        super().__init__(
+            *args,
+            **kwargs,
+            table_name=Database.TABLE_NAME,
+            hhs_dataset_id=Network.DATASET_ID,
+            columns_and_types=Database.ORDERED_CSV_COLUMNS,
+            key_columns=Database.KEY_COLS,
+            additional_fields=[Columndef("T", "record_type", None)],
+        )
diff --git a/src/acquisition/covid_hosp/state_timeseries/network.py b/src/acquisition/covid_hosp/state_timeseries/network.py
index 7bd5082a8..ff53900db 100644
--- a/src/acquisition/covid_hosp/state_timeseries/network.py
+++ b/src/acquisition/covid_hosp/state_timeseries/network.py
@@ -4,14 +4,13 @@
 
 class Network(BaseNetwork):
 
-  DATASET_ID = 'g62h-syeh'
-  METADATA_ID = 'qqte-vkut'
+    DATASET_ID = "g62h-syeh"
+    METADATA_ID = "qqte-vkut"
 
-  def fetch_metadata(*args, **kwags):
-    """Download and return metadata.
+    def fetch_metadata(*args, **kwags):
+        """Download and return metadata.
 
-    See `fetch_metadata_for_dataset`.
-    """
+        See `fetch_metadata_for_dataset`.
+        """
 
-    return Network.fetch_metadata_for_dataset(
-        *args, **kwags, dataset_id=Network.METADATA_ID)
+        return Network.fetch_metadata_for_dataset(*args, **kwags, dataset_id=Network.METADATA_ID)
diff --git a/src/acquisition/covid_hosp/state_timeseries/update.py b/src/acquisition/covid_hosp/state_timeseries/update.py
index 7c8e79941..873de218b 100644
--- a/src/acquisition/covid_hosp/state_timeseries/update.py
+++ b/src/acquisition/covid_hosp/state_timeseries/update.py
@@ -11,17 +11,16 @@
 
 
 class Update:
+    def run(network=Network):
+        """Acquire the most recent dataset, unless it was previously acquired.
 
-  def run(network=Network):
-    """Acquire the most recent dataset, unless it was previously acquired.
+        Returns
+        -------
+        bool
+          Whether a new dataset was acquired.
+        """
 
-    Returns
-    -------
-    bool
-      Whether a new dataset was acquired.
-    """
-
-    return Utils.update_dataset(Database, network)
+        return Utils.update_dataset(Database, network)
 
 
 # main entry point
diff --git a/src/acquisition/covidcast_nowcast/load_sensors.py b/src/acquisition/covidcast_nowcast/load_sensors.py
index 73ce7eee5..6a181d236 100644
--- a/src/acquisition/covidcast_nowcast/load_sensors.py
+++ b/src/acquisition/covidcast_nowcast/load_sensors.py
@@ -82,8 +82,7 @@ def load_and_prepare_file(filepath: str, attributes: PathDetails) -> pd.DataFram
 
 def _move_after_processing(filepath, success):
     archive_dir = SUCCESS_DIR if success else FAIL_DIR
-    new_dir = os.path.dirname(filepath).replace(
-        "receiving", archive_dir)
+    new_dir = os.path.dirname(filepath).replace("receiving", archive_dir)
     os.makedirs(new_dir, exist_ok=True)
     move(filepath, filepath.replace("receiving", archive_dir))
     print(f"{filepath} moved to {archive_dir}")
@@ -96,10 +95,12 @@ def method(table, conn, keys, data_iter):
             meta,
             # specify lag column explicitly; lag is a reserved word sqlalchemy doesn't know about
             sqlalchemy.Column("lag", sqlalchemy.Integer, quote=True),
-            autoload=True)
+            autoload=True,
+        )
         insert_stmt = sqlalchemy.dialects.mysql.insert(sql_table).values([dict(zip(keys, data)) for data in data_iter])
         upsert_stmt = insert_stmt.on_duplicate_key_update({x.name: x for x in insert_stmt.inserted})
         conn.execute(upsert_stmt)
+
     return method
 
 
diff --git a/src/acquisition/ecdc/ecdc_db_update.py b/src/acquisition/ecdc/ecdc_db_update.py
index 63689c1d5..2a951b724 100644
--- a/src/acquisition/ecdc/ecdc_db_update.py
+++ b/src/acquisition/ecdc/ecdc_db_update.py
@@ -33,9 +33,8 @@
 import argparse
 import datetime
 import glob
-import subprocess
-import random
 import os
+import tempfile
 
 # third party
 import mysql.connector
@@ -46,12 +45,14 @@
 from delphi.utils.epiweek import delta_epiweeks
 from delphi.utils.epidate import EpiDate
 
+
 def ensure_tables_exist():
-    (u,p) = secrets.db.epi
-    cnx = mysql.connector.connect(user=u,password=p,database='epidata')
+    (u, p) = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     try:
         cursor = cnx.cursor()
-        cursor.execute('''
+        cursor.execute(
+            """
             CREATE TABLE IF NOT EXISTS `ecdc_ili` (
                 `id` INT(11) NOT NULL PRIMARY KEY AUTO_INCREMENT,
                 `release_date` DATE NOT NULL,
@@ -62,58 +63,63 @@ def ensure_tables_exist():
                 `incidence_rate` DOUBLE NOT NULL,
                 UNIQUE KEY (`issue`, `epiweek`, `region`)
             );
-        ''');
+        """
+        )
         cnx.commit()
     finally:
         cnx.close()
 
+
 def safe_float(f):
     try:
-        return float(f.replace(',',''))
+        return float(f.replace(",", ""))
     except:
         return 0
 
+
 def safe_int(i):
     try:
-        return int(i.replace(',',''))
+        return int(i.replace(",", ""))
     except:
         return 0
 
-def get_rows(cnx, table='ecdc_ili'):
-  # Count and return the number of rows in the `ecdc_ili` table.
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM %s' % table)
-  for (num,) in select:
-    pass
-  select.close()
-  return num
+
+def get_rows(cnx, table="ecdc_ili"):
+    # Count and return the number of rows in the `ecdc_ili` table.
+    select = cnx.cursor()
+    select.execute("SELECT count(1) num FROM %s" % table)
+    for (num,) in select:
+        pass
+    select.close()
+    return num
+
 
 def update_from_file(issue, date, dir, test_mode=False):
     # Read ECDC data from CSVs and insert into (or update) the database.
     # database connection
     u, p = secrets.db.epi
-    cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-    rows1 = get_rows(cnx, 'ecdc_ili')
-    print('rows before: %d' % (rows1))
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx, "ecdc_ili")
+    print("rows before: %d" % (rows1))
     insert = cnx.cursor()
 
     # load the data, ignoring empty rows
-    files = glob.glob(os.path.join(dir,"*.csv"))
+    files = glob.glob(os.path.join(dir, "*.csv"))
     rows = []
     for filename in files:
-        with open(filename,'r') as f:
+        with open(filename, "r") as f:
             for l in f:
-                data = list(map(lambda s: s.strip().replace('"',''),l.split(',')))
+                data = list(map(lambda s: s.strip().replace('"', ""), l.split(",")))
                 row = {}
-                row['epiweek'] = int(data[1][:4] + data[1][5:])
-                row['region'] = data[4]
-                row['incidence_rate'] = data[3]
+                row["epiweek"] = int(data[1][:4] + data[1][5:])
+                row["region"] = data[4]
+                row["incidence_rate"] = data[3]
                 rows.append(row)
-    print(' loaded %d rows' % len(rows))
+    print(" loaded %d rows" % len(rows))
     entries = [obj for obj in rows if obj]
-    print(' found %d entries' % len(entries))
+    print(" found %d entries" % len(entries))
 
-    sql = '''
+    sql = """
     INSERT INTO
         `ecdc_ili` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
         `incidence_rate`)
@@ -122,13 +128,13 @@ def update_from_file(issue, date, dir, test_mode=False):
     ON DUPLICATE KEY UPDATE
         `release_date` = least(`release_date`, '%s'),
         `incidence_rate` = %s
-    '''
+    """
 
     for row in entries:
-        lag = delta_epiweeks(row['epiweek'], issue)
-        data_args = [row['incidence_rate']]
+        lag = delta_epiweeks(row["epiweek"], issue)
+        data_args = [row["incidence_rate"]]
 
-        insert_args = [date,issue,row['epiweek'],row['region'],lag] + data_args
+        insert_args = [date, issue, row["epiweek"], row["region"], lag] + data_args
         update_args = [date] + data_args
         try:
             insert.execute(sql % tuple(insert_args + update_args))
@@ -138,39 +144,28 @@ def update_from_file(issue, date, dir, test_mode=False):
     # cleanup
     insert.close()
     if test_mode:
-        print('test mode, not committing')
+        print("test mode, not committing")
         rows2 = rows1
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print('rows after: %d (added %d)' % (rows2,rows2-rows1))
+    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
     cnx.close()
 
+
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--test',
-        action='store_true',
-        help='do dry run only, do not update the database'
-    )
-    parser.add_argument(
-        '--file',
-        type=str,
-        help='load an existing zip file (otherwise fetch current data)'
-    )
-    parser.add_argument(
-        '--issue',
-        type=int,
-        help='issue of the file (e.g. 201740); used iff --file is given'
-    )
+    parser.add_argument("--test", action="store_true", help="do dry run only, do not update the database")
+    parser.add_argument("--file", type=str, help="load an existing zip file (otherwise fetch current data)")
+    parser.add_argument("--issue", type=int, help="issue of the file (e.g. 201740); used iff --file is given")
     args = parser.parse_args()
 
     if (args.file is None) != (args.issue is None):
-        raise Exception('--file and --issue must both be present or absent')
+        raise Exception("--file and --issue must both be present or absent")
 
-    date = datetime.datetime.now().strftime('%Y-%m-%d')
-    print('assuming release date is today, %s' % date)
+    date = datetime.datetime.now().strftime("%Y-%m-%d")
+    print("assuming release date is today, %s" % date)
 
     ensure_tables_exist()
     if args.file:
@@ -182,29 +177,26 @@ def main():
         max_tries = 5
         while flag < max_tries:
             flag = flag + 1
-            tmp_dir = ''.join(random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for i in range(8))
-            tmp_dir = 'downloads_' + tmp_dir
-            subprocess.call(["mkdir",tmp_dir])
-            # Use temporary directory to avoid data from different time
-            #   downloaded to same folder
-            download_ecdc_data(download_dir=tmp_dir)
-            issue = EpiDate.today().get_ew()
-            files = glob.glob('%s/*.csv' % tmp_dir)
-            for filename in files:
-                with open(filename,'r') as f:
-                    _ = f.readline()
-            db_error = False
-            for filename in files:
-                try:
-                    update_from_file(issue, date, filename, test_mode=args.test)
-                    subprocess.call(["rm",filename])
-                except:
-                    db_error = True
-            subprocess.call(["rm","-r",tmp_dir])
-            if not db_error:
-                break # Exit loop with success
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                # Use temporary directory to avoid data from different time
+                #   downloaded to same folder
+                download_ecdc_data(download_dir=tmp_dir)
+                issue = EpiDate.today().get_ew()
+                files = glob.glob(f"{tmp_dir}/*.csv")
+                for filename in files:
+                    with open(filename, "r") as f:
+                        _ = f.readline()
+                db_error = False
+                for filename in files:
+                    try:
+                        update_from_file(issue, date, filename, test_mode=args.test)
+                    except:
+                        db_error = True
+                if not db_error:
+                    break  # Exit loop with success
         if flag >= max_tries:
-            print('WARNING: Database `ecdc_ili` did not update successfully')
+            print("WARNING: Database `ecdc_ili` did not update successfully")
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/src/acquisition/ecdc/ecdc_ili.py b/src/acquisition/ecdc/ecdc_ili.py
index 1dd0505d1..bf11b9611 100644
--- a/src/acquisition/ecdc/ecdc_ili.py
+++ b/src/acquisition/ecdc/ecdc_ili.py
@@ -11,60 +11,60 @@
 from bs4 import BeautifulSoup
 from selenium import webdriver
 from selenium.webdriver.support.ui import Select
-from selenium.webdriver.support.ui import WebDriverWait 
+from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 
 
-def download_ecdc_data(download_dir = "downloads"):
-    url = 'https://flunewseurope.org/PrimaryCareData'
+def download_ecdc_data(download_dir="downloads"):
+    url = "https://flunewseurope.org/PrimaryCareData"
     resp = requests.get(url)
-    soup = BeautifulSoup(resp.content, 'lxml')
-    mydivs = soup.findAll('div')
+    soup = BeautifulSoup(resp.content, "lxml")
+    mydivs = soup.findAll("div")
     for div in mydivs:
         dic = div.attrs
-        if dic.get('class')== ['graph-container'] and dic.get('id')== 'dinfl06':
+        if dic.get("class") == ["graph-container"] and dic.get("id") == "dinfl06":
             break
     # get new url of the ILI chunck
-    url = div.contents[1].attrs['src']
+    url = div.contents[1].attrs["src"]
     opts = webdriver.firefox.options.Options()
     opts.set_headless()
     fp = webdriver.FirefoxProfile()
-    fp.set_preference("browser.download.folderList",2)
-    fp.set_preference("browser.download.manager.showWhenStarting",False)
-    fp.set_preference("browser.download.dir",os.path.abspath(download_dir))
-    fp.set_preference("browser.helperApps.neverAsk.saveToDisk","text/csv")
+    fp.set_preference("browser.download.folderList", 2)
+    fp.set_preference("browser.download.manager.showWhenStarting", False)
+    fp.set_preference("browser.download.dir", os.path.abspath(download_dir))
+    fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")
     try:
-        driver = webdriver.Firefox(options=opts,firefox_profile=fp)
+        driver = webdriver.Firefox(options=opts, firefox_profile=fp)
         driver.get(url)
         for i in range(2, 54):
             # select country
             try:
-                WebDriverWait(driver,30).until(EC.element_to_be_clickable((By.ID,'fluNewsReportViewer_ctl04_ctl03_ddValue')))
-                Select(driver.find_element_by_tag_name('select')).select_by_value(str(i))
+                WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.ID, "fluNewsReportViewer_ctl04_ctl03_ddValue")))
+                Select(driver.find_element_by_tag_name("select")).select_by_value(str(i))
                 time.sleep(3)
-                soup = BeautifulSoup(driver.page_source, 'html.parser')
-                options = soup.select('#fluNewsReportViewer_ctl04_ctl05_ddValue')[0].find_all('option')
+                soup = BeautifulSoup(driver.page_source, "html.parser")
+                options = soup.select("#fluNewsReportViewer_ctl04_ctl05_ddValue")[0].find_all("option")
                 ind = 1
                 for j in range(len(options)):
-                    if 'ILI' in str(options[j]):
-                        pattern = re.compile(r'\d+')
+                    if "ILI" in str(options[j]):
+                        pattern = re.compile(r"\d+")
                         ind = re.findall(pattern, str(options[j]))[0]
                         break
                 if type(ind) == str:
                     # select clinical tyle
-                    WebDriverWait(driver,30).until(EC.element_to_be_clickable((By.ID,'fluNewsReportViewer_ctl04_ctl05_ddValue')))
-                    Select(driver.find_element_by_id('fluNewsReportViewer_ctl04_ctl05_ddValue')).select_by_value(ind)
-                    WebDriverWait(driver,30).until(EC.element_to_be_clickable((By.ID,'btnSelectExportType')))
-                    driver.find_element_by_id('btnSelectExportType').click()
-                    WebDriverWait(driver,30).until(EC.element_to_be_clickable((By.ID,'btnExportToCsv')))
-                    driver.find_element_by_id('btnExportToCsv').click()
+                    WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.ID, "fluNewsReportViewer_ctl04_ctl05_ddValue")))
+                    Select(driver.find_element_by_id("fluNewsReportViewer_ctl04_ctl05_ddValue")).select_by_value(ind)
+                    WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.ID, "btnSelectExportType")))
+                    driver.find_element_by_id("btnSelectExportType").click()
+                    WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.ID, "btnExportToCsv")))
+                    driver.find_element_by_id("btnExportToCsv").click()
                 time.sleep(3)
             except:
                 driver.get(url)
     except:
-        print('WARNING: ECDC Scraper may not have downloaded all of the available data.')
-    #cleanup
-    os.system('''pkill "firefox" ''')
+        print("WARNING: ECDC Scraper may not have downloaded all of the available data.")
+    # cleanup
+    os.system("""pkill "firefox" """)
     os.system('''pkill "(firefox-bin)"''')
     os.system('''pkill "geckodriver*"''')
diff --git a/src/acquisition/flusurv/flusurv.py b/src/acquisition/flusurv/flusurv.py
index 6b8d247ae..1e534b740 100644
--- a/src/acquisition/flusurv/flusurv.py
+++ b/src/acquisition/flusurv/flusurv.py
@@ -50,167 +50,170 @@
 # all currently available FluSurv locations and their associated codes
 # the number pair represents NetworkID and CatchmentID
 location_codes = {
-  'CA': (2, 1),
-  'CO': (2, 2),
-  'CT': (2, 3),
-  'GA': (2, 4),
-  'IA': (3, 5),
-  'ID': (3, 6),
-  'MD': (2, 7),
-  'MI': (3, 8),
-  'MN': (2, 9),
-  'NM': (2, 11),
-  'NY_albany': (2, 13),
-  'NY_rochester': (2, 14),
-  'OH': (3, 15),
-  'OK': (3, 16),
-  'OR': (2, 17),
-  'RI': (3, 18),
-  'SD': (3, 19),
-  'TN': (2, 20),
-  'UT': (3, 21),
-  'network_all': (1, 22),
-  'network_eip': (2, 22),
-  'network_ihsp': (3, 22),
+    "CA": (2, 1),
+    "CO": (2, 2),
+    "CT": (2, 3),
+    "GA": (2, 4),
+    "IA": (3, 5),
+    "ID": (3, 6),
+    "MD": (2, 7),
+    "MI": (3, 8),
+    "MN": (2, 9),
+    "NM": (2, 11),
+    "NY_albany": (2, 13),
+    "NY_rochester": (2, 14),
+    "OH": (3, 15),
+    "OK": (3, 16),
+    "OR": (2, 17),
+    "RI": (3, 18),
+    "SD": (3, 19),
+    "TN": (2, 20),
+    "UT": (3, 21),
+    "network_all": (1, 22),
+    "network_eip": (2, 22),
+    "network_ihsp": (3, 22),
 }
 
 
 def fetch_json(path, payload, call_count=1, requests_impl=requests):
-  """Send a request to the server and return the parsed JSON response."""
-
-  # it's polite to self-identify this "bot"
-  delphi_url = 'https://delphi.cmu.edu/index.html'
-  user_agent = 'Mozilla/5.0 (compatible; delphibot/1.0; +%s)' % delphi_url
-
-  # the FluSurv AMF server
-  flusurv_url = 'https://gis.cdc.gov/GRASP/Flu3/' + path
-
-  # request headers
-  headers = {
-    'Accept-Encoding': 'gzip',
-    'User-Agent': user_agent,
-  }
-  if payload is not None:
-    headers['Content-Type'] = 'application/json;charset=UTF-8'
-
-  # send the request and read the response
-  if payload is None:
-    method = requests_impl.get
-    data = None
-  else:
-    method = requests_impl.post
-    data = json.dumps(payload)
-  resp = method(flusurv_url, headers=headers, data=data)
-
-  # check the HTTP status code
-  if resp.status_code == 500 and call_count <= 2:
-    # the server often fails with this status, so wait and retry
-    delay = 10 * call_count
-    print('got status %d, will retry in %d sec...' % (resp.status_code, delay))
-    time.sleep(delay)
-    return fetch_json(path, payload, call_count=call_count + 1)
-  elif resp.status_code != 200:
-    raise Exception(['status code != 200', resp.status_code])
-
-  # check response mime type
-  if 'application/json' not in resp.headers.get('Content-Type', ''):
-    raise Exception('response is not json')
-
-  # return the decoded json object
-  return resp.json()
+    """Send a request to the server and return the parsed JSON response."""
+
+    # it's polite to self-identify this "bot"
+    delphi_url = "https://delphi.cmu.edu/index.html"
+    user_agent = "Mozilla/5.0 (compatible; delphibot/1.0; +%s)" % delphi_url
+
+    # the FluSurv AMF server
+    flusurv_url = "https://gis.cdc.gov/GRASP/Flu3/" + path
+
+    # request headers
+    headers = {
+        "Accept-Encoding": "gzip",
+        "User-Agent": user_agent,
+    }
+    if payload is not None:
+        headers["Content-Type"] = "application/json;charset=UTF-8"
+
+    # send the request and read the response
+    if payload is None:
+        method = requests_impl.get
+        data = None
+    else:
+        method = requests_impl.post
+        data = json.dumps(payload)
+    resp = method(flusurv_url, headers=headers, data=data)
+
+    # check the HTTP status code
+    if resp.status_code == 500 and call_count <= 2:
+        # the server often fails with this status, so wait and retry
+        delay = 10 * call_count
+        print("got status %d, will retry in %d sec..." % (resp.status_code, delay))
+        time.sleep(delay)
+        return fetch_json(path, payload, call_count=call_count + 1)
+    elif resp.status_code != 200:
+        raise Exception(["status code != 200", resp.status_code])
+
+    # check response mime type
+    if "application/json" not in resp.headers.get("Content-Type", ""):
+        raise Exception("response is not json")
+
+    # return the decoded json object
+    return resp.json()
 
 
 def fetch_flusurv_object(location_code):
-  """Return decoded FluSurv JSON object for the given location."""
-  return fetch_json('PostPhase03GetData', {
-    'appversion': 'Public',
-    'networkid': location_code[0],
-    'cacthmentid': location_code[1],
-  })
+    """Return decoded FluSurv JSON object for the given location."""
+    return fetch_json(
+        "PostPhase03GetData",
+        {
+            "appversion": "Public",
+            "networkid": location_code[0],
+            "cacthmentid": location_code[1],
+        },
+    )
 
 
 def mmwrid_to_epiweek(mmwrid):
-  """Convert a CDC week index into an epiweek."""
+    """Convert a CDC week index into an epiweek."""
 
-  # Add the difference in IDs, which are sequential, to a reference epiweek,
-  # which is 2003w40 in this case.
-  epiweek_200340 = EpiDate(2003, 9, 28)
-  mmwrid_200340 = 2179
-  return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew()
+    # Add the difference in IDs, which are sequential, to a reference epiweek,
+    # which is 2003w40 in this case.
+    epiweek_200340 = EpiDate(2003, 9, 28)
+    mmwrid_200340 = 2179
+    return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew()
 
 
 def extract_from_object(data_in):
-  """
-  Given a FluSurv data object, return hospitaliation rates.
-
-  The returned object is indexed first by epiweek, then by zero-indexed age
-  group.
-  """
-
-  # an object to hold the result
-  data_out = {}
-
-  # iterate over all seasons and age groups
-  for obj in data_in['busdata']['dataseries']:
-    if obj['age'] in (10, 11, 12):
-      # TODO(https://github.com/cmu-delphi/delphi-epidata/issues/242):
-      #   capture as-of-yet undefined age groups 10, 11, and 12
-      continue
-    age_index = obj['age'] - 1
-    # iterage over weeks
-    for mmwrid, _, _, rate in obj['data']:
-      epiweek = mmwrid_to_epiweek(mmwrid)
-      if epiweek not in data_out:
-        # weekly rate of each age group
-        data_out[epiweek] = [None] * 9
-      prev_rate = data_out[epiweek][age_index]
-      if prev_rate is None:
-        # this is the first time to see a rate for this epiweek/age
-        data_out[epiweek][age_index] = rate
-      elif prev_rate != rate:
-        # a different rate was already found for this epiweek/age
-        format_args = (epiweek, obj['age'], prev_rate, rate)
-        print('warning: %d %d %f != %f' % format_args)
-
-  # sanity check the result
-  if len(data_out) == 0:
-    raise Exception('no data found')
-
-  # print the result and return flu data
-  print('found data for %d weeks' % len(data_out))
-  return data_out
+    """
+    Given a FluSurv data object, return hospitaliation rates.
+
+    The returned object is indexed first by epiweek, then by zero-indexed age
+    group.
+    """
+
+    # an object to hold the result
+    data_out = {}
+
+    # iterate over all seasons and age groups
+    for obj in data_in["busdata"]["dataseries"]:
+        if obj["age"] in (10, 11, 12):
+            # TODO(https://github.com/cmu-delphi/delphi-epidata/issues/242):
+            #   capture as-of-yet undefined age groups 10, 11, and 12
+            continue
+        age_index = obj["age"] - 1
+        # iterage over weeks
+        for mmwrid, _, _, rate in obj["data"]:
+            epiweek = mmwrid_to_epiweek(mmwrid)
+            if epiweek not in data_out:
+                # weekly rate of each age group
+                data_out[epiweek] = [None] * 9
+            prev_rate = data_out[epiweek][age_index]
+            if prev_rate is None:
+                # this is the first time to see a rate for this epiweek/age
+                data_out[epiweek][age_index] = rate
+            elif prev_rate != rate:
+                # a different rate was already found for this epiweek/age
+                format_args = (epiweek, obj["age"], prev_rate, rate)
+                print("warning: %d %d %f != %f" % format_args)
+
+    # sanity check the result
+    if len(data_out) == 0:
+        raise Exception("no data found")
+
+    # print the result and return flu data
+    print("found data for %d weeks" % len(data_out))
+    return data_out
 
 
 def get_data(location_code):
-  """
-  Fetch and parse flu data for the given location.
+    """
+    Fetch and parse flu data for the given location.
 
-  This method performs the following operations:
-    - fetches FluSurv data from CDC
-    - extracts and returns hospitaliation rates
-  """
+    This method performs the following operations:
+      - fetches FluSurv data from CDC
+      - extracts and returns hospitaliation rates
+    """
 
-  # fetch
-  print('[fetching flusurv data...]')
-  data_in = fetch_flusurv_object(location_code)
+    # fetch
+    print("[fetching flusurv data...]")
+    data_in = fetch_flusurv_object(location_code)
 
-  # extract
-  print('[extracting values...]')
-  data_out = extract_from_object(data_in)
+    # extract
+    print("[extracting values...]")
+    data_out = extract_from_object(data_in)
 
-  # return
-  print('[scraped successfully]')
-  return data_out
+    # return
+    print("[scraped successfully]")
+    return data_out
 
 
 def get_current_issue():
-  """Scrape the current issue from the FluSurv main page."""
+    """Scrape the current issue from the FluSurv main page."""
 
-  # fetch
-  data = fetch_json('GetPhase03InitApp?appVersion=Public', None)
+    # fetch
+    data = fetch_json("GetPhase03InitApp?appVersion=Public", None)
 
-  # extract
-  date = datetime.strptime(data['loaddatetime'], '%b %d, %Y')
+    # extract
+    date = datetime.strptime(data["loaddatetime"], "%b %d, %Y")
 
-  # convert and return
-  return EpiDate(date.year, date.month, date.day).get_ew()
+    # convert and return
+    return EpiDate(date.year, date.month, date.day).get_ew()
diff --git a/src/acquisition/flusurv/flusurv_update.py b/src/acquisition/flusurv/flusurv_update.py
index 35fadba05..0715bba37 100644
--- a/src/acquisition/flusurv/flusurv_update.py
+++ b/src/acquisition/flusurv/flusurv_update.py
@@ -82,108 +82,101 @@
 
 
 def get_rows(cur):
-  """Return the number of rows in the `flusurv` table."""
+    """Return the number of rows in the `flusurv` table."""
 
-  # count all rows
-  cur.execute('SELECT count(1) `num` FROM `flusurv`')
-  for (num,) in cur:
-    return num
+    # count all rows
+    cur.execute("SELECT count(1) `num` FROM `flusurv`")
+    for (num,) in cur:
+        return num
 
 
 def update(issue, location_name, test_mode=False):
-  """Fetch and store the currently avialble weekly FluSurv dataset."""
-
-  # fetch data
-  location_code = flusurv.location_codes[location_name]
-  print('fetching data for', location_name, location_code)
-  data = flusurv.get_data(location_code)
-
-  # metadata
-  epiweeks = sorted(data.keys())
-  location = location_name
-  release_date = str(EpiDate.today())
-
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(
-      host=secrets.db.host, user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-  rows1 = get_rows(cur)
-  print('rows before: %d' % rows1)
-
-  # SQL for insert/update
-  sql = '''
-  INSERT INTO `flusurv` (
-    `release_date`, `issue`, `epiweek`, `location`, `lag`, `rate_age_0`,
-    `rate_age_1`, `rate_age_2`, `rate_age_3`, `rate_age_4`, `rate_overall`,
-    `rate_age_5`, `rate_age_6`, `rate_age_7`
-  )
-  VALUES (
-    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
-  )
-  ON DUPLICATE KEY UPDATE
-    `release_date` = least(`release_date`, %s),
-    `rate_age_0` = coalesce(%s, `rate_age_0`),
-    `rate_age_1` = coalesce(%s, `rate_age_1`),
-    `rate_age_2` = coalesce(%s, `rate_age_2`),
-    `rate_age_3` = coalesce(%s, `rate_age_3`),
-    `rate_age_4` = coalesce(%s, `rate_age_4`),
-    `rate_overall` = coalesce(%s, `rate_overall`),
-    `rate_age_5` = coalesce(%s, `rate_age_5`),
-    `rate_age_6` = coalesce(%s, `rate_age_6`),
-    `rate_age_7` = coalesce(%s, `rate_age_7`)
-  '''
-
-  # insert/update each row of data (one per epiweek)
-  for epiweek in epiweeks:
-    lag = delta_epiweeks(epiweek, issue)
-    if lag > 52:
-      # Ignore values older than one year, as (1) they are assumed not to
-      # change, and (2) it would adversely affect database performance if all
-      # values (including duplicates) were stored on each run.
-      continue
-    args_meta = [release_date, issue, epiweek, location, lag]
-    args_insert = data[epiweek]
-    args_update = [release_date] + data[epiweek]
-    cur.execute(sql, tuple(args_meta + args_insert + args_update))
-
-  # commit and disconnect
-  rows2 = get_rows(cur)
-  print('rows after: %d (+%d)' % (rows2, rows2 - rows1))
-  cur.close()
-  if test_mode:
-    print('test mode: not committing database changes')
-  else:
-    cnx.commit()
-  cnx.close()
+    """Fetch and store the currently avialble weekly FluSurv dataset."""
+
+    # fetch data
+    location_code = flusurv.location_codes[location_name]
+    print("fetching data for", location_name, location_code)
+    data = flusurv.get_data(location_code)
+
+    # metadata
+    epiweeks = sorted(data.keys())
+    location = location_name
+    release_date = str(EpiDate.today())
+
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(host=secrets.db.host, user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+    rows1 = get_rows(cur)
+    print("rows before: %d" % rows1)
+
+    # SQL for insert/update
+    sql = """
+    INSERT INTO `flusurv` (
+      `release_date`, `issue`, `epiweek`, `location`, `lag`, `rate_age_0`,
+      `rate_age_1`, `rate_age_2`, `rate_age_3`, `rate_age_4`, `rate_overall`,
+      `rate_age_5`, `rate_age_6`, `rate_age_7`
+    )
+    VALUES (
+      %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
+    )
+    ON DUPLICATE KEY UPDATE
+      `release_date` = least(`release_date`, %s),
+      `rate_age_0` = coalesce(%s, `rate_age_0`),
+      `rate_age_1` = coalesce(%s, `rate_age_1`),
+      `rate_age_2` = coalesce(%s, `rate_age_2`),
+      `rate_age_3` = coalesce(%s, `rate_age_3`),
+      `rate_age_4` = coalesce(%s, `rate_age_4`),
+      `rate_overall` = coalesce(%s, `rate_overall`),
+      `rate_age_5` = coalesce(%s, `rate_age_5`),
+      `rate_age_6` = coalesce(%s, `rate_age_6`),
+      `rate_age_7` = coalesce(%s, `rate_age_7`)
+    """
+
+    # insert/update each row of data (one per epiweek)
+    for epiweek in epiweeks:
+        lag = delta_epiweeks(epiweek, issue)
+        if lag > 52:
+            # Ignore values older than one year, as (1) they are assumed not to
+            # change, and (2) it would adversely affect database performance if all
+            # values (including duplicates) were stored on each run.
+            continue
+        args_meta = [release_date, issue, epiweek, location, lag]
+        args_insert = data[epiweek]
+        args_update = [release_date] + data[epiweek]
+        cur.execute(sql, tuple(args_meta + args_insert + args_update))
+
+    # commit and disconnect
+    rows2 = get_rows(cur)
+    print("rows after: %d (+%d)" % (rows2, rows2 - rows1))
+    cur.close()
+    if test_mode:
+        print("test mode: not committing database changes")
+    else:
+        cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    'location',
-    help='location for which data should be scraped (e.g. "CA" or "all")'
-  )
-  parser.add_argument(
-    '--test', '-t',
-    default=False, action='store_true', help='do not commit database changes'
-  )
-  args = parser.parse_args()
-
-  # scrape current issue from the main page
-  issue = flusurv.get_current_issue()
-  print('current issue: %d' % issue)
-
-  # fetch flusurv data
-  if args.location == 'all':
-    # all locations
-    for location in flusurv.location_codes.keys():
-      update(issue, location, args.test)
-  else:
-    # single location
-    update(issue, args.location, args.test)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("location", help='location for which data should be scraped (e.g. "CA" or "all")')
+    parser.add_argument("--test", "-t", default=False, action="store_true", help="do not commit database changes")
+    args = parser.parse_args()
+
+    # scrape current issue from the main page
+    issue = flusurv.get_current_issue()
+    print("current issue: %d" % issue)
+
+    # fetch flusurv data
+    if args.location == "all":
+        # all locations
+        for location in flusurv.location_codes.keys():
+            update(issue, location, args.test)
+    else:
+        # single location
+        update(issue, args.location, args.test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/fluview/fluview.py b/src/acquisition/fluview/fluview.py
index d723cbc59..a3298c4ce 100644
--- a/src/acquisition/fluview/fluview.py
+++ b/src/acquisition/fluview/fluview.py
@@ -34,183 +34,188 @@
 
 
 class Key:
-  """
-  Constants for navigating the metadata object contained in the web response
-  from CDC.
-  """
+    """
+    Constants for navigating the metadata object contained in the web response
+    from CDC.
+    """
 
-  class TierType:
-    nat = 'National'
-    hhs = 'HHS Regions'
-    cen = 'Census Divisions'
-    sta = 'State'
+    class TierType:
+        nat = "National"
+        hhs = "HHS Regions"
+        cen = "Census Divisions"
+        sta = "State"
 
-  class TierListEntry:
-    hhs = 'hhsregion'
-    cen = 'censusregions'
-    sta = 'states'
+    class TierListEntry:
+        hhs = "hhsregion"
+        cen = "censusregions"
+        sta = "states"
 
-  class TierIdEntry:
-    hhs = 'hhsregionid'
-    cen = 'censusregionid'
-    sta = 'stateid'
+    class TierIdEntry:
+        hhs = "hhsregionid"
+        cen = "censusregionid"
+        sta = "stateid"
 
 
 def check_status(resp, status, content_type):
-  """Raise an exception if the status code or content type is unexpected."""
-  if resp.status_code != status:
-    raise Exception('got unexpected status code: ' + str(resp.status_code))
-  actual_type = resp.headers.get('Content-Type', None)
-  if actual_type is None or content_type not in actual_type.lower():
-    raise Exception('got unexpected content type: ' + str(actual_type))
+    """Raise an exception if the status code or content type is unexpected."""
+    if resp.status_code != status:
+        raise Exception("got unexpected status code: " + str(resp.status_code))
+    actual_type = resp.headers.get("Content-Type", None)
+    if actual_type is None or content_type not in actual_type.lower():
+        raise Exception("got unexpected content type: " + str(actual_type))
 
 
 def fetch_metadata(sess):
-  """
-  Return metadata indicating the current issue and also numeric constants
-  representing the various locations.
-  """
-  url = 'https://gis.cdc.gov/grasp/flu2/GetPhase02InitApp?appVersion=Public'
-  resp = sess.get(url)
-  check_status(resp, 200, 'application/json')
-  return resp.json()
+    """
+    Return metadata indicating the current issue and also numeric constants
+    representing the various locations.
+    """
+    url = "https://gis.cdc.gov/grasp/flu2/GetPhase02InitApp?appVersion=Public"
+    resp = sess.get(url)
+    check_status(resp, 200, "application/json")
+    return resp.json()
 
 
 def get_issue_and_locations(data):
-  """Extract the issue and per-tier location lists from the metadata object."""
-
-  def get_tier_ids(name):
-    for row in data['regiontypes']:
-      if row['description'] == name:
-        return row['regiontypeid']
-    raise Exception()
-
-  tier_ids = dict((name, get_tier_ids(name)) for name in (
-    Key.TierType.nat,
-    Key.TierType.hhs,
-    Key.TierType.cen,
-    Key.TierType.sta,
-  ))
-
-  location_ids = {
-    Key.TierType.nat: [0],
-    Key.TierType.hhs: [],
-    Key.TierType.cen: [],
-    Key.TierType.sta: [],
-  }
-
-  # add location ids for HHS
-  for row in data[Key.TierListEntry.hhs]:
-    location_ids[Key.TierType.hhs].append(row[Key.TierIdEntry.hhs])
-  location_ids[Key.TierType.hhs] = sorted(set(location_ids[Key.TierType.hhs]))
-  num = len(location_ids[Key.TierType.hhs])
-  if num != 10:
-    raise Exception('expected 10 hhs regions, found %d' % num)
-
-  # add location ids for census divisions
-  for row in data[Key.TierListEntry.cen]:
-    location_ids[Key.TierType.cen].append(row[Key.TierIdEntry.cen])
-  location_ids[Key.TierType.cen] = sorted(set(location_ids[Key.TierType.cen]))
-  num = len(location_ids[Key.TierType.cen])
-  if num != 9:
-    raise Exception('expected 9 census divisions, found %d' % num)
-
-  # add location ids for states
-  for row in data[Key.TierListEntry.sta]:
-    location_ids[Key.TierType.sta].append(row[Key.TierIdEntry.sta])
-  location_ids[Key.TierType.sta] = sorted(set(location_ids[Key.TierType.sta]))
-  num = len(location_ids[Key.TierType.sta])
-  if num != 57:
-    raise Exception('expected 57 states/territories/cities, found %d' % num)
-
-  # return a useful subset of the metadata
-  # (latest epiweek, latest season, tier ids, location ids)
-  return {
-    'epiweek': data['mmwr'][-1]['yearweek'],
-    'season_id': data['mmwr'][-1]['seasonid'],
-    'tier_ids': tier_ids,
-    'location_ids': location_ids,
-  }
+    """Extract the issue and per-tier location lists from the metadata object."""
+
+    def get_tier_ids(name):
+        for row in data["regiontypes"]:
+            if row["description"] == name:
+                return row["regiontypeid"]
+        raise Exception()
+
+    tier_ids = dict(
+        (name, get_tier_ids(name))
+        for name in (
+            Key.TierType.nat,
+            Key.TierType.hhs,
+            Key.TierType.cen,
+            Key.TierType.sta,
+        )
+    )
+
+    location_ids = {
+        Key.TierType.nat: [0],
+        Key.TierType.hhs: [],
+        Key.TierType.cen: [],
+        Key.TierType.sta: [],
+    }
+
+    # add location ids for HHS
+    for row in data[Key.TierListEntry.hhs]:
+        location_ids[Key.TierType.hhs].append(row[Key.TierIdEntry.hhs])
+    location_ids[Key.TierType.hhs] = sorted(set(location_ids[Key.TierType.hhs]))
+    num = len(location_ids[Key.TierType.hhs])
+    if num != 10:
+        raise Exception("expected 10 hhs regions, found %d" % num)
+
+    # add location ids for census divisions
+    for row in data[Key.TierListEntry.cen]:
+        location_ids[Key.TierType.cen].append(row[Key.TierIdEntry.cen])
+    location_ids[Key.TierType.cen] = sorted(set(location_ids[Key.TierType.cen]))
+    num = len(location_ids[Key.TierType.cen])
+    if num != 9:
+        raise Exception("expected 9 census divisions, found %d" % num)
+
+    # add location ids for states
+    for row in data[Key.TierListEntry.sta]:
+        location_ids[Key.TierType.sta].append(row[Key.TierIdEntry.sta])
+    location_ids[Key.TierType.sta] = sorted(set(location_ids[Key.TierType.sta]))
+    num = len(location_ids[Key.TierType.sta])
+    if num != 57:
+        raise Exception("expected 57 states/territories/cities, found %d" % num)
+
+    # return a useful subset of the metadata
+    # (latest epiweek, latest season, tier ids, location ids)
+    return {
+        "epiweek": data["mmwr"][-1]["yearweek"],
+        "season_id": data["mmwr"][-1]["seasonid"],
+        "tier_ids": tier_ids,
+        "location_ids": location_ids,
+    }
 
 
 def download_data(tier_id, location_ids, season_ids, filename):
-  """Download zipped ILINet data for the given locations and seasons."""
-
-  def get_entry(num, name=None):
-    return {'ID': num, 'Name': (name if name else num)}
-
-  # download the data (in memory)
-  url = 'https://gis.cdc.gov/grasp/flu2/PostPhase02DataDownload'
-  data = {
-    'AppVersion': 'Public',
-    'DatasourceDT': [get_entry(1, 'ILINet'), get_entry(0, 'WHO_NREVSS')],
-    'RegionTypeId': tier_id,
-    'SubRegionsDT': [get_entry(loc) for loc in sorted(location_ids)],
-    'SeasonsDT': [get_entry(season) for season in sorted(season_ids)],
-  }
-  resp = requests.post(url, json=data)
-  check_status(resp, 200, 'application/octet-stream')
-  payload = resp.content
-
-  # save the data to file and return the file length
-  with open(filename, 'wb') as f:
-    f.write(payload)
-  return len(payload)
+    """Download zipped ILINet data for the given locations and seasons."""
+
+    def get_entry(num, name=None):
+        return {"ID": num, "Name": (name if name else num)}
+
+    # download the data (in memory)
+    url = "https://gis.cdc.gov/grasp/flu2/PostPhase02DataDownload"
+    data = {
+        "AppVersion": "Public",
+        "DatasourceDT": [get_entry(1, "ILINet"), get_entry(0, "WHO_NREVSS")],
+        "RegionTypeId": tier_id,
+        "SubRegionsDT": [get_entry(loc) for loc in sorted(location_ids)],
+        "SeasonsDT": [get_entry(season) for season in sorted(season_ids)],
+    }
+    resp = requests.post(url, json=data)
+    check_status(resp, 200, "application/octet-stream")
+    payload = resp.content
+
+    # save the data to file and return the file length
+    with open(filename, "wb") as f:
+        f.write(payload)
+    return len(payload)
 
 
 def save_latest(path=None):
-  """
-  Save the latest two seasons of data for all locations, separately for each
-  location tier (i.e. national, HHS, census, and states).
-  """
-
-  # set up the session
-  sess = requests.session()
-  sess.headers.update({
-    # it's polite to self-identify this "bot"
-    'User-Agent': 'delphibot/1.0 (+https://delphi.cmu.edu/)',
-  })
-
-  # get metatdata
-  print('looking up ilinet metadata')
-  data = fetch_metadata(sess)
-  info = get_issue_and_locations(data)
-  issue = info['epiweek']
-  print('current issue: %d' % issue)
-
-  # establish timing
-  dt = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
-  current_season = info['season_id']
-  seasons = [s for s in range(current_season - 1, current_season + 1)]
-
-  # make the destination path if it doesn't already exist
-  if path is not None:
-    os.makedirs(path, exist_ok=True)
-
-  # download the data file for each tier
-  files = []
-  for delphi_name, cdc_name in (
-    ('nat', Key.TierType.nat),
-    ('hhs', Key.TierType.hhs),
-    ('cen', Key.TierType.cen),
-    ('sta', Key.TierType.sta),
-  ):
-    name = 'ilinet_%s_%d_%s.zip' % (delphi_name, issue, dt)
-    if path is None:
-      filename = name
-    else:
-      filename = os.path.join(path, name)
-    tier_id = info['tier_ids'][cdc_name]
-    locations = info['location_ids'][cdc_name]
-
-    # download and show timing information
-    print('downloading %s' % delphi_name)
-    t0 = time.time()
-    size = download_data(tier_id, locations, seasons, filename)
-    t1 = time.time()
-
-    print(' saved %s (%d bytes in %.1f seconds)' % (filename, size, t1 - t0))
-    files.append(filename)
-
-  # return the current issue and the list of downloaded files
-  return issue, files
+    """
+    Save the latest two seasons of data for all locations, separately for each
+    location tier (i.e. national, HHS, census, and states).
+    """
+
+    # set up the session
+    sess = requests.session()
+    sess.headers.update(
+        {
+            # it's polite to self-identify this "bot"
+            "User-Agent": "delphibot/1.0 (+https://delphi.cmu.edu/)",
+        }
+    )
+
+    # get metatdata
+    print("looking up ilinet metadata")
+    data = fetch_metadata(sess)
+    info = get_issue_and_locations(data)
+    issue = info["epiweek"]
+    print("current issue: %d" % issue)
+
+    # establish timing
+    dt = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    current_season = info["season_id"]
+    seasons = [s for s in range(current_season - 1, current_season + 1)]
+
+    # make the destination path if it doesn't already exist
+    if path is not None:
+        os.makedirs(path, exist_ok=True)
+
+    # download the data file for each tier
+    files = []
+    for delphi_name, cdc_name in (
+        ("nat", Key.TierType.nat),
+        ("hhs", Key.TierType.hhs),
+        ("cen", Key.TierType.cen),
+        ("sta", Key.TierType.sta),
+    ):
+        name = "ilinet_%s_%d_%s.zip" % (delphi_name, issue, dt)
+        if path is None:
+            filename = name
+        else:
+            filename = os.path.join(path, name)
+        tier_id = info["tier_ids"][cdc_name]
+        locations = info["location_ids"][cdc_name]
+
+        # download and show timing information
+        print("downloading %s" % delphi_name)
+        t0 = time.time()
+        size = download_data(tier_id, locations, seasons, filename)
+        t1 = time.time()
+
+        print(" saved %s (%d bytes in %.1f seconds)" % (filename, size, t1 - t0))
+        files.append(filename)
+
+    # return the current issue and the list of downloaded files
+    return issue, files
diff --git a/src/acquisition/fluview/fluview_locations.py b/src/acquisition/fluview/fluview_locations.py
index 9c851bc6f..e5ebe0fc3 100644
--- a/src/acquisition/fluview/fluview_locations.py
+++ b/src/acquisition/fluview/fluview_locations.py
@@ -15,100 +15,100 @@
 # https://gis.cdc.gov/grasp/flu2/GetPhase02InitApp?appVersion=Public
 # The values are used in queries of Delphi's Epidata API.
 cdc_to_delphi = {
-  'national': {
-    'x': 'nat',
-  },
-  'hhs regions': {
-    'region 1': 'hhs1',
-    'region 2': 'hhs2',
-    'region 3': 'hhs3',
-    'region 4': 'hhs4',
-    'region 5': 'hhs5',
-    'region 6': 'hhs6',
-    'region 7': 'hhs7',
-    'region 8': 'hhs8',
-    'region 9': 'hhs9',
-    'region 10': 'hhs10',
-  },
-  'census regions': {
-    'new england': 'cen1',
-    'mid-atlantic': 'cen2',
-    'east north central': 'cen3',
-    'west north central': 'cen4',
-    'south atlantic': 'cen5',
-    'east south central': 'cen6',
-    'west south central': 'cen7',
-    'mountain': 'cen8',
-    'pacific': 'cen9',
-  },
-  'states': {
-    # states/territories: two-letter ISO 3166
-    'alabama': 'al',
-    'alaska': 'ak',
-    'arizona': 'az',
-    'arkansas': 'ar',
-    'california': 'ca',
-    'colorado': 'co',
-    'connecticut': 'ct',
-    'delaware': 'de',
-    'florida': 'fl',
-    'georgia': 'ga',
-    'hawaii': 'hi',
-    'idaho': 'id',
-    'illinois': 'il',
-    'indiana': 'in',
-    'iowa': 'ia',
-    'kansas': 'ks',
-    'kentucky': 'ky',
-    'louisiana': 'la',
-    'maine': 'me',
-    'maryland': 'md',
-    'massachusetts': 'ma',
-    'michigan': 'mi',
-    'minnesota': 'mn',
-    'mississippi': 'ms',
-    'missouri': 'mo',
-    'montana': 'mt',
-    'nebraska': 'ne',
-    'nevada': 'nv',
-    'new hampshire': 'nh',
-    'new jersey': 'nj',
-    'new mexico': 'nm',
-    # Even though it's called "New York", this location doesn't include New
-    # York City ("jfk"). New York ("ny") is actually this *plus* jfk.
-    'new york': 'ny_minus_jfk',
-    'north carolina': 'nc',
-    'north dakota': 'nd',
-    'ohio': 'oh',
-    'oklahoma': 'ok',
-    'oregon': 'or',
-    'pennsylvania': 'pa',
-    'rhode island': 'ri',
-    'south carolina': 'sc',
-    'south dakota': 'sd',
-    'tennessee': 'tn',
-    'texas': 'tx',
-    'utah': 'ut',
-    'vermont': 'vt',
-    'virginia': 'va',
-    'washington': 'wa',
-    'west virginia': 'wv',
-    'wisconsin': 'wi',
-    'wyoming': 'wy',
-    'american samoa': 'as',
-    'commonwealth of the northern mariana islands': 'mp',
-    'district of columbia': 'dc',
-    'guam': 'gu',
-    'puerto rico': 'pr',
-    'virgin islands': 'vi',
-    # cities: three-letter IATA
-    'chicago': 'ord',
-    'los angeles': 'lax',
-    'new york city': 'jfk',
-  },
+    "national": {
+        "x": "nat",
+    },
+    "hhs regions": {
+        "region 1": "hhs1",
+        "region 2": "hhs2",
+        "region 3": "hhs3",
+        "region 4": "hhs4",
+        "region 5": "hhs5",
+        "region 6": "hhs6",
+        "region 7": "hhs7",
+        "region 8": "hhs8",
+        "region 9": "hhs9",
+        "region 10": "hhs10",
+    },
+    "census regions": {
+        "new england": "cen1",
+        "mid-atlantic": "cen2",
+        "east north central": "cen3",
+        "west north central": "cen4",
+        "south atlantic": "cen5",
+        "east south central": "cen6",
+        "west south central": "cen7",
+        "mountain": "cen8",
+        "pacific": "cen9",
+    },
+    "states": {
+        # states/territories: two-letter ISO 3166
+        "alabama": "al",
+        "alaska": "ak",
+        "arizona": "az",
+        "arkansas": "ar",
+        "california": "ca",
+        "colorado": "co",
+        "connecticut": "ct",
+        "delaware": "de",
+        "florida": "fl",
+        "georgia": "ga",
+        "hawaii": "hi",
+        "idaho": "id",
+        "illinois": "il",
+        "indiana": "in",
+        "iowa": "ia",
+        "kansas": "ks",
+        "kentucky": "ky",
+        "louisiana": "la",
+        "maine": "me",
+        "maryland": "md",
+        "massachusetts": "ma",
+        "michigan": "mi",
+        "minnesota": "mn",
+        "mississippi": "ms",
+        "missouri": "mo",
+        "montana": "mt",
+        "nebraska": "ne",
+        "nevada": "nv",
+        "new hampshire": "nh",
+        "new jersey": "nj",
+        "new mexico": "nm",
+        # Even though it's called "New York", this location doesn't include New
+        # York City ("jfk"). New York ("ny") is actually this *plus* jfk.
+        "new york": "ny_minus_jfk",
+        "north carolina": "nc",
+        "north dakota": "nd",
+        "ohio": "oh",
+        "oklahoma": "ok",
+        "oregon": "or",
+        "pennsylvania": "pa",
+        "rhode island": "ri",
+        "south carolina": "sc",
+        "south dakota": "sd",
+        "tennessee": "tn",
+        "texas": "tx",
+        "utah": "ut",
+        "vermont": "vt",
+        "virginia": "va",
+        "washington": "wa",
+        "west virginia": "wv",
+        "wisconsin": "wi",
+        "wyoming": "wy",
+        "american samoa": "as",
+        "commonwealth of the northern mariana islands": "mp",
+        "district of columbia": "dc",
+        "guam": "gu",
+        "puerto rico": "pr",
+        "virgin islands": "vi",
+        # cities: three-letter IATA
+        "chicago": "ord",
+        "los angeles": "lax",
+        "new york city": "jfk",
+    },
 }
 
 
 def get_location_name(region_type, region_name):
-  """Convert a CDC location type and name pair into a Delphi location name."""
-  return cdc_to_delphi[region_type.lower()][region_name.lower()]
+    """Convert a CDC location type and name pair into a Delphi location name."""
+    return cdc_to_delphi[region_type.lower()][region_name.lower()]
diff --git a/src/acquisition/fluview/fluview_notify.py b/src/acquisition/fluview/fluview_notify.py
index 13f0f3559..d4d426556 100644
--- a/src/acquisition/fluview/fluview_notify.py
+++ b/src/acquisition/fluview/fluview_notify.py
@@ -31,41 +31,41 @@
 import delphi.operations.secrets as secrets
 
 
-if __name__ == '__main__':
-  # Args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('-t', '--test', action='store_const', const=True, default=False, help="do dry run only, don't update the database")
-  args = parser.parse_args()
+if __name__ == "__main__":
+    # Args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-t", "--test", action="store_const", const=True, default=False, help="do dry run only, don't update the database")
+    args = parser.parse_args()
 
-  # connect
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
+    # connect
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
 
-  # get the last known issue from the automation table `variables`
-  cur.execute('SELECT `value` FROM automation.`variables` WHERE `name` = %s', ('most_recent_issue',))
-  for (issue1,) in cur:
-    issue1 = int(issue1)
-  print('last known issue:', issue1)
-  # get the most recent issue from the epidata table `fluview`
-  cur.execute('SELECT max(`issue`) FROM `fluview`')
-  for (issue2,) in cur:
-    issue2 = int(issue2)
-  print('most recent issue:', issue2)
+    # get the last known issue from the automation table `variables`
+    cur.execute("SELECT `value` FROM automation.`variables` WHERE `name` = %s", ("most_recent_issue",))
+    for (issue1,) in cur:
+        issue1 = int(issue1)
+    print("last known issue:", issue1)
+    # get the most recent issue from the epidata table `fluview`
+    cur.execute("SELECT max(`issue`) FROM `fluview`")
+    for (issue2,) in cur:
+        issue2 = int(issue2)
+    print("most recent issue:", issue2)
 
-  if issue2 > issue1:
-    print('new data is available!')
-    if args.test:
-      print('test mode - not making any changes')
-    else:
-      # update the variable
-      cur.execute('UPDATE automation.`variables` SET `value` = %s WHERE `name` = %s', (issue2, 'most_recent_issue'))
-      # queue the 'New FluView Available' flow
-      cur.execute('CALL automation.RunStep(36)')
-  elif issue2 < issue2:
-    raise Exception('most recent issue is older than the last known issue')
+    if issue2 > issue1:
+        print("new data is available!")
+        if args.test:
+            print("test mode - not making any changes")
+        else:
+            # update the variable
+            cur.execute("UPDATE automation.`variables` SET `value` = %s WHERE `name` = %s", (issue2, "most_recent_issue"))
+            # queue the 'New FluView Available' flow
+            cur.execute("CALL automation.RunStep(36)")
+    elif issue2 < issue2:
+        raise Exception("most recent issue is older than the last known issue")
 
-  # cleanup
-  cnx.commit()
-  cur.close()
-  cnx.close()
+    # cleanup
+    cnx.commit()
+    cur.close()
+    cnx.close()
diff --git a/src/acquisition/fluview/fluview_update.py b/src/acquisition/fluview/fluview_update.py
index 65bec7a40..bafa01855 100644
--- a/src/acquisition/fluview/fluview_update.py
+++ b/src/acquisition/fluview/fluview_update.py
@@ -130,398 +130,352 @@
 from . import fluview_locations
 
 # sheet names
-ILINET_SHEET = 'ILINet.csv'
-PHL_SHEET = 'WHO_NREVSS_Public_Health_Labs.csv'
-CL_SHEET = 'WHO_NREVSS_Clinical_Labs.csv'
+ILINET_SHEET = "ILINet.csv"
+PHL_SHEET = "WHO_NREVSS_Public_Health_Labs.csv"
+CL_SHEET = "WHO_NREVSS_Clinical_Labs.csv"
 # table names
-CL_TABLE = 'fluview_clinical'
-PHL_TABLE = 'fluview_public'
+CL_TABLE = "fluview_clinical"
+PHL_TABLE = "fluview_public"
+
 
 def optional_int(i):
-  return int(i) if i not in ('', 'X') else None
+    return int(i) if i not in ("", "X") else None
+
 
 def optional_float(i, j):
-  return float(i) if i not in ('', 'X') else float(j)
+    return float(i) if i not in ("", "X") else float(j)
+
 
 def nullable_float(i):
-  return float(i) if i not in ('', 'X') else None
+    return float(i) if i not in ("", "X") else None
+
 
 def get_ilinet_data(row):
-  if row[0] == 'REGION TYPE' and row != [
-    'REGION TYPE',
-    'REGION',
-    'YEAR',
-    'WEEK',
-    '% WEIGHTED ILI',
-    '%UNWEIGHTED ILI',
-    'AGE 0-4',
-    'AGE 25-49',
-    'AGE 25-64',
-    'AGE 5-24',
-    'AGE 50-64',
-    'AGE 65',
-    'ILITOTAL',
-    'NUM. OF PROVIDERS',
-    'TOTAL PATIENTS'
-  ]:
-    raise Exception('header row has changed')
-  if len(row) == 1 or row[0] == 'REGION TYPE':
-    # this is a header row
-    return None
-  if row[5] == 'X':
-    # ILI isn't reported, ignore this row
-    return None
-  return {
-    'location': fluview_locations.get_location_name(*row[:2]),
-    'epiweek': join_epiweek(int(row[2]), int(row[3])),
-    'wili': optional_float(*row[4:6]),
-    'ili': float(row[5]),
-    'age0': optional_int(row[6]),
-    'age1': optional_int(row[9]),
-    'age2': optional_int(row[8]),
-    'age3': optional_int(row[7]),
-    'age4': optional_int(row[10]),
-    'age5': optional_int(row[11]),
-    'n_ili': optional_int(row[12]),
-    'n_providers': optional_int(row[13]),
-    'n_patients': optional_int(row[14]),
-  }
+    if row[0] == "REGION TYPE" and row != [
+        "REGION TYPE",
+        "REGION",
+        "YEAR",
+        "WEEK",
+        "% WEIGHTED ILI",
+        "%UNWEIGHTED ILI",
+        "AGE 0-4",
+        "AGE 25-49",
+        "AGE 25-64",
+        "AGE 5-24",
+        "AGE 50-64",
+        "AGE 65",
+        "ILITOTAL",
+        "NUM. OF PROVIDERS",
+        "TOTAL PATIENTS",
+    ]:
+        raise Exception("header row has changed")
+    if len(row) == 1 or row[0] == "REGION TYPE":
+        # this is a header row
+        return None
+    if row[5] == "X":
+        # ILI isn't reported, ignore this row
+        return None
+    return {
+        "location": fluview_locations.get_location_name(*row[:2]),
+        "epiweek": join_epiweek(int(row[2]), int(row[3])),
+        "wili": optional_float(*row[4:6]),
+        "ili": float(row[5]),
+        "age0": optional_int(row[6]),
+        "age1": optional_int(row[9]),
+        "age2": optional_int(row[8]),
+        "age3": optional_int(row[7]),
+        "age4": optional_int(row[10]),
+        "age5": optional_int(row[11]),
+        "n_ili": optional_int(row[12]),
+        "n_providers": optional_int(row[13]),
+        "n_patients": optional_int(row[14]),
+    }
+
 
 def get_clinical_data(row):
-  if row[0] == 'REGION TYPE' and row != [
-    'REGION TYPE',
-    'REGION',
-    'YEAR',
-    'WEEK',
-    'TOTAL SPECIMENS',
-    'TOTAL A',
-    'TOTAL B',
-    'PERCENT POSITIVE',
-    'PERCENT A',
-    'PERCENT B'
-  ]:
-    raise Exception('header row has changed for clinical lab data.')
-  if len(row) == 1 or row[0] == 'REGION TYPE':
-    # this is a header row
-    return None
-  if row[4] == 'X':
-    # data is not reported, ignore this row
-    return None
-  # ignore percentage calculations for now
-  return {
-    'location': fluview_locations.get_location_name(*row[:2]),
-    'epiweek': join_epiweek(int(row[2]), int(row[3])),
-    'total_specimens': int(row[4]),
-    'total_a': optional_int(row[5]),
-    'total_b': optional_int(row[6]),
-    'percent_positive': nullable_float(row[7]),
-    'percent_a': nullable_float(row[8]),
-    'percent_b': nullable_float(row[9])
-  }
+    if row[0] == "REGION TYPE" and row != ["REGION TYPE", "REGION", "YEAR", "WEEK", "TOTAL SPECIMENS", "TOTAL A", "TOTAL B", "PERCENT POSITIVE", "PERCENT A", "PERCENT B"]:
+        raise Exception("header row has changed for clinical lab data.")
+    if len(row) == 1 or row[0] == "REGION TYPE":
+        # this is a header row
+        return None
+    if row[4] == "X":
+        # data is not reported, ignore this row
+        return None
+    # ignore percentage calculations for now
+    return {
+        "location": fluview_locations.get_location_name(*row[:2]),
+        "epiweek": join_epiweek(int(row[2]), int(row[3])),
+        "total_specimens": int(row[4]),
+        "total_a": optional_int(row[5]),
+        "total_b": optional_int(row[6]),
+        "percent_positive": nullable_float(row[7]),
+        "percent_a": nullable_float(row[8]),
+        "percent_b": nullable_float(row[9]),
+    }
+
 
 def get_public_data(row):
-  hrow1 = [
-    'REGION TYPE',
-    'REGION',
-    'SEASON_DESCRIPTION',
-    'TOTAL SPECIMENS',
-    'A (2009 H1N1)',
-    'A (H3)',
-    'A (Subtyping not Performed)',
-    'B',
-    'BVic',
-    'BYam',
-    'H3N2v'
-  ]
-  hrow2 = [
-    'REGION TYPE',
-    'REGION',
-    'YEAR',
-    'WEEK',
-    'TOTAL SPECIMENS',
-    'A (2009 H1N1)',
-    'A (H3)',
-    'A (Subtyping not Performed)',
-    'B',
-    'BVic',
-    'BYam',
-    'H3N2v'
-  ]
-  if row[0] == 'REGION TYPE' and row != hrow1 and row != hrow2:
-    raise Exception('header row has changed for public health lab data.')
-  if len(row) == 1 or row[0] == 'REGION TYPE':
-    # header row
-    return None
-  if row[3] == 'X':
-    # data is not reported, ignore this row
-    return None
-  # handle case where data is reported by season, not by epiweek
-  is_weekly = len(row) == len(hrow2)
-  # set epiweek
-  if is_weekly:
-    epiweek = join_epiweek(int(row[2]), int(row[3]))
-  else:
-    epiweek = int(row[2][7:11]) * 100 + 40
-  # row offset
-  offset = 1 if is_weekly else 0
-  return {
-    'location': fluview_locations.get_location_name(*row[:2]),
-    'epiweek': epiweek,
-    'total_specimens': int(row[3 + offset]),
-    'total_a_h1n1': optional_int(row[4+ offset]),
-    'total_a_h3': optional_int(row[5 + offset]),
-    'total_a_h3n2v': optional_int(row[10 + offset]),
-    'total_a_no_sub': optional_int(row[6 + offset]),
-    'total_b': optional_int(row[7 + offset]),
-    'total_b_vic': optional_int(row[8 + offset]),
-    'total_b_yam': optional_int(row[9 + offset])
-  }
-
-def load_zipped_csv(filename, sheetname='ILINet.csv'):
-  """Read rows from a zipped CSV, which is expected to be named as specified
-  by the sheetname parameter. Default is ILINet.csv, for the default flu data."""
-  with zipfile.ZipFile(filename) as f:
-    with f.open(sheetname) as ff:
-      return [row for row in csv.reader(io.StringIO(str(ff.read(), 'utf-8')))]
-
-def get_rows(cnx, table='fluview'):
-  """Count and return the number of rows in the `fluview` table.
-  Looking at the fluview table by default, but may pass parameter
-  to look at public health or clinical lab data instead."""
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM %s' % table)
-  for (num,) in select:
-    pass
-  select.close()
-  return num
+    hrow1 = ["REGION TYPE", "REGION", "SEASON_DESCRIPTION", "TOTAL SPECIMENS", "A (2009 H1N1)", "A (H3)", "A (Subtyping not Performed)", "B", "BVic", "BYam", "H3N2v"]
+    hrow2 = ["REGION TYPE", "REGION", "YEAR", "WEEK", "TOTAL SPECIMENS", "A (2009 H1N1)", "A (H3)", "A (Subtyping not Performed)", "B", "BVic", "BYam", "H3N2v"]
+    if row[0] == "REGION TYPE" and row != hrow1 and row != hrow2:
+        raise Exception("header row has changed for public health lab data.")
+    if len(row) == 1 or row[0] == "REGION TYPE":
+        # header row
+        return None
+    if row[3] == "X":
+        # data is not reported, ignore this row
+        return None
+    # handle case where data is reported by season, not by epiweek
+    is_weekly = len(row) == len(hrow2)
+    # set epiweek
+    if is_weekly:
+        epiweek = join_epiweek(int(row[2]), int(row[3]))
+    else:
+        epiweek = int(row[2][7:11]) * 100 + 40
+    # row offset
+    offset = 1 if is_weekly else 0
+    return {
+        "location": fluview_locations.get_location_name(*row[:2]),
+        "epiweek": epiweek,
+        "total_specimens": int(row[3 + offset]),
+        "total_a_h1n1": optional_int(row[4 + offset]),
+        "total_a_h3": optional_int(row[5 + offset]),
+        "total_a_h3n2v": optional_int(row[10 + offset]),
+        "total_a_no_sub": optional_int(row[6 + offset]),
+        "total_b": optional_int(row[7 + offset]),
+        "total_b_vic": optional_int(row[8 + offset]),
+        "total_b_yam": optional_int(row[9 + offset]),
+    }
+
+
+def load_zipped_csv(filename, sheetname="ILINet.csv"):
+    """Read rows from a zipped CSV, which is expected to be named as specified
+    by the sheetname parameter. Default is ILINet.csv, for the default flu data."""
+    with zipfile.ZipFile(filename) as f:
+        with f.open(sheetname) as ff:
+            return [row for row in csv.reader(io.StringIO(str(ff.read(), "utf-8")))]
+
+
+def get_rows(cnx, table="fluview"):
+    """Count and return the number of rows in the `fluview` table.
+    Looking at the fluview table by default, but may pass parameter
+    to look at public health or clinical lab data instead."""
+    select = cnx.cursor()
+    select.execute("SELECT count(1) num FROM %s" % table)
+    for (num,) in select:
+        pass
+    select.close()
+    return num
+
 
 def update_from_file_clinical(issue, date, filename, test_mode=False):
-  """
-  Read WHO/NREVSS data from a zipped CSV and insert into (or update) the database.
-  """
-
-  # database connection
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  rows1 = get_rows(cnx, CL_TABLE)
-  print('rows before: %d' % (rows1))
-  insert = cnx.cursor()
-
-  # load the data, ignoring empty rows
-  print('loading data from %s as issued on %d' % (filename, issue))
-  rows = load_zipped_csv(filename, CL_SHEET)
-  print(' loaded %d rows' % len(rows))
-  data = [get_clinical_data(row) for row in rows]
-  entries = [obj for obj in data if obj]
-  print(' found %d entries' % len(entries))
-
-  sql = '''
-  INSERT INTO
-    `fluview_clinical` (`release_date`, `issue`, `epiweek`, `region`, `lag`, 
-    `total_specimens`, `total_a`, `total_b`, `percent_positive`, `percent_a`, 
-    `percent_b`)
-  VALUES
-    (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-  `release_date` = least(`release_date`, %s),
-  `total_specimens` = %s,
-  `total_a` = %s,
-  `total_b` = %s,
-  `percent_positive` = %s,
-  `percent_a` = %s,
-  `percent_b` = %s
-  '''
-
-  # insert each row
-  insert = cnx.cursor()
-  for row in entries:
-    lag = delta_epiweeks(row['epiweek'], issue)
-    args = [
-      row['total_specimens'], row['total_a'], row['total_b'],
-      row['percent_positive'], row['percent_a'], row['percent_b']
-    ]
-    ins_args = [date, issue, row['epiweek'], row['location'], lag] + args
-    upd_args = [date] + args
-    insert.execute(sql, ins_args + upd_args)
-
-  # cleanup
-  insert.close()
-  if test_mode:
-    print('test mode, not committing')
-    rows2 = rows1
-  else:
-    cnx.commit()
-    rows2 = get_rows(cnx)
-  print('rows after: %d (added %d)' % (rows2, rows2 - rows1))
-  cnx.close()
+    """
+    Read WHO/NREVSS data from a zipped CSV and insert into (or update) the database.
+    """
+
+    # database connection
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx, CL_TABLE)
+    print("rows before: %d" % (rows1))
+    insert = cnx.cursor()
+
+    # load the data, ignoring empty rows
+    print("loading data from %s as issued on %d" % (filename, issue))
+    rows = load_zipped_csv(filename, CL_SHEET)
+    print(" loaded %d rows" % len(rows))
+    data = [get_clinical_data(row) for row in rows]
+    entries = [obj for obj in data if obj]
+    print(" found %d entries" % len(entries))
+
+    sql = """
+    INSERT INTO
+        `fluview_clinical` (`release_date`, `issue`, `epiweek`, `region`, `lag`, 
+        `total_specimens`, `total_a`, `total_b`, `percent_positive`, `percent_a`, 
+        `percent_b`)
+    VALUES
+        (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+    `release_date` = least(`release_date`, %s),
+    `total_specimens` = %s,
+    `total_a` = %s,
+    `total_b` = %s,
+    `percent_positive` = %s,
+    `percent_a` = %s,
+    `percent_b` = %s
+    """
+
+    # insert each row
+    insert = cnx.cursor()
+    for row in entries:
+        lag = delta_epiweeks(row["epiweek"], issue)
+        args = [row["total_specimens"], row["total_a"], row["total_b"], row["percent_positive"], row["percent_a"], row["percent_b"]]
+        ins_args = [date, issue, row["epiweek"], row["location"], lag] + args
+        upd_args = [date] + args
+        insert.execute(sql, ins_args + upd_args)
+
+    # cleanup
+    insert.close()
+    if test_mode:
+        print("test mode, not committing")
+        rows2 = rows1
+    else:
+        cnx.commit()
+        rows2 = get_rows(cnx)
+    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    cnx.close()
+
 
 def update_from_file_public(issue, date, filename, test_mode=False):
-  """
-  Read WHO/NREVSS data from a zipped CSV and insert into (or update) the database.
-  """
-
-  # database connection
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  rows1 = get_rows(cnx, PHL_TABLE)
-  print('rows before: %d' % (rows1))
-  insert = cnx.cursor()
-
-  # load the data, ignoring empty rows
-  print('loading data from %s as issued on %d' % (filename, issue))
-  rows = load_zipped_csv(filename, PHL_SHEET)
-  print(' loaded %d rows' % len(rows))
-  data = [get_public_data(row) for row in rows]
-  entries = [obj for obj in data if obj]
-  print(' found %d entries' % len(entries))
-
-  sql = '''
-  INSERT INTO
-    `fluview_public` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
-    `total_specimens`, `total_a_h1n1`, `total_a_h3`, `total_a_h3n2v`,
-    `total_a_no_sub`, `total_b`, `total_b_vic`, `total_b_yam`)
-  VALUES
-    (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-  `release_date` = least(`release_date`, %s),
-  `total_specimens` = %s,
-  `total_a_h1n1` = %s,
-  `total_a_h3` = %s,
-  `total_a_h3n2v` = %s,
-  `total_a_no_sub` = %s,
-  `total_b` = %s,
-  `total_b_vic` = %s, 
-  `total_b_yam` = %s
-  '''
-
-  # insert each row
-  insert = cnx.cursor()
-  for row in entries:
-    lag = delta_epiweeks(row['epiweek'], issue)
-    args = [
-      row['total_specimens'], row['total_a_h1n1'], row['total_a_h3'],
-      row['total_a_h3n2v'], row['total_a_no_sub'], row['total_b'],
-      row['total_b_vic'], row['total_b_yam']
-    ]
-    ins_args = [date, issue, row['epiweek'], row['location'], lag] + args
-    upd_args = [date] + args
-    insert.execute(sql, ins_args + upd_args)
-
-  # cleanup
-  insert.close()
-  if test_mode:
-    print('test mode, not committing')
-    rows2 = rows1
-  else:
-    cnx.commit()
-    rows2 = get_rows(cnx)
-  print('rows after: %d (added %d)' % (rows2, rows2 - rows1))
-  cnx.close()
+    """
+    Read WHO/NREVSS data from a zipped CSV and insert into (or update) the database.
+    """
+
+    # database connection
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx, PHL_TABLE)
+    print("rows before: %d" % (rows1))
+    insert = cnx.cursor()
+
+    # load the data, ignoring empty rows
+    print("loading data from %s as issued on %d" % (filename, issue))
+    rows = load_zipped_csv(filename, PHL_SHEET)
+    print(" loaded %d rows" % len(rows))
+    data = [get_public_data(row) for row in rows]
+    entries = [obj for obj in data if obj]
+    print(" found %d entries" % len(entries))
+
+    sql = """
+    INSERT INTO
+        `fluview_public` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
+        `total_specimens`, `total_a_h1n1`, `total_a_h3`, `total_a_h3n2v`,
+        `total_a_no_sub`, `total_b`, `total_b_vic`, `total_b_yam`)
+    VALUES
+        (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+    `release_date` = least(`release_date`, %s),
+    `total_specimens` = %s,
+    `total_a_h1n1` = %s,
+    `total_a_h3` = %s,
+    `total_a_h3n2v` = %s,
+    `total_a_no_sub` = %s,
+    `total_b` = %s,
+    `total_b_vic` = %s, 
+    `total_b_yam` = %s
+    """
+
+    # insert each row
+    insert = cnx.cursor()
+    for row in entries:
+        lag = delta_epiweeks(row["epiweek"], issue)
+        args = [row["total_specimens"], row["total_a_h1n1"], row["total_a_h3"], row["total_a_h3n2v"], row["total_a_no_sub"], row["total_b"], row["total_b_vic"], row["total_b_yam"]]
+        ins_args = [date, issue, row["epiweek"], row["location"], lag] + args
+        upd_args = [date] + args
+        insert.execute(sql, ins_args + upd_args)
+
+    # cleanup
+    insert.close()
+    if test_mode:
+        print("test mode, not committing")
+        rows2 = rows1
+    else:
+        cnx.commit()
+        rows2 = get_rows(cnx)
+    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    cnx.close()
+
 
 def update_from_file(issue, date, filename, test_mode=False):
-  """
-  Read ILINet data from a zipped CSV and insert into (or update) the database.
-  """
-
-  # database connection
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  rows1 = get_rows(cnx)
-  print('rows before: %d' % (rows1))
-  insert = cnx.cursor()
-
-  # load the data, ignoring empty rows
-  print('loading data from %s as issued on %d' % (filename, issue))
-  rows = load_zipped_csv(filename)
-  print(' loaded %d rows' % len(rows))
-  data = [get_ilinet_data(row) for row in rows]
-  entries = [obj for obj in data if obj]
-  print(' found %d entries' % len(entries))
-
-  sql = '''
-  INSERT INTO
-    `fluview` (`release_date`, `issue`, `epiweek`, `region`, `lag`, `num_ili`,
-    `num_patients`, `num_providers`, `wili`, `ili`, `num_age_0`, `num_age_1`,
-    `num_age_2`, `num_age_3`, `num_age_4`, `num_age_5`)
-  VALUES
-    (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-  `release_date` = least(`release_date`, %s),
-  `num_ili` = %s,
-  `num_patients` = %s,
-  `num_providers` = %s,
-  `wili` = %s,
-  `ili` = %s,
-  `num_age_0` = coalesce(%s, `num_age_0`),
-  `num_age_1` = coalesce(%s, `num_age_1`),
-  `num_age_2` = coalesce(%s, `num_age_2`),
-  `num_age_3` = coalesce(%s, `num_age_3`),
-  `num_age_4` = coalesce(%s, `num_age_4`),
-  `num_age_5` = coalesce(%s, `num_age_5`)
-  '''
-
-  # insert each row
-  insert = cnx.cursor()
-  for row in entries:
-    lag = delta_epiweeks(row['epiweek'], issue)
-    args = [
-      row['n_ili'], row['n_patients'], row['n_providers'], row['wili'],
-      row['ili'], row['age0'], row['age1'], row['age2'], row['age3'],
-      row['age4'], row['age5']
-    ]
-    ins_args = [date, issue, row['epiweek'], row['location'], lag] + args
-    upd_args = [date] + args
-    insert.execute(sql, ins_args + upd_args)
-
-  # cleanup
-  insert.close()
-  if test_mode:
-    print('test mode, not committing')
-    rows2 = rows1
-  else:
-    cnx.commit()
-    rows2 = get_rows(cnx)
-  print('rows after: %d (added %d)' % (rows2, rows2 - rows1))
-  cnx.close()
+    """
+    Read ILINet data from a zipped CSV and insert into (or update) the database.
+    """
+
+    # database connection
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx)
+    print("rows before: %d" % (rows1))
+    insert = cnx.cursor()
+
+    # load the data, ignoring empty rows
+    print("loading data from %s as issued on %d" % (filename, issue))
+    rows = load_zipped_csv(filename)
+    print(" loaded %d rows" % len(rows))
+    data = [get_ilinet_data(row) for row in rows]
+    entries = [obj for obj in data if obj]
+    print(" found %d entries" % len(entries))
+
+    sql = """
+    INSERT INTO
+        `fluview` (`release_date`, `issue`, `epiweek`, `region`, `lag`, `num_ili`,
+        `num_patients`, `num_providers`, `wili`, `ili`, `num_age_0`, `num_age_1`,
+        `num_age_2`, `num_age_3`, `num_age_4`, `num_age_5`)
+    VALUES
+        (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+    `release_date` = least(`release_date`, %s),
+    `num_ili` = %s,
+    `num_patients` = %s,
+    `num_providers` = %s,
+    `wili` = %s,
+    `ili` = %s,
+    `num_age_0` = coalesce(%s, `num_age_0`),
+    `num_age_1` = coalesce(%s, `num_age_1`),
+    `num_age_2` = coalesce(%s, `num_age_2`),
+    `num_age_3` = coalesce(%s, `num_age_3`),
+    `num_age_4` = coalesce(%s, `num_age_4`),
+    `num_age_5` = coalesce(%s, `num_age_5`)
+    """
+
+    # insert each row
+    insert = cnx.cursor()
+    for row in entries:
+        lag = delta_epiweeks(row["epiweek"], issue)
+        args = [row["n_ili"], row["n_patients"], row["n_providers"], row["wili"], row["ili"], row["age0"], row["age1"], row["age2"], row["age3"], row["age4"], row["age5"]]
+        ins_args = [date, issue, row["epiweek"], row["location"], lag] + args
+        upd_args = [date] + args
+        insert.execute(sql, ins_args + upd_args)
+
+    # cleanup
+    insert.close()
+    if test_mode:
+        print("test mode, not committing")
+        rows2 = rows1
+    else:
+        cnx.commit()
+        rows2 = get_rows(cnx)
+    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    cnx.close()
+
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    '--test',
-    action='store_true',
-    help='do dry run only, do not update the database'
-  )
-  parser.add_argument(
-    '--file',
-    type=str,
-    help='load an existing zip file (otherwise fetch current data)'
-  )
-  parser.add_argument(
-    '--issue',
-    type=int,
-    help='issue of the file (e.g. 201740); used iff --file is given'
-  )
-  args = parser.parse_args()
-
-  if (args.file is None) != (args.issue is None):
-    raise Exception('--file and --issue must both be present or absent')
-
-  date = datetime.datetime.now().strftime('%Y-%m-%d')
-  print('assuming release date is today, %s' % date)
-
-  if args.file:
-    update_from_file(args.issue, date, args.file, test_mode=args.test)
-    update_from_file_clinical(args.issue, date, args.file, test_mode=args.test)
-    # TODO: header row has changed for public health lab data
-    # update_from_file_public(args.issue, date, args.file, test_mode=args.test)
-  else:
-    issue, files = fluview.save_latest(path='flu_data')
-    for filename in files:
-      update_from_file(issue, date, filename, test_mode=args.test)
-      update_from_file_clinical(issue, date, filename, test_mode=args.test)
-      # TODO: header row has changed for public health lab data
-      # update_from_file_public(issue, date, filename, test_mode=args.test)
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--test", action="store_true", help="do dry run only, do not update the database")
+    parser.add_argument("--file", type=str, help="load an existing zip file (otherwise fetch current data)")
+    parser.add_argument("--issue", type=int, help="issue of the file (e.g. 201740); used iff --file is given")
+    args = parser.parse_args()
+
+    if (args.file is None) != (args.issue is None):
+        raise Exception("--file and --issue must both be present or absent")
+
+    date = datetime.datetime.now().strftime("%Y-%m-%d")
+    print("assuming release date is today, %s" % date)
+
+    if args.file:
+        update_from_file(args.issue, date, args.file, test_mode=args.test)
+        update_from_file_clinical(args.issue, date, args.file, test_mode=args.test)
+        # TODO: header row has changed for public health lab data
+        # update_from_file_public(args.issue, date, args.file, test_mode=args.test)
+    else:
+        issue, files = fluview.save_latest(path="flu_data")
+        for filename in files:
+            update_from_file(issue, date, filename, test_mode=args.test)
+            update_from_file_clinical(issue, date, filename, test_mode=args.test)
+            # TODO: header row has changed for public health lab data
+            # update_from_file_public(issue, date, filename, test_mode=args.test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/fluview/impute_missing_values.py b/src/acquisition/fluview/impute_missing_values.py
index 7f9a23231..645daaba5 100644
--- a/src/acquisition/fluview/impute_missing_values.py
+++ b/src/acquisition/fluview/impute_missing_values.py
@@ -59,290 +59,281 @@
 
 
 class Database:
-  """Database wrapper and abstraction layer."""
-
-  class Sql:
-    """Container for SQL constants."""
-
-    # Count the total number of imputed rows.
-    count_rows = '''
-      SELECT
-        count(1) `num`
-      FROM
-        `fluview_imputed`
-    '''
-
-    # Find (issue, epiweek) pairs that exist in table `fluview` but not in
-    # table `fluview_imputed`. Note that only issues >= 201740 are selected
-    # because that's when CDC first started posting state-level ILINet data.
-    # This assumes that `fluview` is always missing at least one location.
-    find_missing_rows = '''
-      SELECT
-        fv.`issue`, fv.`epiweek`
-      FROM (
+    """Database wrapper and abstraction layer."""
+
+    class Sql:
+        """Container for SQL constants."""
+
+        # Count the total number of imputed rows.
+        count_rows = """
         SELECT
-          `issue`, `epiweek`
+          count(1) `num`
         FROM
-          `fluview`
+          `fluview_imputed`
+        """
+
+        # Find (issue, epiweek) pairs that exist in table `fluview` but not in
+        # table `fluview_imputed`. Note that only issues >= 201740 are selected
+        # because that's when CDC first started posting state-level ILINet data.
+        # This assumes that `fluview` is always missing at least one location.
+        find_missing_rows = """
+        SELECT
+          fv.`issue`, fv.`epiweek`
+        FROM (
+          SELECT
+            `issue`, `epiweek`
+          FROM
+            `fluview`
+          WHERE
+            `issue` >= 201740
+          GROUP BY
+            `issue`, `epiweek`
+        ) fv
+        LEFT JOIN (
+          SELECT
+            `issue`, `epiweek`
+          FROM
+            `fluview_imputed`
+          GROUP BY
+            `issue`, `epiweek`
+        ) fvi
+        ON
+          fvi.`issue` = fv.`issue` AND fvi.`epiweek` = fv.`epiweek`
         WHERE
-          `issue` >= 201740
-        GROUP BY
-          `issue`, `epiweek`
-      ) fv
-      LEFT JOIN (
+          fvi.`issue` IS NULL
+        """
+
+        # Read all location rows from the `fluview` table for a given issue and
+        # epiweek.
+        get_known_values = """
         SELECT
-          `issue`, `epiweek`
+          `region`, `num_ili`, `num_patients`, `num_providers`
         FROM
-          `fluview_imputed`
-        GROUP BY
-          `issue`, `epiweek`
-      ) fvi
-      ON
-        fvi.`issue` = fv.`issue` AND fvi.`epiweek` = fv.`epiweek`
-      WHERE
-        fvi.`issue` IS NULL
-    '''
-
-    # Read all location rows from the `fluview` table for a given issue and
-    # epiweek.
-    get_known_values = '''
-      SELECT
-        `region`, `num_ili`, `num_patients`, `num_providers`
-      FROM
-        `fluview`
-      WHERE
-        `issue` = %s AND `epiweek` = %s
-    '''
-
-    # Insert location rows into the `fluview_imputed` table for a given issue
-    # and epiweek.
-    add_imputed_values = '''
-      INSERT INTO
-        `fluview_imputed` (
-          `issue`,
-          `epiweek`,
-          `region`,
-          `lag`,
-          `num_ili`,
-          `num_patients`,
-          `num_providers`,
-          `ili`
-        )
-      VALUES
-        (%s, %s, %s, %s, %s, %s, %s, %s)
-    '''
-
-  def connect(self):
-    """Connect to the database."""
-    u, p = secrets.db.epi
-    self.cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-    self.cur = self.cnx.cursor()
-
-  def close(self, commit):
-    """
-    Close the connection to the database, committing or rolling back changes as
-    indicated.
-    """
-    self.cur.close()
-    if commit:
-      self.cnx.commit()
-    else:
-      print('test mode, not committing')
-    self.cnx.close()
-
-  def count_rows(self):
-    """Count and return the number of rows in the `fluview_imputed` table."""
-    self.cur.execute(Database.Sql.count_rows)
-    for (num,) in self.cur:
-      return num
-
-  def find_missing_rows(self):
-    """
-    Find rows that still have missing values. Each missing row is uniquely
-    identified by an (issue, epiweek, location) tuple. This function finds the
-    first two.
-    """
+          `fluview`
+        WHERE
+          `issue` = %s AND `epiweek` = %s
+        """
+
+        # Insert location rows into the `fluview_imputed` table for a given issue
+        # and epiweek.
+        add_imputed_values = """
+        INSERT INTO
+          `fluview_imputed` (
+            `issue`,
+            `epiweek`,
+            `region`,
+            `lag`,
+            `num_ili`,
+            `num_patients`,
+            `num_providers`,
+            `ili`
+          )
+        VALUES
+          (%s, %s, %s, %s, %s, %s, %s, %s)
+        """
+
+    def connect(self):
+        """Connect to the database."""
+        u, p = secrets.db.epi
+        self.cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+        self.cur = self.cnx.cursor()
+
+    def close(self, commit):
+        """
+        Close the connection to the database, committing or rolling back changes as
+        indicated.
+        """
+        self.cur.close()
+        if commit:
+            self.cnx.commit()
+        else:
+            print("test mode, not committing")
+        self.cnx.close()
+
+    def count_rows(self):
+        """Count and return the number of rows in the `fluview_imputed` table."""
+        self.cur.execute(Database.Sql.count_rows)
+        for (num,) in self.cur:
+            return num
+
+    def find_missing_rows(self):
+        """
+        Find rows that still have missing values. Each missing row is uniquely
+        identified by an (issue, epiweek, location) tuple. This function finds the
+        first two.
+        """
+
+        self.cur.execute(Database.Sql.find_missing_rows)
+        return [(issue, epiweek) for (issue, epiweek) in self.cur]
+
+    def get_known_values(self, issue, epiweek):
+        """
+        Fetch ILINet data for all locations available for the given issue and
+        epiweek. The returned value is a dict mapping from locations to ILI data.
+        """
+
+        self.cur.execute(Database.Sql.get_known_values, (issue, epiweek))
+        return dict([(loc, (n_ili, n_pat, n_prov)) for (loc, n_ili, n_pat, n_prov) in self.cur])
+
+    def add_imputed_values(self, issue, epiweek, imputed):
+        """
+        Store imputed ILINet data for the given locations on the given issue and
+        epiweek. The imputed value is a dict mapping from locations to ILI data.
+        """
+
+        for loc in imputed.keys():
+            lag, n_ili, n_pat, n_prov, ili = imputed[loc]
+            args = (issue, epiweek, loc, lag, n_ili, n_pat, n_prov, ili)
+            self.cur.execute(Database.Sql.add_imputed_values, args)
 
-    self.cur.execute(Database.Sql.find_missing_rows)
-    return [(issue, epiweek) for (issue, epiweek) in self.cur]
 
-  def get_known_values(self, issue, epiweek):
-    """
-    Fetch ILINet data for all locations available for the given issue and
-    epiweek. The returned value is a dict mapping from locations to ILI data.
-    """
+class StatespaceException(Exception):
+    """Used to indicate that imputation is not possible with the given inputs."""
 
-    self.cur.execute(Database.Sql.get_known_values, (issue, epiweek))
-    return dict([
-      (loc, (n_ili, n_pat, n_prov))
-      for
-      (loc, n_ili, n_pat, n_prov)
-      in self.cur
-    ])
 
-  def add_imputed_values(self, issue, epiweek, imputed):
+def get_location_graph():
     """
-    Store imputed ILINet data for the given locations on the given issue and
-    epiweek. The imputed value is a dict mapping from locations to ILI data.
+    Return a matrix where rows represent regions, columns represent atoms, and
+    each entry is a 1 if the region contains the atom, otherwise 0. The
+    corresponding lists of regions and atoms are also returned.
     """
 
-    for loc in imputed.keys():
-      lag, n_ili, n_pat, n_prov, ili = imputed[loc]
-      args = (issue, epiweek, loc, lag, n_ili, n_pat, n_prov, ili)
-      self.cur.execute(Database.Sql.add_imputed_values, args)
-
-
-class StatespaceException(Exception):
-  """Used to indicate that imputation is not possible with the given inputs."""
-
-
-def get_location_graph():
-  """
-  Return a matrix where rows represent regions, columns represent atoms, and
-  each entry is a 1 if the region contains the atom, otherwise 0. The
-  corresponding lists of regions and atoms are also returned.
-  """
-
-  regions = sorted(Locations.region_list)
-  atoms = sorted(Locations.atom_list)
-  graph = np.zeros((len(regions), len(atoms)))
-  for i, r in enumerate(regions):
-    for a in Locations.region_map[r]:
-      j = atoms.index(a)
-      graph[i, j] = 1
-  return graph, regions, atoms
+    regions = sorted(Locations.region_list)
+    atoms = sorted(Locations.atom_list)
+    graph = np.zeros((len(regions), len(atoms)))
+    for i, r in enumerate(regions):
+        for a in Locations.region_map[r]:
+            j = atoms.index(a)
+            graph[i, j] = 1
+    return graph, regions, atoms
 
 
 def get_fusion_parameters(known_locations):
-  """
-  Return a matrix that fuses known ILI values into unknown ILI values. The
-  corresponding lists of known and unknown locations are also returned.
+    """
+    Return a matrix that fuses known ILI values into unknown ILI values. The
+    corresponding lists of known and unknown locations are also returned.
 
-  The goal is to infer ILI data in all locations, given ILI data in some
-  partial set of locations. This function takes a sensor fusion approach.
+    The goal is to infer ILI data in all locations, given ILI data in some
+    partial set of locations. This function takes a sensor fusion approach.
 
-  Let $z$ be a column vector of values in reported locations. Let $y$ be the
-  desired column vector of values in unreported locations. With matrices $H$
-  (mapping from latent state to reported values), $W$ (mapping from latent
-  state to unreported values), and $R = I$ (covariance, which is identity):
+    Let $z$ be a column vector of values in reported locations. Let $y$ be the
+    desired column vector of values in unreported locations. With matrices $H$
+    (mapping from latent state to reported values), $W$ (mapping from latent
+    state to unreported values), and $R = I$ (covariance, which is identity):
 
-    $y = W (H^T R^{-1} H)^{-1} H^T R^{-1} z$
-    $y = W (H^T H)^{-1} H^T z$
+      $y = W (H^T R^{-1} H)^{-1} H^T R^{-1} z$
+      $y = W (H^T H)^{-1} H^T z$
 
-  This is equavalent to OLS regression with an added translation from atomic
-  locations to missing locations. Unknown values are computed as a linear
-  combination of known values.
-  """
+    This is equavalent to OLS regression with an added translation from atomic
+    locations to missing locations. Unknown values are computed as a linear
+    combination of known values.
+    """
 
-  graph, regions, atoms = get_location_graph()
-  is_known = np.array([r in known_locations for r in regions])
-  is_unknown = np.logical_not(is_known)
-  if not np.any(is_known):
-    raise StatespaceException('no values are known')
-  if not np.any(is_unknown):
-    raise StatespaceException('no values are unknown')
+    graph, regions, atoms = get_location_graph()
+    is_known = np.array([r in known_locations for r in regions])
+    is_unknown = np.logical_not(is_known)
+    if not np.any(is_known):
+        raise StatespaceException("no values are known")
+    if not np.any(is_unknown):
+        raise StatespaceException("no values are unknown")
 
-  H = graph[is_known, :]
-  W = graph[is_unknown, :]
-  if np.linalg.matrix_rank(H) != len(atoms):
-    raise StatespaceException('system is underdetermined')
+    H = graph[is_known, :]
+    W = graph[is_unknown, :]
+    if np.linalg.matrix_rank(H) != len(atoms):
+        raise StatespaceException("system is underdetermined")
 
-  HtH = np.dot(H.T, H)
-  HtH_inv = np.linalg.inv(HtH)
-  H_pseudo_inv = np.dot(HtH_inv, H.T)
-  fuser = np.dot(W, H_pseudo_inv)
+    HtH = np.dot(H.T, H)
+    HtH_inv = np.linalg.inv(HtH)
+    H_pseudo_inv = np.dot(HtH_inv, H.T)
+    fuser = np.dot(W, H_pseudo_inv)
 
-  locations = np.array(regions)
-  filter_locations = lambda selected: list(map(str, locations[selected]))
-  return fuser, filter_locations(is_known), filter_locations(is_unknown)
+    locations = np.array(regions)
+    filter_locations = lambda selected: list(map(str, locations[selected]))
+    return fuser, filter_locations(is_known), filter_locations(is_unknown)
 
 
 def get_lag_and_ili(issue, epiweek, num_ili, num_patients):
-  """
-  Compute and return reporting lag and percent ILI from imputed ILINet data.
-  """
-  lag = delta_epiweeks(epiweek, issue)
-  ili = 100.0 * (0 if num_patients == 0 else num_ili / num_patients)
-  return lag, ili
+    """
+    Compute and return reporting lag and percent ILI from imputed ILINet data.
+    """
+    lag = delta_epiweeks(epiweek, issue)
+    ili = 100.0 * (0 if num_patients == 0 else num_ili / num_patients)
+    return lag, ili
 
 
 def impute_missing_values(database, test_mode=False):
-  """
-  Determine whether values are missing for any states and territories. If so,
-  impute them and store them in the database.
-  """
-
-  # database connection
-  database.connect()
-  rows1 = database.count_rows()
-  print('rows before: %d' % (rows1))
-
-  # iterate over missing epiweeks
-  missing_rows = database.find_missing_rows()
-  print('missing data for %d epiweeks' % len(missing_rows))
-  for issue, epiweek in missing_rows:
-    print('i=%d e=%d' % (issue, epiweek))
-
-    # get known values from table `fluview`
-    known_values = database.get_known_values(issue, epiweek)
-
-    # Unlike most other state-level data, which typically begins publicly on
-    # 2010w40, data for PR begins on 2013w40. Before this, there are no reports
-    # for PR. Here we assume that no report is equivalent to a report of all
-    # zeros (number of ILI, patients, and providers). That's mostly true, with
-    # the notable exception of wILI, but that's not relevant here. By assuming
-    # that PR reports zero on those weeks, it's possible to impute values for
-    # VI, which are otherwise not reported until 2015w40.
-    assume_pr_zero = epiweek < 201340 and 'pr' not in known_values
-    if assume_pr_zero:
-      known_values['pr'] = (0, 0, 0)
-
-    # get the imputation matrix and lists of known and unknown locations
-    F, known, unknown = get_fusion_parameters(known_values.keys())
-
-    # finally, impute the missing values
-    z = np.array([known_values[k] for k in known])
-    y = np.dot(F, z)
-
-    # possibly also record the assumptions made for PR
-    if assume_pr_zero:
-      unknown.append('pr')
-      y = np.vstack((y, [known_values['pr']]))
-
-    # add lag and percent ILI to the data for each imputed location
-    imputed_values = {}
-    for loc, values in zip(unknown, y):
-      n_ili, n_pat, n_prov = map(int, np.rint(values))
-      lag, ili = get_lag_and_ili(issue, epiweek, n_ili, n_pat)
-      imputed_values[loc] = (lag, n_ili, n_pat, n_prov, ili)
-      print(' %s: %s' % (loc, str(imputed_values[loc])))
-
-    # save all imputed values in table `fluview_imputed`
-    database.add_imputed_values(issue, epiweek, imputed_values)
-
-  # database cleanup
-  rows2 = database.count_rows()
-  print('rows after: %d (added %d)' % (rows2, rows2 - rows1))
-  commit = not test_mode
-  database.close(commit)
+    """
+    Determine whether values are missing for any states and territories. If so,
+    impute them and store them in the database.
+    """
+
+    # database connection
+    database.connect()
+    rows1 = database.count_rows()
+    print("rows before: %d" % (rows1))
+
+    # iterate over missing epiweeks
+    missing_rows = database.find_missing_rows()
+    print("missing data for %d epiweeks" % len(missing_rows))
+    for issue, epiweek in missing_rows:
+        print("i=%d e=%d" % (issue, epiweek))
+
+        # get known values from table `fluview`
+        known_values = database.get_known_values(issue, epiweek)
+
+        # Unlike most other state-level data, which typically begins publicly on
+        # 2010w40, data for PR begins on 2013w40. Before this, there are no reports
+        # for PR. Here we assume that no report is equivalent to a report of all
+        # zeros (number of ILI, patients, and providers). That's mostly true, with
+        # the notable exception of wILI, but that's not relevant here. By assuming
+        # that PR reports zero on those weeks, it's possible to impute values for
+        # VI, which are otherwise not reported until 2015w40.
+        assume_pr_zero = epiweek < 201340 and "pr" not in known_values
+        if assume_pr_zero:
+            known_values["pr"] = (0, 0, 0)
+
+        # get the imputation matrix and lists of known and unknown locations
+        F, known, unknown = get_fusion_parameters(known_values.keys())
+
+        # finally, impute the missing values
+        z = np.array([known_values[k] for k in known])
+        y = np.dot(F, z)
+
+        # possibly also record the assumptions made for PR
+        if assume_pr_zero:
+            unknown.append("pr")
+            y = np.vstack((y, [known_values["pr"]]))
+
+        # add lag and percent ILI to the data for each imputed location
+        imputed_values = {}
+        for loc, values in zip(unknown, y):
+            n_ili, n_pat, n_prov = map(int, np.rint(values))
+            lag, ili = get_lag_and_ili(issue, epiweek, n_ili, n_pat)
+            imputed_values[loc] = (lag, n_ili, n_pat, n_prov, ili)
+            print(" %s: %s" % (loc, str(imputed_values[loc])))
+
+        # save all imputed values in table `fluview_imputed`
+        database.add_imputed_values(issue, epiweek, imputed_values)
+
+    # database cleanup
+    rows2 = database.count_rows()
+    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
+    commit = not test_mode
+    database.close(commit)
 
 
 def get_argument_parser():
-  """Set up command line arguments and usage."""
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    '--test',
-    action='store_true',
-    help='do dry run only, do not update the database'
-  )
-  return parser
+    """Set up command line arguments and usage."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--test", action="store_true", help="do dry run only, do not update the database")
+    return parser
 
 
 def main():
-  """Run this script from the command line."""
-  args = get_argument_parser().parse_args()
-  impute_missing_values(Database(), test_mode=args.test)
+    """Run this script from the command line."""
+    args = get_argument_parser().parse_args()
+    impute_missing_values(Database(), test_mode=args.test)
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/ght/ght_update.py b/src/acquisition/ght/ght_update.py
index c1e9b8d94..1cd5c5693 100644
--- a/src/acquisition/ght/ght_update.py
+++ b/src/acquisition/ght/ght_update.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -63,7 +63,7 @@
   * fixed multiple-word queries (surround with quotes)
 2015-12-01
   * Original version
-'''
+"""
 
 # standard library
 import argparse
@@ -88,304 +88,304 @@
 # 2010-04-19 and 2015-05-05
 # see: https://www.google.com/trends/correlate
 TERMS = [
-  '/m/0cycc',
-  'influenza type a',
-  'flu duration',
-  'flu fever',
-  'treating flu',
-  'fever flu',
-  'flu recovery',
-  'braun thermoscan',
-  'oscillococcinum',
-  'treating the flu',
-  'cold or flu',
-  'flu versus cold',
-  'flu remedies',
-  'contagious flu',
-  'type a influenza',
-  'flu or cold',
-  'duration of flu',
-  'cold versus flu',
-  'flu cough',
-  'flu headache',
-  'thermoscan',
-  'influenza incubation period',
-  'flu lasts',
-  'length of flu',
-  'flu stomach',
-  'cold vs flu',
-  'flu and fever',
-  'getting over the flu',
-  'influenza a',
-  'treatment for flu',
-  'flu length',
-  'treatment for the flu',
-  'influenza symptoms',
-  'over the counter flu',
-  'flu complications',
-  'cold and flu symptoms',
-  'influenza incubation',
-  'treatment of flu',
-  'human temperature',
-  'low body',
-  'flu contagious',
-  'robitussin ac',
-  'flu how long',
-  'ear thermometer',
-  'flu contagious period',
-  'treat flu',
-  'cough flu',
-  'low body temperature',
-  'expectorant',
-  'flu and cold',
-  'rapid flu',
-  'flu vs. cold',
-  'how to treat the flu',
-  'how long does the flu last?',
-  'viral pneumonia',
-  'flu in kids',
-  'type a flu',
-  'influenza treatment',
-  'fighting the flu',
-  'flu relief',
-  'treat the flu',
-  'flu medicine',
-  'dangerous fever',
-  'what is influenza',
-  'tussin',
-  'low body temp',
-  'flu care',
-  'flu in infants',
-  'flu dizziness',
-  'feed a fever',
-  'flu vs cold',
-  'flu vomiting',
-  'bacterial pneumonia',
-  'flu activity',
-  'flu chills',
-  'anas barbariae',
-  'flu germs',
-  'tylenol cold',
-  'how to get over the flu',
-  'flu in children',
-  'influenza a and b',
-  'duration of the flu',
-  'cold symptoms',
-  'flu report',
-  'rapid flu test',
-  'flu relapse',
-  'get over the flu',
-  'flu during pregnancy',
-  'flu recovery time',
-  'cure for flu',
-  'tamiflu and breastfeeding',
-  'flu chest pain',
-  'flu treatment',
-  'flu nausea',
-  'remedies for the flu',
-  'tamiflu in pregnancy',
-  'side effects of tamiflu',
-  'how to treat flu',
-  'viral bronchitis',
-  'flu how long contagious',
-  'flu remedy',
+    "/m/0cycc",
+    "influenza type a",
+    "flu duration",
+    "flu fever",
+    "treating flu",
+    "fever flu",
+    "flu recovery",
+    "braun thermoscan",
+    "oscillococcinum",
+    "treating the flu",
+    "cold or flu",
+    "flu versus cold",
+    "flu remedies",
+    "contagious flu",
+    "type a influenza",
+    "flu or cold",
+    "duration of flu",
+    "cold versus flu",
+    "flu cough",
+    "flu headache",
+    "thermoscan",
+    "influenza incubation period",
+    "flu lasts",
+    "length of flu",
+    "flu stomach",
+    "cold vs flu",
+    "flu and fever",
+    "getting over the flu",
+    "influenza a",
+    "treatment for flu",
+    "flu length",
+    "treatment for the flu",
+    "influenza symptoms",
+    "over the counter flu",
+    "flu complications",
+    "cold and flu symptoms",
+    "influenza incubation",
+    "treatment of flu",
+    "human temperature",
+    "low body",
+    "flu contagious",
+    "robitussin ac",
+    "flu how long",
+    "ear thermometer",
+    "flu contagious period",
+    "treat flu",
+    "cough flu",
+    "low body temperature",
+    "expectorant",
+    "flu and cold",
+    "rapid flu",
+    "flu vs. cold",
+    "how to treat the flu",
+    "how long does the flu last?",
+    "viral pneumonia",
+    "flu in kids",
+    "type a flu",
+    "influenza treatment",
+    "fighting the flu",
+    "flu relief",
+    "treat the flu",
+    "flu medicine",
+    "dangerous fever",
+    "what is influenza",
+    "tussin",
+    "low body temp",
+    "flu care",
+    "flu in infants",
+    "flu dizziness",
+    "feed a fever",
+    "flu vs cold",
+    "flu vomiting",
+    "bacterial pneumonia",
+    "flu activity",
+    "flu chills",
+    "anas barbariae",
+    "flu germs",
+    "tylenol cold",
+    "how to get over the flu",
+    "flu in children",
+    "influenza a and b",
+    "duration of the flu",
+    "cold symptoms",
+    "flu report",
+    "rapid flu test",
+    "flu relapse",
+    "get over the flu",
+    "flu during pregnancy",
+    "flu recovery time",
+    "cure for flu",
+    "tamiflu and breastfeeding",
+    "flu chest pain",
+    "flu treatment",
+    "flu nausea",
+    "remedies for the flu",
+    "tamiflu in pregnancy",
+    "side effects of tamiflu",
+    "how to treat flu",
+    "viral bronchitis",
+    "flu how long contagious",
+    "flu remedy",
 ]
 
 # a list of all US states, including DC and the US as a whole
 LOCATIONS = [
-  'US',
-  'AL',
-  'AK',
-  'AZ',
-  'AR',
-  'CA',
-  'CO',
-  'CT',
-  'DC',
-  'DE',
-  'FL',
-  'GA',
-  'HI',
-  'ID',
-  'IL',
-  'IN',
-  'IA',
-  'KS',
-  'KY',
-  'LA',
-  'ME',
-  'MD',
-  'MA',
-  'MI',
-  'MN',
-  'MS',
-  'MO',
-  'MT',
-  'NE',
-  'NV',
-  'NH',
-  'NJ',
-  'NM',
-  'NY',
-  'NC',
-  'ND',
-  'OH',
-  'OK',
-  'OR',
-  'PA',
-  'RI',
-  'SC',
-  'SD',
-  'TN',
-  'TX',
-  'UT',
-  'VT',
-  'VA',
-  'WA',
-  'WV',
-  'WI',
-  'WY',
+    "US",
+    "AL",
+    "AK",
+    "AZ",
+    "AR",
+    "CA",
+    "CO",
+    "CT",
+    "DC",
+    "DE",
+    "FL",
+    "GA",
+    "HI",
+    "ID",
+    "IL",
+    "IN",
+    "IA",
+    "KS",
+    "KY",
+    "LA",
+    "ME",
+    "MD",
+    "MA",
+    "MI",
+    "MN",
+    "MS",
+    "MO",
+    "MT",
+    "NE",
+    "NV",
+    "NH",
+    "NJ",
+    "NM",
+    "NY",
+    "NC",
+    "ND",
+    "OH",
+    "OK",
+    "OR",
+    "PA",
+    "RI",
+    "SC",
+    "SD",
+    "TN",
+    "TX",
+    "UT",
+    "VT",
+    "VA",
+    "WA",
+    "WV",
+    "WI",
+    "WY",
 ]
 
 
-def update(locations, terms, first=None, last=None, countries=['US']):
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
+def update(locations, terms, first=None, last=None, countries=["US"]):
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
 
-  def get_num_rows():
-    cur.execute('SELECT count(1) `num` FROM `ght`')
-    for (num,) in cur:
-      pass
-    return num
+    def get_num_rows():
+        cur.execute("SELECT count(1) `num` FROM `ght`")
+        for (num,) in cur:
+            pass
+        return num
 
-  # check from 4 weeks preceeding the last week with data through this week
-  cur.execute('SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `ght`')
-  for (ew0, ew1) in cur:
-    ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
-  ew0 = ew0 if first is None else first
-  ew1 = ew1 if last is None else last
-  print('Checking epiweeks between %d and %d...' % (ew0, ew1))
+    # check from 4 weeks preceeding the last week with data through this week
+    cur.execute("SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `ght`")
+    for (ew0, ew1) in cur:
+        ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
+    ew0 = ew0 if first is None else first
+    ew1 = ew1 if last is None else last
+    print("Checking epiweeks between %d and %d..." % (ew0, ew1))
 
-  # keep track of how many rows were added
-  rows_before = get_num_rows()
+    # keep track of how many rows were added
+    rows_before = get_num_rows()
 
-  # check Google Trends for new and/or revised data
-  sql = '''
+    # check Google Trends for new and/or revised data
+    sql = """
     INSERT INTO
       `ght` (`query`, `location`, `epiweek`, `value`)
     VALUES
       (%s, %s, %s, %s)
     ON DUPLICATE KEY UPDATE
       `value` = %s
-  '''
-  total_rows = 0
-  ght = GHT(API_KEY)
-  for term in terms:
-    print(' [%s] using term' % term)
-    ll, cl = len(locations), len(countries)
-    for i in range(max(ll,cl)):
-      location = locations[i] if i < ll else locations[0]
-      country = countries[i] if i < cl else countries[0]
-      try:
-        #term2 = ('"%s"' % term) if ' ' in term else term
-        term2 = term
-        attempt = 0
-        while True:
-          attempt += 1
-          try:
-            result = ght.get_data(ew0, ew1, location, term2, country=country)
-            break
-          except Exception as ex:
-            if attempt >= 5:
-              raise ex
-            else:
-              delay = 2 ** attempt
-              print(' [%s|%s] caught exception (will retry in %ds):' % (term, location, delay), ex)
-              time.sleep(delay)
-        values = [p['value'] for p in result['data']['lines'][0]['points']]
-        ew = result['start_week']
-        num_missing = 0
-        for v in values:
-          # Default SQL location value for US country for backwards compatibility
-          # i.e. California's location is still stored as 'CA',
-          # and having location == 'US' is still stored as 'US'
-          sql_location = location if location != NO_LOCATION_STR else country
-
-          # Change SQL location for non-US countries
-          if country != 'US':
-            # Underscore added to distinguish countries from 2-letter US states
-            sql_location = country + "_"
-            if location != NO_LOCATION_STR:
-              sql_location = sql_location + location
-          sql_data = (term, sql_location, ew, v, v)
-          cur.execute(sql, sql_data)
-          total_rows += 1
-          if v == 0:
-            num_missing += 1
-            #print(' [%s|%s|%d] missing value' % (term, location, ew))
-          ew = flu.add_epiweeks(ew, 1)
-        if num_missing > 0:
-          print(' [%s|%s] missing %d/%d value(s)' % (term, location, num_missing, len(values)))
-      except Exception as ex:
-        print(' [%s|%s] caught exception (will NOT retry):' % (term, location), ex)
-
-  # keep track of how many rows were added
-  rows_after = get_num_rows()
-  print('Inserted %d/%d row(s)'%(rows_after - rows_before, total_rows))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
+  """
+    total_rows = 0
+    ght = GHT(API_KEY)
+    for term in terms:
+        print(" [%s] using term" % term)
+        ll, cl = len(locations), len(countries)
+        for i in range(max(ll, cl)):
+            location = locations[i] if i < ll else locations[0]
+            country = countries[i] if i < cl else countries[0]
+            try:
+                # term2 = ('"%s"' % term) if ' ' in term else term
+                term2 = term
+                attempt = 0
+                while True:
+                    attempt += 1
+                    try:
+                        result = ght.get_data(ew0, ew1, location, term2, country=country)
+                        break
+                    except Exception as ex:
+                        if attempt >= 5:
+                            raise ex
+                        else:
+                            delay = 2**attempt
+                            print(" [%s|%s] caught exception (will retry in %ds):" % (term, location, delay), ex)
+                            time.sleep(delay)
+                values = [p["value"] for p in result["data"]["lines"][0]["points"]]
+                ew = result["start_week"]
+                num_missing = 0
+                for v in values:
+                    # Default SQL location value for US country for backwards compatibility
+                    # i.e. California's location is still stored as 'CA',
+                    # and having location == 'US' is still stored as 'US'
+                    sql_location = location if location != NO_LOCATION_STR else country
+
+                    # Change SQL location for non-US countries
+                    if country != "US":
+                        # Underscore added to distinguish countries from 2-letter US states
+                        sql_location = country + "_"
+                        if location != NO_LOCATION_STR:
+                            sql_location = sql_location + location
+                    sql_data = (term, sql_location, ew, v, v)
+                    cur.execute(sql, sql_data)
+                    total_rows += 1
+                    if v == 0:
+                        num_missing += 1
+                        # print(' [%s|%s|%d] missing value' % (term, location, ew))
+                    ew = flu.add_epiweeks(ew, 1)
+                if num_missing > 0:
+                    print(" [%s|%s] missing %d/%d value(s)" % (term, location, num_missing, len(values)))
+            except Exception as ex:
+                print(" [%s|%s] caught exception (will NOT retry):" % (term, location), ex)
+
+    # keep track of how many rows were added
+    rows_after = get_num_rows()
+    print("Inserted %d/%d row(s)" % (rows_after - rows_before, total_rows))
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('location', action='store', type=str, default=None, help='location(s) (ex: all; US; TX; CA,LA,WY)')
-  parser.add_argument('term', action='store', type=str, default=None, help='term/query/topic (ex: all; /m/0cycc; "flu fever")')
-  parser.add_argument('--first', '-f', default=None, type=int, help='first epiweek override')
-  parser.add_argument('--last', '-l', default=None, type=int, help='last epiweek override')
-  parser.add_argument('--country', '-c', default='US', type=str, help='location country (ex: US; BR)')
-  args = parser.parse_args()
-
-  # sanity check
-  first, last = args.first, args.last
-  if first is not None:
-    flu.check_epiweek(first)
-  if last is not None:
-    flu.check_epiweek(last)
-  if first is not None and last is not None and first > last:
-    raise Exception('epiweeks in the wrong order')
-
-  # decide what to update
-  if args.location.lower() == 'all':
-    locations = LOCATIONS
-  elif args.location.lower() == 'none':
-    locations = [NO_LOCATION_STR]
-  else:
-    locations = args.location.upper().split(',')
-  if args.term.lower() == 'all':
-    terms = TERMS
-  else:
-    terms = [args.term]
-
-  # country argument
-  # Check that country follows ISO 1366 Alpha-2 code. 
-  # See https://www.iso.org/obp/ui/#search.
-  countries = args.country.upper().split(',')
-  if not all(map(lambda x: len(x) == 2, countries)):
-    raise Exception('country name must be two letters (ISO 1366 Alpha-2)')
-
-  # if length of locations and countries is > 1, need to be the same
-  if len(locations) > 1 and len(countries) > 1 and len(locations) != len(countries):
-    raise Exception('locations and countries must be length 1, or same length')
-
-  # run the update
-  update(locations, terms, first, last, countries)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("location", action="store", type=str, default=None, help="location(s) (ex: all; US; TX; CA,LA,WY)")
+    parser.add_argument("term", action="store", type=str, default=None, help='term/query/topic (ex: all; /m/0cycc; "flu fever")')
+    parser.add_argument("--first", "-f", default=None, type=int, help="first epiweek override")
+    parser.add_argument("--last", "-l", default=None, type=int, help="last epiweek override")
+    parser.add_argument("--country", "-c", default="US", type=str, help="location country (ex: US; BR)")
+    args = parser.parse_args()
+
+    # sanity check
+    first, last = args.first, args.last
+    if first is not None:
+        flu.check_epiweek(first)
+    if last is not None:
+        flu.check_epiweek(last)
+    if first is not None and last is not None and first > last:
+        raise Exception("epiweeks in the wrong order")
+
+    # decide what to update
+    if args.location.lower() == "all":
+        locations = LOCATIONS
+    elif args.location.lower() == "none":
+        locations = [NO_LOCATION_STR]
+    else:
+        locations = args.location.upper().split(",")
+    if args.term.lower() == "all":
+        terms = TERMS
+    else:
+        terms = [args.term]
+
+    # country argument
+    # Check that country follows ISO 1366 Alpha-2 code.
+    # See https://www.iso.org/obp/ui/#search.
+    countries = args.country.upper().split(",")
+    if not all(map(lambda x: len(x) == 2, countries)):
+        raise Exception("country name must be two letters (ISO 1366 Alpha-2)")
+
+    # if length of locations and countries is > 1, need to be the same
+    if len(locations) > 1 and len(countries) > 1 and len(locations) != len(countries):
+        raise Exception("locations and countries must be length 1, or same length")
+
+    # run the update
+    update(locations, terms, first, last, countries)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/ght/google_health_trends.py b/src/acquisition/ght/google_health_trends.py
index 66a11c227..29d2a5f8e 100644
--- a/src/acquisition/ght/google_health_trends.py
+++ b/src/acquisition/ght/google_health_trends.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -18,7 +18,7 @@
   + sample command line usage
   + extract array of values from returned data
   * separated GHT class from ght_update.py
-'''
+"""
 
 # standard library
 import argparse
@@ -31,109 +31,110 @@
 from delphi.utils.epidate import EpiDate
 import delphi.utils.epiweek as flu
 
-NO_LOCATION_STR = 'none'
+NO_LOCATION_STR = "none"
+
 
 class GHT:
 
-  # Google Trends API endpoint
-  DISCOVERY_URL = 'https://www.googleapis.com/discovery/v1/apis/trends/v1beta/rest'
-
-  def __init__(self, key, delay=1):
-    self.service = build('trends', 'v1beta', developerKey=key, discoveryServiceUrl=GHT.DISCOVERY_URL)
-    self.delay = delay
-
-  # converts a YYYYWW week into a YYYY-MM-DD date (using Sunday of the week)
-  @staticmethod
-  def _ew2date(ew):
-    # parse the epiweek
-    year, week = flu.split_epiweek(ew)
-    # get the date object (middle of the week; Wednesday)
-    date = EpiDate.from_epiweek(year, week)
-    # go to the first day of the week (Sunday)
-    date = date.add_days(-3)
-    # date as string
-    return str(date)
-
-  # get data from Google APIs
-  # see: https://developers.google.com/apis-explorer/#p/trends/v1beta/trends.getTimelinesForHealth
-  def get_data(self, start_week, end_week, location, term, resolution='week', country='US'):
-    start_date = GHT._ew2date(start_week)
-    end_date = GHT._ew2date(end_week)
-    num_weeks = flu.delta_epiweeks(start_week, end_week) + 1
-
-    # getTimelinesForHealth parameters
-    params = {
-      'terms': term,
-      'time_startDate': start_date,
-      'time_endDate': end_date,
-      'timelineResolution': resolution,
-    }
-    # We have a special check for the US for backwards compatibility.
-    # i.e. if the country is 'US' AND the location is 'US', just put the geo-restriction for country.
-    # In contrast, another country might have a sub-region with initials 'US' and we want the region restriction instead.
-    if country == 'US':
-      if location == 'US' or location == NO_LOCATION_STR:
-        params['geoRestriction_country'] = 'US'
-      else:
-        params['geoRestriction_region'] = 'US-' + location
-    else:
-      if location == NO_LOCATION_STR:
-        params['geoRestriction_country'] = country
-      else:
-        params['geoRestriction_region'] = country + '-' + location
-
-    # make the API call
-    data = self.service.getTimelinesForHealth(**params).execute()
-
-    # extract the values
-    try:
-      values = [p['value'] for p in data['lines'][0]['points']]
-    except:
-      values = None
-
-    # throttle request rate
-    time.sleep(self.delay)
-
-    # return the results
-    return {
-      'start_week': start_week,
-      'end_week': end_week,
-      'num_weeks': num_weeks,
-      'location': location,
-      'country' : country,
-      'term': term,
-      'resolution': resolution,
-      'data': data,
-      'values': values,
-    }
+    # Google Trends API endpoint
+    DISCOVERY_URL = "https://www.googleapis.com/discovery/v1/apis/trends/v1beta/rest"
+
+    def __init__(self, key, delay=1):
+        self.service = build("trends", "v1beta", developerKey=key, discoveryServiceUrl=GHT.DISCOVERY_URL)
+        self.delay = delay
+
+    # converts a YYYYWW week into a YYYY-MM-DD date (using Sunday of the week)
+    @staticmethod
+    def _ew2date(ew):
+        # parse the epiweek
+        year, week = flu.split_epiweek(ew)
+        # get the date object (middle of the week; Wednesday)
+        date = EpiDate.from_epiweek(year, week)
+        # go to the first day of the week (Sunday)
+        date = date.add_days(-3)
+        # date as string
+        return str(date)
+
+    # get data from Google APIs
+    # see: https://developers.google.com/apis-explorer/#p/trends/v1beta/trends.getTimelinesForHealth
+    def get_data(self, start_week, end_week, location, term, resolution="week", country="US"):
+        start_date = GHT._ew2date(start_week)
+        end_date = GHT._ew2date(end_week)
+        num_weeks = flu.delta_epiweeks(start_week, end_week) + 1
+
+        # getTimelinesForHealth parameters
+        params = {
+            "terms": term,
+            "time_startDate": start_date,
+            "time_endDate": end_date,
+            "timelineResolution": resolution,
+        }
+        # We have a special check for the US for backwards compatibility.
+        # i.e. if the country is 'US' AND the location is 'US', just put the geo-restriction for country.
+        # In contrast, another country might have a sub-region with initials 'US' and we want the region restriction instead.
+        if country == "US":
+            if location == "US" or location == NO_LOCATION_STR:
+                params["geoRestriction_country"] = "US"
+            else:
+                params["geoRestriction_region"] = "US-" + location
+        else:
+            if location == NO_LOCATION_STR:
+                params["geoRestriction_country"] = country
+            else:
+                params["geoRestriction_region"] = country + "-" + location
+
+        # make the API call
+        data = self.service.getTimelinesForHealth(**params).execute()
+
+        # extract the values
+        try:
+            values = [p["value"] for p in data["lines"][0]["points"]]
+        except:
+            values = None
+
+        # throttle request rate
+        time.sleep(self.delay)
+
+        # return the results
+        return {
+            "start_week": start_week,
+            "end_week": end_week,
+            "num_weeks": num_weeks,
+            "location": location,
+            "country": country,
+            "term": term,
+            "resolution": resolution,
+            "data": data,
+            "values": values,
+        }
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('apikey', action='store', type=str, default=None, help='API key')
-  parser.add_argument('startweek', action='store', type=int, default=None, help='first week (ex: 201440)')
-  parser.add_argument('endweek', action='store', type=int, default=None, help='last week (ex: 201520)')
-  parser.add_argument('location', action='store', type=str, default=None, help='location (ex: US)')
-  parser.add_argument('term', action='store', type=str, default=None, help='term/query/topic (ex: /m/0cycc)')
-  args = parser.parse_args()
-
-  # get the data
-  ght = GHT(args.apikey)
-  result = ght.get_data(args.startweek, args.endweek, args.location, args.term)
-  values = result['values']
-
-  # sanity check
-  expected_weeks = result['num_weeks']
-  received_weeks = len([v for v in values if v is not None and type(v) == float and v >= 0])
-  if expected_weeks != received_weeks:
-    raise Exception('expected %d weeks, received %d' % (expected_weeks, received_weeks))
-
-  # results
-  epiweeks = [ew for ew in flu.range_epiweeks(args.startweek, args.endweek, inclusive=True)]
-  for (epiweek, value) in zip(epiweeks, values):
-    print('%6d: %.3f' % (epiweek, value))
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("apikey", action="store", type=str, default=None, help="API key")
+    parser.add_argument("startweek", action="store", type=int, default=None, help="first week (ex: 201440)")
+    parser.add_argument("endweek", action="store", type=int, default=None, help="last week (ex: 201520)")
+    parser.add_argument("location", action="store", type=str, default=None, help="location (ex: US)")
+    parser.add_argument("term", action="store", type=str, default=None, help="term/query/topic (ex: /m/0cycc)")
+    args = parser.parse_args()
+
+    # get the data
+    ght = GHT(args.apikey)
+    result = ght.get_data(args.startweek, args.endweek, args.location, args.term)
+    values = result["values"]
+
+    # sanity check
+    expected_weeks = result["num_weeks"]
+    received_weeks = len([v for v in values if v is not None and type(v) == float and v >= 0])
+    if expected_weeks != received_weeks:
+        raise Exception("expected %d weeks, received %d" % (expected_weeks, received_weeks))
+
+    # results
+    epiweeks = [ew for ew in flu.range_epiweeks(args.startweek, args.endweek, inclusive=True)]
+    for (epiweek, value) in zip(epiweeks, values):
+        print("%6d: %.3f" % (epiweek, value))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/kcdc/kcdc_update.py b/src/acquisition/kcdc/kcdc_update.py
index 70c167738..2b01bbcf9 100644
--- a/src/acquisition/kcdc/kcdc_update.py
+++ b/src/acquisition/kcdc/kcdc_update.py
@@ -42,12 +42,14 @@
 from delphi.utils.epiweek import delta_epiweeks, range_epiweeks, add_epiweeks
 from delphi.utils.epidate import EpiDate
 
+
 def ensure_tables_exist():
-    (u,p) = secrets.db.epi
-    cnx = mysql.connector.connect(user=u,password=p,database='epidata')
+    (u, p) = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     try:
         cursor = cnx.cursor()
-        cursor.execute('''
+        cursor.execute(
+            """
             CREATE TABLE IF NOT EXISTS `kcdc_ili` (
                 `id` INT(11) NOT NULL PRIMARY KEY AUTO_INCREMENT,
                 `release_date` DATE NOT NULL,
@@ -58,69 +60,71 @@ def ensure_tables_exist():
                 `ili` DOUBLE NOT NULL,
                 UNIQUE KEY (`issue`, `epiweek`, `region`)
             );
-        ''');
+            """
+        )
         cnx.commit()
     finally:
         cnx.close()
 
+
 def safe_float(f):
     try:
-        return float(f.replace(',',''))
+        return float(f.replace(",", ""))
     except:
         return 0
 
+
 def safe_int(i):
     try:
-        return int(i.replace(',',''))
+        return int(i.replace(",", ""))
     except:
         return 0
 
-def get_rows(cnx, table='kcdc_ili'):
-  # Count and return the number of rows in the `kcdc_ili` table.
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM %s' % table)
-  for (num,) in select:
-    pass
-  select.close()
-  return num
+
+def get_rows(cnx, table="kcdc_ili"):
+    # Count and return the number of rows in the `kcdc_ili` table.
+    select = cnx.cursor()
+    select.execute("SELECT count(1) num FROM %s" % table)
+    for (num,) in select:
+        pass
+    select.close()
+    return num
+
 
 def get_kcdc_data():
     issue = EpiDate.today().get_ew()
-    last_season = issue//100 + (1 if issue % 100 > 35 else 0)
-    url = 'http://www.cdc.go.kr/npt/biz/npp/iss/influenzaListAjax.do'
-    params = {
-    'icdNm': 'influenza',
-    'startYear': '2004', # Started in 2004
-    'endYear': str(last_season)
-    }
+    last_season = issue // 100 + (1 if issue % 100 > 35 else 0)
+    url = "https://www.cdc.go.kr/npt/biz/npp/iss/influenzaListAjax.do"
+    params = {"icdNm": "influenza", "startYear": "2004", "endYear": str(last_season)}  # Started in 2004
     response = requests.post(url, params)
     datas = response.json()
-    data = datas['data']
+    data = datas["data"]
     ews = []
     ilis = []
     ew1 = 200436
-    for year in range(2004,last_season):
-        year_data = data[year-2004]
+    for year in range(2004, last_season):
+        year_data = data[year - 2004]
         if year > 2004:
             ew1 = ews[-1] + 1
-        ili_yr = year_data["VALUE"].split('`')
-        ili_yr = [float(f) for f in ili_yr if f != '']
-        ew2 = add_epiweeks(ew1,len(ili_yr))
-        new_ews = list(range_epiweeks(ew1,ew2))
+        ili_yr = year_data["VALUE"].split("`")
+        ili_yr = [float(f) for f in ili_yr if f != ""]
+        ew2 = add_epiweeks(ew1, len(ili_yr))
+        new_ews = list(range_epiweeks(ew1, ew2))
         for i in range(len(new_ews)):
             j = float(ili_yr[i])
             ilis.append(j)
             ews.append(new_ews[i])
     return ews, ilis
 
+
 def update_from_data(ews, ilis, date, issue, test_mode=False):
     u, p = secrets.db.epi
-    cnx = mysql.connector.connect(user=u, password=p, database='epidata')
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     rows1 = get_rows(cnx)
-    print('rows before: %d' % (rows1))
+    print("rows before: %d" % (rows1))
     insert = cnx.cursor()
 
-    sql = '''
+    sql = """
     INSERT INTO
         `kcdc_ili` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
         `ili`)
@@ -129,15 +133,15 @@ def update_from_data(ews, ilis, date, issue, test_mode=False):
     ON DUPLICATE KEY UPDATE
         `release_date` = least(`release_date`, '%s'),
         `ili` = %s
-    '''
+    """
 
     for i in range(len(ews)):
         ew = ews[i]
         ili = ilis[i]
         lag = delta_epiweeks(ews[i], issue)
 
-        insert_args = [date,issue,ew,'ROK',lag,ili]
-        update_args = [date,ili]
+        insert_args = [date, issue, ew, "ROK", lag, ili]
+        update_args = [date, ili]
         try:
             insert.execute(sql % tuple(insert_args + update_args))
         except Exception:
@@ -146,34 +150,31 @@ def update_from_data(ews, ilis, date, issue, test_mode=False):
     # cleanup
     insert.close()
     if test_mode:
-        print('test mode, not committing')
+        print("test mode, not committing")
         rows2 = rows1
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print('rows after: %d (added %d)' % (rows2,rows2-rows1))
+    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
     cnx.close()
 
+
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--test',
-        action='store_true',
-        help='do dry run only, do not update the database'
-    )
+    parser.add_argument("--test", action="store_true", help="do dry run only, do not update the database")
     args = parser.parse_args()
 
-    date = datetime.datetime.now().strftime('%Y-%m-%d')
-    print('assuming release date is today, %s' % date)
+    date = datetime.datetime.now().strftime("%Y-%m-%d")
+    print("assuming release date is today, %s" % date)
     issue = EpiDate.today().get_ew()
 
     ensure_tables_exist()
 
-    ews,ilis = get_kcdc_data()
+    ews, ilis = get_kcdc_data()
 
     update_from_data(ews, ilis, date, issue, test_mode=args.test)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/src/acquisition/nidss/taiwan_nidss.py b/src/acquisition/nidss/taiwan_nidss.py
index 27da863e1..d55ddf7e5 100644
--- a/src/acquisition/nidss/taiwan_nidss.py
+++ b/src/acquisition/nidss/taiwan_nidss.py
@@ -4,7 +4,7 @@
 ===============
 
 Scrapes weekly flu data from Taiwan's National Infectious Disease Statistics
-System (NIDSS): http://nidss.cdc.gov.tw/en/
+System (NIDSS): https://nidss.cdc.gov.tw/en/
 
 
 =================
@@ -37,233 +37,230 @@
 
 
 class NIDSS:
-  """An API for scraping the NIDSS site."""
+    """An API for scraping the NIDSS site."""
 
-  # The page where the flu data is kept
-  FLU_URL = 'https://nidss.cdc.gov.tw/en/CDCWNH01.aspx?dc=wnh'
+    # The page where the flu data is kept
+    FLU_URL = "https://nidss.cdc.gov.tw/en/CDCWNH01.aspx?dc=wnh"
 
-  # Link to the dengue data
-  DENGUE_URL = 'http://nidss.cdc.gov.tw/Download/Weekly_Age_County_Gender_061.csv'
+    # Link to the dengue data
+    DENGUE_URL = "https://nidss.cdc.gov.tw/Download/Weekly_Age_County_Gender_061.csv"
 
-  # Translate location names to English
-  # https://en.wikipedia.org/wiki/List_of_administrative_divisions_of_Taiwan
-  _TRANSLATED = {
-    b'5Y2X5oqV57ij': 'Nantou_County',
-    b'5Y+w5Lit5biC': 'Taichung_City',
-    b'5Y+w5YyX5biC': 'Taipei_City',
-    b'5Y+w5Y2X5biC': 'Tainan_City',
-    b'5Y+w5p2x57ij': 'Taitung_County',
-    b'5ZiJ576p5biC': 'Chiayi_City',
-    b'5ZiJ576p57ij': 'Chiayi_County',
-    b'5Z+66ZqG5biC': 'Keelung_City',
-    b'5a6c6Jit57ij': 'Yilan_County',
-    b'5bGP5p2x57ij': 'Pingtung_County',
-    b'5b2w5YyW57ij': 'Changhua_County',
-    b'5paw5YyX5biC': 'New_Taipei_City',
-    b'5paw56u55biC': 'Hsinchu_City',
-    b'5paw56u557ij': 'Hsinchu_County',
-    b'5qGD5ZyS5biC': 'Taoyuan_City',
-    b'5r6O5rmW57ij': 'Penghu_County',
-    b'6Iqx6JOu57ij': 'Hualien_County',
-    b'6IuX5qCX57ij': 'Miaoli_County',
-    b'6YeR6ZaA57ij': 'Kinmen_County',
-    b'6Zuy5p6X57ij': 'Yunlin_County',
-    b'6auY6ZuE5biC': 'Kaohsiung_City',
-    b'6YCj5rGf57ij': 'Lienchiang_County',
-  }
+    # Translate location names to English
+    # https://en.wikipedia.org/wiki/List_of_administrative_divisions_of_Taiwan
+    _TRANSLATED = {
+        b"5Y2X5oqV57ij": "Nantou_County",
+        b"5Y+w5Lit5biC": "Taichung_City",
+        b"5Y+w5YyX5biC": "Taipei_City",
+        b"5Y+w5Y2X5biC": "Tainan_City",
+        b"5Y+w5p2x57ij": "Taitung_County",
+        b"5ZiJ576p5biC": "Chiayi_City",
+        b"5ZiJ576p57ij": "Chiayi_County",
+        b"5Z+66ZqG5biC": "Keelung_City",
+        b"5a6c6Jit57ij": "Yilan_County",
+        b"5bGP5p2x57ij": "Pingtung_County",
+        b"5b2w5YyW57ij": "Changhua_County",
+        b"5paw5YyX5biC": "New_Taipei_City",
+        b"5paw56u55biC": "Hsinchu_City",
+        b"5paw56u557ij": "Hsinchu_County",
+        b"5qGD5ZyS5biC": "Taoyuan_City",
+        b"5r6O5rmW57ij": "Penghu_County",
+        b"6Iqx6JOu57ij": "Hualien_County",
+        b"6IuX5qCX57ij": "Miaoli_County",
+        b"6YeR6ZaA57ij": "Kinmen_County",
+        b"6Zuy5p6X57ij": "Yunlin_County",
+        b"6auY6ZuE5biC": "Kaohsiung_City",
+        b"6YCj5rGf57ij": "Lienchiang_County",
+    }
 
-  # Map locations to regions
-  # https://en.wikipedia.org/wiki/List_of_administrative_divisions_of_Taiwan
-  # https://en.wikipedia.org/wiki/Regions_of_Taiwan#Hexchotomy
-  LOCATION_TO_REGION = {
-    # Taipei
-    'Taipei_City': 'Taipei',
-    'Keelung_City': 'Taipei',
-    'New_Taipei_City': 'Taipei',
-    'Yilan_County': 'Taipei',
-    'Kinmen_County': 'Taipei',
-    'Lienchiang_County': 'Taipei',
-    # Northern
-    'Hsinchu_City': 'Northern',
-    'Taoyuan_City': 'Northern',
-    'Hsinchu_County': 'Northern',
-    'Miaoli_County': 'Northern',
-    # Central
-    'Taichung_City': 'Central',
-    'Changhua_County': 'Central',
-    'Nantou_County': 'Central',
-    # Southern
-    'Tainan_City': 'Southern',
-    'Chiayi_City': 'Southern',
-    'Yunlin_County': 'Southern',
-    'Chiayi_County': 'Southern',
-    # Kaoping
-    'Kaohsiung_City': 'Kaoping',
-    'Pingtung_County': 'Kaoping',
-    'Penghu_County': 'Kaoping',
-    # Eastern
-    'Hualien_County': 'Eastern',
-    'Taitung_County': 'Eastern',
-  }
+    # Map locations to regions
+    # https://en.wikipedia.org/wiki/List_of_administrative_divisions_of_Taiwan
+    # https://en.wikipedia.org/wiki/Regions_of_Taiwan#Hexchotomy
+    LOCATION_TO_REGION = {
+        # Taipei
+        "Taipei_City": "Taipei",
+        "Keelung_City": "Taipei",
+        "New_Taipei_City": "Taipei",
+        "Yilan_County": "Taipei",
+        "Kinmen_County": "Taipei",
+        "Lienchiang_County": "Taipei",
+        # Northern
+        "Hsinchu_City": "Northern",
+        "Taoyuan_City": "Northern",
+        "Hsinchu_County": "Northern",
+        "Miaoli_County": "Northern",
+        # Central
+        "Taichung_City": "Central",
+        "Changhua_County": "Central",
+        "Nantou_County": "Central",
+        # Southern
+        "Tainan_City": "Southern",
+        "Chiayi_City": "Southern",
+        "Yunlin_County": "Southern",
+        "Chiayi_County": "Southern",
+        # Kaoping
+        "Kaohsiung_City": "Kaoping",
+        "Pingtung_County": "Kaoping",
+        "Penghu_County": "Kaoping",
+        # Eastern
+        "Hualien_County": "Eastern",
+        "Taitung_County": "Eastern",
+    }
 
-  @staticmethod
-  def _get_metadata(html):
-    issue_pattern = re.compile('^.*Latest available data: Week (\\d+), (\\d{4})\\..*$')
-    release_pattern = re.compile('^.*Data as of \\d+:\\d+:\\d+, (\\d{4})/(\\d{2})/(\\d{2})\\..*$')
-    issue, release = None, None
-    for line in html.split('\n'):
-      match = issue_pattern.match(line)
-      if match is not None:
-        year, week = int(match.group(2)), int(match.group(1))
-        issue = year * 100 + week
-      match = release_pattern.match(line)
-      if match is not None:
-        year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
-        release = '%04d-%02d-%02d' % (year, month, day)
-    if issue is None or release is None:
-      raise Exception('metadata not found')
-    return issue, release
+    @staticmethod
+    def _get_metadata(html):
+        issue_pattern = re.compile("^.*Latest available data: Week (\\d+), (\\d{4})\\..*$")
+        release_pattern = re.compile("^.*Data as of \\d+:\\d+:\\d+, (\\d{4})/(\\d{2})/(\\d{2})\\..*$")
+        issue, release = None, None
+        for line in html.split("\n"):
+            match = issue_pattern.match(line)
+            if match is not None:
+                year, week = int(match.group(2)), int(match.group(1))
+                issue = year * 100 + week
+            match = release_pattern.match(line)
+            if match is not None:
+                year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
+                release = "%04d-%02d-%02d" % (year, month, day)
+        if issue is None or release is None:
+            raise Exception("metadata not found")
+        return issue, release
 
-  @staticmethod
-  def _get_flu_data(html):
-    week_pattern = re.compile('^categories: \\[(.*)\\],$')
-    value_pattern = re.compile('^series: \\[(.*)\\],$')
-    data = {}
-    parsing_ili = True
-    for line in html.split('\n'):
-      line = line.strip()
-      match = week_pattern.match(line)
-      if match is not None:
-        weeks = [int(x[1:-1]) for x in match.group(1).split(',')]
-        for week in weeks:
-          check_epiweek(week)
-          if week not in data:
-            data[week] = {}
-      match = value_pattern.match(line)
-      if match is not None:
-        for item in match.group(1).split('},{'):
-          parts = item.replace('{', '').replace('}', '').strip().split(' ')
-          location = parts[1][1:-2]
-          def num(value):
-            if parsing_ili:
-              return float(value)
-            else:
-              if '.' in value:
-                raise Exception('expected type int for visits')
-              return int(value)
-          values = [num(x) for x in parts[3][1:-1].split(',')]
-          unit = 'ili' if parsing_ili else 'visits'
-          if len(weeks) != len(values):
-            raise Exception('len(weeks) != len(values)')
-          for week, value in zip(weeks, values):
-            if location not in data[week]:
-              data[week][location] = {}
-            data[week][location][unit] = value
-        parsing_ili = False
-    if len(data) == 0:
-      raise Exception('no data')
-    return data
+    @staticmethod
+    def _get_flu_data(html):
+        week_pattern = re.compile("^categories: \\[(.*)\\],$")
+        value_pattern = re.compile("^series: \\[(.*)\\],$")
+        data = {}
+        parsing_ili = True
+        for line in html.split("\n"):
+            line = line.strip()
+            match = week_pattern.match(line)
+            if match is not None:
+                weeks = [int(x[1:-1]) for x in match.group(1).split(",")]
+                for week in weeks:
+                    check_epiweek(week)
+                    if week not in data:
+                        data[week] = {}
+            match = value_pattern.match(line)
+            if match is not None:
+                for item in match.group(1).split("},{"):
+                    parts = item.replace("{", "").replace("}", "").strip().split(" ")
+                    location = parts[1][1:-2]
+
+                    def num(value):
+                        if parsing_ili:
+                            return float(value)
+                        else:
+                            if "." in value:
+                                raise Exception("expected type int for visits")
+                            return int(value)
 
-  @staticmethod
-  def get_flu_data():
-    # Fetch the flu page
-    response = requests.get(NIDSS.FLU_URL)
-    if response.status_code != 200:
-      raise Exception('request failed [%d]' % response.status_code)
-    html = response.text
-    # Parse metadata
-    latest_week, release_date = NIDSS._get_metadata(html)
-    # Parse flu data
-    data = NIDSS._get_flu_data(html)
-    # Return results indexed by week and location
-    return latest_week, release_date, data
+                    values = [num(x) for x in parts[3][1:-1].split(",")]
+                    unit = "ili" if parsing_ili else "visits"
+                    if len(weeks) != len(values):
+                        raise Exception("len(weeks) != len(values)")
+                    for week, value in zip(weeks, values):
+                        if location not in data[week]:
+                            data[week][location] = {}
+                        data[week][location][unit] = value
+                parsing_ili = False
+        if len(data) == 0:
+            raise Exception("no data")
+        return data
 
-  @staticmethod
-  def get_dengue_data(first_week, last_week):
-    # Check week order
-    if first_week > last_week:
-      first_week, last_week = last_week, first_week
-    # Bounds check
-    if first_week < 200301 or last_week < 200301:
-      raise Exception('week out of range')
-    # Initialize data by week and location (zeroes are not reported)
-    data = {}
-    for week in range_epiweeks(first_week, add_epiweeks(last_week, 1)):
-      data[week] = {}
-      for location in NIDSS.LOCATION_TO_REGION.keys():
-        data[week][location] = 0
-    # Download CSV
-    response = requests.get(NIDSS.DENGUE_URL)
-    if response.status_code != 200:
-      raise Exception('export Dengue failed [%d]' % response.status_code)
-    csv = response.content.decode('big5-tw')
-    # Parse the data
-    lines = [l.strip() for l in csv.split('\n')[1:] if l.strip() != '']
-    for line in lines:
-      fields = line.split(',')
-      location_b64 = base64.b64encode(fields[3].encode('utf-8'))
-      location = NIDSS._TRANSLATED[location_b64]
-      # Fields currently unused:
-      # region = NIDSS.LOCATION_TO_REGION[location]
-      # imported_b64 = base64.b64encode(fields[6].encode('utf-8'))
-      # imported = imported_b64 == b'5piv'
-      # sex = fields[5]
-      # age = fields[7]
-      count = int(fields[8])
-      year = int(fields[1])
-      week = int(fields[2])
-      # Week 53 was reported each year in 2003-2007
-      if year < 2008 and year != 2003 and week > 52:
-        week = 52
-      # Epiweek system change in 2009
-      # See also: http://research.undefinedx.com/forum/index.php?topic=300.0
-      if year == 2009:
-        week -= 1
-        if week == 0:
-          year, week = 2008, 53
-      epiweek = year * 100 + week
-      if epiweek < first_week or epiweek > last_week:
-        # Outside of the requested range
-        continue
-      if epiweek not in data or location not in data[epiweek]:
-        # Not a vaild U.S. epiweek
-        raise Exception('data missing %d-%s' % (epiweek, location))
-      # Add the counts to the location on this epiweek
-      data[epiweek][location] += count
-    # Return results indexed by week and location
-    return data
+    @staticmethod
+    def get_flu_data():
+        # Fetch the flu page
+        response = requests.get(NIDSS.FLU_URL)
+        if response.status_code != 200:
+            raise Exception("request failed [%d]" % response.status_code)
+        html = response.text
+        # Parse metadata
+        latest_week, release_date = NIDSS._get_metadata(html)
+        # Parse flu data
+        data = NIDSS._get_flu_data(html)
+        # Return results indexed by week and location
+        return latest_week, release_date, data
+
+    @staticmethod
+    def get_dengue_data(first_week, last_week):
+        # Check week order
+        if first_week > last_week:
+            first_week, last_week = last_week, first_week
+        # Bounds check
+        if first_week < 200301 or last_week < 200301:
+            raise Exception("week out of range")
+        # Initialize data by week and location (zeroes are not reported)
+        data = {}
+        for week in range_epiweeks(first_week, add_epiweeks(last_week, 1)):
+            data[week] = {}
+            for location in NIDSS.LOCATION_TO_REGION.keys():
+                data[week][location] = 0
+        # Download CSV
+        response = requests.get(NIDSS.DENGUE_URL)
+        if response.status_code != 200:
+            raise Exception("export Dengue failed [%d]" % response.status_code)
+        csv = response.content.decode("big5-tw")
+        # Parse the data
+        lines = [l.strip() for l in csv.split("\n")[1:] if l.strip() != ""]
+        for line in lines:
+            fields = line.split(",")
+            location_b64 = base64.b64encode(fields[3].encode("utf-8"))
+            location = NIDSS._TRANSLATED[location_b64]
+            # Fields currently unused:
+            # region = NIDSS.LOCATION_TO_REGION[location]
+            # imported_b64 = base64.b64encode(fields[6].encode('utf-8'))
+            # imported = imported_b64 == b'5piv'
+            # sex = fields[5]
+            # age = fields[7]
+            count = int(fields[8])
+            year = int(fields[1])
+            week = int(fields[2])
+            # Week 53 was reported each year in 2003-2007
+            if year < 2008 and year != 2003 and week > 52:
+                week = 52
+            # Epiweek system change in 2009
+            # See also: https://research.undefinedx.com/forum/index.php?topic=300.0
+            if year == 2009:
+                week -= 1
+                if week == 0:
+                    year, week = 2008, 53
+            epiweek = year * 100 + week
+            if epiweek < first_week or epiweek > last_week:
+                # Outside of the requested range
+                continue
+            if epiweek not in data or location not in data[epiweek]:
+                # Not a vaild U.S. epiweek
+                raise Exception("data missing %d-%s" % (epiweek, location))
+            # Add the counts to the location on this epiweek
+            data[epiweek][location] += count
+        # Return results indexed by week and location
+        return data
 
 
 def main():
-  # Args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    'epiweek',
-    action='store',
-    type=int,
-    help='fetch data on this epiweek (ex: 201537)'
-  )
-  args = parser.parse_args()
-  ew = args.epiweek
+    # Args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("epiweek", action="store", type=int, help="fetch data on this epiweek (ex: 201537)")
+    args = parser.parse_args()
+    ew = args.epiweek
 
-  # Get the data
-  latest_week, release_date, fdata = NIDSS.get_flu_data()
-  ddata = NIDSS.get_dengue_data(ew, ew)
+    # Get the data
+    latest_week, release_date, fdata = NIDSS.get_flu_data()
+    ddata = NIDSS.get_dengue_data(ew, ew)
 
-  # Print the results
-  print('*** Meta ***')
-  print('latest_week:', latest_week)
-  print('release_date:', release_date)
-  print('*** Flu ***')
-  for region in sorted(list(fdata[ew].keys())):
-    visits, ili = fdata[ew][region]['visits'], fdata[ew][region]['ili']
-    print('region=%s | visits=%d | ili=%.3f' % (region, visits, ili))
-  print('*** Dengue ***')
-  for location in sorted(list(ddata[ew].keys())):
-    region = NIDSS.LOCATION_TO_REGION[location]
-    count = ddata[ew][location]
-    print('location=%s | region=%s | count=%d' % (location, region, count))
+    # Print the results
+    print("*** Meta ***")
+    print("latest_week:", latest_week)
+    print("release_date:", release_date)
+    print("*** Flu ***")
+    for region in sorted(list(fdata[ew].keys())):
+        visits, ili = fdata[ew][region]["visits"], fdata[ew][region]["ili"]
+        print("region=%s | visits=%d | ili=%.3f" % (region, visits, ili))
+    print("*** Dengue ***")
+    for location in sorted(list(ddata[ew].keys())):
+        region = NIDSS.LOCATION_TO_REGION[location]
+        count = ddata[ew][location]
+        print("location=%s | region=%s | count=%d" % (location, region, count))
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/nidss/taiwan_update.py b/src/acquisition/nidss/taiwan_update.py
index 830a7738d..4ba8b1778 100644
--- a/src/acquisition/nidss/taiwan_update.py
+++ b/src/acquisition/nidss/taiwan_update.py
@@ -87,92 +87,86 @@
 
 # Get a row count just to know how many new rows are inserted
 def get_rows(cnx):
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM nidss_flu')
-  for (num,) in select:
-    rows_flu = num
-  select.execute('SELECT count(1) num FROM nidss_dengue')
-  for (num,) in select:
-    rows_dengue = num
-  select.close()
-  return (rows_flu, rows_dengue)
+    select = cnx.cursor()
+    select.execute("SELECT count(1) num FROM nidss_flu")
+    for (num,) in select:
+        rows_flu = num
+    select.execute("SELECT count(1) num FROM nidss_dengue")
+    for (num,) in select:
+        rows_dengue = num
+    select.close()
+    return (rows_flu, rows_dengue)
 
 
 def update(test_mode=False):
-  # test mode
-  if test_mode:
-    print('test mode enabled: changes will not be saved')
-
-  # Database connection
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  rows1 = get_rows(cnx)
-  print('rows before (flu): %d' % (rows1[0]))
-  print('rows before (dengue): %d' % (rows1[1]))
-  insert = cnx.cursor()
-  sql_flu = '''
-  INSERT INTO
-    `nidss_flu` (`release_date`, `issue`, `epiweek`, `region`, `lag`, `visits`, `ili`)
-  VALUES
-    (%s, %s, %s, %s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-    `release_date` = least(`release_date`, %s), `visits` = %s, `ili` = %s
-  '''
-  sql_dengue = '''
-  INSERT INTO
-    `nidss_dengue` (`epiweek`, `location`, `region`, `count`)
-  VALUES
-    (%s, %s, %s, %s)
-  ON DUPLICATE KEY UPDATE
-    `count` =  %s
-  '''
-
-  # Scrape flu data
-  current_week, release_date, data = NIDSS.get_flu_data()
-  for epiweek in sorted(list(data.keys())):
-    lag = delta_epiweeks(epiweek, current_week)
-    for region in data[epiweek].keys():
-      visits, ili = data[epiweek][region]['visits'], data[epiweek][region]['ili']
-      params1 = [release_date, current_week, epiweek, region, lag, visits, ili]
-      params2 = [release_date, visits, ili]
-      insert.execute(sql_flu, tuple(params1 + params2))
-
-  # Scrape dengue data from the past year
-  data = NIDSS.get_dengue_data(add_epiweeks(current_week, -51), current_week)
-  for epiweek in sorted(list(data.keys())):
-    for location in sorted(list(data[epiweek].keys())):
-      region = NIDSS.LOCATION_TO_REGION[location]
-      count = data[epiweek][location]
-      params = (epiweek, location, region, count, count)
-      insert.execute(sql_dengue, params)
-
-  # Cleanup
-  insert.close()
-  rows2 = get_rows(cnx)
-  print('rows after (flu): %d (added %d)' % (rows2[0], rows2[0] - rows1[0]))
-  print('rows after (dengue): %d (added %d)' % (rows2[1], rows2[1] - rows1[1]))
-  if test_mode:
-    print('test mode: changes not commited')
-  else:
-    cnx.commit()
-  cnx.close()
+    # test mode
+    if test_mode:
+        print("test mode enabled: changes will not be saved")
+
+    # Database connection
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx)
+    print("rows before (flu): %d" % (rows1[0]))
+    print("rows before (dengue): %d" % (rows1[1]))
+    insert = cnx.cursor()
+    sql_flu = """
+    INSERT INTO
+      `nidss_flu` (`release_date`, `issue`, `epiweek`, `region`, `lag`, `visits`, `ili`)
+    VALUES
+      (%s, %s, %s, %s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+      `release_date` = least(`release_date`, %s), `visits` = %s, `ili` = %s
+    """
+    sql_dengue = """
+    INSERT INTO
+      `nidss_dengue` (`epiweek`, `location`, `region`, `count`)
+    VALUES
+      (%s, %s, %s, %s)
+    ON DUPLICATE KEY UPDATE
+      `count` =  %s
+    """
+
+    # Scrape flu data
+    current_week, release_date, data = NIDSS.get_flu_data()
+    for epiweek in sorted(list(data.keys())):
+        lag = delta_epiweeks(epiweek, current_week)
+        for region in data[epiweek].keys():
+            visits, ili = data[epiweek][region]["visits"], data[epiweek][region]["ili"]
+            params1 = [release_date, current_week, epiweek, region, lag, visits, ili]
+            params2 = [release_date, visits, ili]
+            insert.execute(sql_flu, tuple(params1 + params2))
+
+    # Scrape dengue data from the past year
+    data = NIDSS.get_dengue_data(add_epiweeks(current_week, -51), current_week)
+    for epiweek in sorted(list(data.keys())):
+        for location in sorted(list(data[epiweek].keys())):
+            region = NIDSS.LOCATION_TO_REGION[location]
+            count = data[epiweek][location]
+            params = (epiweek, location, region, count, count)
+            insert.execute(sql_dengue, params)
+
+    # Cleanup
+    insert.close()
+    rows2 = get_rows(cnx)
+    print("rows after (flu): %d (added %d)" % (rows2[0], rows2[0] - rows1[0]))
+    print("rows after (dengue): %d (added %d)" % (rows2[1], rows2[1] - rows1[1]))
+    if test_mode:
+        print("test mode: changes not commited")
+    else:
+        cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    '--test',
-    '-t',
-    action='store_true',
-    default=False,
-    help='test mode, do not commit changes'
-  )
-  args = parser.parse_args()
-
-  # fetch and store NIDSS data
-  update(args.test)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--test", "-t", action="store_true", default=False, help="test mode, do not commit changes")
+    args = parser.parse_args()
+
+    # fetch and store NIDSS data
+    update(args.test)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/norostat/norostat_add_history.py b/src/acquisition/norostat/norostat_add_history.py
index 64fd11ff7..05c29d69b 100644
--- a/src/acquisition/norostat/norostat_add_history.py
+++ b/src/acquisition/norostat/norostat_add_history.py
@@ -18,28 +18,31 @@
 from . import norostat_raw
 
 
-
 def main():
-  norostat_sql.ensure_tables_exist()
-  snapshot_dir = os.path.expanduser("~/norostat_history/wayback/websites/www.cdc.gov/norovirus/reporting/norostat/data-table.html/")
-  snapshot_version_counter = collections.Counter()
-  for subdir in os.listdir(snapshot_dir):
-    if re.match(r'[0-9]+', subdir) is not None:
-      # appears to be snapshot dir
-      snapshot_version_counter[subdir] = 0 # register that loop found this snapshot directory
-      for norostat_capitalization in ["norostat","noroSTAT"]:
-        time.sleep(0.002) # ensure parse times are unique, assuming OS can accurately sleep and measure to ms precision
-        path = os.path.join(snapshot_dir,subdir,"norovirus","reporting",norostat_capitalization,"data-table.html")
-        if os.path.isfile(path):
-          print("Processing file ", path)
-          with open(path, 'r') as datatable_file:
-            content = datatable_file.read()
-          wide_raw = norostat_raw.parse_content_to_wide_raw(content)
-          long_raw = norostat_raw.melt_wide_raw_to_long_raw(wide_raw)
-          norostat_sql.record_long_raw(long_raw)
-          snapshot_version_counter[subdir] += 1
-  print('Successfully uploaded the following snapshots, with the count indicating the number of data-table versions found inside each snapshot (expected to be 1, or maybe 2 if there was a change in capitalization; 0 indicates the NoroSTAT page was not found within a snapshot directory); just "Counter()" indicates no snapshot directories were found:', snapshot_version_counter)
-  norostat_sql.update_point()
+    norostat_sql.ensure_tables_exist()
+    snapshot_dir = os.path.expanduser("~/norostat_history/wayback/websites/www.cdc.gov/norovirus/reporting/norostat/data-table.html/")
+    snapshot_version_counter = collections.Counter()
+    for subdir in os.listdir(snapshot_dir):
+        if re.match(r"[0-9]+", subdir) is not None:
+            # appears to be snapshot dir
+            snapshot_version_counter[subdir] = 0  # register that loop found this snapshot directory
+            for norostat_capitalization in ["norostat", "noroSTAT"]:
+                time.sleep(0.002)  # ensure parse times are unique, assuming OS can accurately sleep and measure to ms precision
+                path = os.path.join(snapshot_dir, subdir, "norovirus", "reporting", norostat_capitalization, "data-table.html")
+                if os.path.isfile(path):
+                    print("Processing file ", path)
+                    with open(path, "r") as datatable_file:
+                        content = datatable_file.read()
+                    wide_raw = norostat_raw.parse_content_to_wide_raw(content)
+                    long_raw = norostat_raw.melt_wide_raw_to_long_raw(wide_raw)
+                    norostat_sql.record_long_raw(long_raw)
+                    snapshot_version_counter[subdir] += 1
+    print(
+        'Successfully uploaded the following snapshots, with the count indicating the number of data-table versions found inside each snapshot (expected to be 1, or maybe 2 if there was a change in capitalization; 0 indicates the NoroSTAT page was not found within a snapshot directory); just "Counter()" indicates no snapshot directories were found:',
+        snapshot_version_counter,
+    )
+    norostat_sql.update_point()
+
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/norostat/norostat_raw.py b/src/acquisition/norostat/norostat_raw.py
index 582de9684..db7f5ace9 100644
--- a/src/acquisition/norostat/norostat_raw.py
+++ b/src/acquisition/norostat/norostat_raw.py
@@ -8,7 +8,6 @@
 """
 
 
-
 # standard library
 import datetime
 import re
@@ -22,91 +21,88 @@
 # first party
 from .norostat_utils import *
 
+
 def fetch_content(norostat_datatable_url="https://www.cdc.gov/norovirus/reporting/norostat/data-table.html"):
-  """Download NoroSTAT data-table.  Returns the html content."""
-  headers = {
-    'User-Agent': 'delphibot/1.0 (+https://delphi.cmu.edu/)',
-  }
-  resp = requests.get(norostat_datatable_url, headers=headers)
-  expect_value_eq(resp.status_code, 200,
-                  'Wanted status code {}.  Received: ')
-  expect_value_eq(resp.headers.get("Content-Type"), "text/html",
-                  'Expected Content-Type "{}"; Received ')
-  return resp.content
+    """Download NoroSTAT data-table.  Returns the html content."""
+    headers = {
+        "User-Agent": "delphibot/1.0 (+https://delphi.cmu.edu/)",
+    }
+    resp = requests.get(norostat_datatable_url, headers=headers)
+    expect_value_eq(resp.status_code, 200, "Wanted status code {}.  Received: ")
+    expect_value_eq(resp.headers.get("Content-Type"), "text/html", 'Expected Content-Type "{}"; Received ')
+    return resp.content
+
 
 def save_sample_content(content, f="sample_content.pickle"):
-  """Save the content from fetch_content into a pickle file for most testing (don't download unnecessarily)."""
-  with open(f, "wb") as handle:
-    pickle.dump(content, handle)
+    """Save the content from fetch_content into a pickle file for most testing (don't download unnecessarily)."""
+    with open(f, "wb") as handle:
+        pickle.dump(content, handle)
+
 
 def load_sample_content(f="sample_content.pickle"):
-  """Load data from a past call to fetch_content from a pickle file for most testing (don't download unnecessarily)."""
-  with open(f, "rb") as handle:
-    content = pickle.load(handle)
-  return content
+    """Load data from a past call to fetch_content from a pickle file for most testing (don't download unnecessarily)."""
+    with open(f, "rb") as handle:
+        content = pickle.load(handle)
+    return content
+
 
 def parse_content_to_wide_raw(content):
-  """Convert the html content for the data-table into a wide data frame, then stick it in a tuple along with the release_date, parse_time, and (constant) location."""
-  parse_time = datetime.datetime.now()
-  html_root = lxml.html.fromstring(content)
-  # Extract the release date, a.k.a. dateModified, a.k.a. "Page last updated" date; ~Dec 2018 this is only available in a meta tag; previously, it was available in a visible span
-  dateModified_meta_elts = html_root.xpath('//meta[@property="cdc:last_updated"]')
-  dateModified_span_elts = html_root.xpath('//span[@itemprop="dateModified"]')
-  if len(dateModified_meta_elts) == 1:
-    [dateModified_meta_elt] = dateModified_meta_elts
-    dateModified = dateModified_meta_elt.attrib['content']
-  elif len(dateModified_span_elts) == 1:
-    [dateModified_span_elt] = dateModified_span_elts
-    dateModified = dateModified_span_elt.text
-  else:
-    raise Exception("Could not find the expected number of dateModified meta or span tags.")
-  # FIXME check/enforce locale
-  release_date = datetime.datetime.strptime(dateModified, "%B %d, %Y").date()
-  # Check that table description still specifies suspected&confirmed norovirus
-  # outbreaks (insensitive to case of certain letters and allowing for both old
-  # "to the" and new "through the" text), then extract list of states from the
-  # description:
-  [description_elt] = html_root.xpath('''//p[
+    """Convert the html content for the data-table into a wide data frame, then stick it in a tuple along with the release_date, parse_time, and (constant) location."""
+    parse_time = datetime.datetime.now()
+    html_root = lxml.html.fromstring(content)
+    # Extract the release date, a.k.a. dateModified, a.k.a. "Page last updated" date; ~Dec 2018 this is only available in a meta tag; previously, it was available in a visible span
+    dateModified_meta_elts = html_root.xpath('//meta[@property="cdc:last_updated"]')
+    dateModified_span_elts = html_root.xpath('//span[@itemprop="dateModified"]')
+    if len(dateModified_meta_elts) == 1:
+        [dateModified_meta_elt] = dateModified_meta_elts
+        dateModified = dateModified_meta_elt.attrib["content"]
+    elif len(dateModified_span_elts) == 1:
+        [dateModified_span_elt] = dateModified_span_elts
+        dateModified = dateModified_span_elt.text
+    else:
+        raise Exception("Could not find the expected number of dateModified meta or span tags.")
+    # FIXME check/enforce locale
+    release_date = datetime.datetime.strptime(dateModified, "%B %d, %Y").date()
+    # Check that table description still specifies suspected&confirmed norovirus
+    # outbreaks (insensitive to case of certain letters and allowing for both old
+    # "to the" and new "through the" text), then extract list of states from the
+    # description:
+    [description_elt] = html_root.xpath(
+        """//p[
     contains(translate(text(), "SCNORHD", "scnorhd"), "suspected and confirmed norovirus outbreaks reported by state health departments in") and
     (
       contains(text(), "to the") or
       contains(text(), "through the")
     )
-  ]''')
-  location = re.match(".*?[Dd]epartments in (.*?) (?:to)|(?:through) the.*$", description_elt.text).group(1)
-  # Attempt to find exactly 1 table (note: it would be nice to filter on the
-  # associated caption, but no such caption is present in earlier versions):
-  [table] = html_root.xpath('//table')
-  # Convert html table to DataFrame:
-  #   Directly reading in the table with pd.read_html performs unwanted dtype
-  #   inference, but reveals the column names:
-  [wide_raw_df_with_unwanted_conversions] = pd.read_html(lxml.html.tostring(table))
-  #   We want all columns to be string columns. However, there does not appear
-  #   to be an option to disable dtype inference in pd.read_html. Hide all
-  #   entries inside 1-tuple wrappers using pre-dtype-inference converters,
-  #   then unpack afterward (the entries fed to the converters should already
-  #   be strings, but "convert" them to strings just in case):
-  [wide_raw_df_with_wrappers] = pd.read_html(
-      lxml.html.tostring(table),
-      converters= {col: lambda entry: (str(entry),)
-                   for col in wide_raw_df_with_unwanted_conversions.columns}
-  )
-  #   Unwrap entries:
-  wide_raw_df = wide_raw_df_with_wrappers.applymap(lambda wrapper: wrapper[0])
-  # Check format:
-  expect_value_eq(wide_raw_df.columns[0], "Week",
-                  'Expected raw_colnames[0] to be "{}"; encountered ')
-  for colname in wide_raw_df.columns:
-    expect_result_eq(dtype_kind, wide_raw_df[colname].head(), "O",
-                     'Expected (head of) "%s" column to have dtype kind "{}"; instead had dtype kind & head '%(colname))
-  # Pack up df with metadata:
-  wide_raw = (wide_raw_df, release_date, parse_time, location)
-  return wide_raw
+  ]"""
+    )
+    location = re.match(".*?[Dd]epartments in (.*?) (?:to)|(?:through) the.*$", description_elt.text).group(1)
+    # Attempt to find exactly 1 table (note: it would be nice to filter on the
+    # associated caption, but no such caption is present in earlier versions):
+    [table] = html_root.xpath("//table")
+    # Convert html table to DataFrame:
+    #   Directly reading in the table with pd.read_html performs unwanted dtype
+    #   inference, but reveals the column names:
+    [wide_raw_df_with_unwanted_conversions] = pd.read_html(lxml.html.tostring(table))
+    #   We want all columns to be string columns. However, there does not appear
+    #   to be an option to disable dtype inference in pd.read_html. Hide all
+    #   entries inside 1-tuple wrappers using pre-dtype-inference converters,
+    #   then unpack afterward (the entries fed to the converters should already
+    #   be strings, but "convert" them to strings just in case):
+    [wide_raw_df_with_wrappers] = pd.read_html(lxml.html.tostring(table), converters={col: lambda entry: (str(entry),) for col in wide_raw_df_with_unwanted_conversions.columns})
+    #   Unwrap entries:
+    wide_raw_df = wide_raw_df_with_wrappers.applymap(lambda wrapper: wrapper[0])
+    # Check format:
+    expect_value_eq(wide_raw_df.columns[0], "Week", 'Expected raw_colnames[0] to be "{}"; encountered ')
+    for colname in wide_raw_df.columns:
+        expect_result_eq(dtype_kind, wide_raw_df[colname].head(), "O", 'Expected (head of) "%s" column to have dtype kind "{}"; instead had dtype kind & head ' % (colname))
+    # Pack up df with metadata:
+    wide_raw = (wide_raw_df, release_date, parse_time, location)
+    return wide_raw
+
 
 def melt_wide_raw_to_long_raw(wide_raw):
-  (wide_raw_df, release_date, parse_time, location) = wide_raw
-  long_raw_df = wide_raw_df \
-                .melt(id_vars=["Week"], var_name="measurement_type", value_name="value") \
-                .rename(index=str, columns={"Week": "week"})
-  long_raw = (long_raw_df, release_date, parse_time, location)
-  return long_raw
+    (wide_raw_df, release_date, parse_time, location) = wide_raw
+    long_raw_df = wide_raw_df.melt(id_vars=["Week"], var_name="measurement_type", value_name="value").rename(index=str, columns={"Week": "week"})
+    long_raw = (long_raw_df, release_date, parse_time, location)
+    return long_raw
diff --git a/src/acquisition/norostat/norostat_sql.py b/src/acquisition/norostat/norostat_sql.py
index 168e275eb..4e9e1ffba 100644
--- a/src/acquisition/norostat/norostat_sql.py
+++ b/src/acquisition/norostat/norostat_sql.py
@@ -61,363 +61,430 @@
 #     if there is no such version, this table will not be created or used; uses
 #     interned string id's
 
+
 def ensure_tables_exist():
-  (u, p) = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  try:
-    cursor = cnx.cursor()
-    cursor.execute('''
-      CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_version_list` (
-        `release_date` DATE NOT NULL,
-        `parse_time` DATETIME(6) NOT NULL,
-        PRIMARY KEY (`release_date`, `parse_time`)
-      );
-    ''')
-    cursor.execute('''
-      CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_measurement_type_pool` (
-        `measurement_type_id` INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
-        `measurement_type` NVARCHAR(255) NOT NULL UNIQUE KEY
-      );
-    ''')
-    cursor.execute('''
-      CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_location_pool` (
-        `location_id` INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
-        `location` NVARCHAR(255) NOT NULL UNIQUE KEY
-      );
-    ''')
-    cursor.execute('''
-      CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_week_pool` (
-        `week_id` INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
-        `week` NVARCHAR(255) NOT NULL UNIQUE KEY
-      );
-    ''')
-    cursor.execute('''
-      CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_diffs` (
-        `release_date` DATE NOT NULL,
-        `parse_time` DATETIME(6) NOT NULL,
-        `measurement_type_id` INT NOT NULL,
-        `location_id` INT NOT NULL,
-        `week_id` INT NOT NULL,
-        `new_value` NVARCHAR(255), -- allow NULL, with meaning "removed"
-        FOREIGN KEY (`release_date`,`parse_time`) REFERENCES `norostat_raw_datatable_version_list` (`release_date`,`parse_time`),
-        FOREIGN KEY (`measurement_type_id`) REFERENCES `norostat_raw_datatable_measurement_type_pool` (`measurement_type_id`),
-        FOREIGN KEY (`location_id`) REFERENCES `norostat_raw_datatable_location_pool` (`location_id`),
-        FOREIGN KEY (`week_id`) REFERENCES `norostat_raw_datatable_week_pool` (`week_id`),
-        UNIQUE KEY (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`),
-        PRIMARY KEY (`release_date`, `parse_time`, `measurement_type_id`, `location_id`, `week_id`)
-        -- (the indices here are larger than the data, but reducing the key
-        -- sizes and adding an id somehow seems to result in larger index sizes
-        -- somehow)
-      );
-    ''')
-    cnx.commit()
-  finally:
-    cnx.close()
+    (u, p) = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    try:
+        cursor = cnx.cursor()
+        cursor.execute(
+          """
+          CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_version_list` (
+            `release_date` DATE NOT NULL,
+            `parse_time` DATETIME(6) NOT NULL,
+            PRIMARY KEY (`release_date`, `parse_time`)
+          );
+          """
+        )
+        cursor.execute(
+          """
+          CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_measurement_type_pool` (
+            `measurement_type_id` INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
+            `measurement_type` NVARCHAR(255) NOT NULL UNIQUE KEY
+          );
+          """
+        )
+        cursor.execute(
+          """
+          CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_location_pool` (
+          `location_id` INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
+          `location` NVARCHAR(255) NOT NULL UNIQUE KEY
+          );
+          """
+        )
+        cursor.execute(
+          """
+          CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_week_pool` (
+            `week_id` INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
+            `week` NVARCHAR(255) NOT NULL UNIQUE KEY
+          );
+          """
+        )
+        cursor.execute(
+          """
+          CREATE TABLE IF NOT EXISTS `norostat_raw_datatable_diffs` (
+            `release_date` DATE NOT NULL,
+            `parse_time` DATETIME(6) NOT NULL,
+            `measurement_type_id` INT NOT NULL,
+            `location_id` INT NOT NULL,
+            `week_id` INT NOT NULL,
+            `new_value` NVARCHAR(255), -- allow NULL, with meaning "removed"
+            FOREIGN KEY (`release_date`,`parse_time`) REFERENCES `norostat_raw_datatable_version_list` (`release_date`,`parse_time`),
+            FOREIGN KEY (`measurement_type_id`) REFERENCES `norostat_raw_datatable_measurement_type_pool` (`measurement_type_id`),
+            FOREIGN KEY (`location_id`) REFERENCES `norostat_raw_datatable_location_pool` (`location_id`),
+            FOREIGN KEY (`week_id`) REFERENCES `norostat_raw_datatable_week_pool` (`week_id`),
+            UNIQUE KEY (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`),
+            PRIMARY KEY (`release_date`, `parse_time`, `measurement_type_id`, `location_id`, `week_id`)
+            -- (the indices here are larger than the data, but reducing the key
+            -- sizes and adding an id somehow seems to result in larger index sizes
+            -- somehow)
+          );
+          """
+        )
+        cnx.commit()
+    finally:
+        cnx.close()
+
 
 def dangerously_drop_all_norostat_tables():
-  (u, p) = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  try:
-    cursor = cnx.cursor()
-    # Drop tables in reverse order (to avoid foreign key related errors):
-    cursor.execute('''
-      DROP TABLE IF EXISTS `norostat_point_diffs`,
-                           `norostat_point_version_list`,
-                           `norostat_raw_datatable_diffs`,
-                           `norostat_raw_datatable_week_pool`,
-                           `norostat_raw_datatable_location_pool`,
-                           `norostat_raw_datatable_measurement_type_pool`,
-                           `norostat_raw_datatable_version_list`;
-    ''')
-    cnx.commit() # (might do nothing; each DROP commits itself anyway)
-  finally:
-    cnx.close()
+    (u, p) = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    try:
+        cursor = cnx.cursor()
+        # Drop tables in reverse order (to avoid foreign key related errors):
+        cursor.execute(
+          """
+          DROP TABLE IF EXISTS `norostat_point_diffs`,
+                              `norostat_point_version_list`,
+                              `norostat_raw_datatable_diffs`,
+                              `norostat_raw_datatable_week_pool`,
+                              `norostat_raw_datatable_location_pool`,
+                              `norostat_raw_datatable_measurement_type_pool`,
+                              `norostat_raw_datatable_version_list`;
+          """
+        )
+        cnx.commit()  # (might do nothing; each DROP commits itself anyway)
+    finally:
+        cnx.close()
+
 
 def record_long_raw(long_raw):
-  (long_raw_df, release_date, parse_time, location) = long_raw
-  (u, p) = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  try:
-    cursor = cnx.cursor()
-    cnx.start_transaction(isolation_level='SERIALIZABLE')
-    # Create, populate `norostat_raw_datatable_parsed`:
-    cursor.execute('''
-      CREATE TEMPORARY TABLE `norostat_raw_datatable_parsed` (
-        `measurement_type` NVARCHAR(255) NOT NULL,
-        `location` NVARCHAR(255) NOT NULL,
-        `week` NVARCHAR(255) NOT NULL,
-        `value` NVARCHAR(255) NOT NULL, -- forbid NULL; has special external meaning (see above)
-        PRIMARY KEY (`measurement_type`, `location`, `week`)
-      ) ENGINE=MEMORY;
-    ''')
-    cursor.executemany('''
-      INSERT INTO `norostat_raw_datatable_parsed` (`week`,`measurement_type`,`value`,`location`)
-      VALUES (%s, %s, %s, %s);
-    ''', [(week, measurement_type, value, location) for
-          (week, measurement_type, value) in long_raw_df[["week","measurement_type","value"]].astype(str).itertuples(index=False, name=None)
-    ])
-    # Create, populate `norostat_raw_datatable_previous`:
-    cursor.execute('''
-      CREATE TEMPORARY TABLE `norostat_raw_datatable_previous` (
-        `measurement_type_id` INT NOT NULL,
-        `location_id` INT NOT NULL,
-        `week_id` INT NOT NULL,
-        `value` NVARCHAR(255) NOT NULL, -- forbid NULL; has special external meaning (see above)
-        -- would like but not allowed: FOREIGN KEY (`measurement_type_id`) REFERENCES `norostat_raw_datatable_measurement_type_pool` (`measurement_type_id`),
-        -- would like but not allowed: FOREIGN KEY (`location_id`) REFERENCES `norostat_raw_datatable_location_pool` (`location_id`),
-        -- would like but not allowed: FOREIGN KEY (`week_id`) REFERENCES `norostat_raw_datatable_week_pool` (`week_id`),
-        PRIMARY KEY (`measurement_type_id`, `location_id`, `week_id`)
-      ) ENGINE=MEMORY;
-    ''')
-    cursor.execute('''
-      INSERT INTO `norostat_raw_datatable_previous` (`measurement_type_id`, `location_id`, `week_id`, `value`)
-        SELECT `latest`.`measurement_type_id`, `latest`.`location_id`, `latest`.`week_id`, `latest`.`new_value`
-        FROM `norostat_raw_datatable_diffs` AS `latest`
-        -- Get the latest `new_value` by "group" (measurement_type, location, week)
-        -- using the fact that there are no later measurements belonging to the
-        -- same group (find NULL entries in `later`.{release_date,parse_time}
-        -- in the LEFT JOIN below); if the latest `new_value` is NULL, don't
-        -- include it in the result; it means that the corresponding cell/entry
-        -- has been removed from the data-table:
-        LEFT JOIN (
-          SELECT * FROM `norostat_raw_datatable_diffs`
-          WHERE (`release_date`,`parse_time`) <= (%s,%s)
-        ) `later`
-        ON `latest`.`measurement_type_id` = `later`.`measurement_type_id` AND
-           `latest`.`location_id` = `later`.`location_id` AND
-           `latest`.`week_id` = `later`.`week_id` AND
-           (`latest`.`release_date`, `latest`.`parse_time`) <
-             (`later`.`release_date`, `later`.`parse_time`)
-        WHERE (`latest`.`release_date`, `latest`.`parse_time`) <= (%s, %s) AND
-              `later`.`parse_time` IS NULL AND
-              `latest`.`new_value` IS NOT NULL;
-    ''', (release_date, parse_time, release_date, parse_time))
-    # Find next recorded `release_date`, `parse_time` if any; create, populate
-    # `norostat_raw_datatable_next` if there is such a version:
-    cursor.execute('''
-      SELECT `release_date`, `parse_time`
-      FROM `norostat_raw_datatable_version_list`
-      WHERE (`release_date`, `parse_time`) > (%s,%s)
-      ORDER BY `release_date`, `parse_time`
-      LIMIT 1
-    ''', (release_date, parse_time))
-    next_version_if_any = cursor.fetchall()
-    expect_result_in(len, next_version_if_any, (0,1),
-                     'Bug: expected next-version query to return a number of results in {}; instead have len & val ')
-    if len(next_version_if_any) != 0:
-      cursor.execute('''
-        CREATE TEMPORARY TABLE `norostat_raw_datatable_next` (
-          `measurement_type_id` INT NOT NULL,
-          `location_id` INT NOT NULL,
-          `week_id` INT NOT NULL,
-          `value` NVARCHAR(255) NOT NULL, -- forbid NULL; has special external meaning (see above)
-          -- would like but not allowed: FOREIGN KEY (`measurement_type_id`) REFERENCES `norostat_raw_datatable_measurement_type_pool` (`measurement_type_id`),
-          -- would like but not allowed: FOREIGN KEY (`location_id`) REFERENCES `norostat_raw_datatable_location_pool` (`location_id`),
-          -- would like but not allowed: FOREIGN KEY (`week_id`) REFERENCES `norostat_raw_datatable_week_pool` (`week_id`),
-          PRIMARY KEY (`measurement_type_id`, `location_id`, `week_id`)
-        ) ENGINE=MEMORY;
-      ''')
-      cursor.execute('''
-        INSERT INTO `norostat_raw_datatable_next` (`measurement_type_id`, `location_id`, `week_id`, `value`)
-          SELECT `latest`.`measurement_type_id`, `latest`.`location_id`, `latest`.`week_id`, `latest`.`new_value`
-          FROM `norostat_raw_datatable_diffs` AS `latest`
-          -- Get the latest `new_value` by "group" (measurement_type, location, week)
-          -- using the fact that there are no later measurements belonging to the
-          -- same group (find NULL entries in `later`.{release_date,parse_time}
-          -- in the LEFT JOIN below); if the latest `new_value` is NULL, don't
-          -- include it in the result; it means that the corresponding cell/entry
-          -- has been removed from the data-table:
-          LEFT JOIN (
-            SELECT * FROM `norostat_raw_datatable_diffs`
-            WHERE (`release_date`,`parse_time`) <= (%s, %s)
-          ) `later`
-          ON `latest`.`measurement_type_id` = `later`.`measurement_type_id` AND
-             `latest`.`location_id` = `later`.`location_id` AND
-             `latest`.`week_id` = `later`.`week_id` AND
-             (`latest`.`release_date`, `latest`.`parse_time`) <
-               (`later`.`release_date`, `later`.`parse_time`)
-          WHERE (`latest`.`release_date`, `latest`.`parse_time`) <= (%s, %s) AND
-             `later`.`parse_time` IS NULL AND
-             `latest`.`new_value` IS NOT NULL -- NULL means value was removed
-      ''', next_version_if_any[0]+next_version_if_any[0])
-    # Register new version in version list:
+    (long_raw_df, release_date, parse_time, location) = long_raw
+    (u, p) = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     try:
-      cursor.execute('''
-        INSERT INTO `norostat_raw_datatable_version_list` (`release_date`, `parse_time`)
-          VALUES (%s, %s)
-      ''', (release_date, parse_time))
-    except mysql.connector.errors.IntegrityError as e:
-      raise Exception(['Encountered an IntegrityError when updating the norostat_raw_datatable_version_list table; this probably indicates that a version with the same `release_date` and `parse_time` was already added to the database; parse_time has limited resolution, so this can happen from populating the database too quickly when there are duplicate release dates; original error: ', e])
-    # Add any new measurement_type, location, or week strings to the associated
-    # string pools:
-    cursor.execute('''
-      INSERT INTO `norostat_raw_datatable_measurement_type_pool` (`measurement_type`)
-        SELECT DISTINCT `measurement_type`
-        FROM `norostat_raw_datatable_parsed`
-        WHERE `measurement_type` NOT IN (
-          SELECT `norostat_raw_datatable_measurement_type_pool`.`measurement_type`
-          FROM `norostat_raw_datatable_measurement_type_pool`
-        );
-    ''')
-    cursor.execute('''
-      INSERT INTO `norostat_raw_datatable_location_pool` (`location`)
-        SELECT DISTINCT `location`
-        FROM `norostat_raw_datatable_parsed`
-        WHERE `location` NOT IN (
-          SELECT `norostat_raw_datatable_location_pool`.`location`
-          FROM `norostat_raw_datatable_location_pool`
-        );
-    ''')
-    cursor.execute('''
-      INSERT INTO `norostat_raw_datatable_week_pool` (`week`)
-        SELECT DISTINCT `week`
-        FROM `norostat_raw_datatable_parsed`
-        WHERE `week` NOT IN (
-          SELECT `norostat_raw_datatable_week_pool`.`week`
-          FROM `norostat_raw_datatable_week_pool`
-        );
-    ''')
-    # Record diff: [newly parsed version "minus" previous version] (first,
-    # record additions/updates, then record deletions):
-    cursor.execute('''
-      INSERT INTO `norostat_raw_datatable_diffs` (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`)
-        SELECT `measurement_type_id`, `location_id`, `week_id`, %s, %s, `value`
-        FROM `norostat_raw_datatable_parsed`
-        LEFT JOIN `norostat_raw_datatable_measurement_type_pool` USING (`measurement_type`)
-        LEFT JOIN `norostat_raw_datatable_location_pool` USING (`location`)
-        LEFT JOIN `norostat_raw_datatable_week_pool` USING (`week`)
-        WHERE (`measurement_type_id`, `location_id`, `week_id`, `value`) NOT IN (
-          SELECT `norostat_raw_datatable_previous`.`measurement_type_id`,
-                 `norostat_raw_datatable_previous`.`location_id`,
-                 `norostat_raw_datatable_previous`.`week_id`,
-                 `norostat_raw_datatable_previous`.`value`
-          FROM `norostat_raw_datatable_previous`
-        );
-    ''', (release_date, parse_time))
-    cursor.execute('''
-      INSERT INTO `norostat_raw_datatable_diffs` (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`)
-        SELECT `measurement_type_id`, `location_id`, `week_id`, %s, %s, NULL
-        FROM `norostat_raw_datatable_previous`
-        WHERE (`measurement_type_id`, `location_id`, `week_id`) NOT IN (
-          SELECT `norostat_raw_datatable_measurement_type_pool`.`measurement_type_id`,
-                 `norostat_raw_datatable_location_pool`.`location_id`,
-                 `norostat_raw_datatable_week_pool`.`week_id`
-          FROM `norostat_raw_datatable_parsed`
-          LEFT JOIN `norostat_raw_datatable_measurement_type_pool` USING (`measurement_type`)
-          LEFT JOIN `norostat_raw_datatable_location_pool` USING (`location`)
-          LEFT JOIN `norostat_raw_datatable_week_pool` USING (`week`)
-        );
-    ''', (release_date, parse_time))
-    # If there is an already-recorded next version, its diff is invalidated by
-    # the insertion of the newly parsed version; delete the [next version
-    # "minus" previous version] diff and record the [next version "minus" newly
-    # parsed] diff:
-    if len(next_version_if_any) != 0:
-      cursor.execute('''
-        DELETE FROM `norostat_raw_datatable_diffs`
-        WHERE `release_date`=%s AND `parse_time`=%s;
-      ''', next_version_if_any[0])
-      cursor.execute('''
-        INSERT INTO `norostat_raw_datatable_diffs` (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`)
-          SELECT `measurement_type_id`, `location_id`, `week_id`, %s, %s, `value`
-          FROM `norostat_raw_datatable_next`
-          WHERE (`measurement_type_id`, `location_id`, `week_id`, `value`) NOT IN (
-            SELECT
-              `norostat_raw_datatable_measurement_type_pool`.`measurement_type_id`,
-              `norostat_raw_datatable_location_pool`.`location_id`,
-              `norostat_raw_datatable_week_pool`.`week_id`,
-              `norostat_raw_datatable_parsed`.`value`
+        cursor = cnx.cursor()
+        cnx.start_transaction(isolation_level="SERIALIZABLE")
+        # Create, populate `norostat_raw_datatable_parsed`:
+        cursor.execute(
+          """
+          CREATE TEMPORARY TABLE `norostat_raw_datatable_parsed` (
+            `measurement_type` NVARCHAR(255) NOT NULL,
+            `location` NVARCHAR(255) NOT NULL,
+            `week` NVARCHAR(255) NOT NULL,
+            `value` NVARCHAR(255) NOT NULL, -- forbid NULL; has special external meaning (see above)
+            PRIMARY KEY (`measurement_type`, `location`, `week`)
+          ) ENGINE=MEMORY;
+          """
+        )
+        cursor.executemany(
+          """
+          INSERT INTO `norostat_raw_datatable_parsed` (`week`,`measurement_type`,`value`,`location`)
+          VALUES (%s, %s, %s, %s);
+          """,
+            [(week, measurement_type, value, location) for (week, measurement_type, value) in long_raw_df[["week", "measurement_type", "value"]].astype(str).itertuples(index=False, name=None)],
+        )
+        # Create, populate `norostat_raw_datatable_previous`:
+        cursor.execute(
+          """
+          CREATE TEMPORARY TABLE `norostat_raw_datatable_previous` (
+            `measurement_type_id` INT NOT NULL,
+            `location_id` INT NOT NULL,
+            `week_id` INT NOT NULL,
+            `value` NVARCHAR(255) NOT NULL, -- forbid NULL; has special external meaning (see above)
+            -- would like but not allowed: FOREIGN KEY (`measurement_type_id`) REFERENCES `norostat_raw_datatable_measurement_type_pool` (`measurement_type_id`),
+            -- would like but not allowed: FOREIGN KEY (`location_id`) REFERENCES `norostat_raw_datatable_location_pool` (`location_id`),
+            -- would like but not allowed: FOREIGN KEY (`week_id`) REFERENCES `norostat_raw_datatable_week_pool` (`week_id`),
+            PRIMARY KEY (`measurement_type_id`, `location_id`, `week_id`)
+          ) ENGINE=MEMORY;
+          """
+        )
+        cursor.execute(
+          """
+          INSERT INTO `norostat_raw_datatable_previous` (`measurement_type_id`, `location_id`, `week_id`, `value`)
+            SELECT `latest`.`measurement_type_id`, `latest`.`location_id`, `latest`.`week_id`, `latest`.`new_value`
+            FROM `norostat_raw_datatable_diffs` AS `latest`
+            -- Get the latest `new_value` by "group" (measurement_type, location, week)
+            -- using the fact that there are no later measurements belonging to the
+            -- same group (find NULL entries in `later`.{release_date,parse_time}
+            -- in the LEFT JOIN below); if the latest `new_value` is NULL, don't
+            -- include it in the result; it means that the corresponding cell/entry
+            -- has been removed from the data-table:
+            LEFT JOIN (
+              SELECT * FROM `norostat_raw_datatable_diffs`
+              WHERE (`release_date`,`parse_time`) <= (%s,%s)
+            ) `later`
+            ON `latest`.`measurement_type_id` = `later`.`measurement_type_id` AND
+              `latest`.`location_id` = `later`.`location_id` AND
+              `latest`.`week_id` = `later`.`week_id` AND
+              (`latest`.`release_date`, `latest`.`parse_time`) <
+                (`later`.`release_date`, `later`.`parse_time`)
+            WHERE (`latest`.`release_date`, `latest`.`parse_time`) <= (%s, %s) AND
+                  `later`.`parse_time` IS NULL AND
+                  `latest`.`new_value` IS NOT NULL;
+          """,
+          (release_date, parse_time, release_date, parse_time),
+        )
+        # Find next recorded `release_date`, `parse_time` if any; create, populate
+        # `norostat_raw_datatable_next` if there is such a version:
+        cursor.execute(
+          """
+          SELECT `release_date`, `parse_time`
+          FROM `norostat_raw_datatable_version_list`
+          WHERE (`release_date`, `parse_time`) > (%s,%s)
+          ORDER BY `release_date`, `parse_time`
+          LIMIT 1
+          """,
+          (release_date, parse_time),
+        )
+        next_version_if_any = cursor.fetchall()
+        expect_result_in(len, next_version_if_any, (0, 1), "Bug: expected next-version query to return a number of results in {}; instead have len & val ")
+        if len(next_version_if_any) != 0:
+            cursor.execute(
+              """
+              CREATE TEMPORARY TABLE `norostat_raw_datatable_next` (
+                `measurement_type_id` INT NOT NULL,
+                `location_id` INT NOT NULL,
+                `week_id` INT NOT NULL,
+                `value` NVARCHAR(255) NOT NULL, -- forbid NULL; has special external meaning (see above)
+                -- would like but not allowed: FOREIGN KEY (`measurement_type_id`) REFERENCES `norostat_raw_datatable_measurement_type_pool` (`measurement_type_id`),
+                -- would like but not allowed: FOREIGN KEY (`location_id`) REFERENCES `norostat_raw_datatable_location_pool` (`location_id`),
+                -- would like but not allowed: FOREIGN KEY (`week_id`) REFERENCES `norostat_raw_datatable_week_pool` (`week_id`),
+                PRIMARY KEY (`measurement_type_id`, `location_id`, `week_id`)
+              ) ENGINE=MEMORY;
+              """
+            )
+            cursor.execute(
+              """
+              INSERT INTO `norostat_raw_datatable_next` (`measurement_type_id`, `location_id`, `week_id`, `value`)
+                SELECT `latest`.`measurement_type_id`, `latest`.`location_id`, `latest`.`week_id`, `latest`.`new_value`
+                FROM `norostat_raw_datatable_diffs` AS `latest`
+                -- Get the latest `new_value` by "group" (measurement_type, location, week)
+                -- using the fact that there are no later measurements belonging to the
+                -- same group (find NULL entries in `later`.{release_date,parse_time}
+                -- in the LEFT JOIN below); if the latest `new_value` is NULL, don't
+                -- include it in the result; it means that the corresponding cell/entry
+                -- has been removed from the data-table:
+                LEFT JOIN (
+                  SELECT * FROM `norostat_raw_datatable_diffs`
+                  WHERE (`release_date`,`parse_time`) <= (%s, %s)
+                ) `later`
+                ON `latest`.`measurement_type_id` = `later`.`measurement_type_id` AND
+                  `latest`.`location_id` = `later`.`location_id` AND
+                  `latest`.`week_id` = `later`.`week_id` AND
+                  (`latest`.`release_date`, `latest`.`parse_time`) <
+                    (`later`.`release_date`, `later`.`parse_time`)
+                WHERE (`latest`.`release_date`, `latest`.`parse_time`) <= (%s, %s) AND
+                  `later`.`parse_time` IS NULL AND
+                  `latest`.`new_value` IS NOT NULL -- NULL means value was removed
+              """,
+              next_version_if_any[0] + next_version_if_any[0],
+            )
+        # Register new version in version list:
+        try:
+            cursor.execute(
+              """
+              INSERT INTO `norostat_raw_datatable_version_list` (`release_date`, `parse_time`)
+                VALUES (%s, %s)
+              """,
+              (release_date, parse_time),
+            )
+        except mysql.connector.errors.IntegrityError as e:
+            raise Exception(
+                [
+                    "Encountered an IntegrityError when updating the norostat_raw_datatable_version_list table; this probably indicates that a version with the same `release_date` and `parse_time` was already added to the database; parse_time has limited resolution, so this can happen from populating the database too quickly when there are duplicate release dates; original error: ",
+                    e,
+                ]
+            )
+        # Add any new measurement_type, location, or week strings to the associated
+        # string pools:
+        cursor.execute(
+          """
+          INSERT INTO `norostat_raw_datatable_measurement_type_pool` (`measurement_type`)
+            SELECT DISTINCT `measurement_type`
+            FROM `norostat_raw_datatable_parsed`
+            WHERE `measurement_type` NOT IN (
+              SELECT `norostat_raw_datatable_measurement_type_pool`.`measurement_type`
+              FROM `norostat_raw_datatable_measurement_type_pool`
+            );
+          """
+        )
+        cursor.execute(
+          """
+          INSERT INTO `norostat_raw_datatable_location_pool` (`location`)
+            SELECT DISTINCT `location`
+            FROM `norostat_raw_datatable_parsed`
+            WHERE `location` NOT IN (
+              SELECT `norostat_raw_datatable_location_pool`.`location`
+              FROM `norostat_raw_datatable_location_pool`
+            );
+          """
+        )
+        cursor.execute(
+          """
+          INSERT INTO `norostat_raw_datatable_week_pool` (`week`)
+            SELECT DISTINCT `week`
+            FROM `norostat_raw_datatable_parsed`
+            WHERE `week` NOT IN (
+              SELECT `norostat_raw_datatable_week_pool`.`week`
+              FROM `norostat_raw_datatable_week_pool`
+            );
+          """
+        )
+        # Record diff: [newly parsed version "minus" previous version] (first,
+        # record additions/updates, then record deletions):
+        cursor.execute(
+          """
+          INSERT INTO `norostat_raw_datatable_diffs` (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`)
+            SELECT `measurement_type_id`, `location_id`, `week_id`, %s, %s, `value`
             FROM `norostat_raw_datatable_parsed`
             LEFT JOIN `norostat_raw_datatable_measurement_type_pool` USING (`measurement_type`)
             LEFT JOIN `norostat_raw_datatable_location_pool` USING (`location`)
             LEFT JOIN `norostat_raw_datatable_week_pool` USING (`week`)
+            WHERE (`measurement_type_id`, `location_id`, `week_id`, `value`) NOT IN (
+              SELECT `norostat_raw_datatable_previous`.`measurement_type_id`,
+                    `norostat_raw_datatable_previous`.`location_id`,
+                    `norostat_raw_datatable_previous`.`week_id`,
+                    `norostat_raw_datatable_previous`.`value`
+              FROM `norostat_raw_datatable_previous`
+            );
+          """,
+          (release_date, parse_time),
+        )
+        cursor.execute(
+          """
+          INSERT INTO `norostat_raw_datatable_diffs` (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`)
+            SELECT `measurement_type_id`, `location_id`, `week_id`, %s, %s, NULL
+            FROM `norostat_raw_datatable_previous`
+            WHERE (`measurement_type_id`, `location_id`, `week_id`) NOT IN (
+              SELECT `norostat_raw_datatable_measurement_type_pool`.`measurement_type_id`,
+                    `norostat_raw_datatable_location_pool`.`location_id`,
+                    `norostat_raw_datatable_week_pool`.`week_id`
+              FROM `norostat_raw_datatable_parsed`
+              LEFT JOIN `norostat_raw_datatable_measurement_type_pool` USING (`measurement_type`)
+              LEFT JOIN `norostat_raw_datatable_location_pool` USING (`location`)
+              LEFT JOIN `norostat_raw_datatable_week_pool` USING (`week`)
+            );
+          """,
+          (release_date, parse_time),
+        )
+        # If there is an already-recorded next version, its diff is invalidated by
+        # the insertion of the newly parsed version; delete the [next version
+        # "minus" previous version] diff and record the [next version "minus" newly
+        # parsed] diff:
+        if len(next_version_if_any) != 0:
+            cursor.execute(
+              """
+              DELETE FROM `norostat_raw_datatable_diffs`
+              WHERE `release_date`=%s AND `parse_time`=%s;
+              """,
+              next_version_if_any[0],
+            )
+            cursor.execute(
+              """
+              INSERT INTO `norostat_raw_datatable_diffs` (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`)
+                SELECT `measurement_type_id`, `location_id`, `week_id`, %s, %s, `value`
+                FROM `norostat_raw_datatable_next`
+                WHERE (`measurement_type_id`, `location_id`, `week_id`, `value`) NOT IN (
+                  SELECT
+                    `norostat_raw_datatable_measurement_type_pool`.`measurement_type_id`,
+                    `norostat_raw_datatable_location_pool`.`location_id`,
+                    `norostat_raw_datatable_week_pool`.`week_id`,
+                    `norostat_raw_datatable_parsed`.`value`
+                  FROM `norostat_raw_datatable_parsed`
+                  LEFT JOIN `norostat_raw_datatable_measurement_type_pool` USING (`measurement_type`)
+                  LEFT JOIN `norostat_raw_datatable_location_pool` USING (`location`)
+                  LEFT JOIN `norostat_raw_datatable_week_pool` USING (`week`)
+                );
+              """,
+              next_version_if_any[0],
+            )
+            cursor.execute(
+              """
+              INSERT INTO `norostat_raw_datatable_diffs` (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`)
+                SELECT `measurement_type_id`, `location_id`, `week_id`, %s, %s, NULL
+                FROM `norostat_raw_datatable_parsed`
+                LEFT JOIN `norostat_raw_datatable_measurement_type_pool` USING (`measurement_type`)
+                LEFT JOIN `norostat_raw_datatable_location_pool` USING (`location`)
+                LEFT JOIN `norostat_raw_datatable_week_pool` USING (`week`)
+                WHERE (`measurement_type_id`, `location_id`, `week_id`) NOT IN (
+                  SELECT `norostat_raw_datatable_next`.`measurement_type_id`,
+                        `norostat_raw_datatable_next`.`location_id`,
+                        `norostat_raw_datatable_next`.`week_id`
+                  FROM `norostat_raw_datatable_next`
+                );
+              """,
+              next_version_if_any[0],
+            )
+        cursor.execute(
+          """
+          CREATE TABLE IF NOT EXISTS `norostat_point_version_list` (
+            `release_date` DATE NOT NULL,
+            `parse_time` DATETIME(6) NOT NULL,
+            FOREIGN KEY (`release_date`,`parse_time`) REFERENCES `norostat_raw_datatable_version_list` (`release_date`,`parse_time`),
+            PRIMARY KEY (`release_date`, `parse_time`)
           );
-      ''', next_version_if_any[0])
-      cursor.execute('''
-        INSERT INTO `norostat_raw_datatable_diffs` (`measurement_type_id`, `location_id`, `week_id`, `release_date`, `parse_time`, `new_value`)
-          SELECT `measurement_type_id`, `location_id`, `week_id`, %s, %s, NULL
-          FROM `norostat_raw_datatable_parsed`
-          LEFT JOIN `norostat_raw_datatable_measurement_type_pool` USING (`measurement_type`)
-          LEFT JOIN `norostat_raw_datatable_location_pool` USING (`location`)
-          LEFT JOIN `norostat_raw_datatable_week_pool` USING (`week`)
-          WHERE (`measurement_type_id`, `location_id`, `week_id`) NOT IN (
-            SELECT `norostat_raw_datatable_next`.`measurement_type_id`,
-                   `norostat_raw_datatable_next`.`location_id`,
-                   `norostat_raw_datatable_next`.`week_id`
-            FROM `norostat_raw_datatable_next`
+          """
+        )
+        cursor.execute(
+          """
+          CREATE TABLE IF NOT EXISTS `norostat_point_diffs` (
+            `release_date` DATE NOT NULL,
+            `parse_time` datetime(6) NOT NULL,
+            `location_id` INT NOT NULL,
+            `epiweek` INT NOT NULL,
+            `new_value` NVARCHAR(255), -- allow NULL, with meaning "removed"
+            FOREIGN KEY (`release_date`,`parse_time`) REFERENCES `norostat_point_version_list` (`release_date`,`parse_time`),
+            FOREIGN KEY (`location_id`) REFERENCES norostat_raw_datatable_location_pool (`location_id`),
+            UNIQUE KEY (`location_id`, `epiweek`, `release_date`, `parse_time`, `new_value`),
+            PRIMARY KEY (`release_date`, `parse_time`, `location_id`, `epiweek`)
           );
-      ''', next_version_if_any[0])
-    cursor.execute('''
-      CREATE TABLE IF NOT EXISTS `norostat_point_version_list` (
-        `release_date` DATE NOT NULL,
-        `parse_time` DATETIME(6) NOT NULL,
-        FOREIGN KEY (`release_date`,`parse_time`) REFERENCES `norostat_raw_datatable_version_list` (`release_date`,`parse_time`),
-        PRIMARY KEY (`release_date`, `parse_time`)
-      );
-    ''')
-    cursor.execute('''
-      CREATE TABLE IF NOT EXISTS `norostat_point_diffs` (
-        `release_date` DATE NOT NULL,
-        `parse_time` datetime(6) NOT NULL,
-        `location_id` INT NOT NULL,
-        `epiweek` INT NOT NULL,
-        `new_value` NVARCHAR(255), -- allow NULL, with meaning "removed"
-        FOREIGN KEY (`release_date`,`parse_time`) REFERENCES `norostat_point_version_list` (`release_date`,`parse_time`),
-        FOREIGN KEY (`location_id`) REFERENCES norostat_raw_datatable_location_pool (`location_id`),
-        UNIQUE KEY (`location_id`, `epiweek`, `release_date`, `parse_time`, `new_value`),
-        PRIMARY KEY (`release_date`, `parse_time`, `location_id`, `epiweek`)
-      );
-    ''')
-    cnx.commit() # (might do nothing; each statement above takes effect and/or commits immediately)
-  finally:
-    cnx.close()
+          """
+        )
+        cnx.commit()  # (might do nothing; each statement above takes effect and/or commits immediately)
+    finally:
+        cnx.close()
+
 
 def update_point():
-  (u, p) = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  try:
-    cursor = cnx.cursor()
-    cnx.start_transaction(isolation_level='serializable')
-    cursor.execute('''
-      SELECT `release_date`, `parse_time`, `measurement_type`, `location_id`, `week`, `new_value`
-      FROM `norostat_raw_datatable_diffs`
-      LEFT JOIN `norostat_raw_datatable_measurement_type_pool` USING (`measurement_type_id`)
-      LEFT JOIN `norostat_raw_datatable_week_pool` USING (`week_id`)
-      WHERE (`release_date`, `parse_time`) NOT IN (
-        SELECT `norostat_point_version_list`.`release_date`,
-               `norostat_point_version_list`.`parse_time`
-        FROM `norostat_point_version_list`
-      );
-    ''')
-    raw_datatable_diff_selection = cursor.fetchall()
-    prog = re.compile(r"[0-9]+-[0-9]+$")
-    point_diff_insertion = [
-        (release_date, parse_time, location_id,
-         season_db_to_epiweek(measurement_type, week),
-         int(new_value_str) if new_value_str is not None else None
+    (u, p) = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    try:
+        cursor = cnx.cursor()
+        cnx.start_transaction(isolation_level="serializable")
+        cursor.execute(
+          """
+          SELECT `release_date`, `parse_time`, `measurement_type`, `location_id`, `week`, `new_value`
+          FROM `norostat_raw_datatable_diffs`
+          LEFT JOIN `norostat_raw_datatable_measurement_type_pool` USING (`measurement_type_id`)
+          LEFT JOIN `norostat_raw_datatable_week_pool` USING (`week_id`)
+          WHERE (`release_date`, `parse_time`) NOT IN (
+            SELECT `norostat_point_version_list`.`release_date`,
+                  `norostat_point_version_list`.`parse_time`
+            FROM `norostat_point_version_list`
+          );
+          """
+        )
+        raw_datatable_diff_selection = cursor.fetchall()
+        prog = re.compile(r"[0-9]+-[0-9]+$")
+        point_diff_insertion = [
+            (release_date, parse_time, location_id, season_db_to_epiweek(measurement_type, week), int(new_value_str) if new_value_str is not None else None)
+            for (release_date, parse_time, measurement_type, location_id, week, new_value_str) in raw_datatable_diff_selection
+            if prog.match(measurement_type) is not None and new_value_str != ""
+        ]
+        cursor.execute(
+          """
+          INSERT INTO `norostat_point_version_list` (`release_date`, `parse_time`)
+            SELECT DISTINCT `release_date`, `parse_time`
+            FROM `norostat_raw_datatable_version_list`
+            WHERE (`release_date`, `parse_time`) NOT IN (
+              SELECT `norostat_point_version_list`.`release_date`,
+                    `norostat_point_version_list`.`parse_time`
+              FROM `norostat_point_version_list`
+            );
+          """
         )
-        for (release_date, parse_time, measurement_type, location_id, week, new_value_str)
-        in raw_datatable_diff_selection
-        if prog.match(measurement_type) is not None and
-           new_value_str != ""
-    ]
-    cursor.execute('''
-      INSERT INTO `norostat_point_version_list` (`release_date`, `parse_time`)
-        SELECT DISTINCT `release_date`, `parse_time`
-        FROM `norostat_raw_datatable_version_list`
-        WHERE (`release_date`, `parse_time`) NOT IN (
-          SELECT `norostat_point_version_list`.`release_date`,
-                 `norostat_point_version_list`.`parse_time`
-          FROM `norostat_point_version_list`
-        );
-    ''')
-    cursor.executemany('''
-      INSERT INTO `norostat_point_diffs` (`release_date`, `parse_time`, `location_id`, `epiweek`, `new_value`)
-      VALUES (%s, %s, %s, %s, %s)
-    ''', point_diff_insertion)
-    cnx.commit()
-  finally:
-    cnx.close()
+        cursor.executemany(
+          """
+          INSERT INTO `norostat_point_diffs` (`release_date`, `parse_time`, `location_id`, `epiweek`, `new_value`)
+          VALUES (%s, %s, %s, %s, %s)
+          """,
+          point_diff_insertion,
+        )
+        cnx.commit()
+    finally:
+        cnx.close()
+
 
 # note there are more efficient ways to calculate diffs without forming ..._next table
 # todo give indices names
diff --git a/src/acquisition/norostat/norostat_update.py b/src/acquisition/norostat/norostat_update.py
index 4b0021dd5..3d0263ff1 100644
--- a/src/acquisition/norostat/norostat_update.py
+++ b/src/acquisition/norostat/norostat_update.py
@@ -14,15 +14,16 @@
 
 
 def main():
-  # Download the data:
-  # content = norostat_raw.load_sample_content()
-  content = norostat_raw.fetch_content()
-  # norostat_raw.save_sample_content(content)
-  wide_raw = norostat_raw.parse_content_to_wide_raw(content)
-  long_raw = norostat_raw.melt_wide_raw_to_long_raw(wide_raw)
-  norostat_sql.ensure_tables_exist()
-  norostat_sql.record_long_raw(long_raw)
-  norostat_sql.update_point()
+    # Download the data:
+    # content = norostat_raw.load_sample_content()
+    content = norostat_raw.fetch_content()
+    # norostat_raw.save_sample_content(content)
+    wide_raw = norostat_raw.parse_content_to_wide_raw(content)
+    long_raw = norostat_raw.melt_wide_raw_to_long_raw(wide_raw)
+    norostat_sql.ensure_tables_exist()
+    norostat_sql.record_long_raw(long_raw)
+    norostat_sql.update_point()
 
-if __name__ == '__main__':
-  main()
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/norostat/norostat_utils.py b/src/acquisition/norostat/norostat_utils.py
index a99a4dc96..1285e7867 100644
--- a/src/acquisition/norostat/norostat_utils.py
+++ b/src/acquisition/norostat/norostat_utils.py
@@ -7,38 +7,48 @@
 
 # helper funs for checking expectations, throwing exceptions on violations:
 def expect_value_eq(encountered, expected, mismatch_format):
-  if encountered != expected:
-    raise Exception([mismatch_format.format(expected), encountered])
+    if encountered != expected:
+        raise Exception([mismatch_format.format(expected), encountered])
+
+
 def expect_result_eq(f, value, expected, mismatch_format):
-  result = f(value)
-  if result != expected:
-    raise Exception([mismatch_format.format(expected), result, value])
+    result = f(value)
+    if result != expected:
+        raise Exception([mismatch_format.format(expected), result, value])
+
+
 def expect_value_in(encountered, expected_candidates, mismatch_format):
-  if encountered not in expected_candidates:
-    raise Exception([mismatch_format.format(expected_candidates), encountered])
+    if encountered not in expected_candidates:
+        raise Exception([mismatch_format.format(expected_candidates), encountered])
+
+
 def expect_result_in(f, value, expected_candidates, mismatch_format):
-  result = f(value)
-  if result not in expected_candidates:
-    raise Exception([mismatch_format.format(expected_candidates), result, value])
+    result = f(value)
+    if result not in expected_candidates:
+        raise Exception([mismatch_format.format(expected_candidates), result, value])
+
+
 def expect_str_contains(encountered, regex, mismatch_format):
-  if re.search(regex, encountered) is None:
-    raise Exception([mismatch_format.format(regex), encountered])
+    if re.search(regex, encountered) is None:
+        raise Exception([mismatch_format.format(regex), encountered])
+
 
 # helper fun used with expect_* funs to check value of <obj>.dtype.kind:
 def dtype_kind(numpy_like):
-  return numpy_like.dtype.kind
+    return numpy_like.dtype.kind
+
 
 # helper fun used to convert season string ("YYYY-YY" or "YYYY-YYYY") and
 # "Week" string (strptime format "%d-%b") to the corresponding epiweek; assumes
 # by default that dates >= 1-Aug correspond to weeks of the first year:
 def season_db_to_epiweek(season_str, db_date_str, first_db_date_of_season_str="1-Aug"):
-  year_strs = season_str.split("-")
-  first_year = int(year_strs[0])
-  second_year = first_year + 1
-  # FIXME check/enforce locale
-  first_date_of_season = datetime.datetime.strptime(first_db_date_of_season_str+"-"+str(first_year), "%d-%b-%Y").date()
-  date_using_first_year = datetime.datetime.strptime(db_date_str+"-"+str(first_year), "%d-%b-%Y").date()
-  date_using_second_year = datetime.datetime.strptime(db_date_str+"-"+str(second_year), "%d-%b-%Y").date()
-  date = date_using_first_year if date_using_first_year >= first_date_of_season else date_using_second_year
-  epiweek = EpiDate(date.year, date.month, date.day).get_ew()
-  return epiweek
+    year_strs = season_str.split("-")
+    first_year = int(year_strs[0])
+    second_year = first_year + 1
+    # FIXME check/enforce locale
+    first_date_of_season = datetime.datetime.strptime(first_db_date_of_season_str + "-" + str(first_year), "%d-%b-%Y").date()
+    date_using_first_year = datetime.datetime.strptime(db_date_str + "-" + str(first_year), "%d-%b-%Y").date()
+    date_using_second_year = datetime.datetime.strptime(db_date_str + "-" + str(second_year), "%d-%b-%Y").date()
+    date = date_using_first_year if date_using_first_year >= first_date_of_season else date_using_second_year
+    epiweek = EpiDate(date.year, date.month, date.day).get_ew()
+    return epiweek
diff --git a/src/acquisition/paho/paho_db_update.py b/src/acquisition/paho/paho_db_update.py
index d07885f79..d463a915c 100644
--- a/src/acquisition/paho/paho_db_update.py
+++ b/src/acquisition/paho/paho_db_update.py
@@ -50,9 +50,8 @@
 import csv
 import datetime
 import glob
-import subprocess
-import random
 from io import StringIO
+import tempfile
 
 # third party
 import mysql.connector
@@ -64,12 +63,14 @@
 from delphi.utils.epiweek import delta_epiweeks, check_epiweek
 from delphi.utils.epidate import EpiDate
 
+
 def ensure_tables_exist():
-    (u,p) = secrets.db.epi
-    cnx = mysql.connector.connect(user=u,password=p,database='epidata')
+    (u, p) = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     try:
         cursor = cnx.cursor()
-        cursor.execute('''
+        cursor.execute(
+            """
             CREATE TABLE IF NOT EXISTS `paho_dengue` (
                 `id` INT(11) NOT NULL PRIMARY KEY AUTO_INCREMENT,
                 `release_date` DATE NOT NULL,
@@ -85,35 +86,40 @@ def ensure_tables_exist():
                 `num_deaths` INT(11) NOT NULL,
                 UNIQUE KEY (`issue`, `epiweek`, `region`)
             );
-        ''');
+        """
+        )
         cnx.commit()
     finally:
         cnx.close()
 
+
 def safe_float(f):
     try:
-        return float(f.replace(',',''))
+        return float(f.replace(",", ""))
     except:
         return 0
 
+
 def safe_int(i):
     try:
-        return int(i.replace(',',''))
+        return int(i.replace(",", ""))
     except:
         return 0
 
-def get_rows(cnx, table='paho_dengue'):
-  # Count and return the number of rows in the `fluview` table.
-  select = cnx.cursor()
-  select.execute('SELECT count(1) num FROM %s' % table)
-  for (num,) in select:
-    pass
-  select.close()
-  return num
+
+def get_rows(cnx, table="paho_dengue"):
+    # Count and return the number of rows in the `fluview` table.
+    select = cnx.cursor()
+    select.execute("SELECT count(1) num FROM %s" % table)
+    for (num,) in select:
+        pass
+    select.close()
+    return num
+
 
 def get_paho_row(row):
-    if row[0] == "\ufeffIncidence Rate (c)" and row != "\ufeffIncidence Rate (c),(SD/D) x100 (e),CFR (f),ID,Country or Subregion,Deaths,EW,Confirmed,Epidemiological Week (a),Pop (no usar),Serotype,Severe Dengue (d),Total of Dengue Cases (b),Year,Population x 1000".split(","):
-        raise Exception('PAHO header row has changed')
+    if (row[0] == "\ufeffIncidence Rate (c)" and row != "\ufeffIncidence Rate (c),(SD/D) x100 (e),CFR (f),ID,Country or Subregion,Deaths,EW,Confirmed,Epidemiological Week (a),Pop (no usar),Serotype,Severe Dengue (d),Total of Dengue Cases (b),Year,Population x 1000".split(",")):
+        raise Exception("PAHO header row has changed")
     if len(row) == 1 or row[0] == "Incidence Rate (c)":
         # this is a header row
         return None
@@ -128,23 +134,24 @@ def get_paho_row(row):
             except:
                 return None
     try:
-        check_epiweek(safe_int(row[13])*100 + safe_int(row[8]), safe_int(row[13])*100 + safe_int(row[6]))
+        check_epiweek(safe_int(row[13]) * 100 + safe_int(row[8]), safe_int(row[13]) * 100 + safe_int(row[6]))
     except:
         return None
     return {
-        'issue': safe_int(row[13])*100 + safe_int(row[6]),
-        'epiweek': safe_int(row[13])*100 + safe_int(row[8]),
-        'region': country,
-        'total_pop': safe_int(row[14]),
-        'serotype': row[10],
-        'num_dengue': safe_int(row[12]),
-        'incidence_rate': safe_float(row[0]),
-        'num_severe': safe_int(row[11]),
-        'num_deaths': safe_int(row[5]),
-        'severe_ratio': safe_float(row[1]),
-        'cfr': safe_float(row[2])
+        "issue": safe_int(row[13]) * 100 + safe_int(row[6]),
+        "epiweek": safe_int(row[13]) * 100 + safe_int(row[8]),
+        "region": country,
+        "total_pop": safe_int(row[14]),
+        "serotype": row[10],
+        "num_dengue": safe_int(row[12]),
+        "incidence_rate": safe_float(row[0]),
+        "num_severe": safe_int(row[11]),
+        "num_deaths": safe_int(row[5]),
+        "severe_ratio": safe_float(row[1]),
+        "cfr": safe_float(row[2]),
     }
 
+
 def update_from_file(issue, date, filename, test_mode=False):
     # Read PAHO data from CSV and insert into (or update) the database.
 
@@ -156,23 +163,23 @@ def update_from_file(issue, date, filename, test_mode=False):
 
     # database connection
     u, p = secrets.db.epi
-    cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-    rows1 = get_rows(cnx, 'paho_dengue')
-    print('rows before: %d' % (rows1))
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    rows1 = get_rows(cnx, "paho_dengue")
+    print("rows before: %d" % (rows1))
     insert = cnx.cursor()
 
     # load the data, ignoring empty rows
-    print('loading data from %s as issued on %d' % (filename, issue))
-    with open(filename,'r',encoding='utf-8') as f:
+    print("loading data from %s as issued on %d" % (filename, issue))
+    with open(filename, "r", encoding="utf-8") as f:
         c = f.read()
     rows = []
-    for l in csv.reader(StringIO(c), delimiter=','):
+    for l in csv.reader(StringIO(c), delimiter=","):
         rows.append(get_paho_row(l))
-    print(' loaded %d rows' % len(rows))
+    print(" loaded %d rows" % len(rows))
     entries = [obj for obj in rows if obj]
-    print(' found %d entries' % len(entries))
+    print(" found %d entries" % len(entries))
 
-    sql = '''
+    sql = """
     INSERT INTO
         `paho_dengue` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
         `total_pop`, `serotype`, `num_dengue`, `incidence_rate`,
@@ -187,55 +194,43 @@ def update_from_file(issue, date, filename, test_mode=False):
         `incidence_rate` = %s,
         `num_severe` = %s,
         `num_deaths` = %s
-    '''
+    """
 
     for row in entries:
-        if row['issue'] > issue: # Issued in a week that hasn't happened yet
+        if row["issue"] > issue:  # Issued in a week that hasn't happened yet
             continue
-        lag = delta_epiweeks(row['epiweek'], issue)
-        data_args = [row['total_pop'], row['serotype'], row['num_dengue'],
-                     row['incidence_rate'], row['num_severe'], row['num_deaths']]
+        lag = delta_epiweeks(row["epiweek"], issue)
+        data_args = [row["total_pop"], row["serotype"], row["num_dengue"], row["incidence_rate"], row["num_severe"], row["num_deaths"]]
 
-        insert_args = [date,issue,row['epiweek'],row['region'],lag] + data_args
+        insert_args = [date, issue, row["epiweek"], row["region"], lag] + data_args
         update_args = [date] + data_args
         insert.execute(sql % tuple(insert_args + update_args))
 
     # cleanup
     insert.close()
     if test_mode:
-        print('test mode, not committing')
+        print("test mode, not committing")
         rows2 = rows1
     else:
         cnx.commit()
         rows2 = get_rows(cnx)
-    print('rows after: %d (added %d)' % (rows2,rows2-rows1))
+    print("rows after: %d (added %d)" % (rows2, rows2 - rows1))
     cnx.close()
 
+
 def main():
     # args and usage
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--test',
-        action='store_true',
-        help='do dry run only, do not update the database'
-    )
-    parser.add_argument(
-        '--file',
-        type=str,
-        help='load an existing zip file (otherwise fetch current data)'
-    )
-    parser.add_argument(
-        '--issue',
-        type=int,
-        help='issue of the file (e.g. 201740); used iff --file is given'
-    )
+    parser.add_argument("--test", action="store_true", help="do dry run only, do not update the database")
+    parser.add_argument("--file", type=str, help="load an existing zip file (otherwise fetch current data)")
+    parser.add_argument("--issue", type=int, help="issue of the file (e.g. 201740); used iff --file is given")
     args = parser.parse_args()
 
     if (args.file is None) != (args.issue is None):
-        raise Exception('--file and --issue must both be present or absent')
+        raise Exception("--file and --issue must both be present or absent")
 
-    date = datetime.datetime.now().strftime('%Y-%m-%d')
-    print('assuming release date is today, %s' % date)
+    date = datetime.datetime.now().strftime("%Y-%m-%d")
+    print("assuming release date is today, %s" % date)
 
     if args.file:
         update_from_file(args.issue, date, args.file, test_mode=args.test)
@@ -247,34 +242,31 @@ def main():
         max_tries = 5
         while flag < max_tries:
             flag = flag + 1
-            tmp_dir = ''.join(random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for i in range(8))
-            tmp_dir = 'downloads_' + tmp_dir
-            subprocess.call(["mkdir",tmp_dir])
-            # Use temporary directory to avoid data from different time
-            #   downloaded to same folder
-            get_paho_data(dir=tmp_dir)
-            issue = EpiDate.today().get_ew()
-            # Check to make sure we downloaded a file for every week
-            issueset = set()
-            files = glob.glob('%s/*.csv' % tmp_dir)
-            for filename in files:
-                with open(filename,'r') as f:
-                    _ = f.readline()
-                    data = f.readline().split(',')
-                    issueset.add(data[6])
-            db_error = False
-            if len(issueset) >= 53: # Shouldn't be more than 53
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                # Use temporary directory to avoid data from different time
+                #   downloaded to same folder
+                get_paho_data(dir=tmp_dir)
+                issue = EpiDate.today().get_ew()
+                # Check to make sure we downloaded a file for every week
+                issueset = set()
+                files = glob.glob(f"{tmp_dir}/*.csv")
                 for filename in files:
-                    try:
-                        update_from_file(issue, date, filename, test_mode=args.test)
-                        subprocess.call(["rm",filename])
-                    except:
-                        db_error = True
-                subprocess.call(["rm","-r",tmp_dir])
-                if not db_error:
-                    break # Exit loop with success
+                    with open(filename, "r") as f:
+                        _ = f.readline()
+                        data = f.readline().split(",")
+                        issueset.add(data[6])
+                db_error = False
+                if len(issueset) >= 53:  # Shouldn't be more than 53
+                    for filename in files:
+                        try:
+                            update_from_file(issue, date, filename, test_mode=args.test)
+                        except:
+                            db_error = True
+                    if not db_error:
+                        break  # Exit loop with success
             if flag >= max_tries:
-                print('WARNING: Database `paho_dengue` did not update successfully')
+                print("WARNING: Database `paho_dengue` did not update successfully")
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/src/acquisition/paho/paho_download.py b/src/acquisition/paho/paho_download.py
index 60dd13ae8..149fdf374 100644
--- a/src/acquisition/paho/paho_download.py
+++ b/src/acquisition/paho/paho_download.py
@@ -1,4 +1,3 @@
-
 # IMPORTANT: This code is extremely unstable.
 # Slight changes to the PAHO website may render this script partially or entirely useless.
 
@@ -15,42 +14,45 @@
 
 headerheight = 0
 
+
 def wait_for(browser, css_selector, delay=10):
     try:
         WebDriverWait(browser, delay).until(EC.presence_of_element_located((By.CSS_SELECTOR, css_selector)))
         WebDriverWait(browser, delay).until(EC.element_to_be_clickable((By.CSS_SELECTOR, css_selector)))
-        print('Success Loading %s' % (css_selector))
+        print("Success Loading %s" % (css_selector))
     except TimeoutException:
         print("Loading %s took too much time!" % (css_selector))
-        
+
+
 def find_and_click(browser, element):
     element.location_once_scrolled_into_view
     browser.switch_to.default_content()
-    browser.execute_script("window.scrollBy(0,-%d)"%headerheight)
+    browser.execute_script("window.scrollBy(0,-%d)" % headerheight)
     browser.switch_to.frame(browser.find_element_by_tag_name("iframe"))
     browser.switch_to.frame(browser.find_element_by_tag_name("iframe"))
     element.click()
 
-def get_paho_data(offset=0, dir='downloads'):
+
+def get_paho_data(offset=0, dir="downloads"):
     opts = Options()
     opts.set_headless()
     assert opts.headless  # Operating in headless mode
 
     fp = FirefoxProfile()
-    fp.set_preference("browser.download.folderList",2)
-    fp.set_preference("browser.download.manager.showWhenStarting",False)
-    fp.set_preference("browser.download.dir",os.path.abspath(dir))
-    fp.set_preference("browser.helperApps.neverAsk.saveToDisk","text/csv")
+    fp.set_preference("browser.download.folderList", 2)
+    fp.set_preference("browser.download.manager.showWhenStarting", False)
+    fp.set_preference("browser.download.dir", os.path.abspath(dir))
+    fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")
 
-    browser = Firefox(options=opts,firefox_profile=fp)
-    browser.get('http://www.paho.org/data/index.php/en/mnu-topics/indicadores-dengue-en/dengue-nacional-en/252-dengue-pais-ano-en.html?showall=&start=1')
+    browser = Firefox(options=opts, firefox_profile=fp)
+    browser.get("https://www.paho.org/data/index.php/en/mnu-topics/indicadores-dengue-en/dengue-nacional-en/252-dengue-pais-ano-en.html?showall=&start=1")
     tab1 = browser.window_handles[0]
-    browser.execute_script('''window.open("","_blank");''')
+    browser.execute_script("""window.open("","_blank");""")
     tab2 = browser.window_handles[1]
     browser.switch_to.window(tab1)
-    
+
     curr_offset = offset
-    
+
     wait_for(browser, "div.rt-top-inner", delay=30)
     header = browser.find_element_by_css_selector("div.rt-top-inner")
     global headerheight
@@ -59,7 +61,7 @@ def get_paho_data(offset=0, dir='downloads'):
     # The actual content of the data of this webpage is within 2 iframes, so we need to navigate into them first
     browser.switch_to.frame(browser.find_element_by_tag_name("iframe"))
     browser.switch_to.frame(browser.find_element_by_tag_name("iframe"))
-    
+
     # Locate the button that allows to download the table
     downloadoption = browser.find_elements_by_css_selector("div.tabToolbarButton.tab-widget.download")[0]
     find_and_click(browser, downloadoption)
@@ -78,10 +80,13 @@ def get_paho_data(offset=0, dir='downloads'):
     # Extract session ID
     href = downloadbutton.get_attribute("href")
     startidx = href.index("sessions/") + len("sessions/")
-    endidx = href.index("/",startidx)
+    endidx = href.index("/", startidx)
     sessionid = href[startidx:endidx]
 
-    dataurl = "http://phip.paho.org/vizql/w/Casosdedengue_tben/v/ByLastAvailableEpiWeek/viewData/sessions/%s/views/18076444178507886853_9530488980060483892?maxrows=200&viz=%%7B%%22worksheet%%22:%%22W%%20By%%20Last%%20Available%%20EpiWeek%%22,%%22dashboard%%22:%%22By%%20Last%%20Available%%20Epi%%20Week%%22%%7D"%sessionid
+    dataurl = (
+        "https://phip.paho.org/vizql/w/Casosdedengue_tben/v/ByLastAvailableEpiWeek/viewData/sessions/%s/views/18076444178507886853_9530488980060483892?maxrows=200&viz=%%7B%%22worksheet%%22:%%22W%%20By%%20Last%%20Available%%20EpiWeek%%22,%%22dashboard%%22:%%22By%%20Last%%20Available%%20Epi%%20Week%%22%%7D"
+        % sessionid
+    )
 
     wait_for(browser, "div[data-tb-test-id='CancelBtn-Button']")
 
@@ -107,27 +112,27 @@ def get_paho_data(offset=0, dir='downloads'):
 
     for i in range(offset):
         gp = browser.find_element_by_css_selector("div.wcGlassPane")
-        #print gp.is_enabled()
-        #print gp.is_selected()
-        #print gp.is_displayed()
+        # print gp.is_enabled()
+        # print gp.is_selected()
+        # print gp.is_displayed()
         try:
             WebDriverWait(browser, 10).until(EC.staleness_of(gp))
-            print("Loaded next week % d" % (53-offset))
+            print("Loaded next week % d" % (53 - offset))
         except TimeoutException:
-            print("Loading next week %d took too much time!" % (53-offset))
+            print("Loading next week %d took too much time!" % (53 - offset))
         gp = browser.find_element_by_css_selector("div.wcGlassPane")
-        #print gp.is_enabled()
-        #print gp.is_selected()
-        #print gp.is_displayed()
+        # print gp.is_enabled()
+        # print gp.is_selected()
+        # print gp.is_displayed()
         x = browser.find_elements_by_css_selector("div.dijitReset.dijitSliderButtonContainer.dijitSliderButtonContainerH.tableauArrowDec")[0]
         find_and_click(browser, x)
 
     # Cycle through all weeks, downloading each week as a separate .csv
     # Theoretically, need to cycle 53 times, but in practice only 54 works, unsure why
-    for i in range(54-offset):
+    for i in range(54 - offset):
         # If something goes wrong for whatever reason, try from the beginning
         try:
-            print('Loading week %d' % (53-i))
+            print("Loading week %d" % (53 - i))
             # (Re-)load URL
             browser.switch_to.window(tab2)
             browser.get(dataurl)
@@ -137,7 +142,7 @@ def get_paho_data(offset=0, dir='downloads'):
             full_data_tab = browser.find_elements_by_css_selector("li[id='tab-view-full-data']")[0]
             full_data_tab.click()
 
-            wait_for(browser, "a.csvLink") # Sometimes this fails but the button is successfully clicked anyway, not sure why
+            wait_for(browser, "a.csvLink")  # Sometimes this fails but the button is successfully clicked anyway, not sure why
             # Actually download the data as a .csv (Will be downloaded to Firefox's default download destination)
             data_links = browser.find_elements_by_css_selector("a.csvLink")
             data_link = None
@@ -155,10 +160,11 @@ def get_paho_data(offset=0, dir='downloads'):
             find_and_click(browser, x)
             curr_offset += 1
         except Exception as e:
-            print('Got exception %s\nTrying again from week %d' % (e,53-offset))
+            print("Got exception %s\nTrying again from week %d" % (e, 53 - offset))
             browser.quit()
             get_paho_data(offset=curr_offset)
     browser.quit()
 
-if __name__ == '__main__':
-    get_paho_data(dir='downloads/')
+
+if __name__ == "__main__":
+    get_paho_data(dir="downloads/")
diff --git a/src/acquisition/quidel/quidel.py b/src/acquisition/quidel/quidel.py
index a7c9a2918..f33cb3aef 100644
--- a/src/acquisition/quidel/quidel.py
+++ b/src/acquisition/quidel/quidel.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -15,7 +15,7 @@
     * add end date, end week check
 2017-12-02:
     * original version
-'''
+"""
 
 # standard library
 from collections import defaultdict
@@ -35,148 +35,146 @@
 import delphi.utils.epidate as ED
 from delphi.utils.geo.locations import Locations
 
-def word_map(row,terms):
-    for (k,v) in terms.items():
-        row = row.replace(k,v)
+
+def word_map(row, terms):
+    for (k, v) in terms.items():
+        row = row.replace(k, v)
     return row
 
-def date_less_than(d1,d2):
-    y1,m1,d1 = [int(x) for x in d1.split('-')]
-    y2,m2,d2 = [int(x) for x in d2.split('-')]
 
-    if y1*10000+m1*100+d1<y2*10000+m2*100+d2:
+def date_less_than(d1, d2):
+    y1, m1, d1 = [int(x) for x in d1.split("-")]
+    y2, m2, d2 = [int(x) for x in d2.split("-")]
+
+    if y1 * 10000 + m1 * 100 + d1 < y2 * 10000 + m2 * 100 + d2:
         return 1
-    elif y1*10000+m1*100+d1==y2*10000+m2*100+d2:
+    elif y1 * 10000 + m1 * 100 + d1 == y2 * 10000 + m2 * 100 + d2:
         return 0
     else:
         return -1
 
+
 # shift>0: shifted to future
 def date_to_epiweek(date, shift=0):
-    y,m,d = [int(x) for x in date.split('-')]
+    y, m, d = [int(x) for x in date.split("-")]
 
-    epidate = ED.EpiDate(y,m,d)
+    epidate = ED.EpiDate(y, m, d)
     epidate = epidate.add_days(shift)
     ew = epidate.get_ew()
     return ew
 
+
 # convert measurment to time series format
 # startweek and endweek are inclusive
-def measurement_to_ts(m,index,startweek=None,endweek=None):
+def measurement_to_ts(m, index, startweek=None, endweek=None):
     if startweek is None:
         startweek = 0
     if endweek is None:
         endweek = 999999
     res = {}
-    for r,rdict in m.items():
-        res[r]={}
-        for t,vals in rdict.items():
-            if index>=len(vals):
+    for r, rdict in m.items():
+        res[r] = {}
+        for t, vals in rdict.items():
+            if index >= len(vals):
                 raise Exception("Index is invalid")
-            if t>=startweek and t<=endweek:
+            if t >= startweek and t <= endweek:
                 res[r][t] = vals[index]
     return res
 
+
 class QuidelData:
     def __init__(self, raw_path, load_email=True):
         self.data_path = raw_path
-        self.excel_uptodate_path = join(raw_path,'excel/uptodate')
-        self.excel_history_path = join(raw_path,'excel/history')
-        self.csv_path = join(raw_path,'csv')
-        self.xlsx_uptodate_list = [
-            f[:-5] for f in listdir(self.excel_uptodate_path) if isfile(join(self.excel_uptodate_path, f)) and f[-5:]=='.xlsx'
-        ]
-        self.xlsx_history_list = [
-            f[:-5] for f in listdir(self.excel_history_path) if isfile(join(self.excel_history_path, f)) and f[-5:]=='.xlsx'
-        ]
-        self.csv_list = [f[:-4] for f in listdir(self.csv_path) if isfile(join(self.csv_path, f)) and f[-4:]=='.csv']
+        self.excel_uptodate_path = join(raw_path, "excel/uptodate")
+        self.excel_history_path = join(raw_path, "excel/history")
+        self.csv_path = join(raw_path, "csv")
+        self.xlsx_uptodate_list = [f[:-5] for f in listdir(self.excel_uptodate_path) if isfile(join(self.excel_uptodate_path, f)) and f[-5:] == ".xlsx"]
+        self.xlsx_history_list = [f[:-5] for f in listdir(self.excel_history_path) if isfile(join(self.excel_history_path, f)) and f[-5:] == ".xlsx"]
+        self.csv_list = [f[:-4] for f in listdir(self.csv_path) if isfile(join(self.csv_path, f)) and f[-4:] == ".csv"]
         self.map_terms = {
-            ' FL  34637"':'FL',
+            ' FL  34637"': "FL",
         }
         # hardcoded parameters
         self.date_dim = 1
         self.state_dim = 4
-        self.fields = [
-            'sofia_ser','date','fac_id','city','state','zip','age',
-            'fluA','fluB','fluAll','county','fac_type'
-        ]
-        self.fields_to_keep = ['fac_id','fluA','fluB','fluAll']
+        self.fields = ["sofia_ser", "date", "fac_id", "city", "state", "zip", "age", "fluA", "fluB", "fluAll", "county", "fac_type"]
+        self.fields_to_keep = ["fac_id", "fluA", "fluB", "fluAll"]
         self.dims_to_keep = [self.fields.index(x) for x in self.fields_to_keep]
         if load_email:
             self.retrieve_excels()
         self.prepare_csv()
 
     def retrieve_excels(self):
-        detach_dir = self.excel_uptodate_path # directory where to save attachments (default: current)
+        detach_dir = self.excel_uptodate_path  # directory where to save attachments (default: current)
 
         # connecting to the gmail imap server
         m = imaplib.IMAP4_SSL("imap.gmail.com")
-        m.login(secrets.quidel.email_addr,secrets.quidel.email_pwd)
-        m.select("INBOX") # here you a can choose a mail box like INBOX instead
+        m.login(secrets.quidel.email_addr, secrets.quidel.email_pwd)
+        m.select("INBOX")  # here you a can choose a mail box like INBOX instead
         # use m.list() to get all the mailboxes
-        _, items = m.search(None, "ALL") # you could filter using the IMAP rules here (check http://www.example-code.com/csharp/imap-search-critera.asp)
-        items = items[0].split() # getting the mails id
+        _, items = m.search(None, "ALL")  # you could filter using the IMAP rules here (check https://www.example-code.com/csharp/imap-search-critera.asp)
+        items = items[0].split()  # getting the mails id
 
         # The emailids are ordered from past to now
         for emailid in items:
-            _, data = m.fetch(emailid, "(RFC822)") # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc
-            email_body = data[0][1].decode('utf-8') # getting the mail content
-            mail = email.message_from_string(email_body) # parsing the mail content to get a mail object
+            _, data = m.fetch(emailid, "(RFC822)")  # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc
+            email_body = data[0][1].decode("utf-8")  # getting the mail content
+            mail = email.message_from_string(email_body)  # parsing the mail content to get a mail object
 
-            #Check if any attachments at all
-            if mail.get_content_maintype() != 'multipart':
+            # Check if any attachments at all
+            if mail.get_content_maintype() != "multipart":
                 continue
 
             # we use walk to create a generator so we can iterate on the parts and forget about the recursive headach
             for part in mail.walk():
                 # multipart are just containers, so we skip them
-                if part.get_content_maintype() == 'multipart':
+                if part.get_content_maintype() == "multipart":
                     continue
 
                 # is this part an attachment ?
-                if part.get('Content-Disposition') is None:
+                if part.get("Content-Disposition") is None:
                     continue
 
                 filename = part.get_filename()
                 # check duplicates
-                if filename[-5:]!='.xlsx' or filename[:-5] in self.xlsx_uptodate_list+self.xlsx_history_list:
+                if filename[-5:] != ".xlsx" or filename[:-5] in self.xlsx_uptodate_list + self.xlsx_history_list:
                     continue
 
                 self.xlsx_uptodate_list.append(filename[:-5])
                 att_path = os.path.join(detach_dir, filename)
 
-                #Check if its already there
-                if not os.path.isfile(att_path) :
+                # Check if its already there
+                if not os.path.isfile(att_path):
                     # finally write the stuff
-                    fp = open(att_path, 'wb')
+                    fp = open(att_path, "wb")
                     fp.write(part.get_payload(decode=True))
                     fp.close()
 
     def prepare_csv(self):
-        need_update=False
+        need_update = False
         for f in self.xlsx_uptodate_list:
             if f in self.csv_list:
                 continue
             else:
-                need_update=True
+                need_update = True
 
-            date_regex = '\d{2}-\d{2}-\d{4}'
-            date_items = re.findall(date_regex,f)
+            date_regex = "\d{2}-\d{2}-\d{4}"
+            date_items = re.findall(date_regex, f)
             if date_items:
-                end_date = '-'.join(date_items[-1].split('-')[x] for x in [2,0,1])
+                end_date = "-".join(date_items[-1].split("-")[x] for x in [2, 0, 1])
             else:
-                print("End date not found in file name:"+f)
+                print("End date not found in file name:" + f)
                 end_date = None
 
-            df_dict = pd.read_excel(join(self.excel_uptodate_path, f+'.xlsx'), sheet_name=None)
-            for (_,df) in df_dict.items():
-                df = df.dropna(axis=0, how='all')
-                df['TestDate'] = df['TestDate'].apply(lambda x: x.strftime('%Y-%m-%d'))
-                df_filtered = df[df['TestDate']!='']
+            df_dict = pd.read_excel(join(self.excel_uptodate_path, f + ".xlsx"), sheet_name=None)
+            for (_, df) in df_dict.items():
+                df = df.dropna(axis=0, how="all")
+                df["TestDate"] = df["TestDate"].apply(lambda x: x.strftime("%Y-%m-%d"))
+                df_filtered = df[df["TestDate"] != ""]
                 if end_date is not None:
-                    df_filtered = df_filtered[df.apply(lambda x: date_less_than(end_date,x['TestDate'])!=1, axis=1)]
-                df_filtered.to_csv(join(self.csv_path, f+'.csv'), index=False, encoding='utf-8')
-        self.csv_list = [f[:-4] for f in listdir(self.csv_path) if isfile(join(self.csv_path, f)) and f[-4:]=='.csv']
+                    df_filtered = df_filtered[df.apply(lambda x: date_less_than(end_date, x["TestDate"]) != 1, axis=1)]
+                df_filtered.to_csv(join(self.csv_path, f + ".csv"), index=False, encoding="utf-8")
+        self.csv_list = [f[:-4] for f in listdir(self.csv_path) if isfile(join(self.csv_path, f)) and f[-4:] == ".csv"]
         self.need_update = need_update
 
     def load_csv(self, dims=None):
@@ -186,12 +184,12 @@ def load_csv(self, dims=None):
         for f in self.csv_list:
             if f in self.xlsx_history_list:
                 continue
-            rf = open(join(self.csv_path,f+'.csv'))
+            rf = open(join(self.csv_path, f + ".csv"))
 
             lines = rf.readlines()
             for l in lines[1:]:
-                l = word_map(l,self.map_terms)
-                row = l.strip().split(',')
+                l = word_map(l, self.map_terms)
+                row = l.strip().split(",")
                 date = row[self.date_dim]
                 state = row[self.state_dim]
                 if state not in parsed_dict[date]:
@@ -202,7 +200,7 @@ def load_csv(self, dims=None):
 
     # hardcoded aggregation function
     # output: [#unique_device,fluA,fluB,fluAll,total]
-    def prepare_measurements(self,data_dict,use_hhs=True,start_weekday=6):
+    def prepare_measurements(self, data_dict, use_hhs=True, start_weekday=6):
         buffer_dict = {}
         if use_hhs:
             region_list = Locations.hhs_list
@@ -210,34 +208,33 @@ def prepare_measurements(self,data_dict,use_hhs=True,start_weekday=6):
             region_list = Locations.atom_list
 
         def get_hhs_region(atom):
-          for region in Locations.hhs_list:
-            if atom.lower() in Locations.hhs_map[region]:
-              return region
-          if atom.lower() == 'ny':
-            return 'hhs2'
-          return atom
+            for region in Locations.hhs_list:
+                if atom.lower() in Locations.hhs_map[region]:
+                    return region
+            if atom.lower() == "ny":
+                return "hhs2"
+            return atom
 
         day_shift = 6 - start_weekday
-        time_map = lambda x:date_to_epiweek(x,shift=day_shift)
-        region_map = lambda x:get_hhs_region(x) \
-                    if use_hhs and x not in Locations.hhs_list else x # a bit hacky
+        time_map = lambda x: date_to_epiweek(x, shift=day_shift)
+        region_map = lambda x: get_hhs_region(x) if use_hhs and x not in Locations.hhs_list else x  # a bit hacky
 
         end_date = sorted(data_dict.keys())[-1]
         # count the latest week in only if Thurs data is included
-        end_epiweek = date_to_epiweek(end_date,shift=-4)
+        end_epiweek = date_to_epiweek(end_date, shift=-4)
         # first pass: prepare device_id set
         device_dict = {}
-        for (date,daily_dict) in data_dict.items():
+        for (date, daily_dict) in data_dict.items():
             if not date:
                 continue
             ew = time_map(date)
-            if ew == -1 or ew>end_epiweek:
+            if ew == -1 or ew > end_epiweek:
                 continue
             if ew not in device_dict:
-                device_dict[ew]={}
+                device_dict[ew] = {}
                 for r in region_list:
                     device_dict[ew][r] = set()
-            for (state,rec_list) in daily_dict.items():
+            for (state, rec_list) in daily_dict.items():
                 region = region_map(state)
                 # get rid of non-US regions
                 if region not in region_list:
@@ -247,38 +244,40 @@ def get_hhs_region(atom):
                     device_dict[ew][region].add(fac)
 
         # second pass: prepare all measurements
-        for (date,daily_dict) in data_dict.items():
+        for (date, daily_dict) in data_dict.items():
             ew = time_map(date)
-            if ew == -1 or ew>end_epiweek:
+            if ew == -1 or ew > end_epiweek:
                 continue
             if ew not in buffer_dict:
-                buffer_dict[ew]={}
+                buffer_dict[ew] = {}
                 for r in region_list:
-                    buffer_dict[ew][r] = [0.0]*8
+                    buffer_dict[ew][r] = [0.0] * 8
 
-            for (state,rec_list) in daily_dict.items():
+            for (state, rec_list) in daily_dict.items():
                 region = region_map(state)
                 # get rid of non-US regions
                 if region not in region_list:
                     continue
                 for rec in rec_list:
                     fac_num = float(len(device_dict[ew][region]))
-                    buffer_dict[ew][region]= np.add(
-                        buffer_dict[ew][region],[
-                            rec[1]=='positive',
-                            rec[2]=='positive',
-                            rec[3]=='positive',
+                    buffer_dict[ew][region] = np.add(
+                        buffer_dict[ew][region],
+                        [
+                            rec[1] == "positive",
+                            rec[2] == "positive",
+                            rec[3] == "positive",
                             1.0,
-                            float(rec[1]=='positive')/fac_num,
-                            float(rec[2]=='positive')/fac_num,
-                            float(rec[3]=='positive')/fac_num,
-                            1.0/fac_num,
-                    ]).tolist()
+                            float(rec[1] == "positive") / fac_num,
+                            float(rec[2] == "positive") / fac_num,
+                            float(rec[3] == "positive") / fac_num,
+                            1.0 / fac_num,
+                        ],
+                    ).tolist()
         # switch two dims of dict
         result_dict = {}
         for r in region_list:
-            result_dict[r]={}
-            for (k,v) in buffer_dict.items():
-                result_dict[r][k]=v[r]
+            result_dict[r] = {}
+            for (k, v) in buffer_dict.items():
+                result_dict[r][k] = v[r]
 
         return result_dict
diff --git a/src/acquisition/quidel/quidel_update.py b/src/acquisition/quidel/quidel_update.py
index b6303533c..d8a93cc36 100644
--- a/src/acquisition/quidel/quidel_update.py
+++ b/src/acquisition/quidel/quidel_update.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -33,7 +33,7 @@
 
 2017-12-02:
   * original version
-'''
+"""
 
 # standard library
 import argparse
@@ -49,106 +49,107 @@
 from delphi.utils.geo.locations import Locations
 
 LOCATIONS = Locations.hhs_list
-DATAPATH = '/home/automation/quidel_data'
+DATAPATH = "/home/automation/quidel_data"
+
 
 def update(locations, first=None, last=None, force_update=False, load_email=True):
-  # download and prepare data first
-  qd = quidel.QuidelData(DATAPATH,load_email)
-  if not qd.need_update and not force_update:
-    print('Data not updated, nothing needs change.')
-    return
-
-  qd_data = qd.load_csv()
-  qd_measurements = qd.prepare_measurements(qd_data,start_weekday=4)
-  qd_ts = quidel.measurement_to_ts(qd_measurements,7,startweek=first,endweek=last)
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  def get_num_rows():
-    cur.execute('SELECT count(1) `num` FROM `quidel`')
-    for (num,) in cur:
-      pass
-    return num
-
-  # check from 4 weeks preceeding the last week with data through this week
-  cur.execute('SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `quidel`')
-  for (ew0, ew1) in cur:
-    ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
-  ew0 = ew0 if first is None else first
-  ew1 = ew1 if last is None else last
-  print('Checking epiweeks between %d and %d...' % (ew0, ew1))
-
-  # keep track of how many rows were added
-  rows_before = get_num_rows()
-
-  # check Quidel for new and/or revised data
-  sql = '''
+    # download and prepare data first
+    qd = quidel.QuidelData(DATAPATH, load_email)
+    if not qd.need_update and not force_update:
+        print("Data not updated, nothing needs change.")
+        return
+
+    qd_data = qd.load_csv()
+    qd_measurements = qd.prepare_measurements(qd_data, start_weekday=4)
+    qd_ts = quidel.measurement_to_ts(qd_measurements, 7, startweek=first, endweek=last)
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    def get_num_rows():
+        cur.execute("SELECT count(1) `num` FROM `quidel`")
+        for (num,) in cur:
+            pass
+        return num
+
+    # check from 4 weeks preceeding the last week with data through this week
+    cur.execute("SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `quidel`")
+    for (ew0, ew1) in cur:
+        ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
+    ew0 = ew0 if first is None else first
+    ew1 = ew1 if last is None else last
+    print("Checking epiweeks between %d and %d..." % (ew0, ew1))
+
+    # keep track of how many rows were added
+    rows_before = get_num_rows()
+
+    # check Quidel for new and/or revised data
+    sql = """
     INSERT INTO
       `quidel` (`location`, `epiweek`, `value`)
     VALUES
       (%s, %s, %s)
     ON DUPLICATE KEY UPDATE
       `value` = %s
-  '''
-
-  total_rows = 0
-
-  for location in locations:
-    if location not in qd_ts:
-      continue
-    ews = sorted(qd_ts[location].keys())
-    num_missing = 0
-    for ew in ews:
-      v = qd_ts[location][ew]
-      sql_data = (location, ew, v, v)
-      cur.execute(sql, sql_data)
-      total_rows += 1
-      if v == 0:
-        num_missing += 1
-    if num_missing > 0:
-      print(' [%s] missing %d/%d value(s)' % (location, num_missing, len(ews)))
-
-  # keep track of how many rows were added
-  rows_after = get_num_rows()
-  print('Inserted %d/%d row(s)'%(rows_after - rows_before, total_rows))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
+    """
+
+    total_rows = 0
+
+    for location in locations:
+        if location not in qd_ts:
+            continue
+        ews = sorted(qd_ts[location].keys())
+        num_missing = 0
+        for ew in ews:
+            v = qd_ts[location][ew]
+            sql_data = (location, ew, v, v)
+            cur.execute(sql, sql_data)
+            total_rows += 1
+            if v == 0:
+                num_missing += 1
+        if num_missing > 0:
+            print(" [%s] missing %d/%d value(s)" % (location, num_missing, len(ews)))
+
+    # keep track of how many rows were added
+    rows_after = get_num_rows()
+    print("Inserted %d/%d row(s)" % (rows_after - rows_before, total_rows))
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('--location', action='store', type=str, default=None, help='location(s) (ex: all; any of hhs1-10)')
-  parser.add_argument('--first', '-f', default=None, type=int, help='first epiweek override')
-  parser.add_argument('--last', '-l', default=None, type=int, help='last epiweek override')
-  parser.add_argument('--force_update', '-u', action='store_true', help='force update db values')
-  parser.add_argument('--skip_email', '-s', action='store_true', help='skip email downloading step')
-  args = parser.parse_args()
-
-  # sanity check
-  first, last, force_update, skip_email = args.first, args.last, args.force_update, args.skip_email
-  load_email = not skip_email
-  if first is not None:
-    flu.check_epiweek(first)
-  if last is not None:
-    flu.check_epiweek(last)
-  if first is not None and last is not None and first > last:
-    raise Exception('epiweeks in the wrong order')
-
-  # decide what to update
-  if args.location.lower() == 'all':
-    locations = LOCATIONS
-  else:
-    locations = args.location.lower().split(',')
-
-  # run the update
-  update(locations, first, last, force_update, load_email)
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--location", action="store", type=str, default=None, help="location(s) (ex: all; any of hhs1-10)")
+    parser.add_argument("--first", "-f", default=None, type=int, help="first epiweek override")
+    parser.add_argument("--last", "-l", default=None, type=int, help="last epiweek override")
+    parser.add_argument("--force_update", "-u", action="store_true", help="force update db values")
+    parser.add_argument("--skip_email", "-s", action="store_true", help="skip email downloading step")
+    args = parser.parse_args()
+
+    # sanity check
+    first, last, force_update, skip_email = args.first, args.last, args.force_update, args.skip_email
+    load_email = not skip_email
+    if first is not None:
+        flu.check_epiweek(first)
+    if last is not None:
+        flu.check_epiweek(last)
+    if first is not None and last is not None and first > last:
+        raise Exception("epiweeks in the wrong order")
+
+    # decide what to update
+    if args.location.lower() == "all":
+        locations = LOCATIONS
+    else:
+        locations = args.location.lower().split(",")
+
+    # run the update
+    update(locations, first, last, force_update, load_email)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/twtr/healthtweets.py b/src/acquisition/twtr/healthtweets.py
index 78eb2b3ec..763298cee 100644
--- a/src/acquisition/twtr/healthtweets.py
+++ b/src/acquisition/twtr/healthtweets.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -20,7 +20,7 @@
  * Fetching daily values instead of weekly values
 2015-03-??
  * Original version
-'''
+"""
 
 # standard library
 import argparse
@@ -36,132 +36,190 @@
 
 class HealthTweets:
 
-  # mapping from state abbreviations to location codes used by healthtweets.org
-  STATE_CODES = {'AL': 3024, 'AK': 3025, 'AZ': 3026, 'AR': 3027, 'CA': 440, 'CO': 3029, 'CT': 3030, 'DE': 3031, 'DC': 3032, 'FL': 3033, 'GA': 3034, 'HI': 3035, 'ID': 3036, 'IL': 3037, 'IN': 3038, 'IA': 3039, 'KS': 3040, 'KY': 3041, 'LA': 2183, 'ME': 3043, 'MD': 3044, 'MA': 450, 'MI': 3046, 'MN': 3047, 'MS': 3048, 'MO': 3049, 'MT': 3050, 'NE': 3051, 'NV': 3052, 'NH': 3053, 'NJ': 478, 'NM': 2225, 'NY': 631, 'NC': 3057, 'ND': 3058, 'OH': 3059, 'OK': 3060, 'OR': 281, 'PA': 3062, 'RI': 3063, 'SC': 3064, 'SD': 3065, 'TN': 3066, 'TX': 3067, 'UT': 2272, 'VT': 3069, 'VA': 3070, 'WA': 3071, 'WV': 3072, 'WI': 3073, 'WY': 3074}
-
-  def __init__(self, username, password, debug=False):
-    self.debug = debug
-    self.session = requests.Session()
-    # spoof a web browser
-    self.session.headers.update({
-      'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
-    })
-    # get the login token
-    response = self._go('http://www.healthtweets.org/accounts/login')
-    token = self._get_token(response.text)
-    if self.debug:
-      print('token=%s'%(token))
-    data = {
-      'csrfmiddlewaretoken': token,
-      'username': username,
-      'password': password,
-      'next': '/',
+    # mapping from state abbreviations to location codes used by healthtweets.org
+    STATE_CODES = {
+        "AL": 3024,
+        "AK": 3025,
+        "AZ": 3026,
+        "AR": 3027,
+        "CA": 440,
+        "CO": 3029,
+        "CT": 3030,
+        "DE": 3031,
+        "DC": 3032,
+        "FL": 3033,
+        "GA": 3034,
+        "HI": 3035,
+        "ID": 3036,
+        "IL": 3037,
+        "IN": 3038,
+        "IA": 3039,
+        "KS": 3040,
+        "KY": 3041,
+        "LA": 2183,
+        "ME": 3043,
+        "MD": 3044,
+        "MA": 450,
+        "MI": 3046,
+        "MN": 3047,
+        "MS": 3048,
+        "MO": 3049,
+        "MT": 3050,
+        "NE": 3051,
+        "NV": 3052,
+        "NH": 3053,
+        "NJ": 478,
+        "NM": 2225,
+        "NY": 631,
+        "NC": 3057,
+        "ND": 3058,
+        "OH": 3059,
+        "OK": 3060,
+        "OR": 281,
+        "PA": 3062,
+        "RI": 3063,
+        "SC": 3064,
+        "SD": 3065,
+        "TN": 3066,
+        "TX": 3067,
+        "UT": 2272,
+        "VT": 3069,
+        "VA": 3070,
+        "WA": 3071,
+        "WV": 3072,
+        "WI": 3073,
+        "WY": 3074,
     }
-    # login to the site
-    response = self._go('http://www.healthtweets.org/accounts/login', data=data)
-    if response.status_code != 200 or 'Your username and password' in response.text:
-      raise Exception('login failed')
-
-  def get_values(self, state, date1, date2):
-    '''
-    state: two-letter state abbreviation (see STATE_CODES)
-    date1: the first date in the range, inclusive (format: YYYY-MM-DD)
-    date2: the last date in the range, inclusive (format: YYYY-MM-DD)
-    returns a dictionary (by date) of number of flu tweets (num) and total tweets (total)
-    '''
-    # get raw values (number of flu tweets) and normalized values (flu tweets as a percent of total tweets)
-    raw_values = self._get_values(state, date1, date2, False)
-    normalized_values = self._get_values(state, date1, date2, True)
-    values = {}
-    # save the raw number and calculate the total
-    for date in raw_values.keys():
-      if normalized_values[date] == 0:
-        continue
-      values[date] = {
-        'num': round(raw_values[date]),
-        'total': round(100 * raw_values[date] / normalized_values[date]),
-      }
-      print(date, raw_values[date], normalized_values[date])
-    return values
-
-  def _get_values(self, state, date1, date2, normalized):
-    if state not in HealthTweets.STATE_CODES:
-      raise Exception('invalid state')
-    state_code = HealthTweets.STATE_CODES[state]
-    d1, d2 = datetime.strptime(date1, '%Y-%m-%d'), datetime.strptime(date2, '%Y-%m-%d')
-    s1, s2 = d1.strftime('%m%%2F%d%%2F%Y'), d2.strftime('%m%%2F%d%%2F%Y')
-    count_type = 'normalized' if normalized else 'raw'
-    url = 'http://www.healthtweets.org/trends/plot?resolution=Day&count_type=%s&dayNum=%d&from=%s&to=%s&plot1_disease=65&location_plot1=%d'%(count_type, (d2 - d1).days, s1, s2, state_code)
-    response = self._go('http://www.healthtweets.org/trends/plot?resolution=Day&count_type=%s&dayNum=%d&from=%s&to=%s&plot1_disease=65&location_plot1=%d'%(count_type, (d2 - d1).days, s1, s2, state_code))
-    #print(state, date1, date2, normalized)
-    #print(url)
-    #print(response.status_code)
-    if response.status_code != 200:
-      raise Exception('plot status is ' + str(response.status_code) + ' (when was data last updated?)')
-    lines = [line.strip() for line in response.text.split('\n')]
-    data_line = [line for line in lines if line[:16] == 'var chartData = ']
-    if len(data_line) != 1:
-      raise Exception('lookup failed')
-    values = json.loads(data_line[0][16:-1])
-    return dict([(datetime.strptime(v[0], '%m/%d/%Y').strftime('%Y-%m-%d'), float(v[1])) for v in values])
-
-  def check_state(self, state):
-    '''
-    Sanity checks state code mapping.
-    state: two-letter state abbreviation (see STATE_CODES)
-    returns the full state name associated with the state abbreviation
-    '''
-    if state not in HealthTweets.STATE_CODES:
-      raise Exception('invalid state')
-    state_code = HealthTweets.STATE_CODES[state]
-    response = self._go('http://www.healthtweets.org/trends/plot?resolution=Day&count_type=normalized&dayNum=7&from=01%%2F01%%2F2015&to=01%%2F07%%2F2015&plot1_disease=65&location_plot1=%d'%(state_code))
-    lines = [line.strip() for line in response.text.split('\n')]
-    data_line = [line for line in lines if line[:29] == 'var plotNames = ["Influenza (']
-    if len(data_line) == 0:
-      raise Exception('check failed')
-    name = data_line[0][29:]
-    name = name.split('(')[0]
-    return name.strip()
-
-  def _get_token(self, html):
-    page = PageParser.parse(html)
-    hidden = PageParser.filter_all(page, [('html',), ('body',), ('div',), ('div',), ('div',), ('form',), ('input',)])
-    return hidden['attrs']['value']
-
-  def _go(self, url, method=None, referer=None, data=None):
-    if self.debug:
-      print('%s'%(url))
-    if method is None:
-      if data is None:
-        method = self.session.get
-      else:
-        method = self.session.post
-    response = method(url, headers={'referer': referer}, data=data)
-    html = response.text
-    if self.debug:
-      for item in response.history:
-        print(' [%d to %s]'%(item.status_code, item.headers['Location']))
-      print(' %d (%d bytes)'%(response.status_code, len(html)))
-    return response
+
+    def __init__(self, username, password, debug=False):
+        self.debug = debug
+        self.session = requests.Session()
+        # spoof a web browser
+        self.session.headers.update(
+            {
+                "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
+            }
+        )
+        # get the login token
+        response = self._go("https://www.healthtweets.org/accounts/login")
+        token = self._get_token(response.text)
+        if self.debug:
+            print("token=%s" % (token))
+        data = {
+            "csrfmiddlewaretoken": token,
+            "username": username,
+            "password": password,
+            "next": "/",
+        }
+        # login to the site
+        response = self._go("https://www.healthtweets.org/accounts/login", data=data)
+        if response.status_code != 200 or "Your username and password" in response.text:
+            raise Exception("login failed")
+
+    def get_values(self, state, date1, date2):
+        """
+        state: two-letter state abbreviation (see STATE_CODES)
+        date1: the first date in the range, inclusive (format: YYYY-MM-DD)
+        date2: the last date in the range, inclusive (format: YYYY-MM-DD)
+        returns a dictionary (by date) of number of flu tweets (num) and total tweets (total)
+        """
+        # get raw values (number of flu tweets) and normalized values (flu tweets as a percent of total tweets)
+        raw_values = self._get_values(state, date1, date2, False)
+        normalized_values = self._get_values(state, date1, date2, True)
+        values = {}
+        # save the raw number and calculate the total
+        for date in raw_values.keys():
+            if normalized_values[date] == 0:
+                continue
+            values[date] = {
+                "num": round(raw_values[date]),
+                "total": round(100 * raw_values[date] / normalized_values[date]),
+            }
+            print(date, raw_values[date], normalized_values[date])
+        return values
+
+    def _get_values(self, state, date1, date2, normalized):
+        if state not in HealthTweets.STATE_CODES:
+            raise Exception("invalid state")
+        state_code = HealthTweets.STATE_CODES[state]
+        d1, d2 = datetime.strptime(date1, "%Y-%m-%d"), datetime.strptime(date2, "%Y-%m-%d")
+        s1, s2 = d1.strftime("%m%%2F%d%%2F%Y"), d2.strftime("%m%%2F%d%%2F%Y")
+        count_type = "normalized" if normalized else "raw"
+        url = "https://www.healthtweets.org/trends/plot?resolution=Day&count_type=%s&dayNum=%d&from=%s&to=%s&plot1_disease=65&location_plot1=%d" % (count_type, (d2 - d1).days, s1, s2, state_code)
+        response = self._go(
+            "https://www.healthtweets.org/trends/plot?resolution=Day&count_type=%s&dayNum=%d&from=%s&to=%s&plot1_disease=65&location_plot1=%d" % (count_type, (d2 - d1).days, s1, s2, state_code)
+        )
+        # print(state, date1, date2, normalized)
+        # print(url)
+        # print(response.status_code)
+        if response.status_code != 200:
+            raise Exception("plot status is " + str(response.status_code) + " (when was data last updated?)")
+        lines = [line.strip() for line in response.text.split("\n")]
+        data_line = [line for line in lines if line[:16] == "var chartData = "]
+        if len(data_line) != 1:
+            raise Exception("lookup failed")
+        values = json.loads(data_line[0][16:-1])
+        return dict([(datetime.strptime(v[0], "%m/%d/%Y").strftime("%Y-%m-%d"), float(v[1])) for v in values])
+
+    def check_state(self, state):
+        """
+        Sanity checks state code mapping.
+        state: two-letter state abbreviation (see STATE_CODES)
+        returns the full state name associated with the state abbreviation
+        """
+        if state not in HealthTweets.STATE_CODES:
+            raise Exception("invalid state")
+        state_code = HealthTweets.STATE_CODES[state]
+        response = self._go(
+            "https://www.healthtweets.org/trends/plot?resolution=Day&count_type=normalized&dayNum=7&from=01%%2F01%%2F2015&to=01%%2F07%%2F2015&plot1_disease=65&location_plot1=%d" % (state_code)
+        )
+        lines = [line.strip() for line in response.text.split("\n")]
+        data_line = [line for line in lines if line[:29] == 'var plotNames = ["Influenza (']
+        if len(data_line) == 0:
+            raise Exception("check failed")
+        name = data_line[0][29:]
+        name = name.split("(")[0]
+        return name.strip()
+
+    def _get_token(self, html):
+        page = PageParser.parse(html)
+        hidden = PageParser.filter_all(page, [("html",), ("body",), ("div",), ("div",), ("div",), ("form",), ("input",)])
+        return hidden["attrs"]["value"]
+
+    def _go(self, url, method=None, referer=None, data=None):
+        if self.debug:
+            print("%s" % (url))
+        if method is None:
+            if data is None:
+                method = self.session.get
+            else:
+                method = self.session.post
+        response = method(url, headers={"referer": referer}, data=data)
+        html = response.text
+        if self.debug:
+            for item in response.history:
+                print(" [%d to %s]" % (item.status_code, item.headers["Location"]))
+            print(" %d (%d bytes)" % (response.status_code, len(html)))
+        return response
 
 
 def main():
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('username', action='store', type=str, help='healthtweets.org username')
-  parser.add_argument('password', action='store', type=str, help='healthtweets.org password')
-  parser.add_argument('state', action='store', type=str, choices=list(HealthTweets.STATE_CODES.keys()), help='U.S. state (ex: TX)')
-  parser.add_argument('date1', action='store', type=str, help='first date, inclusive (ex: 2015-01-01)')
-  parser.add_argument('date2', action='store', type=str, help='last date, inclusive (ex: 2015-01-01)')
-  parser.add_argument('-d', '--debug', action='store_const', const=True, default=False, help='enable debug mode')
-  args = parser.parse_args()
-
-  ht = HealthTweets(args.username, args.password, debug=args.debug)
-  values = ht.get_values(args.state, args.date1, args.date2)
-  print('Daily counts in %s from %s to %s:'%(ht.check_state(args.state), args.date1, args.date2))
-  for date in sorted(list(values.keys())):
-    print('%s: num=%-4d total=%-5d (%.3f%%)'%(date, values[date]['num'], values[date]['total'], 100 * values[date]['num'] / values[date]['total']))
-
-
-if __name__ == '__main__':
-  main()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("username", action="store", type=str, help="healthtweets.org username")
+    parser.add_argument("password", action="store", type=str, help="healthtweets.org password")
+    parser.add_argument("state", action="store", type=str, choices=list(HealthTweets.STATE_CODES.keys()), help="U.S. state (ex: TX)")
+    parser.add_argument("date1", action="store", type=str, help="first date, inclusive (ex: 2015-01-01)")
+    parser.add_argument("date2", action="store", type=str, help="last date, inclusive (ex: 2015-01-01)")
+    parser.add_argument("-d", "--debug", action="store_const", const=True, default=False, help="enable debug mode")
+    args = parser.parse_args()
+
+    ht = HealthTweets(args.username, args.password, debug=args.debug)
+    values = ht.get_values(args.state, args.date1, args.date2)
+    print("Daily counts in %s from %s to %s:" % (ht.check_state(args.state), args.date1, args.date2))
+    for date in sorted(list(values.keys())):
+        print("%s: num=%-4d total=%-5d (%.3f%%)" % (date, values[date]["num"], values[date]["total"], 100 * values[date]["num"] / values[date]["total"]))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/twtr/pageparser.py b/src/acquisition/twtr/pageparser.py
index 5e9aaaea1..2b2183c89 100644
--- a/src/acquisition/twtr/pageparser.py
+++ b/src/acquisition/twtr/pageparser.py
@@ -5,74 +5,73 @@
 
 
 class PageParser(HTMLParser):
-  '''
-  This is an HTML parser! All of the hard work is done by the superclass
-  (which is a Python built-in). This class puts the HTML into a hierarchy
-  that's (hopefully) easier to work with than raw string parsing.
-  '''
+    """
+    This is an HTML parser! All of the hard work is done by the superclass
+    (which is a Python built-in). This class puts the HTML into a hierarchy
+    that's (hopefully) easier to work with than raw string parsing.
+    """
 
-  @staticmethod
-  def parse(html):
-    parser = PageParser()
-    parser.feed(html)
-    return parser.get_root_node()
+    @staticmethod
+    def parse(html):
+        parser = PageParser()
+        parser.feed(html)
+        return parser.get_root_node()
 
-  @staticmethod
-  def banlist():
-    '''Commonly unclosed tags'''
-    return ('br', 'img', 'meta')
+    @staticmethod
+    def banlist():
+        """Commonly unclosed tags"""
+        return ("br", "img", "meta")
 
-  @staticmethod
-  def new_node(type):
-    '''An empty node of the HTML tree'''
-    return {'type': type, 'attrs': {}, 'nodes': [], 'data': ''}
+    @staticmethod
+    def new_node(type):
+        """An empty node of the HTML tree"""
+        return {"type": type, "attrs": {}, "nodes": [], "data": ""}
 
-  @staticmethod
-  def filter_all(node, filters):
-    '''Applies all filters'''
-    for f in filters:
-        node = PageParser.filter(node, *f)
-    return node
+    @staticmethod
+    def filter_all(node, filters):
+        """Applies all filters"""
+        for f in filters:
+            node = PageParser.filter(node, *f)
+        return node
 
-  @staticmethod
-  def filter(node, type, index=0):
-    '''Finds a sub-node of the given type, specified by index'''
-    i = 0
-    for node in node['nodes']:
-      if node['type'] == type:
-        if i == index:
-          return node
-        i += 1
-    return None
+    @staticmethod
+    def filter(node, type, index=0):
+        """Finds a sub-node of the given type, specified by index"""
+        i = 0
+        for node in node["nodes"]:
+            if node["type"] == type:
+                if i == index:
+                    return node
+                i += 1
+        return None
 
-  def __init__(self):
-    HTMLParser.__init__(self)
-    self.root = PageParser.new_node(None)
-    self.stack = [self.root]
-    self.indent = 0
+    def __init__(self):
+        HTMLParser.__init__(self)
+        self.root = PageParser.new_node(None)
+        self.stack = [self.root]
+        self.indent = 0
 
-  def get_root_node(self):
-    '''After parsing, returns the abstract root node (which contains the html node)'''
-    return self.root
+    def get_root_node(self):
+        """After parsing, returns the abstract root node (which contains the html node)"""
+        return self.root
 
-  def handle_starttag(self, tag, attrs):
-    '''Inherited - called when a start tag is found'''
-    if tag in PageParser.banlist():
-        return
-    element = PageParser.new_node(tag)
-    for (k, v) in attrs:
-        element['attrs'][k] = v
-    self.stack[-1]['nodes'].append(element)
-    self.stack.append(element)
+    def handle_starttag(self, tag, attrs):
+        """Inherited - called when a start tag is found"""
+        if tag in PageParser.banlist():
+            return
+        element = PageParser.new_node(tag)
+        for (k, v) in attrs:
+            element["attrs"][k] = v
+        self.stack[-1]["nodes"].append(element)
+        self.stack.append(element)
 
-  def handle_endtag(self, tag):
-    '''Inherited - called when an end tag is found'''
-    if tag in PageParser.banlist():
-        return
-    self.stack.pop()
+    def handle_endtag(self, tag):
+        """Inherited - called when an end tag is found"""
+        if tag in PageParser.banlist():
+            return
+        self.stack.pop()
 
-
-  def handle_data(self, data):
-    '''Inherited - called when a data string is found'''
-    element = self.stack[-1]
-    element['data'] += data
+    def handle_data(self, data):
+        """Inherited - called when a data string is found"""
+        element = self.stack[-1]
+        element["data"] += data
diff --git a/src/acquisition/twtr/twitter_update.py b/src/acquisition/twtr/twitter_update.py
index 5c1f3f45b..b2e270c97 100644
--- a/src/acquisition/twtr/twitter_update.py
+++ b/src/acquisition/twtr/twitter_update.py
@@ -1,4 +1,4 @@
-'''
+"""
 ===============
 === Purpose ===
 ===============
@@ -49,7 +49,7 @@
   * Small documentation update
 2015-05-22
   * Original version
-'''
+"""
 
 # third party
 import mysql.connector
@@ -60,46 +60,46 @@
 
 
 def run():
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  def get_num_rows():
-    cur.execute('SELECT count(1) `num` FROM `twitter`')
-    for (num,) in cur:
-      pass
-    return num
-
-  # check from 7 days preceeding the last date with data through yesterday (healthtweets.org 404's if today's date is part of the range)
-  cur.execute('SELECT date_sub(max(`date`), INTERVAL 7 DAY) `date1`, date_sub(date(now()), INTERVAL 1 DAY) `date2` FROM `twitter`')
-  for (date1, date2) in cur:
-    date1, date2 = date1.strftime('%Y-%m-%d'), date2.strftime('%Y-%m-%d')
-  print('Checking dates between %s and %s...'%(date1, date2))
-
-  # keep track of how many rows were added
-  rows_before = get_num_rows()
-
-  # check healthtweets.org for new and/or revised data
-  ht = HealthTweets(*secrets.healthtweets.login)
-  sql = 'INSERT INTO `twitter` (`date`, `state`, `num`, `total`) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE `num` = %s, `total` = %s'
-  total_rows = 0
-  for state in sorted(HealthTweets.STATE_CODES.keys()):
-    values = ht.get_values(state, date1, date2)
-    for date in sorted(list(values.keys())):
-      sql_data = (date, state, values[date]['num'], values[date]['total'], values[date]['num'], values[date]['total'])
-      cur.execute(sql, sql_data)
-      total_rows += 1
-
-  # keep track of how many rows were added
-  rows_after = get_num_rows()
-  print('Inserted %d/%d row(s)'%(rows_after - rows_before, total_rows))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
-
-
-if __name__ == '__main__':
-  run()
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    def get_num_rows():
+        cur.execute("SELECT count(1) `num` FROM `twitter`")
+        for (num,) in cur:
+            pass
+        return num
+
+    # check from 7 days preceeding the last date with data through yesterday (healthtweets.org 404's if today's date is part of the range)
+    cur.execute("SELECT date_sub(max(`date`), INTERVAL 7 DAY) `date1`, date_sub(date(now()), INTERVAL 1 DAY) `date2` FROM `twitter`")
+    for (date1, date2) in cur:
+        date1, date2 = date1.strftime("%Y-%m-%d"), date2.strftime("%Y-%m-%d")
+    print("Checking dates between %s and %s..." % (date1, date2))
+
+    # keep track of how many rows were added
+    rows_before = get_num_rows()
+
+    # check healthtweets.org for new and/or revised data
+    ht = HealthTweets(*secrets.healthtweets.login)
+    sql = "INSERT INTO `twitter` (`date`, `state`, `num`, `total`) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE `num` = %s, `total` = %s"
+    total_rows = 0
+    for state in sorted(HealthTweets.STATE_CODES.keys()):
+        values = ht.get_values(state, date1, date2)
+        for date in sorted(list(values.keys())):
+            sql_data = (date, state, values[date]["num"], values[date]["total"], values[date]["num"], values[date]["total"])
+            cur.execute(sql, sql_data)
+            total_rows += 1
+
+    # keep track of how many rows were added
+    rows_after = get_num_rows()
+    print("Inserted %d/%d row(s)" % (rows_after - rows_before, total_rows))
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
+
+
+if __name__ == "__main__":
+    run()
diff --git a/src/acquisition/wiki/wiki.py b/src/acquisition/wiki/wiki.py
index 602e21102..c57582918 100644
--- a/src/acquisition/wiki/wiki.py
+++ b/src/acquisition/wiki/wiki.py
@@ -1,112 +1,112 @@
 """
-===============
-=== Purpose ===
-===============
-
-Wrapper for the entire wiki data collection process:
-  1. Uses wiki_update.py to fetch metadata for new access logs
-  2. Uses wiki_download.py to download the access logs
-  3. Uses wiki_extract.py to store article access counts
-
+===============
+=== Purpose ===
+===============
+
+Wrapper for the entire wiki data collection process:
+  1. Uses wiki_update.py to fetch metadata for new access logs
+  2. Uses wiki_download.py to download the access logs
+  3. Uses wiki_extract.py to store article access counts
+
 See also: master.php
-
-
-=======================
-=== Data Dictionary ===
-=======================
-
-`wiki_raw` is a staging table where extracted access log data is stored for
-further processing. When wiki_update.py finds a new log, it saves the name and
-hash to this table, with a status of 0. This table is read by master.php, which
-then hands out "jobs" (independently and in parallel) to wiki_download.py.
-After wiki_download.py downloads the log and extracts the counts, it submits
-the data (as JSON) to master.php, which then stores the "raw" JSON counts in
-this table.
-+----------+---------------+------+-----+---------+----------------+
-| Field    | Type          | Null | Key | Default | Extra          |
-+----------+---------------+------+-----+---------+----------------+
-| id       | int(11)       | NO   | PRI | NULL    | auto_increment |
-| name     | varchar(64)   | NO   | UNI | NULL    |                |
-| hash     | char(32)      | NO   |     | NULL    |                |
-| status   | int(11)       | NO   | MUL | 0       |                |
-| size     | int(11)       | YES  |     | NULL    |                |
-| datetime | datetime      | YES  |     | NULL    |                |
-| worker   | varchar(256)  | YES  |     | NULL    |                |
-| elapsed  | float         | YES  |     | NULL    |                |
-| data     | varchar(2048) | YES  |     | NULL    |                |
-+----------+---------------+------+-----+---------+----------------+
-id: unique identifier for each record
-name: name of the access log
-hash: md5 hash of the file, as reported by the dumps site (all zeroes if no
-  hash is provided)
-status: the status of the job, using the following values:
-  0: queued for download
-  1: download in progress
-  2: queued for extraction
-  3: extracted to `wiki` table
-  (any negative value indicates failure)
-size: the size, in bytes, of the downloaded file
-datetime: the timestamp of the most recent status update
-worker: name (user@hostname) of the machine working on the job
-elapsed: time, in seconds, taken to complete the job
-data: a JSON string containing counts for selected articles in the access log
-
-`wiki` is the table where access counts are stored (parsed from wiki_raw). The
-"raw" JSON counts are parsed by wiki_extract.py and stored directly in this
-table.
-+----------+-------------+------+-----+---------+----------------+
-| Field    | Type        | Null | Key | Default | Extra          |
-+----------+-------------+------+-----+---------+----------------+
-| id       | int(11)     | NO   | PRI | NULL    | auto_increment |
-| datetime | datetime    | NO   | MUL | NULL    |                |
-| article  | varchar(64) | NO   | MUL | NULL    |                |
-| count    | int(11)     | NO   |     | NULL    |                |
-+----------+-------------+------+-----+---------+----------------+
-id: unique identifier for each record
-datetime: UTC timestamp (rounded to the nearest hour) of article access
-article: name of the article
-count: number of times the article was accessed in the hour
-
-`wiki_meta` is a metadata table for this dataset. It contains pre-calculated
-date and epiweeks fields, and more importantly, the total number of English
-article hits (denominator) for each `datetime` in the `wiki` table. This table
-is populated in parallel with `wiki` by the wiki_extract.py script.
-+----------+----------+------+-----+---------+----------------+
-| Field    | Type     | Null | Key | Default | Extra          |
-+----------+----------+------+-----+---------+----------------+
-| id       | int(11)  | NO   | PRI | NULL    | auto_increment |
-| datetime | datetime | NO   | UNI | NULL    |                |
-| date     | date     | NO   |     | NULL    |                |
-| epiweek  | int(11)  | NO   |     | NULL    |                |
-| total    | int(11)  | NO   |     | NULL    |                |
-+----------+----------+------+-----+---------+----------------+
-id: unique identifier for each record
-datetime: UTC timestamp (rounded to the nearest hour) of article access
-date: the date portion of `datetime`
-epiweek: the year and week containing `datetime`
-total: total number of English article hits in the hour
-
-
-=================
-=== Changelog ===
-=================
-
+
+
+=======================
+=== Data Dictionary ===
+=======================
+
+`wiki_raw` is a staging table where extracted access log data is stored for
+further processing. When wiki_update.py finds a new log, it saves the name and
+hash to this table, with a status of 0. This table is read by master.php, which
+then hands out "jobs" (independently and in parallel) to wiki_download.py.
+After wiki_download.py downloads the log and extracts the counts, it submits
+the data (as JSON) to master.php, which then stores the "raw" JSON counts in
+this table.
++----------+---------------+------+-----+---------+----------------+
+| Field    | Type          | Null | Key | Default | Extra          |
++----------+---------------+------+-----+---------+----------------+
+| id       | int(11)       | NO   | PRI | NULL    | auto_increment |
+| name     | varchar(64)   | NO   | UNI | NULL    |                |
+| hash     | char(32)      | NO   |     | NULL    |                |
+| status   | int(11)       | NO   | MUL | 0       |                |
+| size     | int(11)       | YES  |     | NULL    |                |
+| datetime | datetime      | YES  |     | NULL    |                |
+| worker   | varchar(256)  | YES  |     | NULL    |                |
+| elapsed  | float         | YES  |     | NULL    |                |
+| data     | varchar(2048) | YES  |     | NULL    |                |
++----------+---------------+------+-----+---------+----------------+
+id: unique identifier for each record
+name: name of the access log
+hash: md5 hash of the file, as reported by the dumps site (all zeroes if no
+  hash is provided)
+status: the status of the job, using the following values:
+  0: queued for download
+  1: download in progress
+  2: queued for extraction
+  3: extracted to `wiki` table
+  (any negative value indicates failure)
+size: the size, in bytes, of the downloaded file
+datetime: the timestamp of the most recent status update
+worker: name (user@hostname) of the machine working on the job
+elapsed: time, in seconds, taken to complete the job
+data: a JSON string containing counts for selected articles in the access log
+
+`wiki` is the table where access counts are stored (parsed from wiki_raw). The
+"raw" JSON counts are parsed by wiki_extract.py and stored directly in this
+table.
++----------+-------------+------+-----+---------+----------------+
+| Field    | Type        | Null | Key | Default | Extra          |
++----------+-------------+------+-----+---------+----------------+
+| id       | int(11)     | NO   | PRI | NULL    | auto_increment |
+| datetime | datetime    | NO   | MUL | NULL    |                |
+| article  | varchar(64) | NO   | MUL | NULL    |                |
+| count    | int(11)     | NO   |     | NULL    |                |
++----------+-------------+------+-----+---------+----------------+
+id: unique identifier for each record
+datetime: UTC timestamp (rounded to the nearest hour) of article access
+article: name of the article
+count: number of times the article was accessed in the hour
+
+`wiki_meta` is a metadata table for this dataset. It contains pre-calculated
+date and epiweeks fields, and more importantly, the total number of English
+article hits (denominator) for each `datetime` in the `wiki` table. This table
+is populated in parallel with `wiki` by the wiki_extract.py script.
++----------+----------+------+-----+---------+----------------+
+| Field    | Type     | Null | Key | Default | Extra          |
++----------+----------+------+-----+---------+----------------+
+| id       | int(11)  | NO   | PRI | NULL    | auto_increment |
+| datetime | datetime | NO   | UNI | NULL    |                |
+| date     | date     | NO   |     | NULL    |                |
+| epiweek  | int(11)  | NO   |     | NULL    |                |
+| total    | int(11)  | NO   |     | NULL    |                |
++----------+----------+------+-----+---------+----------------+
+id: unique identifier for each record
+datetime: UTC timestamp (rounded to the nearest hour) of article access
+date: the date portion of `datetime`
+epiweek: the year and week containing `datetime`
+total: total number of English article hits in the hour
+
+
+=================
+=== Changelog ===
+=================
+
 2017-02-24
   * secrets and small improvements
 2016-08-14
   * Increased job limit (6 -> 12) (pageviews files are ~2x smaller)
-2015-08-26
+2015-08-26
   * Reduced job limit (8 -> 6)
-2015-08-14
+2015-08-14
   * Reduced job limit (10 -> 8)
-2015-08-11
+2015-08-11
   + New table `wiki_meta`
-2015-05-22
+2015-05-22
   * Updated status codes for `wiki_raw` table
-2015-05-21
+2015-05-21
   * Original version
 """
-
+
 # first party
 from . import wiki_update
 from . import wiki_download
@@ -115,31 +115,27 @@
 
 
 def main():
-  # step 1: find new access logs (aka "jobs")
-  print('looking for new jobs...')
-  try:
-    wiki_update.run()
-  except:
-    print('wiki_update failed')
-
-  # step 2: run a few jobs
-  print('running jobs...')
-  try:
-    wiki_download.run(
-      secrets.wiki.hmac,
-      download_limit=1024 * 1024 * 1024,
-      job_limit=12
-    )
-  except:
-    print('wiki_download failed')
-
-  # step 3: extract counts from the staging data
-  print('extracting counts...')
-  try:
-    wiki_extract.run(job_limit=100)
-  except:
-    print('wiki_extract failed')
-
-
-if __name__ == '__main__':
-  main()
+    # step 1: find new access logs (aka "jobs")
+    print("looking for new jobs...")
+    try:
+        wiki_update.run()
+    except:
+        print("wiki_update failed")
+
+    # step 2: run a few jobs
+    print("running jobs...")
+    try:
+        wiki_download.run(secrets.wiki.hmac, download_limit=1024 * 1024 * 1024, job_limit=12)
+    except:
+        print("wiki_download failed")
+
+    # step 3: extract counts from the staging data
+    print("extracting counts...")
+    try:
+        wiki_extract.run(job_limit=100)
+    except:
+        print("wiki_extract failed")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/wiki/wiki_download.py b/src/acquisition/wiki/wiki_download.py
index 1a01b7f8e..0737df6de 100644
--- a/src/acquisition/wiki/wiki_download.py
+++ b/src/acquisition/wiki/wiki_download.py
@@ -29,14 +29,15 @@
 # python 2 and 3
 from __future__ import print_function
 import sys
+
 if sys.version_info.major == 2:
-  # python 2 libraries
-  from urllib import urlencode
-  from urllib2 import urlopen
+    # python 2 libraries
+    from urllib import urlencode
+    from urllib2 import urlopen
 else:
-  # python 3 libraries
-  from urllib.parse import urlencode
-  from urllib.request import urlopen
+    # python 3 libraries
+    from urllib.parse import urlencode
+    from urllib.request import urlopen
 
 # common libraries
 import argparse
@@ -53,234 +54,233 @@
 
 
 VERSION = 10
-MASTER_URL = 'https://delphi.cmu.edu/~automation/public/wiki/master.php'
+MASTER_URL = "https://delphi.cmu.edu/~automation/public/wiki/master.php"
+
 
 def text(data_string):
-  return str(data_string.decode('utf-8'))
+    return str(data_string.decode("utf-8"))
 
 
 def data(text_string):
-  if sys.version_info.major == 2:
-    return text_string
-  else:
-    return bytes(text_string, 'utf-8')
+    if sys.version_info.major == 2:
+        return text_string
+    else:
+        return bytes(text_string, "utf-8")
 
 
 def get_hmac_sha256(key, msg):
-  key_bytes, msg_bytes = key.encode('utf-8'), msg.encode('utf-8')
-  return hmac.new(key_bytes, msg_bytes, hashlib.sha256).hexdigest()
+    key_bytes, msg_bytes = key.encode("utf-8"), msg.encode("utf-8")
+    return hmac.new(key_bytes, msg_bytes, hashlib.sha256).hexdigest()
 
 
 def extract_article_counts(filename, language, articles, debug_mode):
-  """
-  Support multiple languages ('en' | 'es' | 'pt')
-  Running time optimized to O(M), which means only need to scan the whole file once
-  :param filename:
-  :param language: Different languages such as 'en', 'es', and 'pt'
-  :param articles:
-  :param debug_mode:
-  :return:
-  """
-  counts = {}
-  articles_set = set(map(lambda x: x.lower(), articles))
-  total = 0
-  with open(filename, "r", encoding="utf8") as f:
-    for line in f:
-      content = line.strip().split()
-      if len(content) != 4:
-        print('unexpected article format: {0}'.format(line))
-        continue
-      article_title = content[1].lower()
-      article_count = int(content[2])
-      if content[0] == language:
-        total += article_count
-      if content[0] == language and article_title in articles_set:
-        if debug_mode:
-          print("Find article {0}: {1}".format(article_title, line))
-        counts[article_title] = article_count
-  if debug_mode:
-    print("Total number of counts for language {0} is {1}".format(language, total))
-  counts['total'] = total
-  return counts
+    """
+    Support multiple languages ('en' | 'es' | 'pt')
+    Running time optimized to O(M), which means only need to scan the whole file once
+    :param filename:
+    :param language: Different languages such as 'en', 'es', and 'pt'
+    :param articles:
+    :param debug_mode:
+    :return:
+    """
+    counts = {}
+    articles_set = set(map(lambda x: x.lower(), articles))
+    total = 0
+    with open(filename, "r", encoding="utf8") as f:
+        for line in f:
+            content = line.strip().split()
+            if len(content) != 4:
+                print("unexpected article format: {0}".format(line))
+                continue
+            article_title = content[1].lower()
+            article_count = int(content[2])
+            if content[0] == language:
+                total += article_count
+            if content[0] == language and article_title in articles_set:
+                if debug_mode:
+                    print("Find article {0}: {1}".format(article_title, line))
+                counts[article_title] = article_count
+    if debug_mode:
+        print("Total number of counts for language {0} is {1}".format(language, total))
+    counts["total"] = total
+    return counts
 
 
 def extract_article_counts_orig(articles, debug_mode):
-  """
-  The original method which extracts article counts by shell command grep (only support en articles).
-  As it is difficult to deal with other languages (utf-8 encoding), we choose to use python read files.
-  Another things is that it is slower to go over the whole file once and once again, the time complexity is O(NM),
-  where N is the number of articles and M is the lines in the file
-  In our new implementation extract_article_counts(), the time complexity is O(M), and it can cope with utf8 encoding
-  :param articles:
-  :param debug_mode:
-  :return:
-  """
-  counts = {}
-  for article in articles:
-    if debug_mode:
-      print(' %s' % (article))
-    out = text(
-      subprocess.check_output('LC_ALL=C grep -a -i "^en %s " raw2 | cat' % (article.lower()), shell=True)).strip()
-    count = 0
-    if len(out) > 0:
-      for line in out.split('\n'):
-        fields = line.split()
-        if len(fields) != 4:
-          print('unexpected article format: [%s]' % (line))
-        else:
-          count += int(fields[2])
-    # print ' %4d %s'%(count, article)
-    counts[article.lower()] = count
+    """
+    The original method which extracts article counts by shell command grep (only support en articles).
+    As it is difficult to deal with other languages (utf-8 encoding), we choose to use python read files.
+    Another things is that it is slower to go over the whole file once and once again, the time complexity is O(NM),
+    where N is the number of articles and M is the lines in the file
+    In our new implementation extract_article_counts(), the time complexity is O(M), and it can cope with utf8 encoding
+    :param articles:
+    :param debug_mode:
+    :return:
+    """
+    counts = {}
+    for article in articles:
+        if debug_mode:
+            print(" %s" % (article))
+        out = text(subprocess.check_output('LC_ALL=C grep -a -i "^en %s " raw2 | cat' % (article.lower()), shell=True)).strip()
+        count = 0
+        if len(out) > 0:
+            for line in out.split("\n"):
+                fields = line.split()
+                if len(fields) != 4:
+                    print("unexpected article format: [%s]" % (line))
+                else:
+                    count += int(fields[2])
+        # print ' %4d %s'%(count, article)
+        counts[article.lower()] = count
+        if debug_mode:
+            print("  %d" % (count))
+    print("getting total count...")
+    out = text(subprocess.check_output('cat raw2 | LC_ALL=C grep -a -i "^en " | cut -d" " -f 3 | awk \'{s+=$1} END {printf "%.0f", s}\'', shell=True))
+    total = int(out)
     if debug_mode:
-      print('  %d' % (count))
-  print('getting total count...')
-  out = text(subprocess.check_output(
-    'cat raw2 | LC_ALL=C grep -a -i "^en " | cut -d" " -f 3 | awk \'{s+=$1} END {printf "%.0f", s}\'', shell=True))
-  total = int(out)
-  if debug_mode:
-    print(total)
-  counts['total'] = total
-  return counts
+        print(total)
+    counts["total"] = total
+    return counts
 
 
 def run(secret, download_limit=None, job_limit=None, sleep_time=1, job_type=0, debug_mode=False):
 
-  worker = text(subprocess.check_output("echo `whoami`@`hostname`", shell=True)).strip()
-  print('this is [%s]'%(worker))
-  if debug_mode:
-    print('*** running in debug mode ***')
-
-  total_download = 0
-  passed_jobs = 0
-  failed_jobs = 0
-  while (download_limit is None or total_download < download_limit) and (job_limit is None or (passed_jobs + failed_jobs) < job_limit):
-    try:
-      time_start = datetime.datetime.now()
-      req = urlopen(MASTER_URL + '?get=x&type=%s'%(job_type))
-      code = req.getcode()
-      if code != 200:
-        if code == 201:
-          print('no jobs available')
-          if download_limit is None and job_limit is None:
-            time.sleep(60)
-            continue
-          else:
-            print('nothing to do, exiting')
-            return
-        else:
-          raise Exception('server response code (get) was %d'%(code))
-      # Make the code compatible with mac os system
-      if platform == "darwin":
-        job_content = text(req.readlines()[1])
-      else:
-        job_content = text(req.readlines()[0])
-      if job_content == 'no jobs':
-        print('no jobs available')
-        if download_limit is None and job_limit is None:
-          time.sleep(60)
-          continue
-        else:
-          print('nothing to do, exiting')
-          return
-      job = json.loads(job_content)
-      print('received job [%d|%s]'%(job['id'], job['name']))
-      # updated parsing for pageviews - maybe use a regex in the future
-      #year, month = int(job['name'][11:15]), int(job['name'][15:17])
-      year, month = int(job['name'][10:14]), int(job['name'][14:16])
-      #print 'year=%d | month=%d'%(year, month)
-      url = 'https://dumps.wikimedia.org/other/pageviews/%d/%d-%02d/%s'%(year, year, month, job['name'])
-      print('downloading file [%s]...'%(url))
-      subprocess.check_call('curl -s %s > raw.gz'%(url), shell=True)
-      print('checking file size...')
-      # Make the code cross-platfrom, so use python to get the size of the file
-      # size = int(text(subprocess.check_output('ls -l raw.gz | cut -d" " -f 5', shell=True)))
-      size = os.stat("raw.gz").st_size
-      if debug_mode:
-        print(size)
-      total_download += size
-      if job['hash'] != '00000000000000000000000000000000':
-        print('checking hash...')
-        out = text(subprocess.check_output('md5sum raw.gz', shell=True))
-        result = out[0:32]
-        if result != job['hash']:
-          raise Exception('wrong hash [expected %s, got %s]'%(job['hash'], result))
-        if debug_mode:
-          print(result)
-      print('decompressing...')
-      subprocess.check_call('gunzip -f raw.gz', shell=True)
-      #print 'converting case...'
-      #subprocess.check_call('cat raw | tr "[:upper:]" "[:lower:]" > raw2', shell=True)
-      #subprocess.check_call('rm raw', shell=True)
-      subprocess.check_call('mv raw raw2', shell=True)
-      print('extracting article counts...')
-
-      # Use python to read the file and extract counts, if you want to use the original shell method, please use
-      counts = {}
-      for language in wiki_util.Articles.available_languages:
-        lang2articles = {'en': wiki_util.Articles.en_articles, 'es': wiki_util.Articles.es_articles, 'pt': wiki_util.Articles.pt_articles}
-        articles = lang2articles[language]
-        articles = sorted(articles)
-        if debug_mode:
-          print("Language is {0} and target articles are {1}".format(language, articles))
-        temp_counts = extract_article_counts("raw2", language, articles, debug_mode)
-        counts[language] = temp_counts
-
-      if not debug_mode:
-        print('deleting files...')
-        subprocess.check_call('rm raw2', shell=True)
-      print('saving results...')
-      time_stop = datetime.datetime.now()
-      result = {
-        'id': job['id'],
-        'size': size,
-        'data': json.dumps(counts),
-        'worker': worker,
-        'elapsed': (time_stop - time_start).total_seconds(),
-      }
-      payload = json.dumps(result)
-      hmac_str = get_hmac_sha256(secret, payload)
-      if debug_mode:
-        print(' hmac: %s' % hmac_str)
-      post_data = urlencode({'put': payload, 'hmac': hmac_str})
-      req = urlopen(MASTER_URL, data=data(post_data))
-      code = req.getcode()
-      if code != 200:
-        raise Exception('server response code (put) was %d'%(code))
-      print('done! (dl=%d)'%(total_download))
-      passed_jobs += 1
-    except Exception as ex:
-      print('***** Caught Exception: %s *****'%(str(ex)))
-      failed_jobs += 1
-      time.sleep(30)
-    print('passed=%d | failed=%d | total=%d'%(passed_jobs, failed_jobs, passed_jobs + failed_jobs))
-    time.sleep(sleep_time)
-
-  if download_limit is not None and total_download >= download_limit:
-    print('download limit has been reached [%d >= %d]'%(total_download, download_limit))
-  if job_limit is not None and (passed_jobs + failed_jobs) >= job_limit:
-    print('job limit has been reached [%d >= %d]'%(passed_jobs + failed_jobs, job_limit))
+    worker = text(subprocess.check_output("echo `whoami`@`hostname`", shell=True)).strip()
+    print("this is [%s]" % (worker))
+    if debug_mode:
+        print("*** running in debug mode ***")
+
+    total_download = 0
+    passed_jobs = 0
+    failed_jobs = 0
+    while (download_limit is None or total_download < download_limit) and (job_limit is None or (passed_jobs + failed_jobs) < job_limit):
+        try:
+            time_start = datetime.datetime.now()
+            req = urlopen(MASTER_URL + "?get=x&type=%s" % (job_type))
+            code = req.getcode()
+            if code != 200:
+                if code == 201:
+                    print("no jobs available")
+                    if download_limit is None and job_limit is None:
+                        time.sleep(60)
+                        continue
+                    else:
+                        print("nothing to do, exiting")
+                        return
+                else:
+                    raise Exception("server response code (get) was %d" % (code))
+            # Make the code compatible with mac os system
+            if platform == "darwin":
+                job_content = text(req.readlines()[1])
+            else:
+                job_content = text(req.readlines()[0])
+            if job_content == "no jobs":
+                print("no jobs available")
+                if download_limit is None and job_limit is None:
+                    time.sleep(60)
+                    continue
+                else:
+                    print("nothing to do, exiting")
+                    return
+            job = json.loads(job_content)
+            print("received job [%d|%s]" % (job["id"], job["name"]))
+            # updated parsing for pageviews - maybe use a regex in the future
+            # year, month = int(job['name'][11:15]), int(job['name'][15:17])
+            year, month = int(job["name"][10:14]), int(job["name"][14:16])
+            # print 'year=%d | month=%d'%(year, month)
+            url = "https://dumps.wikimedia.org/other/pageviews/%d/%d-%02d/%s" % (year, year, month, job["name"])
+            print("downloading file [%s]..." % (url))
+            subprocess.check_call("curl -s %s > raw.gz" % (url), shell=True)
+            print("checking file size...")
+            # Make the code cross-platfrom, so use python to get the size of the file
+            # size = int(text(subprocess.check_output('ls -l raw.gz | cut -d" " -f 5', shell=True)))
+            size = os.stat("raw.gz").st_size
+            if debug_mode:
+                print(size)
+            total_download += size
+            if job["hash"] != "00000000000000000000000000000000":
+                print("checking hash...")
+                out = text(subprocess.check_output("md5sum raw.gz", shell=True))
+                result = out[0:32]
+                if result != job["hash"]:
+                    raise Exception("wrong hash [expected %s, got %s]" % (job["hash"], result))
+                if debug_mode:
+                    print(result)
+            print("decompressing...")
+            subprocess.check_call("gunzip -f raw.gz", shell=True)
+            # print 'converting case...'
+            # subprocess.check_call('cat raw | tr "[:upper:]" "[:lower:]" > raw2', shell=True)
+            # subprocess.check_call('rm raw', shell=True)
+            subprocess.check_call("mv raw raw2", shell=True)
+            print("extracting article counts...")
+
+            # Use python to read the file and extract counts, if you want to use the original shell method, please use
+            counts = {}
+            for language in wiki_util.Articles.available_languages:
+                lang2articles = {"en": wiki_util.Articles.en_articles, "es": wiki_util.Articles.es_articles, "pt": wiki_util.Articles.pt_articles}
+                articles = lang2articles[language]
+                articles = sorted(articles)
+                if debug_mode:
+                    print("Language is {0} and target articles are {1}".format(language, articles))
+                temp_counts = extract_article_counts("raw2", language, articles, debug_mode)
+                counts[language] = temp_counts
+
+            if not debug_mode:
+                print("deleting files...")
+                subprocess.check_call("rm raw2", shell=True)
+            print("saving results...")
+            time_stop = datetime.datetime.now()
+            result = {
+                "id": job["id"],
+                "size": size,
+                "data": json.dumps(counts),
+                "worker": worker,
+                "elapsed": (time_stop - time_start).total_seconds(),
+            }
+            payload = json.dumps(result)
+            hmac_str = get_hmac_sha256(secret, payload)
+            if debug_mode:
+                print(" hmac: %s" % hmac_str)
+            post_data = urlencode({"put": payload, "hmac": hmac_str})
+            req = urlopen(MASTER_URL, data=data(post_data))
+            code = req.getcode()
+            if code != 200:
+                raise Exception("server response code (put) was %d" % (code))
+            print("done! (dl=%d)" % (total_download))
+            passed_jobs += 1
+        except Exception as ex:
+            print("***** Caught Exception: %s *****" % (str(ex)))
+            failed_jobs += 1
+            time.sleep(30)
+        print("passed=%d | failed=%d | total=%d" % (passed_jobs, failed_jobs, passed_jobs + failed_jobs))
+        time.sleep(sleep_time)
+
+    if download_limit is not None and total_download >= download_limit:
+        print("download limit has been reached [%d >= %d]" % (total_download, download_limit))
+    if job_limit is not None and (passed_jobs + failed_jobs) >= job_limit:
+        print("job limit has been reached [%d >= %d]" % (passed_jobs + failed_jobs, job_limit))
 
 
 def main():
-  # version info
-  print('version', VERSION)
+    # version info
+    print("version", VERSION)
 
-  # args and usage
-  parser = argparse.ArgumentParser()
-  parser.add_argument('secret', type=str, help='hmac secret key')
-  parser.add_argument('-b', '--blimit', action='store', type=int, default=None, help='download limit, in bytes')
-  parser.add_argument('-j', '--jlimit', action='store', type=int, default=None, help='job limit')
-  parser.add_argument('-s', '--sleep', action='store', type=int, default=1, help='seconds to sleep between each job')
-  parser.add_argument('-t', '--type', action='store', type=int, default=0, help='type of job')
-  parser.add_argument('-d', '--debug', action='store_const', const=True, default=False, help='enable debug mode')
-  args = parser.parse_args()
+    # args and usage
+    parser = argparse.ArgumentParser()
+    parser.add_argument("secret", type=str, help="hmac secret key")
+    parser.add_argument("-b", "--blimit", action="store", type=int, default=None, help="download limit, in bytes")
+    parser.add_argument("-j", "--jlimit", action="store", type=int, default=None, help="job limit")
+    parser.add_argument("-s", "--sleep", action="store", type=int, default=1, help="seconds to sleep between each job")
+    parser.add_argument("-t", "--type", action="store", type=int, default=0, help="type of job")
+    parser.add_argument("-d", "--debug", action="store_const", const=True, default=False, help="enable debug mode")
+    args = parser.parse_args()
 
-  # runtime options
-  secret, download_limit, job_limit, sleep_time, job_type, debug_mode = args.secret, args.blimit, args.jlimit, args.sleep, args.type, args.debug
+    # runtime options
+    secret, download_limit, job_limit, sleep_time, job_type, debug_mode = args.secret, args.blimit, args.jlimit, args.sleep, args.type, args.debug
 
-  # run
-  run(secret, download_limit, job_limit, sleep_time, job_type, debug_mode)
+    # run
+    run(secret, download_limit, job_limit, sleep_time, job_type, debug_mode)
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()
diff --git a/src/acquisition/wiki/wiki_extract.py b/src/acquisition/wiki/wiki_extract.py
index 839d7d6dc..cdcc440a6 100644
--- a/src/acquisition/wiki/wiki_extract.py
+++ b/src/acquisition/wiki/wiki_extract.py
@@ -35,74 +35,80 @@
 
 
 def floor_timestamp(timestamp):
-  return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour)
+    return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour)
 
 
 def ceil_timestamp(timestamp):
-  return floor_timestamp(timestamp) + timedelta(hours=1)
+    return floor_timestamp(timestamp) + timedelta(hours=1)
 
 
 def round_timestamp(timestamp):
-  before = floor_timestamp(timestamp)
-  after = ceil_timestamp(timestamp)
-  if (timestamp - before) < (after - timestamp):
-    return before
-  else:
-    return after
+    before = floor_timestamp(timestamp)
+    after = ceil_timestamp(timestamp)
+    if (timestamp - before) < (after - timestamp):
+        return before
+    else:
+        return after
 
 
 def get_timestamp(name):
-  # new parsing for pageviews compared to pagecounts - maybe switch to regex in the future
-  #return datetime(int(name[11:15]), int(name[15:17]), int(name[17:19]), int(name[20:22]), int(name[22:24]), int(name[24:26]))
-  return datetime(int(name[10:14]), int(name[14:16]), int(name[16:18]), int(name[19:21]), int(name[21:23]), int(name[23:25]))
+    # new parsing for pageviews compared to pagecounts - maybe switch to regex in the future
+    # return datetime(int(name[11:15]), int(name[15:17]), int(name[17:19]), int(name[20:22]), int(name[22:24]), int(name[24:26]))
+    return datetime(int(name[10:14]), int(name[14:16]), int(name[16:18]), int(name[19:21]), int(name[21:23]), int(name[23:25]))
 
 
 def run(job_limit=100):
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-  
-  # # Some preparation for utf-8, and it is a temporary trick solution. The real solution should change those char set and collation encoding to utf8 permanently
-  # cur.execute("SET NAMES utf8;")
-  # cur.execute("SET CHARACTER SET utf8;")
-  # # I print SHOW SESSION VARIABLES LIKE 'character\_set\_%'; and SHOW SESSION VARIABLES LIKE 'collation\_%'; on my local computer
-  # cur.execute("SET character_set_client=utf8mb4;")
-  # cur.execute("SET character_set_connection=utf8mb4;")
-  # cur.execute("SET character_set_database=utf8;")
-  # cur.execute("SET character_set_results=utf8mb4;")
-  # cur.execute("SET character_set_server=utf8;")
-  # cur.execute("SET collation_connection=utf8mb4_general_ci;")
-  # cur.execute("SET collation_database=utf8_general_ci;")
-  # cur.execute("SET collation_server=utf8_general_ci;")
-
-  # find jobs that are queued for extraction
-  cur.execute('SELECT `id`, `name`, `data` FROM `wiki_raw` WHERE `status` = 2 ORDER BY `name` ASC LIMIT %s', (job_limit,))
-  jobs = []
-  for (id, name, data_str) in cur:
-    jobs.append((id, name, json.loads(data_str)))
-  print('Processing data from %d jobs'%(len(jobs)))
-
-  # get the counts from the json object and insert into (or update) the database
-  # Notice that data_collect contains data with different languages
-  for (id, name, data_collect) in jobs:
-    print('processing job [%d|%s]...'%(id, name))
-    timestamp = round_timestamp(get_timestamp(name))
-    for language in data_collect.keys():
-      data = data_collect[language]
-      for article in sorted(data.keys()):
-        count = data[article]
-        cur.execute('INSERT INTO `wiki` (`datetime`, `article`, `count`, `language`) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE `count` = `count` + %s', (str(timestamp), article.encode('utf-8').decode('latin-1'), count, language, count))
-        if article == 'total':
-          cur.execute('INSERT INTO `wiki_meta` (`datetime`, `date`, `epiweek`, `total`, `language`) VALUES (%s, date(%s), yearweek(%s, 6), %s, %s) ON DUPLICATE KEY UPDATE `total` = `total` + %s', (str(timestamp), str(timestamp), str(timestamp), count, language, count))
-    # update the job
-    cur.execute('UPDATE `wiki_raw` SET `status` = 3 WHERE `id` = %s', (id,))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
-
-
-if __name__ == '__main__':
-  run()
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    # # Some preparation for utf-8, and it is a temporary trick solution. The real solution should change those char set and collation encoding to utf8 permanently
+    # cur.execute("SET NAMES utf8;")
+    # cur.execute("SET CHARACTER SET utf8;")
+    # # I print SHOW SESSION VARIABLES LIKE 'character\_set\_%'; and SHOW SESSION VARIABLES LIKE 'collation\_%'; on my local computer
+    # cur.execute("SET character_set_client=utf8mb4;")
+    # cur.execute("SET character_set_connection=utf8mb4;")
+    # cur.execute("SET character_set_database=utf8;")
+    # cur.execute("SET character_set_results=utf8mb4;")
+    # cur.execute("SET character_set_server=utf8;")
+    # cur.execute("SET collation_connection=utf8mb4_general_ci;")
+    # cur.execute("SET collation_database=utf8_general_ci;")
+    # cur.execute("SET collation_server=utf8_general_ci;")
+
+    # find jobs that are queued for extraction
+    cur.execute("SELECT `id`, `name`, `data` FROM `wiki_raw` WHERE `status` = 2 ORDER BY `name` ASC LIMIT %s", (job_limit,))
+    jobs = []
+    for (id, name, data_str) in cur:
+        jobs.append((id, name, json.loads(data_str)))
+    print("Processing data from %d jobs" % (len(jobs)))
+
+    # get the counts from the json object and insert into (or update) the database
+    # Notice that data_collect contains data with different languages
+    for (id, name, data_collect) in jobs:
+        print("processing job [%d|%s]..." % (id, name))
+        timestamp = round_timestamp(get_timestamp(name))
+        for language in data_collect.keys():
+            data = data_collect[language]
+            for article in sorted(data.keys()):
+                count = data[article]
+                cur.execute(
+                    "INSERT INTO `wiki` (`datetime`, `article`, `count`, `language`) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE `count` = `count` + %s",
+                    (str(timestamp), article.encode("utf-8").decode("latin-1"), count, language, count),
+                )
+                if article == "total":
+                    cur.execute(
+                        "INSERT INTO `wiki_meta` (`datetime`, `date`, `epiweek`, `total`, `language`) VALUES (%s, date(%s), yearweek(%s, 6), %s, %s) ON DUPLICATE KEY UPDATE `total` = `total` + %s",
+                        (str(timestamp), str(timestamp), str(timestamp), count, language, count),
+                    )
+        # update the job
+        cur.execute("UPDATE `wiki_raw` SET `status` = 3 WHERE `id` = %s", (id,))
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
+
+
+if __name__ == "__main__":
+    run()
diff --git a/src/acquisition/wiki/wiki_update.py b/src/acquisition/wiki/wiki_update.py
index 411544810..773b9351d 100644
--- a/src/acquisition/wiki/wiki_update.py
+++ b/src/acquisition/wiki/wiki_update.py
@@ -32,87 +32,87 @@
 
 
 def floor_timestamp(timestamp):
-  return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour)
+    return datetime(timestamp.year, timestamp.month, timestamp.day, timestamp.hour)
 
 
 def ceil_timestamp(timestamp):
-  return floor_timestamp(timestamp) + timedelta(hours=1)
+    return floor_timestamp(timestamp) + timedelta(hours=1)
 
 
 def round_timestamp(timestamp):
-  before = floor_timestamp(timestamp)
-  after = ceil_timestamp(timestamp)
-  if (timestamp - before) < (after - timestamp):
-    return before
-  else:
-    return after
+    before = floor_timestamp(timestamp)
+    after = ceil_timestamp(timestamp)
+    if (timestamp - before) < (after - timestamp):
+        return before
+    else:
+        return after
 
 
 def get_timestamp(name):
-  # If the program is cold start (there are no previous names in the table, and the name will be None)
-  if name is None:
-    curr = datetime.now()
-    return datetime(curr.year, curr.month, curr.day, curr.hour, curr.minute, curr.second)
-  # new parsing for pageviews compared to pagecounts - maybe switch to regex in the future
-  #return datetime(int(name[11:15]), int(name[15:17]), int(name[17:19]), int(name[20:22]), int(name[22:24]), int(name[24:26]))
-  return datetime(int(name[10:14]), int(name[14:16]), int(name[16:18]), int(name[19:21]), int(name[21:23]), int(name[23:25]))
+    # If the program is cold start (there are no previous names in the table, and the name will be None)
+    if name is None:
+        curr = datetime.now()
+        return datetime(curr.year, curr.month, curr.day, curr.hour, curr.minute, curr.second)
+    # new parsing for pageviews compared to pagecounts - maybe switch to regex in the future
+    # return datetime(int(name[11:15]), int(name[15:17]), int(name[17:19]), int(name[20:22]), int(name[22:24]), int(name[24:26]))
+    return datetime(int(name[10:14]), int(name[14:16]), int(name[16:18]), int(name[19:21]), int(name[21:23]), int(name[23:25]))
 
 
 def get_manifest(year, month, optional=False):
-  # unlike pagecounts-raw, pageviews doesn't provide hashes
-  #url = 'https://dumps.wikimedia.org/other/pagecounts-raw/%d/%d-%02d/md5sums.txt'%(year, year, month)
-  url = 'https://dumps.wikimedia.org/other/pageviews/%d/%d-%02d/' % (year, year, month)
-  print('Checking manifest at %s...'%(url))
-  response = requests.get(url)
-  if response.status_code == 200:
-    #manifest = [line.strip().split() for line in response.text.split('\n') if 'pagecounts' in line]
-    manifest = [('00000000000000000000000000000000', line[9:37]) for line in response.text.split('\n') if '<a href="pageviews-' in line]
-  else:
-    if optional:
-      manifest = []
+    # unlike pagecounts-raw, pageviews doesn't provide hashes
+    # url = 'https://dumps.wikimedia.org/other/pagecounts-raw/%d/%d-%02d/md5sums.txt'%(year, year, month)
+    url = "https://dumps.wikimedia.org/other/pageviews/%d/%d-%02d/" % (year, year, month)
+    print("Checking manifest at %s..." % (url))
+    response = requests.get(url)
+    if response.status_code == 200:
+        # manifest = [line.strip().split() for line in response.text.split('\n') if 'pagecounts' in line]
+        manifest = [("00000000000000000000000000000000", line[9:37]) for line in response.text.split("\n") if '<a href="pageviews-' in line]
     else:
-      raise Exception('expected 200 status code, but got %d'%(response.status_code))
-  print('Found %d access log(s)'%(len(manifest)))
-  return manifest
+        if optional:
+            manifest = []
+        else:
+            raise Exception("expected 200 status code, but got %d" % (response.status_code))
+    print("Found %d access log(s)" % (len(manifest)))
+    return manifest
 
 
 def run():
-  # connect to the database
-  u, p = secrets.db.epi
-  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
-  cur = cnx.cursor()
-
-  # get the most recent job in wiki_raw
-  # luckily, "pageviews" is lexicographically greater than "pagecounts-raw"
-  cur.execute('SELECT max(`name`) FROM `wiki_raw`')
-  for (max_name,) in cur:
-    pass
-  print('Last known file: %s'%(max_name))
-  timestamp = get_timestamp(max_name)
-
-  # crawl dumps.wikimedia.org to find more recent access logs
-  t1, t2 = floor_timestamp(timestamp), ceil_timestamp(timestamp)
-  manifest = get_manifest(t1.year, t1.month, optional=False)
-  if t2.month != t1.month:
-    manifest += get_manifest(t2.year, t2.month, optional=True)
-
-  # find access logs newer than the most recent job
-  new_logs = {}
-  for (hash, name) in manifest:
-    if max_name is None or name > max_name:
-      new_logs[name] = hash
-      print(' New job: %s [%s]'%(name, hash))
-  print('Found %d new job(s)'%(len(new_logs)))
-
-  # store metadata for new jobs
-  for name in sorted(new_logs.keys()):
-    cur.execute('INSERT INTO `wiki_raw` (`name`, `hash`) VALUES (%s, %s)', (name, new_logs[name]))
-
-  # cleanup
-  cur.close()
-  cnx.commit()
-  cnx.close()
-
-
-if __name__ == '__main__':
-  run()
+    # connect to the database
+    u, p = secrets.db.epi
+    cnx = mysql.connector.connect(user=u, password=p, database="epidata")
+    cur = cnx.cursor()
+
+    # get the most recent job in wiki_raw
+    # luckily, "pageviews" is lexicographically greater than "pagecounts-raw"
+    cur.execute("SELECT max(`name`) FROM `wiki_raw`")
+    for (max_name,) in cur:
+        pass
+    print("Last known file: %s" % (max_name))
+    timestamp = get_timestamp(max_name)
+
+    # crawl dumps.wikimedia.org to find more recent access logs
+    t1, t2 = floor_timestamp(timestamp), ceil_timestamp(timestamp)
+    manifest = get_manifest(t1.year, t1.month, optional=False)
+    if t2.month != t1.month:
+        manifest += get_manifest(t2.year, t2.month, optional=True)
+
+    # find access logs newer than the most recent job
+    new_logs = {}
+    for (hash, name) in manifest:
+        if max_name is None or name > max_name:
+            new_logs[name] = hash
+            print(" New job: %s [%s]" % (name, hash))
+    print("Found %d new job(s)" % (len(new_logs)))
+
+    # store metadata for new jobs
+    for name in sorted(new_logs.keys()):
+        cur.execute("INSERT INTO `wiki_raw` (`name`, `hash`) VALUES (%s, %s)", (name, new_logs[name]))
+
+    # cleanup
+    cur.close()
+    cnx.commit()
+    cnx.close()
+
+
+if __name__ == "__main__":
+    run()
diff --git a/src/acquisition/wiki/wiki_util.py b/src/acquisition/wiki/wiki_util.py
index ed3c743bc..55bf3e2ca 100644
--- a/src/acquisition/wiki/wiki_util.py
+++ b/src/acquisition/wiki/wiki_util.py
@@ -1,159 +1,156 @@
-
-
-
 class Articles:
 
     # Notice that all languages must be two chars, because that `language` column in table `wiki` is CHAR(2)
-    available_languages = ['en', 'es', 'pt']
+    available_languages = ["en", "es", "pt"]
 
     en_articles_flu = [
-        'Influenza_B_virus',
-        'Influenza_A_virus',
-        'Human_flu',
-        'Influenzavirus_C',
-        'Oseltamivir',
-        'Influenza',
-        'Influenzavirus_A',
-        'Influenza_A_virus_subtype_H1N1',
-        'Zanamivir',
-        'Influenza-like_illness',
-        'Common_cold',
-        'Sore_throat',
-        'Flu_season',
-        'Chills',
-        'Fever',
-        'Influenza_A_virus_subtype_H2N2',
-        'Swine_influenza',
-        'Shivering',
-        'Canine_influenza',
-        'Influenza_A_virus_subtype_H3N2',
-        'Neuraminidase_inhibitor',
-        'Influenza_pandemic',
-        'Viral_pneumonia',
-        'Influenza_prevention',
-        'Influenza_A_virus_subtype_H1N2',
-        'Rhinorrhea',
-        'Orthomyxoviridae',
-        'Nasal_congestion',
-        'Gastroenteritis',
-        'Rimantadine',
-        'Paracetamol',
-        'Amantadine',
-        'Viral_neuraminidase',
-        'Headache',
-        'Influenza_vaccine',
-        'Vomiting',
-        'Cough',
-        'Influenza_A_virus_subtype_H5N1',
-        'Nausea',
-        'Avian_influenza',
-        'Influenza_A_virus_subtype_H7N9',
-        'Influenza_A_virus_subtype_H10N7',
-        'Influenza_A_virus_subtype_H9N2',
-        'Hemagglutinin_(influenza)',
-        'Influenza_A_virus_subtype_H7N7',
-        'Fatigue_(medical)',
-        'Myalgia',
-        'Influenza_A_virus_subtype_H7N3',
-        'Malaise',
-        'Equine_influenza',
-        'Cat_flu',
-        'Influenza_A_virus_subtype_H3N8',
-        'Antiviral_drugs',
-        'Influenza_A_virus_subtype_H7N2',
+        "Influenza_B_virus",
+        "Influenza_A_virus",
+        "Human_flu",
+        "Influenzavirus_C",
+        "Oseltamivir",
+        "Influenza",
+        "Influenzavirus_A",
+        "Influenza_A_virus_subtype_H1N1",
+        "Zanamivir",
+        "Influenza-like_illness",
+        "Common_cold",
+        "Sore_throat",
+        "Flu_season",
+        "Chills",
+        "Fever",
+        "Influenza_A_virus_subtype_H2N2",
+        "Swine_influenza",
+        "Shivering",
+        "Canine_influenza",
+        "Influenza_A_virus_subtype_H3N2",
+        "Neuraminidase_inhibitor",
+        "Influenza_pandemic",
+        "Viral_pneumonia",
+        "Influenza_prevention",
+        "Influenza_A_virus_subtype_H1N2",
+        "Rhinorrhea",
+        "Orthomyxoviridae",
+        "Nasal_congestion",
+        "Gastroenteritis",
+        "Rimantadine",
+        "Paracetamol",
+        "Amantadine",
+        "Viral_neuraminidase",
+        "Headache",
+        "Influenza_vaccine",
+        "Vomiting",
+        "Cough",
+        "Influenza_A_virus_subtype_H5N1",
+        "Nausea",
+        "Avian_influenza",
+        "Influenza_A_virus_subtype_H7N9",
+        "Influenza_A_virus_subtype_H10N7",
+        "Influenza_A_virus_subtype_H9N2",
+        "Hemagglutinin_(influenza)",
+        "Influenza_A_virus_subtype_H7N7",
+        "Fatigue_(medical)",
+        "Myalgia",
+        "Influenza_A_virus_subtype_H7N3",
+        "Malaise",
+        "Equine_influenza",
+        "Cat_flu",
+        "Influenza_A_virus_subtype_H3N8",
+        "Antiviral_drugs",
+        "Influenza_A_virus_subtype_H7N2",
     ]
 
     en_articles_noro = [
-        'Norovirus',
-        'Diarrhea',
-        'Dehydration',
-        'Gastroenteritis',
-        'Vomiting',
-        'Abdominal_pain',
-        'Nausea',
-        'Foodborne_illness',
-        'Rotavirus',
-        'Fecal–oral_route',
-        'Intravenous_therapy',
-        'Oral_rehydration_therapy',
-        'Shellfish',
-        'Caliciviridae',
-        'Leaky_scanning',
+        "Norovirus",
+        "Diarrhea",
+        "Dehydration",
+        "Gastroenteritis",
+        "Vomiting",
+        "Abdominal_pain",
+        "Nausea",
+        "Foodborne_illness",
+        "Rotavirus",
+        "Fecal–oral_route",
+        "Intravenous_therapy",
+        "Oral_rehydration_therapy",
+        "Shellfish",
+        "Caliciviridae",
+        "Leaky_scanning",
     ]
 
     en_articles_dengue = [
-        'Dengue_fever',
-        'Dengue_virus',
-        'Aedes',
-        'Aedes_aegypti',
-        'Dengue_vaccine',
-        'Mosquito',
-        'Mosquito-borne_disease',
-        'Blood_transfusion',
-        'Paracetamol',
-        'Fever',
-        'Headache',
-        'Rhinitis',
-        'Flavivirus',
-        'Exanthem',
-        'Myalgia',
-        'Arthralgia',
-        'Thrombocytopenia',
-        'Hematuria',
-        'Nosebleed',
-        'Petechia',
-        'Nausea',
-        'Vomiting',
-        'Diarrhea',
+        "Dengue_fever",
+        "Dengue_virus",
+        "Aedes",
+        "Aedes_aegypti",
+        "Dengue_vaccine",
+        "Mosquito",
+        "Mosquito-borne_disease",
+        "Blood_transfusion",
+        "Paracetamol",
+        "Fever",
+        "Headache",
+        "Rhinitis",
+        "Flavivirus",
+        "Exanthem",
+        "Myalgia",
+        "Arthralgia",
+        "Thrombocytopenia",
+        "Hematuria",
+        "Nosebleed",
+        "Petechia",
+        "Nausea",
+        "Vomiting",
+        "Diarrhea",
     ]
 
     en_articles = list(set(en_articles_flu + en_articles_noro + en_articles_dengue))
 
     es_articles = [
-        'Dengue',
-        'Virus_dengue',
-        'Aedes',
-        'Aedes_aegypti',
-        'Culicidae',
-        'Transfusión_de_sangre',
-        'Paracetamol',
-        'Fiebre',
-        'Cefalea',
-        'Coriza',
-        'Flavivirus',
-        'Exantema',
-        'Mosquito',
-        'Mialgia',
-        'Artralgia',
-        'Trombocitopenia',
-        'Hematuria',
-        'Epistaxis',
-        'Petequia',
-        'Náusea',
-        'Vómito',
-        'Diarrea',
+        "Dengue",
+        "Virus_dengue",
+        "Aedes",
+        "Aedes_aegypti",
+        "Culicidae",
+        "Transfusión_de_sangre",
+        "Paracetamol",
+        "Fiebre",
+        "Cefalea",
+        "Coriza",
+        "Flavivirus",
+        "Exantema",
+        "Mosquito",
+        "Mialgia",
+        "Artralgia",
+        "Trombocitopenia",
+        "Hematuria",
+        "Epistaxis",
+        "Petequia",
+        "Náusea",
+        "Vómito",
+        "Diarrea",
     ]
 
     pt_articles = [
-        'Dengue',
-        'Vírus_da_dengue',
-        'Aedes',
-        'Aedes_aegypti',
-        'Culicidae',
-        'Transfusão_de_sangue',
-        'Paracetamol',
-        'Febre',
-        'Cefaleia',
-        'Coriza',
-        'Flavivírus',
-        'Exantema',
-        'Mialgia',
-        'Artralgia',
-        'Trombocitopenia',
-        'Hematúria',
-        'Epistaxe',
-        'Petéquia',
-        'Náusea',
-        'Vômito',
-        'Diarreia',
+        "Dengue",
+        "Vírus_da_dengue",
+        "Aedes",
+        "Aedes_aegypti",
+        "Culicidae",
+        "Transfusão_de_sangue",
+        "Paracetamol",
+        "Febre",
+        "Cefaleia",
+        "Coriza",
+        "Flavivírus",
+        "Exantema",
+        "Mialgia",
+        "Artralgia",
+        "Trombocitopenia",
+        "Hematúria",
+        "Epistaxe",
+        "Petéquia",
+        "Náusea",
+        "Vômito",
+        "Diarreia",
     ]