From 1b254dc9be7823860f4f872ffc248ab35c22676d Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Mon, 17 Jul 2023 13:58:51 -0400
Subject: [PATCH 01/21] remove dsew dir and ansible template params

---
 ...dsew_community_profile-params-prod.json.j2 |  53 -
 dsew_community_profile/.gitignore             |   1 -
 dsew_community_profile/.pylintrc              |  22 -
 dsew_community_profile/DETAILS.md             | 133 ---
 dsew_community_profile/Makefile               |  30 -
 dsew_community_profile/README.md              |  86 --
 dsew_community_profile/REVIEW.md              |  38 -
 dsew_community_profile/cache/.gitignore       |   0
 .../delphi_dsew_community_profile/__init__.py |  13 -
 .../delphi_dsew_community_profile/__main__.py |  12 -
 .../constants.py                              | 118 ---
 .../delphi_dsew_community_profile/pull.py     | 909 ------------------
 .../delphi_dsew_community_profile/run.py      | 127 ---
 dsew_community_profile/input_cache/.gitignore |   1 -
 dsew_community_profile/params.json.template   |  45 -
 dsew_community_profile/setup.py               |  30 -
 dsew_community_profile/static/.gitignore      |   0
 .../tests/params.json.template                |  34 -
 dsew_community_profile/tests/test_pull.py     | 612 ------------
 dsew_community_profile/version.cfg            |   1 -
 20 files changed, 2265 deletions(-)
 delete mode 100644 ansible/templates/dsew_community_profile-params-prod.json.j2
 delete mode 100644 dsew_community_profile/.gitignore
 delete mode 100644 dsew_community_profile/.pylintrc
 delete mode 100644 dsew_community_profile/DETAILS.md
 delete mode 100644 dsew_community_profile/Makefile
 delete mode 100644 dsew_community_profile/README.md
 delete mode 100644 dsew_community_profile/REVIEW.md
 delete mode 100644 dsew_community_profile/cache/.gitignore
 delete mode 100644 dsew_community_profile/delphi_dsew_community_profile/__init__.py
 delete mode 100644 dsew_community_profile/delphi_dsew_community_profile/__main__.py
 delete mode 100644 dsew_community_profile/delphi_dsew_community_profile/constants.py
 delete mode 100644 dsew_community_profile/delphi_dsew_community_profile/pull.py
 delete mode 100644 dsew_community_profile/delphi_dsew_community_profile/run.py
 delete mode 100644 dsew_community_profile/input_cache/.gitignore
 delete mode 100644 dsew_community_profile/params.json.template
 delete mode 100644 dsew_community_profile/setup.py
 delete mode 100644 dsew_community_profile/static/.gitignore
 delete mode 100644 dsew_community_profile/tests/params.json.template
 delete mode 100644 dsew_community_profile/tests/test_pull.py
 delete mode 100644 dsew_community_profile/version.cfg

diff --git a/ansible/templates/dsew_community_profile-params-prod.json.j2 b/ansible/templates/dsew_community_profile-params-prod.json.j2
deleted file mode 100644
index f8777c510..000000000
--- a/ansible/templates/dsew_community_profile-params-prod.json.j2
+++ /dev/null
@@ -1,53 +0,0 @@
-{
-  "common": {
-    "export_dir": "/common/covidcast/receiving/dsew-cpr",
-    "log_filename": "/var/log/indicators/dsew_cpr.log"
-  },
-  "indicator": {
-    "input_cache": "./input_cache",
-    "reports": "new",
-    "export_signals": [
-      "confirmed covid-19 admissions",
-      "doses administered",
-      "booster doses administered",
-      "fully vaccinated",
-      "booster dose since",
-      "positivity"
-    ]
-  },
-  "validation": {
-    "common": {
-      "data_source": "dsew-cpr",
-      "api_credentials": "{{ validation_api_key }}",
-      "span_length": 14,
-      "min_expected_lag": {"all": "3"},
-      "max_expected_lag": {"all": "9"},
-      "dry_run": true,
-      "suppressed_errors": []
-    },
-    "static": {
-      "minimum_sample_size": 0,
-      "missing_se_allowed": true,
-      "missing_sample_size_allowed": true
-    },
-    "dynamic": {
-      "ref_window_size": 7,
-      "smoothed_signals": [
-        "naats_positivity_7dav",
-        "confirmed_admissions_covid_1d_prop_7dav",
-        "confirmed_admissions_covid_1d_7dav",
-        "doses_admin_7dav",
-        "booster_doses_admin_7dav"
-      ]
-    }
-  },
-  "archive": {
-    "aws_credentials": {
-      "aws_access_key_id": "{{ delphi_aws_access_key_id }}",
-      "aws_secret_access_key": "{{ delphi_aws_secret_access_key }}"
-    },
-    "bucket_name": "delphi-covidcast-indicator-output",
-    "cache_dir": "./cache",
-    "indicator_prefix": "delphi_dsew_community_profile"
-  }
-}
diff --git a/dsew_community_profile/.gitignore b/dsew_community_profile/.gitignore
deleted file mode 100644
index fec731824..000000000
--- a/dsew_community_profile/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-input_cache/*
diff --git a/dsew_community_profile/.pylintrc b/dsew_community_profile/.pylintrc
deleted file mode 100644
index f30837c7e..000000000
--- a/dsew_community_profile/.pylintrc
+++ /dev/null
@@ -1,22 +0,0 @@
-
-[MESSAGES CONTROL]
-
-disable=logging-format-interpolation,
-    too-many-locals,
-    too-many-arguments,
-    # Allow pytest functions to be part of a class.
-    no-self-use,
-    # Allow pytest classes to have one test.
-    too-few-public-methods
-
-[BASIC]
-
-# Allow arbitrarily short-named variables.
-variable-rgx=[a-z_][a-z0-9_]*
-argument-rgx=[a-z_][a-z0-9_]*
-attr-rgx=[a-z_][a-z0-9_]*
-
-[DESIGN]
-
-# Don't complain about pytest "unused" arguments.
-ignored-argument-names=(_.*|run_as_module)
\ No newline at end of file
diff --git a/dsew_community_profile/DETAILS.md b/dsew_community_profile/DETAILS.md
deleted file mode 100644
index 56816ee06..000000000
--- a/dsew_community_profile/DETAILS.md
+++ /dev/null
@@ -1,133 +0,0 @@
-# Dataset layout
-
-The Data Strategy and Execution Workgroup (DSEW) publishes a Community Profile
-Report each weekday, comprising a pair of files: an Excel workbook (.xlsx) and a
-PDF which shows select metrics from the workbook as time series charts and
-choropleth maps. These files are listed as attachments on the healthdata.gov
-site:
-
-https://healthdata.gov/Health/COVID-19-Community-Profile-Report/gqxm-d9w9
-
-Each Excel file attachment has a filename. The filename contains a date,
-presumably the publish date. The attachment also has an alphanumeric
-assetId. Both the filename and the assetId are required for downloading the
-file. Whether this means that updated versions of a particular file may be
-uploaded by DSEW at later times is not known. The attachment does not explicitly
-list an upload timestamp. To be safe, we cache our downloads using both the
-assetId and the filename.
-
-# Workbook layout
-
-Each Excel file is a workbook with multiple sheets. The exemplar file used in
-writing this indicator is "Community Profile Report 20211102.xlsx". The sheets
-include:
-
-- User Notes: Instructions for using the workbook
-- Overview: US National figures for the last 5 weeks, plus monthly peaks back to
-  April 2020
-- Regions*: Figures for FEMA regions (double-checked: they match HHS regions
-  except that FEMA 2 does not include Palau while HHS 2 does)
-- States*: Figures for US states and territories
-- CBSAs*: Figures for US Census Block Statistical Areas
-- Counties*: Figures for US counties
-- Weekly Transmission Categories: Lists of high, substantial, and moderate
-  transmission states and territories
-- National Peaks: Monthly national peaks back to April 2020
-- National Historic: Daily national figures back to January 22 2020
-- Data Notes: Source and methods information for all metrics
-- Color Thresholds: Color-coding is used extensively in all sheets; these are
-  the keys
-
-The starred sheets above have nearly-identical column layouts, and together
-cover the county, MSA, state, and HHS geographical levels used in
-covidcast. Rather than aggregate them ourselves and risk a mismatch, this
-indicator lifts these geographical aggregations directly from the corresponding
-sheets of the workbook. 
-
-GeoMapper _is_ used to generate national figures from
-state, due to architectural differences between the starred sheets and the
-Overview sheet. If we discover that our nation-level figures differ too much
-from those listed in the Overview sheet, we can add dedicated parsing for the
-Overview sheet and remove GeoMapper from this indicator altogether.
-
-# Sheet layout
-
-## Headers
-
-Each starred sheet has two rows of headers. The first row uses merged cells to
-group several columns together under a single "overheader". This overheader
-often includes the reference period for that group of columns, such as:
-
-- CASES/DEATHS: LAST WEEK (October 26-November 1)
-- TESTING: LAST WEEK (October 24-30, Test Volume October 20-26)
-- TESTING: PREVIOUS WEEK (October 17-23, Test Volume October 13-19)
-
-Overheaders have changed periodically since the first report. For example, the
-"TESTING: LAST WEEK" overheader above has also appeared as "VIRAL (RT-PCR) LAB
-TESTING: LAST WEEK", with and without a separate reference date for Test
-Volume. All known overheader forms are checked in test_pull.py.
-
-The second row contains a header for each column. The headers uniquely identify
-each column included in the sheet. Column headers include spaces, and typically
-specify both the metric and the reference period over which it was calculated,
-such as:
-
-- Total NAATs - last 7 days (may be an underestimate due to delayed reporting)
-- NAAT positivity rate - previous 7 days (may be an underestimate due to delayed
-  reporting)
-
-Columns headers have also changed periodically since the first report. For
-example, the "Total NAATs - last 7 days" header above has also appeared as
-"Total RT-PCR diagnostic tests - last 7 days".
-
-## Contents
-
-Each starred sheet contains test positivity and total test volume figures for
-two reference periods, "last [week]" and "previous [week]". In some reports, the
-reference periods for test positivity and total test volume are the same; in
-others, they are different, such that the report contains figures for four
-distinct reference periods, two for each metric we extract.
-
-# Time series conversions and parsing notes
-
-## Reference date
-
-The reference period in the overheader never includes the year. We guess the
-reference year by picking the same year as the publish date (i.e., the date
-extracted from the filename), and if the reference month is greater than the
-publish month, subtract 1 from the reference year. This adequately covers the
-December-January boundary.
-
-We select as reference date the end date of the reference period for each
-metric. Reference periods are always 7 days, so this indicator produces
-seven-day averages. We divide the total testing volume by seven and leave the
-test positivity alone.
-
-## Geo ID
-
-The Counties sheet lists FIPS codes numerically, such that FIPS with a leading
-zero only have four digits. We fix this by zero-filling to five characters.
-
-MSAs are a subset of CBSAs. We fix this by selecting only CBSAs with type
-"Metropolitan".
-
-Most of the starred sheets have the geo id as the first non-index column. The
-Region sheet has no such column. We fix this by generating the HHS ids from the
-index column instead.
-
-## Combining multiple reports
-
-Each report file generates two reference dates for each metric, up to four
-reference dates total. Since it's not clear whether new versions of past files
-are ever made available, the default mode (params.indicator.reports="new")
-fetches any files that are not already in the input cache, then combines the
-results into a single data frame before exporting. This will generate correct
-behavior should (for instance) a previously-downloaded file get a new assetId.
-
-For the initial run on an empty input cache, and for runs configured to process
-a range of reports (using params.indicator.reports=YYYY-mm-dd--YYYY-mm-dd), this
-indicator makes no distinction between figures that came from different
-reports. That may not be what you want. If the covidcast issue date needs to
-match the date on the report filename, then the indicator must instead be run
-repeatedly, with equal start and end dates, keeping the output of each run
-separate.
diff --git a/dsew_community_profile/Makefile b/dsew_community_profile/Makefile
deleted file mode 100644
index bdea33afd..000000000
--- a/dsew_community_profile/Makefile
+++ /dev/null
@@ -1,30 +0,0 @@
-.PHONY = venv, lint, test, clean
-
-dir = $(shell find ./delphi_* -name __init__.py | grep -o 'delphi_[_[:alnum:]]*')
-
-venv:
-	python3.8 -m venv env
-
-install: venv
-	. env/bin/activate; \
-	pip install wheel ; \
-	pip install -e ../_delphi_utils_python ;\
-	pip install -e .
-
-install-ci: venv
-	. env/bin/activate; \
-	pip install wheel ; \
-	pip install ../_delphi_utils_python ;\
-	pip install .
-
-lint:
-	. env/bin/activate; pylint $(dir)
-	. env/bin/activate; pydocstyle $(dir)
-
-test:
-	. env/bin/activate ;\
-	(cd tests && ../env/bin/pytest --cov=$(dir) --cov-report=term-missing)
-
-clean:
-	rm -rf env
-	rm -f params.json
diff --git a/dsew_community_profile/README.md b/dsew_community_profile/README.md
deleted file mode 100644
index dc8e1ef09..000000000
--- a/dsew_community_profile/README.md
+++ /dev/null
@@ -1,86 +0,0 @@
-# COVID-19 Community Profile Report
-
-The Data Strategy and Execution Workgroup (DSEW) publishes a Community Profile
-Report each weekday at this location:
-
-https://healthdata.gov/Health/COVID-19-Community-Profile-Report/gqxm-d9w9
-
-This indicator extracts COVID-19 test figures from these reports.
-
-Indicator-specific parameters:
-
-* `input_cache`: a directory where Excel (.xlsx) files downloaded from
-  healthdata.gov will be stored for posterity. Each file is 3.3 MB in size, so
-  we expect this directory to require ~1GB of disk space for each year of
-  operation.
-* `reports`: {new | all | YYYY-mm-dd--YYYY-mm-dd} a string indicating which
-  reports to export. The default, "new", downloads and exports only reports not
-  already found in the input cache. The "all" setting exports data for all
-  available reports, downloading them to the input cache if necessary. The date
-  range setting refers to the date listed in the filename for the report,
-  presumably the publish date. Only reports named with a date within the
-  specified range (inclusive) will be downloaded to the input cache if necessary
-  and exported.
-* `export_start_date`: a YYYY-mm-dd string indicating the first date to export.
-* `export_end_date`: a YYYY-mm-dd string indicating the final date to export.
-* `export_signals`: list of string keys from constants.SIGNALS indicating which
-  signals to export
-
-## Running the Indicator
-
-The indicator is run by directly executing the Python module contained in this
-directory. The safest way to do this is to create a virtual environment,
-installed the common DELPHI tools, and then install the module and its
-dependencies. To do this, run the following command from this directory:
-
-```
-make install
-```
-
-This command will install the package in editable mode, so you can make changes that
-will automatically propagate to the installed package. 
-
-All of the user-changable parameters are stored in `params.json`. To execute
-the module and produce the output datasets (by default, in `receiving`), run
-the following:
-
-```
-env/bin/python -m delphi_dsew_community_profile
-```
-
-If you want to enter the virtual environment in your shell, 
-you can run `source env/bin/activate`. Run `deactivate` to leave the virtual environment. 
-
-Once you are finished, you can remove the virtual environment and 
-params file with the following:
-
-```
-make clean
-```
-
-## Testing the code
-
-To run static tests of the code style, run the following command:
-
-```
-make lint
-```
-
-Unit tests are also included in the module. To execute these, run the following
-command from this directory:
-
-```
-make test
-```
-
-To run individual tests, run the following:
-
-```
-(cd tests && ../env/bin/pytest <your_test>.py --cov=delphi_dsew_community_profile --cov-report=term-missing)
-```
-
-The output will show the number of unit tests that passed and failed, along
-with the percentage of code covered by the tests. 
-
-None of the linting or unit tests should fail, and the code lines that are not covered by unit tests should be small and
-should not include critical sub-routines. 
diff --git a/dsew_community_profile/REVIEW.md b/dsew_community_profile/REVIEW.md
deleted file mode 100644
index 03f87b17a..000000000
--- a/dsew_community_profile/REVIEW.md
+++ /dev/null
@@ -1,38 +0,0 @@
-## Code Review (Python)
-
-A code review of this module should include a careful look at the code and the
-output. To assist in the process, but certainly not in replace of it, please
-check the following items.
-
-**Documentation**
-
-- [ ] the README.md file template is filled out and currently accurate; it is
-possible to load and test the code using only the instructions given
-- [ ] minimal docstrings (one line describing what the function does) are
-included for all functions; full docstrings describing the inputs and expected
-outputs should be given for non-trivial functions
-
-**Structure**
-
-- [ ] code should pass lint checks (`make lint`)
-- [ ] any required metadata files are checked into the repository and placed
-within the directory `static`
-- [ ] any intermediate files that are created and stored by the module should
-be placed in the directory `cache`
-- [ ] final expected output files to be uploaded to the API are placed in the
-`receiving` directory; output files should not be committed to the respository
-- [ ] all options and API keys are passed through the file `params.json`
-- [ ] template parameter file (`params.json.template`) is checked into the
-code; no personal (i.e., usernames) or private (i.e., API keys) information is
-included in this template file
-
-**Testing**
-
-- [ ] module can be installed in a new virtual environment (`make install`)
-- [ ] reasonably high level of unit test coverage covering all of the main logic
-of the code (e.g., missing coverage for raised errors that do not currently seem
-possible to reach are okay; missing coverage for options that will be needed are
-not)
-- [ ] all unit tests run without errors (`make test`)
-- [ ] indicator directory has been added to GitHub CI
-(`covidcast-indicators/.github/workflows/python-ci.yml`)
diff --git a/dsew_community_profile/cache/.gitignore b/dsew_community_profile/cache/.gitignore
deleted file mode 100644
index e69de29bb..000000000
diff --git a/dsew_community_profile/delphi_dsew_community_profile/__init__.py b/dsew_community_profile/delphi_dsew_community_profile/__init__.py
deleted file mode 100644
index 52a507259..000000000
--- a/dsew_community_profile/delphi_dsew_community_profile/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Module to pull and clean indicators from the XXXXX source.
-
-This file defines the functions that are made public by the module. As the
-module is intended to be executed though the main method, these are primarily
-for testing.
-"""
-
-from __future__ import absolute_import
-
-from . import run
-
-__version__ = "0.1.0"
diff --git a/dsew_community_profile/delphi_dsew_community_profile/__main__.py b/dsew_community_profile/delphi_dsew_community_profile/__main__.py
deleted file mode 100644
index ab5a749dc..000000000
--- a/dsew_community_profile/delphi_dsew_community_profile/__main__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Call the function run_module when executed.
-
-This file indicates that calling the module (`python -m delphi_dsew_community_profile`) will
-call the function `run_module` found within the run.py file. There should be
-no need to change this template.
-"""
-
-from delphi_utils import read_params
-from .run import run_module  # pragma: no cover
-
-run_module(read_params())  # pragma: no cover
diff --git a/dsew_community_profile/delphi_dsew_community_profile/constants.py b/dsew_community_profile/delphi_dsew_community_profile/constants.py
deleted file mode 100644
index 632bfc2ae..000000000
--- a/dsew_community_profile/delphi_dsew_community_profile/constants.py
+++ /dev/null
@@ -1,118 +0,0 @@
-"""Registry for variations."""
-from collections.abc import Callable as function
-from dataclasses import dataclass
-
-URL_PREFIX = "https://healthdata.gov/api/views/gqxm-d9w9"
-DOWNLOAD_ATTACHMENT = URL_PREFIX + "/files/{assetId}?download=true&filename={filename}"
-DOWNLOAD_LISTING = URL_PREFIX + ".json"
-
-INTERP_LENGTH = 5
-
-@dataclass
-class Transform:
-    """Transformation filters for interpreting a particular sheet in the workbook."""
-
-    name: str = None
-    level: str = None
-    row_filter: function = None
-    geo_id_select: function = None
-    geo_id_apply: function = None
-
-T_FIRST = lambda df: df[df.columns[0]]
-TRANSFORMS = {
-    t.name: t for t in [
-        Transform(
-            name="Regions",
-            level="hhs",
-            geo_id_select=lambda df: df.index.to_series(),
-            geo_id_apply=lambda x: x.replace("Region ", "")
-        ),
-        Transform(
-            name="States",
-            level="state",
-            geo_id_select=T_FIRST,
-            geo_id_apply=lambda x: x.lower()
-        ),
-        Transform(
-            name="CBSAs",
-            level="msa",
-            row_filter=lambda df: df['CBSA type'] == "Metropolitan",
-            geo_id_select=T_FIRST,
-            geo_id_apply=lambda x: f"{x}"
-        ),
-        Transform(
-            name="Counties",
-            level="county",
-            geo_id_select=T_FIRST,
-            geo_id_apply=lambda x: f"{x:05}"
-        )
-    ]}
-
-# key: signal id, string pattern used to find column to report as signal
-#     is_rate: originating signal is a percentage (e.g. test positivity)
-#     is_cumulative: originating signal is cumulative (e.g. vaccine doses ever administered)
-#     api_name: name to use in API
-#     make_prop: report originating signal as-is and per 100k population
-#     api_prop_name: name to use in API for proportion signal
-SIGNALS = {
-    "total": {
-        "is_rate" : False,
-        "api_name": "covid_naat_num_7dav",
-        "make_prop": False,
-        "is_cumulative" : False
-    },
-    "positivity": {
-        "is_rate" : True,
-        "api_name": "covid_naat_pct_positive_7dav",
-        "make_prop": False,
-        "is_cumulative" : False
-    },
-    "confirmed covid-19 admissions": {
-        "is_rate" : False,
-        "api_name": "confirmed_admissions_covid_1d_7dav",
-        "make_prop": True,
-        "api_prop_name": "confirmed_admissions_covid_1d_prop_7dav",
-        "is_cumulative" : False
-    },
-    "fully vaccinated": {
-        "is_rate" : False,
-        "api_name": "people_full_vaccinated",
-        "make_prop": False,
-        "is_cumulative" : True
-    },
-    "booster dose since": {
-        "is_rate" : False,
-        "api_name": "people_booster_doses",
-        "make_prop": False,
-        "is_cumulative" : True
-    },
-    "booster doses administered": {
-        "is_rate" : False,
-        "api_name": "booster_doses_admin_7dav",
-        "make_prop": False,
-        "is_cumulative" : False
-    },
-    "doses administered": {
-        "is_rate" : False,
-        "api_name": "doses_admin_7dav",
-        "make_prop": False,
-        "is_cumulative" : False
-    }
-}
-
-COUNTS_7D_SIGNALS = {key for key, value in SIGNALS.items() \
-                        if not((value["is_rate"]) or (value["is_cumulative"]))}
-
-def make_signal_name(key, is_prop=False):
-    """Convert a signal key to the corresponding signal name for the API.
-
-    Note, this function gets called twice with the same `key` for signals that support
-    population-proportion ("prop") variants.
-    """
-    if is_prop:
-        return SIGNALS[key]["api_prop_name"]
-    return SIGNALS[key]["api_name"]
-
-NEWLINE = "\n"
-IS_PROP = True
-NOT_PROP = False
diff --git a/dsew_community_profile/delphi_dsew_community_profile/pull.py b/dsew_community_profile/delphi_dsew_community_profile/pull.py
deleted file mode 100644
index e9b8d24a1..000000000
--- a/dsew_community_profile/delphi_dsew_community_profile/pull.py
+++ /dev/null
@@ -1,909 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions to call when downloading data."""
-from dataclasses import dataclass
-import datetime
-import os
-import re
-from typing import Dict, Tuple
-from urllib.parse import quote_plus as quote_as_url
-
-import pandas as pd
-import numpy as np
-import requests
-
-from delphi_utils.geomap import GeoMapper
-
-from .constants import (
-    TRANSFORMS, SIGNALS, COUNTS_7D_SIGNALS, NEWLINE,
-    IS_PROP, NOT_PROP,
-    DOWNLOAD_ATTACHMENT, DOWNLOAD_LISTING,
-    INTERP_LENGTH
-)
-
-DataDict = Dict[Tuple[str, str, bool], pd.DataFrame]
-
-# YYYYMMDD
-# example: "Community Profile Report 20211104.xlsx"
-RE_DATE_FROM_FILENAME = re.compile(r'.*([0-9]{4})([0-9]{2})([0-9]{2}).*xlsx')
-
-# example: "TESTING: LAST WEEK (October 24-30, Test Volume October 20-26)"
-# example: "TESTING: PREVIOUS WEEK (October 17-23, Test Volume October 13-19)"
-DATE_EXP = r'(?:([A-Za-z]*) )?([0-9]{1,2})'
-DATE_RANGE_EXP = f"{DATE_EXP}-{DATE_EXP}"
-RE_DATE_FROM_TEST_HEADER = re.compile(
-    rf'.*TESTING: (.*) WEEK \({DATE_RANGE_EXP}(?:, Test Volume ({DATE_RANGE_EXP}))? *\)'
-)
-
-# example: "HOSPITAL UTILIZATION: LAST WEEK (January 2-8)"
-RE_DATE_FROM_HOSP_HEADER = re.compile(
-    rf'HOSPITAL UTILIZATION: (.*) WEEK \({DATE_RANGE_EXP}\)'
-)
-
-# example: "COVID-19 VACCINATION DATA: LAST WEEK (January 5-11)"
-RE_DATE_FROM_VAC_HEADER_WEEK= re.compile(
-    rf'COVID-19 VACCINATION DATA: (.*) WEEK \({DATE_RANGE_EXP}\)'
-)
-
-# example: 'COVID-19 VACCINATION DATA: CUMULATIVE (January 11)'
-RE_DATE_FROM_VAC_HEADER_CUMULATIVE= re.compile(
-    rf'COVID-19 VACCINATION DATA: CUMULATIVE (.*)\({DATE_EXP}\)'
-)
-
-# example: "NAAT positivity rate - last 7 days (may be an underestimate due to delayed reporting)"
-# example: "Total NAATs - last 7 days (may be an underestimate due to delayed reporting)"
-RE_COLUMN_FROM_HEADER = re.compile('- (.*) 7 days')
-
-@dataclass
-class DatasetTimes:
-    """Collect reference dates for a column."""
-
-    column: str
-    positivity_reference_date: datetime.date
-    total_reference_date: datetime.date
-    hosp_reference_date: datetime.date
-    vac_reference_date: datetime.date
-    cumulative_vac_reference_date: datetime.date
-
-    @staticmethod
-    def from_header(header, publish_date):
-        """Convert reference dates in overheader to DatasetTimes."""
-        positivity_reference_date = None
-        total_reference_date = None
-        hosp_reference_date = None
-        vac_reference_date = None
-        cumulative_vac_reference_date= None
-        def as_date(sub_result, is_single_date):
-            if is_single_date:
-                month = sub_result[0]
-                day = sub_result[1]
-                month_numeric = datetime.datetime.strptime(month, "%B").month
-            else:
-                month = sub_result[2] if sub_result[2] else sub_result[0]
-                assert month, f"Bad month in header: {header}\nsub_result: {sub_result}"
-                month_numeric = datetime.datetime.strptime(month, "%B").month
-                day = sub_result[3]
-            year = publish_date.year
-            # year boundary
-            if month_numeric > publish_date.month:
-                year -= 1
-            return datetime.datetime.strptime(f"{year}-{month}-{day}", "%Y-%B-%d").date()
-
-        if RE_DATE_FROM_TEST_HEADER.match(header):
-            findall_result = RE_DATE_FROM_TEST_HEADER.findall(header)[0]
-            column = findall_result[0].lower()
-            positivity_reference_date = as_date(findall_result[1:5], False)
-            if findall_result[6]:
-                # Reports published starting 2021-03-17 specify different reference
-                # dates for positivity and total test volume
-                total_reference_date = as_date(findall_result[6:10], False)
-            else:
-                total_reference_date = positivity_reference_date
-        elif RE_DATE_FROM_HOSP_HEADER.match(header):
-            findall_result = RE_DATE_FROM_HOSP_HEADER.findall(header)[0]
-            column = findall_result[0].lower()
-            hosp_reference_date = as_date(findall_result[1:5], False)
-        elif RE_DATE_FROM_VAC_HEADER_WEEK.match(header):
-            findall_result = RE_DATE_FROM_VAC_HEADER_WEEK.findall(header)[0]
-            column = findall_result[0].lower()
-            vac_reference_date = as_date(findall_result[1:5], False)
-        elif RE_DATE_FROM_VAC_HEADER_CUMULATIVE.match(header):
-            findall_result = RE_DATE_FROM_VAC_HEADER_CUMULATIVE.findall(header)[0]
-            column = findall_result[0].lower()
-            cumulative_vac_reference_date = as_date(findall_result[1:], True)
-        else:
-            raise ValueError(f"Couldn't find reference date in header '{header}'")
-        return DatasetTimes(column, positivity_reference_date,
-            total_reference_date, hosp_reference_date,
-            cumulative_vac_reference_date, vac_reference_date)
-    def __getitem__(self, key):
-        """Use DatasetTimes like a dictionary."""
-        ref_list = list(SIGNALS.keys())
-        if key.lower()=="positivity":
-            return self.positivity_reference_date
-        if key.lower()=="total":
-            return self.total_reference_date
-        if key.lower()=="confirmed covid-19 admissions":
-            return self.hosp_reference_date
-        if key.lower() in ["doses administered","booster doses administered"]:
-            return self.cumulative_vac_reference_date
-        if key.lower() in ["fully vaccinated","booster dose since"]:
-            return self.vac_reference_date
-        raise ValueError(
-            f"Bad reference date type request '{key}'; " + \
-            "need one of: " + " ,".join(ref_list)
-        )
-    def __setitem__(self, key, newvalue):
-        """Use DatasetTimes like a dictionary."""
-        ref_list = list(SIGNALS.keys())
-        if key.lower()=="positivity":
-            self.positivity_reference_date = newvalue
-        if key.lower()=="total":
-            self.total_reference_date = newvalue
-        if key.lower()=="confirmed covid-19 admissions":
-            self.hosp_reference_date = newvalue
-        if key.lower() in ["doses administered","booster doses administered"]:
-            self.cumulative_vac_reference_date = newvalue
-        if key.lower() in ["fully vaccinated","booster dose since"]:
-            self.vac_reference_date = newvalue
-        if key.lower() not in ref_list:
-            raise ValueError(
-                f"Bad reference date type request '{key}'; " + \
-                "need one of: " + " ,".join(ref_list)
-            )
-    def __eq__(self, other):
-        """Check equality by value."""
-        return isinstance(other, DatasetTimes) and \
-            other.column == self.column and \
-            other.positivity_reference_date == self.positivity_reference_date and \
-            other.total_reference_date == self.total_reference_date
-
-class Dataset:
-    """All data extracted from a single report file."""
-
-    def __init__(self, config, sheets=TRANSFORMS.keys(), logger=None):
-        """Create a new Dataset instance.
-
-        Download and cache the requested report file.
-
-        Parse the file into data frames at multiple geo levels.
-        """
-        self.publish_date = self.parse_publish_date(config['filename'])
-        self.url = DOWNLOAD_ATTACHMENT.format(
-            assetId=config['assetId'],
-            filename=quote_as_url(config['filename'])
-        )
-        if not os.path.exists(config['cached_filename']):
-            if logger:
-                logger.info("Downloading file", filename=config['cached_filename'])
-            resp = requests.get(self.url)
-            with open(config['cached_filename'], 'wb') as f:
-                f.write(resp.content)
-
-        self.workbook = pd.ExcelFile(config['cached_filename'])
-
-        self.dfs = {}
-        self.times = {}
-        for si in sheets:
-            assert si in TRANSFORMS, f"Bad sheet requested: {si}"
-            if logger:
-                logger.info("Building dfs",
-                            sheet=f"{si}",
-                            filename=config['cached_filename'])
-            sheet = TRANSFORMS[si]
-            self._parse_times_for_sheet(sheet)
-            self._parse_sheet(sheet)
-
-    @staticmethod
-    def parse_publish_date(report_filename):
-        """Extract publish date from filename."""
-        return datetime.date(
-            *[int(x) for x in RE_DATE_FROM_FILENAME.findall(report_filename)[0]]
-        )
-    @staticmethod
-    def skip_overheader(header):
-        """Ignore irrelevant overheaders."""
-        # include "TESTING: [LAST|PREVIOUS] WEEK (October 24-30, Test Volume October 20-26)"
-        # include "VIRAL (RT-PCR) LAB TESTING: [LAST|PREVIOUS] WEEK (August 24-30, ..."
-        # include "HOSPITAL UTILIZATION: LAST WEEK (January 2-8)"
-        return not (isinstance(header, str) and \
-                    (((header.startswith("TESTING:") or \
-                     header.startswith("VIRAL (RT-PCR) LAB TESTING:") or \
-                     header.startswith("HOSPITAL UTILIZATION: ")) and \
-                    # exclude "TESTING: % CHANGE FROM PREVIOUS WEEK" \
-                    # exclude "TESTING: DEMOGRAPHIC DATA" \
-                    # exclude "HOSPITAL UTILIZATION: CHANGE FROM PREVIOUS WEEK" \
-                    # exclude "HOSPITAL UTILIZATION: DEMOGRAPHIC DATA" \
-                    header.find("WEEK (") > 0) or \
-                    # include "COVID-19 VACCINATION DATA: CUMULATIVE (January 25)"
-                    # include "COVID-19 VACCINATION DATA: LAST WEEK (January 25-31)"
-                    (header.startswith("COVID-19 VACCINATION DATA: CUMULATIVE") or
-                    header.startswith("COVID-19 VACCINATION DATA: LAST WEEK") \
-                        )))
-
-
-    def _parse_times_for_sheet(self, sheet):
-        """Record reference dates for this sheet."""
-        # grab reference dates from overheaders
-        overheaders = pd.read_excel(
-                self.workbook, sheet_name=sheet.name,
-                header=None,
-                nrows=1
-        ).values.flatten().tolist()
-        for h in overheaders:
-            if self.skip_overheader(h):
-                continue
-
-            dt = DatasetTimes.from_header(h, self.publish_date)
-            if dt.column in self.times:
-                # Items that are not None should be the same between sheets.
-                # Fill None items with the newly calculated version of the
-                # field from dt.
-                for sig in SIGNALS:
-                    if self.times[dt.column][sig] is not None and dt[sig] is not None:
-                        assert self.times[dt.column][sig] == dt[sig], \
-                            f"Conflicting reference date from {sheet.name} {dt[sig]}" + \
-                            f"vs previous {self.times[dt.column][sig]}"
-                    elif self.times[dt.column][sig] is None:
-                        self.times[dt.column][sig] = dt[sig]
-            else:
-                self.times[dt.column] = dt
-
-        if self.publish_date <= datetime.date(2021, 1, 11):
-            # No vaccination data available, so we only have hospitalization and testing overheaders
-            assert len(self.times) == 2, \
-                f"No times extracted from overheaders:\n{NEWLINE.join(str(s) for s in overheaders)}"
-        else:
-            assert len(self.times) == 3, \
-                f"No times extracted from overheaders:\n{NEWLINE.join(str(s) for s in overheaders)}"
-
-    @staticmethod
-    def retain_header(header):
-        """Ignore irrelevant headers."""
-        return all([
-            # include "Total NAATs - [last|previous] 7 days ..."
-            # include "Total RT-PCR diagnostic tests - [last|previous] 7 days ..."
-            # include "NAAT positivity rate - [last|previous] 7 days ..."
-            # include "Viral (RT-PCR) lab test positivity rate - [last|previous] 7 days ..."
-            # include "Booster doses administered - [last|previous] 7 days ..."
-            # include "Doses administered - [last|previous] 7 days ..."
-            (header.startswith("Total NAATs") or
-             header.startswith("NAAT positivity rate") or
-             header.startswith("Total RT-PCR") or
-             header.startswith("Viral (RT-PCR)") or
-             header.startswith("Booster") or
-             header.startswith("Doses administered -")
-             ),
-            # exclude "NAAT positivity rate - absolute change ..."
-            header.find("7 days") > 0,
-            # exclude "NAAT positivity rate - last 7 days - ages <5"
-            header.find(" ages") < 0,
-        ]) or (
-            # include "Confirmed COVID-19 admissions - last 7 days"
-            # exclude "Confirmed COVID-19 admissions - percent change"
-            # exclude "Confirmed COVID-19 admissions - last 7 days - ages <18"
-            # exclude "Confirmed COVID-19 admissions - last 7 days - age unknown"
-            # exclude "Confirmed COVID-19 admissions per 100 inpatient beds - last 7 days"
-            # exclude "Confirmed COVID-19 admissions per 100k - last 7 days"
-            header == "Confirmed COVID-19 admissions - last 7 days"
-        ) or all([
-            # include "People who are fully vaccinated"
-            # include "People who have received a booster dose since August 13, 2021"
-            header.startswith("People who"),
-            # exclude "People who are fully vaccinated as % of total population"
-            # exclude "People who have received a booster dose as % of fully vaccinated population"
-            header.find("%") < 0,
-            # exclude "People who are fully vaccinated - ages 5-11" ...
-            # exclude "People who have received a booster dose - ages 65+" ...
-            header.find(" age") < 0,
-            # exclude "People who are fully vaccinated - 12-17" ...
-            header.find("-") < 0,
-        ]) or all([
-            # include "People with a completed primary series"
-            header.startswith("People with a completed primary series"),
-            # exclude "People with a completed primary series as % of adult population"
-            header.find("%") < 0,
-            # exclude "People with a completed primary series - ages 65+"
-            header.find(" age") < 0,
-            # exclude "People with a completed primary series - 12-17" ...
-            header.find("-") < 0,
-        ]) or all([
-            # include "People with full course administered"
-            header.startswith("People with full course"),
-            # exclude "People with full course administered as % of adult population"
-            header.find("%") < 0,
-        ])
-    def _parse_sheet(self, sheet):
-        """Extract data frame for this sheet."""
-        df = pd.read_excel(
-            self.workbook,
-            sheet_name=sheet.name,
-            header=1,
-            index_col=0,
-        )
-        if sheet.row_filter:
-            df = df.loc[sheet.row_filter(df)]
-
-
-        def select_fn(h):
-            """Allow for default to the 7-day in the name of the dataframe column."""
-            try:
-                return (RE_COLUMN_FROM_HEADER.findall(h)[0], h, h.lower())
-            except IndexError:
-                return ("", h, h.lower())
-
-        select = [
-            select_fn(h)
-            for h in list(df.columns)
-            if self.retain_header(h)
-        ]
-
-        for sig in SIGNALS:
-            ## Check if field is known to be missing
-            # Hospital admissions not available at the county or CBSA level prior to Jan 8, 2021.
-            is_hosp_adm_before_jan8 = (sheet.level == "msa" or sheet.level == "county") \
-                and self.publish_date < datetime.date(2021, 1, 8) \
-                and sig == "confirmed covid-19 admissions"
-            # Booster data not available before November 1 2021.
-            is_booster_before_nov1 = self.publish_date < datetime.date(2021, 11, 1) \
-                and (sig in ["booster dose since", "booster doses administered"])
-            # Booster and weekly doses administered not available below the state level.
-            is_booster_below_state = ((sheet.level != "hhs" and sheet.level != "state") \
-                and (sig in ["doses administered", \
-                 "booster doses administered", "booster dose since"]))
-            # Weekly doses administered not available on or before Apr 29, 2021.
-            is_dose_admin_apr29 = self.publish_date <= datetime.date(2021, 4, 29) \
-                and sig == "doses administered"
-            # People fully vaccinated not available on or before Apr 11, 2021 at the CBSA level.
-            is_fully_vax_msa_before_apr11 = (sheet.level == "msa" or sheet.level == "county") \
-                and self.publish_date <= datetime.date(2021, 4, 11) \
-                and sig == "fully vaccinated"
-            # People fully vaccinated not available before Jan 15, 2021 at any geo level.
-            is_fully_vax_before_jan14 = self.publish_date <= datetime.date(2021, 1, 14) \
-                and sig == "fully vaccinated"
-
-            if any([is_hosp_adm_before_jan8,
-                is_booster_before_nov1,
-                is_booster_below_state,
-                is_dose_admin_apr29,
-                is_fully_vax_msa_before_apr11,
-                is_fully_vax_before_jan14
-            ]):
-                self.dfs[(sheet.level, sig, NOT_PROP)] = pd.DataFrame(
-                        columns = ["geo_id", "timestamp", "val", \
-                            "se", "sample_size", "publish_date"]
-                    )
-                continue
-
-            sig_select = [s for s in select if s[-1].find(sig) >= 0]
-            # The name of the cumulative vaccination was changed after 03/09/2021
-            # when J&J vaccines were added.
-            # fully vacinated signal was renamed again on 01/12/2023
-            if (sig == "fully vaccinated") and (len(sig_select) == 0):
-                # Read these headers if "fully vaccinated" not found in source data
-                other_sigs = [
-                    "people with a completed primary series",
-                    "people with full course administered"
-                    ]
-                sig_select = [s for s in select if s[-1] in other_sigs]
-
-            # Since "doses administered" is a substring of another desired header,
-            # "booster doses administered", we need to more strictly check if "doses administered"
-            # occurs at the beginning of a header to find the correct match.
-            if sig == "doses administered":
-                sig_select = [s for s in select if s[-1].startswith(sig)]
-            assert len(sig_select) > 0, \
-                f"No {sig} in any of {select}\n\nAll headers:\n{NEWLINE.join(list(df.columns))}"
-
-            self.dfs[(sheet.level, sig, NOT_PROP)] = pd.concat([
-                pd.DataFrame({
-                    "geo_id": sheet.geo_id_select(df).apply(sheet.geo_id_apply),
-                    "timestamp": pd.to_datetime(self.times[si[0]][sig]),
-                    "val": df[si[-2]],
-                    "se": None,
-                    "sample_size": None,
-                    "publish_date": self.publish_date
-                })
-                for si in sig_select
-            ])
-
-        for sig in COUNTS_7D_SIGNALS:
-            assert (sheet.level, sig, NOT_PROP) in self.dfs.keys()
-            self.dfs[(sheet.level, sig, NOT_PROP)]["val"] /= 7 # 7-day total -> 7-day average
-
-def as_cached_filename(params, config):
-    """Formulate a filename to uniquely identify this report in the input cache."""
-    # eg "Community Profile Report 20220128.xlsx"
-    # but delimiters vary; don't get tripped up if they do something wacky like
-    # Community.Profile.Report.20220128.xlsx
-    name, _, ext = config['filename'].rpartition(".")
-    return os.path.join(
-        params['indicator']['input_cache'],
-        f"{name}--{config['assetId']}.{ext}"
-    )
-
-def fetch_listing(params):
-    """Generate the list of report files to process."""
-    export_start_date = params['indicator'].get(
-        'export_start_date', datetime.datetime.utcfromtimestamp(0).date()
-    )
-
-    listing = requests.get(DOWNLOAD_LISTING).json()['metadata']['attachments']
-    # drop the pdf files
-    listing = [
-        dict(
-            el,
-            cached_filename=as_cached_filename(params, el),
-            publish_date=Dataset.parse_publish_date(el['filename'])
-        )
-        for el in listing if el['filename'].endswith("xlsx")
-    ]
-
-    def check_valid_publish_date(x):
-        return x['publish_date'] >= export_start_date
-
-    if params['indicator']['reports'] == 'new':
-        # drop files we already have in the input cache
-        keep = [
-            el for el in listing
-            if not os.path.exists(el['cached_filename']) and check_valid_publish_date(el)
-        ]
-    elif params['indicator']['reports'].find("--") > 0:
-        # drop files outside the specified publish-date range
-        start_str, _, end_str = params['indicator']['reports'].partition("--")
-        start_date = datetime.datetime.strptime(start_str, "%Y-%m-%d").date()
-        end_date = datetime.datetime.strptime(end_str, "%Y-%m-%d").date()
-        keep = [
-            el for el in listing
-            if (start_date <= el['publish_date'] <= end_date) and check_valid_publish_date(el)
-        ]
-    elif params['indicator']['reports'] == 'all':
-        keep = [
-            el for el in listing if check_valid_publish_date(el)
-        ]
-    else:
-        raise ValueError("params['indicator']['reports'] is set to" \
-            + f" {params['indicator']['reports']}, which isn't 'new', 'all', or a date range.")
-
-    return extend_listing_for_interp(keep, listing)
-
-def extend_listing_for_interp(keep, listing):
-    """Grab additional files from the full listing for interpolation if needed.
-
-    Selects files based purely on publish_date, so may include duplicates where
-    multiple reports for a single publish_date are available.
-
-    Parameters:
-     - keep: list of reports desired in the final output
-     - listing: complete list of reports available from healthdata.gov
-
-    Returns: list of reports including keep and additional files needed for
-    interpolation.
-    """
-    publish_date_keeplist = set()
-    for el in keep:
-        # starts at 0 so includes keep publish_dates
-        for i in range(INTERP_LENGTH):
-            publish_date_keeplist.add(el['publish_date'] - datetime.timedelta(days=i))
-    keep = [el for el in listing if el['publish_date'] in publish_date_keeplist]
-    return keep
-
-def download_and_parse(listing, logger):
-    """Convert a list of report files into Dataset instances."""
-    datasets = {}
-    for item in listing:
-        d = Dataset(item, logger=logger)
-        for sig, df in d.dfs.items():
-            if sig not in datasets:
-                datasets[sig] = []
-            datasets[sig].append(df)
-    return datasets
-
-def nation_from_state(df, sig, geomapper):
-    """Compute nation level from state df."""
-    if df.empty:
-        return df
-    if SIGNALS[sig]["is_rate"]: # true if sig is a rate
-        df = geomapper.add_population_column(df, "state_id") \
-                      .rename(columns={"population":"weight"})
-
-        norm_denom = df.groupby("timestamp").agg(norm_denom=("weight", "sum"))
-        df = df.join(
-            norm_denom, on="timestamp", how="left"
-        ).assign(
-            weight=lambda x: x.weight / x.norm_denom
-        ).drop(
-            "norm_denom", axis=1
-        )
-    # The filter in `fetch_new_reports` to keep most recent publish date
-    # gurantees that we'll only see one unique publish date per timestamp
-    # here, so just keep the first obs of each group.
-    publish_date_by_ts = df.groupby(
-        ["timestamp"]
-    )["publish_date"].first(
-    ).reset_index(
-    )
-    df = geomapper.replace_geocode(
-        df.drop("publish_date", axis=1),
-        'state_id',
-        'nation',
-        new_col="geo_id"
-    )
-    df["se"] = None
-    df["sample_size"] = None
-    # Recreate publish_date column
-    df =  pd.merge(df, publish_date_by_ts, on="timestamp", how="left")
-
-    return df
-
-def keep_latest_report(df, sig):
-    """Keep data associated with most recent report for each timestamp."""
-    df = df.groupby(
-            "timestamp"
-        ).apply(
-            lambda x: x[x["publish_date"] == x["publish_date"].max()]
-        ).drop_duplicates(
-        )
-
-    if not df.empty:
-        df = df.reset_index(drop=True)
-        assert all(df.groupby(
-                ["timestamp", "geo_id"]
-            ).size(
-            ).reset_index(
-                drop=True
-            ) == 1), f"Duplicate rows in {sig} indicate that one or" \
-            + " more reports were published multiple times and the copies differ"
-
-    return df
-
-def fetch_new_reports(params, logger=None):
-    """Retrieve, compute, and collate all data we haven't seen yet."""
-    listing = fetch_listing(params)
-
-    # download and parse individual reports
-    datasets = download_and_parse(listing, logger)
-    # collect like signals together, keeping most recent publish date
-    ret = {}
-
-    for key, lst in datasets.items():
-        (_, sig, _) = key
-        latest_key_df = pd.concat(lst)
-        if sig in ("total", "positivity"):
-            latest_key_df = pd.concat(apply_thres_change_date(
-                keep_latest_report,
-                latest_key_df,
-                [sig] * 2
-            ))
-        else:
-            latest_key_df = keep_latest_report(latest_key_df, sig)
-
-        if not latest_key_df.empty:
-            ret[key] = latest_key_df
-
-    # add nation from state
-    geomapper = GeoMapper()
-    for sig in SIGNALS:
-        state_key = ("state", sig, NOT_PROP)
-        if state_key not in ret:
-            continue
-
-        if sig in ("total", "positivity"):
-            nation_df = pd.concat(apply_thres_change_date(
-                nation_from_state,
-                ret[state_key].rename(columns={"geo_id": "state_id"}),
-                [sig] * 2,
-                [geomapper] * 2
-            ))
-        else:
-            nation_df = nation_from_state(
-                ret[state_key].rename(columns={"geo_id": "state_id"}),
-                sig,
-                geomapper
-            )
-        ret[("nation", sig, NOT_PROP)] = nation_df
-
-    for key, df in ret.copy().items():
-        (geo, sig, prop) = key
-
-        if sig == "positivity":
-            # Combine with test volume using publish date.
-            total_key = (geo, "total", prop)
-            ret[key] = unify_testing_sigs(
-                df, ret[total_key]
-            ).drop(
-                "publish_date", axis=1
-            )
-
-            # No longer need "total" signal.
-            del ret[total_key]
-        elif sig != "total":
-            # If signal is not test volume or test positivity, we don't need
-            # publish date.
-            df = df.drop("publish_date", axis=1)
-            ret[key] = df
-
-        if SIGNALS[sig]["make_prop"]:
-            ret[(geo, sig, IS_PROP)] = generate_prop_signal(df, geo, geomapper)
-
-    ret = interpolate_missing_values(ret)
-
-    return ret
-
-def interpolate_missing_values(dfs: DataDict) -> DataDict:
-    """Interpolates each signal in the dictionary of dfs."""
-    interpolate_df = dict()
-    for key, df in dfs.items():
-        # Here we exclude the 'positivity' signal from interpolation. This is a temporary fix.
-        # https://github.com/cmu-delphi/covidcast-indicators/issues/1576
-        _, sig, _ = key
-        if sig == "positivity":
-            reindexed_group_df = df.set_index(["geo_id", "timestamp"]).sort_index().reset_index()
-            interpolate_df[key] = reindexed_group_df[~reindexed_group_df.val.isna()]
-            continue
-
-        geo_dfs = []
-        for geo, group_df in df.groupby("geo_id"):
-            reindexed_group_df = group_df.set_index("timestamp").reindex(
-                pd.date_range(group_df.timestamp.min(), group_df.timestamp.max())
-            )
-            reindexed_group_df["geo_id"] = geo
-            if "val" in reindexed_group_df.columns and not reindexed_group_df["val"].isna().all():
-                reindexed_group_df["val"] = (
-                    reindexed_group_df["val"]
-                    .astype(float)
-                    .interpolate(method="linear", limit_area="inside")
-                )
-            if "se" in reindexed_group_df.columns:
-                reindexed_group_df["se"] = (
-                    reindexed_group_df["se"]
-                    .astype(float)
-                    .interpolate(method="linear", limit_area="inside")
-                )
-            if (
-                "sample_size" in reindexed_group_df.columns
-                and not reindexed_group_df["sample_size"].isna().all()
-            ):
-                reindexed_group_df["sample_size"] = (
-                    reindexed_group_df["sample_size"]
-                    .astype(float)
-                    .interpolate(method="linear", limit_area="inside")
-                )
-            if "publish_date" in reindexed_group_df.columns:
-                reindexed_group_df["publish_date"] = reindexed_group_df["publish_date"].fillna(
-                    method="bfill"
-                )
-            reindexed_group_df = reindexed_group_df[~reindexed_group_df.val.isna()]
-            geo_dfs.append(reindexed_group_df)
-        interpolate_df[key] = (
-            pd.concat(geo_dfs)
-            .reset_index()
-            .rename(columns={"index": "timestamp"})
-            .set_index(["geo_id", "timestamp"])
-            .sort_index()
-            .reset_index()
-        )
-    return interpolate_df
-
-def generate_prop_signal(df, geo, geo_mapper):
-    """Transform base df into a proportion (per 100k population)."""
-    if geo == "state":
-        geo = "state_id"
-    if geo == "county":
-        geo = "fips"
-
-    # Add population data
-    if geo == "msa":
-        map_df = geo_mapper.get_crosswalk("fips", geo)
-        map_df = geo_mapper.add_population_column(
-            map_df, "fips"
-        ).drop(
-            "fips", axis=1
-        ).groupby(
-            geo
-        ).sum(
-            numeric_only=True
-        ).reset_index(
-        )
-        df = pd.merge(df, map_df, left_on="geo_id", right_on=geo, how="inner")
-    else:
-        df = geo_mapper.add_population_column(df, geo, geocode_col="geo_id")
-
-    df["val"] = df["val"] / df["population"] * 100000
-    df.drop(["population", geo], axis=1, inplace=True)
-
-    return df
-
-def unify_testing_sigs(positivity_df, volume_df):
-    """
-    Drop any observations with a sample size of 5 or less. Generate standard errors.
-
-    This combines test positivity and testing volume into a single signal,
-    where testing volume *from the same spreadsheet/publish date* (NOT the
-    same reference date) is used as the sample size for test positivity.
-
-    Total testing volume is typically provided for a 7-day period about 4 days
-    before the test positivity period. Since the CPR is only published on
-    weekdays, test positivity and test volume are only available for the same
-    reported dates 3 times a week. We have chosen to censor 7dav test
-    positivity based on the 7dav test volume provided in the same originating
-    spreadsheet, corresponding to a period ~4 days earlier.
-
-    This approach makes the signals maximally available (5 days per week) with
-    low latency. It avoids complications of having to process multiple
-    spreadsheets each day, and the fact that test positivity and test volume
-    are not available for all the same reference dates.
-
-    Discussion of decision and alternatives (Delphi-internal share drive):
-    https://docs.google.com/document/d/1MoIimdM_8OwG4SygoeQ9QEVZzIuDl339_a0xoYa6vuA/edit#
-
-    """
-    # Check that we have positivity *and* volume for each publishdate+geo, and
-    # that they have the same number of timestamps.
-    pos_count_ts = positivity_df.groupby(
-        ["publish_date", "geo_id"]
-    ).agg(
-        num_obs=("timestamp", "count"),
-        num_unique_obs=("timestamp", "nunique")
-    )
-    vol_count_ts = volume_df.groupby(
-        ["publish_date", "geo_id"]
-    ).agg(
-        num_obs=("timestamp", "count"),
-        num_unique_obs=("timestamp", "nunique")
-    )
-    merged = pos_count_ts.merge(
-        vol_count_ts,
-        on=["geo_id", "publish_date"],
-        how="outer",
-        indicator=True
-    )
-    assert all(
-        merged["_merge"] == "both"
-    ) and all(
-        merged.num_obs_x == merged.num_obs_y
-    ) and all(
-        merged.num_unique_obs_x == merged.num_unique_obs_y
-    ), \
-        "Each publish date-geo value combination should be available for both " + \
-        "test positivity and test volume, and have the same number of timestamps available."
-    assert len(positivity_df.index) == len(volume_df.index), \
-        "Test positivity and volume data have different numbers of observations."
-    expected_rows = len(positivity_df.index)
-
-    volume_df = add_max_ts_col(volume_df)[
-        ["geo_id", "publish_date", "val", "is_max_group_ts"]
-    ].rename(
-        columns={"val":"sample_size"}
-    )
-    col_order = list(positivity_df.columns)
-    positivity_df = add_max_ts_col(positivity_df).drop(["sample_size"], axis=1)
-
-    # Combine test positivity and test volume, maintaining "this week" and
-    # "previous week" status. Perform outer join here so that we can later
-    # check if any observations did not have a match.
-    df = pd.merge(
-        positivity_df, volume_df,
-        on=["publish_date", "geo_id", "is_max_group_ts"],
-        how="outer",
-        indicator=True
-    ).drop(
-        ["is_max_group_ts"], axis=1
-    )
-
-    # Check that every volume observation was matched with a positivity observation.
-    assert (len(df.index) == expected_rows) and all(df["_merge"] == "both"), \
-        "Some observations in the test positivity data were not matched with test volume data."
-
-    # Drop everything with 5 or fewer total tests.
-    df = df.loc[df.sample_size > 5]
-
-    # Generate stderr.
-    df = df.assign(
-        se=std_err(df)
-    ).drop(
-        ["_merge"],
-        axis=1
-    )
-
-    return df[col_order]
-
-def add_max_ts_col(df):
-    """
-    Add column to differentiate timestamps for a given publish date-geo combo.
-
-    Each publish date is associated with up to two timestamps for test volume
-    and test positivity. The older timestamp corresponds to data from the
-    "previous week"; the newer timestamp corresponds to the "last week".
-
-    Since test volume and test positivity timestamps don't match exactly, we
-    can't use them to merge the two signals together, but we still need a way
-    to uniquely identify observations to avoid duplicating observations during
-    the join. This new column, which is analagous to the "last/previous week"
-    classification, is used to merge on.
-    """
-    assert_df = df.groupby(
-        ["publish_date", "geo_id"]
-    ).agg(
-        num_obs=("timestamp", "count"),
-        num_unique_obs=("timestamp", "nunique")
-    )
-    assert all(
-        assert_df.num_obs <= 2
-    ) and all(
-        assert_df.num_obs == assert_df.num_unique_obs
-    ), "Testing signals should have up to two timestamps per publish date-geo level " + \
-        "combination. Each timestamp should be unique."
-
-    max_ts_by_group = df.groupby(
-        ["publish_date", "geo_id"], as_index=False
-    )["timestamp"].max(
-    ).rename(
-        columns={"timestamp":"max_timestamp"}
-    )
-    df = pd.merge(
-        df, max_ts_by_group,
-        on=["publish_date", "geo_id"],
-        how="outer"
-    ).assign(
-        is_max_group_ts=lambda df: df["timestamp"] == df["max_timestamp"]
-    ).drop(
-        ["max_timestamp"], axis=1
-    )
-
-    return df
-
-def std_err(df):
-    """
-    Find Standard Error of a binomial proportion.
-
-    Assumes input sample_size are all > 0.
-
-    Parameters
-    ----------
-    df: pd.DataFrame
-        Columns: val, sample_size, ...
-
-    Returns
-    -------
-    pd.Series
-        Standard error of the positivity rate of PCR-specimen tests.
-    """
-    assert all(df.sample_size > 0), "Sample sizes must be greater than 0"
-    p = df.val
-    n = df.sample_size
-    return np.sqrt(p * (1 - p) / n)
-
-def apply_thres_change_date(apply_fn, df, *apply_fn_args):
-    """
-    Apply a function separately to data before and after the test volume change date.
-
-    The test volume change date is when test volume and test positivity
-    started being reported for different reference dates within the same
-    report. This first occurred on 2021-03-17.
-
-    Parameters
-    ----------
-    apply_fn: function
-        function to apply to data before and after the test volume change date
-    df: pd.DataFrame
-        Columns: val, sample_size, ...
-    apply_fn_args: tuple of lists
-        variable number of additional arguments to pass to the `apply_fn`.
-        Each additional argument should be a list of length 2. The first
-        element of each list will be passed to the `apply_fn` when processing
-        pre-change date data; the second element will be used for the
-        post-change date data.
-
-    Returns
-    -------
-    map object
-        Iterator with two entries, one for the "before" data and one for the "after" data.
-    """
-    change_date = datetime.date(2021, 3, 17)
-    list_of_dfs = [df[df.publish_date < change_date], df[df.publish_date >= change_date]]
-
-    for arg_field in apply_fn_args:
-        assert len(arg_field) == 2, "Extra arguments must be iterables with " + \
-            "length 2, the same as the number of dfs to process"
-
-    return map(apply_fn, list_of_dfs, *apply_fn_args)
diff --git a/dsew_community_profile/delphi_dsew_community_profile/run.py b/dsew_community_profile/delphi_dsew_community_profile/run.py
deleted file mode 100644
index 6de1443a7..000000000
--- a/dsew_community_profile/delphi_dsew_community_profile/run.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions to call when running the indicator.
-
-This module should contain a function called `run_module`, that is executed when
-the module is run with `python -m delphi_dsew_community_profile`.
-`run_module`'s lone argument should be a nested dictionary of parameters loaded
-from the params.json file.  We expect the `params` to have the following
-structure:
-
-    - "common":
-        - "export_dir": str, directory to which the results are exported
-        - "log_filename": (optional) str, path to log file
-    - "indicator": (optional)
-        - Any other indicator-specific settings
-"""
-from datetime import datetime
-import time
-
-from delphi_utils import get_structured_logger
-from delphi_utils.export import create_export_csv
-import pandas as pd
-import covidcast
-
-from .constants import make_signal_name, SIGNALS
-from .pull import fetch_new_reports
-
-
-def run_module(params):
-    """
-    Run the indicator.
-
-    Arguments
-    --------
-    params:  Dict[str, Any]
-        Nested dictionary of parameters.
-    """
-    start_time = time.time()
-    logger = get_structured_logger(
-        __name__, filename=params["common"].get("log_filename"),
-        log_exceptions=params["common"].get("log_exceptions", True))
-    def replace_date_param(p):
-        if p in params["indicator"]:
-            if params["indicator"][p] is None:
-                del params["indicator"][p]
-            else:
-                date_param = datetime.strptime(params["indicator"][p], "%Y-%m-%d").date()
-                params["indicator"][p] = date_param
-    replace_date_param("export_start_date")
-    replace_date_param("export_end_date")
-    export_params = {
-        'start_date': params["indicator"].get("export_start_date", None),
-        'end_date': params["indicator"].get("export_end_date", None)
-    }
-    export_params = {
-        k: pd.to_datetime(v) if v is not None else v
-        for k, v in export_params.items()
-    }
-
-    run_stats = []
-    dfs = fetch_new_reports(params, logger)
-    for key, df in dfs.items():
-        (geo, sig, is_prop) = key
-        if sig not in params["indicator"]["export_signals"]:
-            continue
-        dates = create_export_csv(
-            df,
-            params['common']['export_dir'],
-            geo,
-            make_signal_name(sig, is_prop),
-            **export_params
-        )
-        if len(dates)>0:
-            run_stats.append((max(dates), len(dates)))
-
-    ## If any requested signal is not in metadata, generate it for all dates.
-    #
-    # Only do so if params.reports is set to "new". If set to "all", the
-    # previous fetch_new_reports + CSV loop will already have generated the full
-    # history for new signals. If params.reports is set to a specific date
-    # range, that request overrides automated backfill.
-    if params['indicator']['reports'] == 'new':
-        # Fetch metadata to check how recent signals are
-        metadata = covidcast.metadata()
-        sensor_names = {
-            SIGNALS[key][name_field]: key
-            for key in params["indicator"]["export_signals"]
-            for name_field in ["api_name", "api_prop_name"]
-            if name_field in SIGNALS[key].keys()
-        }
-
-        # Filter to only those we currently want to produce
-        cpr_metadata = metadata[(metadata.data_source == "dsew-cpr") &
-            (metadata.signal.isin(sensor_names.keys()))]
-
-        new_signals = set(sensor_names.keys()).difference(set(cpr_metadata.signal))
-        if new_signals:
-            # If any signal not in metadata yet, we need to backfill its full
-            # history.
-            params['indicator']['reports'] = 'all'
-            params['indicator']['export_signals'] = {sensor_names[key] for key in new_signals}
-
-            dfs = fetch_new_reports(params, logger)
-            for key, df in dfs.items():
-                (geo, sig, is_prop) = key
-                if sig not in params["indicator"]["export_signals"]:
-                    continue
-                dates = create_export_csv(
-                    df,
-                    params['common']['export_dir'],
-                    geo,
-                    make_signal_name(sig, is_prop),
-                    **export_params
-                )
-                if len(dates)>0:
-                    run_stats.append((max(dates), len(dates)))
-
-    ## log this indicator run
-    elapsed_time_in_seconds = round(time.time() - start_time, 2)
-    min_max_date = run_stats and min(s[0] for s in run_stats)
-    csv_export_count = sum(s[-1] for s in run_stats)
-    max_lag_in_days = min_max_date and (datetime.now() - min_max_date).days
-    formatted_min_max_date = min_max_date and min_max_date.strftime("%Y-%m-%d")
-    logger.info("Completed indicator run",
-                elapsed_time_in_seconds = elapsed_time_in_seconds,
-                csv_export_count = csv_export_count,
-                max_lag_in_days = max_lag_in_days,
-                oldest_final_export_date = formatted_min_max_date)
diff --git a/dsew_community_profile/input_cache/.gitignore b/dsew_community_profile/input_cache/.gitignore
deleted file mode 100644
index 7c1222033..000000000
--- a/dsew_community_profile/input_cache/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*.xlsx
diff --git a/dsew_community_profile/params.json.template b/dsew_community_profile/params.json.template
deleted file mode 100644
index 1fcb75ee7..000000000
--- a/dsew_community_profile/params.json.template
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-  "common": {
-    "export_dir": "./receiving",
-    "log_filename": "dsew_cpr.log"
-  },
-  "indicator": {
-    "input_cache": "./input_cache",
-    "reports": "new",
-    "export_start_date": null,
-    "export_end_date": null,
-    "export_signals": [
-      "confirmed covid-19 admissions",
-      "positivity",
-      "doses administered",
-      "booster doses administered",
-      "fully vaccinated",
-      "booster dose since"
-    ]
-  },
-  "validation": {
-    "common": {
-      "data_source": "dsew_cpr",
-      "span_length": 14,
-      "min_expected_lag": {"all": "3"},
-      "max_expected_lag": {"all": "9"},
-      "dry_run": true,
-      "suppressed_errors": []
-    },
-    "static": {
-      "minimum_sample_size": 0,
-      "missing_se_allowed": true,
-      "missing_sample_size_allowed": true
-    },
-    "dynamic": {
-      "ref_window_size": 7,
-      "smoothed_signals": [
-        "naats_positivity_7dav",
-        "confirmed_admissions_covid_1d_prop_7dav",
-        "confirmed_admissions_covid_1d_7dav",
-        "doses_admin_7dav",
-        "booster_doses_admin_7dav"
-      ]
-    }
-  }
-}
diff --git a/dsew_community_profile/setup.py b/dsew_community_profile/setup.py
deleted file mode 100644
index fb5f9d4a9..000000000
--- a/dsew_community_profile/setup.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from setuptools import setup
-from setuptools import find_packages
-
-required = [
-    "numpy",
-    "openpyxl",
-    "pandas",
-    "pydocstyle",
-    "pytest",
-    "pytest-cov",
-    "pylint==2.8.3",
-    "delphi-utils",
-    "covidcast"
-]
-
-setup(
-    name="delphi_dsew_community_profile",
-    version="0.1.0",
-    description="Indicator tracking specimen test results and hospital admissions published in the COVID-19 Community Profile Report by the Data Strategy and Execution Workgroup",
-    author="",
-    author_email="",
-    url="https://github.com/cmu-delphi/covidcast-indicators",
-    install_requires=required,
-    classifiers=[
-        "Development Status :: 5 - Production/Stable",
-        "Intended Audience :: Developers",
-        "Programming Language :: Python :: 3.8",
-    ],
-    packages=find_packages(),
-)
diff --git a/dsew_community_profile/static/.gitignore b/dsew_community_profile/static/.gitignore
deleted file mode 100644
index e69de29bb..000000000
diff --git a/dsew_community_profile/tests/params.json.template b/dsew_community_profile/tests/params.json.template
deleted file mode 100644
index 645bd253f..000000000
--- a/dsew_community_profile/tests/params.json.template
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "common": {
-    "export_dir": "./receiving",
-    "log_filename": "dsew_cpr.log"
-  },
-  "indicator": {
-    "input_cache": "./input_cache",
-    "reports": "new"
-  },
-  "validation": {
-    "common": {
-      "data_source": "dsew_cpr",
-      "span_length": 14,
-      "min_expected_lag": {"all": "5"},
-      "max_expected_lag": {"all": "9"},
-      "dry_run": true,
-      "suppressed_errors": []
-    },
-    "static": {
-      "minimum_sample_size": 0,
-      "missing_se_allowed": true,
-      "missing_sample_size_allowed": true
-    },
-    "dynamic": {
-      "ref_window_size": 7,
-      "smoothed_signals": [
-        "naats_total_7dav",
-        "naats_positivity_7dav",
-        "confirmed_admissions_covid_1d_prop_7dav",
-        "confirmed_admissions_covid_1d_7dav"
-      ]
-    }
-  }
-}
diff --git a/dsew_community_profile/tests/test_pull.py b/dsew_community_profile/tests/test_pull.py
deleted file mode 100644
index e968df4d7..000000000
--- a/dsew_community_profile/tests/test_pull.py
+++ /dev/null
@@ -1,612 +0,0 @@
-from collections import namedtuple
-from dataclasses import dataclass
-from datetime import date, datetime, timedelta
-from itertools import chain
-from typing import Any, Dict, List, Union
-import pandas as pd
-from pandas.testing import assert_frame_equal
-import numpy as np
-import pytest
-from unittest.mock import patch, Mock
-
-from delphi_utils.geomap import GeoMapper
-
-from delphi_dsew_community_profile.pull import (
-    DatasetTimes, Dataset,
-    fetch_listing, nation_from_state, generate_prop_signal,
-    std_err, add_max_ts_col, unify_testing_sigs, interpolate_missing_values,
-    extend_listing_for_interp
-)
-
-
-example = namedtuple("example", "given expected")
-
-def _assert_frame_equal(df1, df2, index_cols: List[str] = None):
-    # Ensure same columns present.
-    assert set(df1.columns) == set(df2.columns)
-    # Ensure same column order.
-    df1 = df1[df1.columns]
-    df2 = df2[df1.columns]
-    # Ensure same row order by using a common index and sorting.
-    df1 = df1.set_index(index_cols).sort_index()
-    df2 = df2.set_index(index_cols).sort_index()
-    return assert_frame_equal(df1, df2)
-
-def _set_df_dtypes(df: pd.DataFrame, dtypes: Dict[str, Any]) -> pd.DataFrame:
-    df = df.copy()
-    for k, v in dtypes.items():
-        if k in df.columns:
-            df[k] = df[k].astype(v)
-    return df
-
-
-class TestPull:
-    def test_DatasetTimes(self):
-        examples = [
-            example(DatasetTimes("xyzzy", date(2021, 10, 30), date(2021, 10, 20), date(2021, 10, 22), date(2021, 10, 23), date(2021, 10, 24)),
-                    DatasetTimes("xyzzy", date(2021, 10, 30), date(2021, 10, 20), date(2021, 10, 22), date(2021, 10, 23), date(2021, 10, 24))),
-        ]
-        for ex in examples:
-            assert ex.given == ex.expected, "Equality"
-
-        dt = DatasetTimes("xyzzy", date(2021, 10, 30), date(2021, 10, 20), date(2021, 10, 22), date(2021, 10, 23), date(2021, 10, 24))
-        assert dt["positivity"] == date(2021, 10, 30), "positivity"
-        assert dt["total"] == date(2021, 10, 20), "total"
-        assert dt["confirmed covid-19 admissions"] == date(2021, 10, 22), "confirmed covid-19 admissions"
-        assert dt["doses administered"] == date(2021, 10, 24), "doses administered"
-        assert dt["fully vaccinated"] == date(2021, 10, 23), "fully vaccinated"
-        with pytest.raises(ValueError):
-            dt["xyzzy"]
-
-    def test_DatasetTimes_from_header(self):
-        examples = [
-            example("TESTING: LAST WEEK (October 24-30, Test Volume October 20-26)",
-                    DatasetTimes("last", date(2021, 10, 30), date(2021, 10, 26), None, None, None)),
-            example("TESTING: PREVIOUS WEEK (October 24-30, Test Volume October 20-26)",
-                    DatasetTimes("previous", date(2021, 10, 30), date(2021, 10, 26), None, None, None)),
-            example("TESTING: LAST WEEK (October 24-November 30, Test Volume October 20-26)",
-                    DatasetTimes("last", date(2021, 11, 30), date(2021, 10, 26), None, None, None)),
-            example("VIRAL (RT-PCR) LAB TESTING: LAST WEEK (June 7-13, Test Volume June 3-9 )",
-                    DatasetTimes("last", date(2021, 6, 13), date(2021, 6, 9), None, None, None)),
-            example("VIRAL (RT-PCR) LAB TESTING: LAST WEEK (March 7-13)",
-                    DatasetTimes("last", date(2021, 3, 13), date(2021, 3, 13), None, None, None)),
-            example("HOSPITAL UTILIZATION: LAST WEEK (June 2-8)",
-                    DatasetTimes("last", None, None, date(2021, 6, 8), None, None)),
-            example("HOSPITAL UTILIZATION: LAST WEEK (June 28-July 8)",
-                    DatasetTimes("last", None, None, date(2021, 7, 8), None, None)),
-            example("COVID-19 VACCINATION DATA: CUMULATIVE (January 25)",
-                    DatasetTimes("", None, None, None, date(2021, 1, 25), None)),
-            example("COVID-19 VACCINATION DATA: LAST WEEK (January 25-31)",
-                    DatasetTimes("last", None, None,  None, None, date(2021, 1, 25)))
-        ]
-        for ex in examples:
-            assert DatasetTimes.from_header(ex.given, date(2021, 12, 31)) == ex.expected, ex.given
-
-        # test year boundary
-        examples = [
-            example("TESTING: LAST WEEK (October 24-30, Test Volume October 20-26)",
-                    DatasetTimes("last", date(2020, 10, 30), date(2020, 10, 26), None, None, None)),
-        ]
-        for ex in examples:
-            assert DatasetTimes.from_header(ex.given, date(2021, 1, 1)) == ex.expected, ex.given
-
-    def test_Dataset_skip_overheader(self):
-        examples = [
-            example("TESTING: LAST WEEK (October 24-30, Test Volume October 20-26)",
-                    False),
-            example("TESTING: PREVIOUS WEEK (October 17-23, Test Volume October 13-19)",
-                    False),
-            example("VIRAL (RT-PCR) LAB TESTING: LAST WEEK (August 24-30, Test Volume August 20-26)",
-                    False),
-            example("VIRAL (RT-PCR) LAB TESTING: PREVIOUS WEEK (August 17-23, Test Volume August 13-19)",
-                    False),
-            example("TESTING: % CHANGE FROM PREVIOUS WEEK",
-                    True),
-            example("VIRAL (RT-PCR) LAB TESTING: % CHANGE FROM PREVIOUS WEEK",
-                    True),
-            example("TESTING: DEMOGRAPHIC DATA",
-                    True),
-            example("HOSPITAL UTILIZATION: LAST WEEK (January 2-8)",
-                    False),
-            example("HOSPITAL UTILIZATION: CHANGE FROM PREVIOUS WEEK",
-                    True),
-            example("HOSPITAL UTILIZATION: DEMOGRAPHIC DATA",
-                    True),
-            example("COVID-19 VACCINATION DATA: CUMULATIVE (January 25)",
-                    False),
-            example("COVID-19 VACCINATION DATA: LAST WEEK (January 25-31)",
-                    False),
-            example("COVID-19 VACCINATION DATA: DEMOGRAPHIC DATA",
-                    True)
-        ]
-        for ex in examples:
-            assert Dataset.skip_overheader(ex.given) == ex.expected, ex.given
-    def test_Dataset_retain_header(self):
-        examples = [
-            example("Total NAATs - last 7 days (may be an underestimate due to delayed reporting)",
-                    True),
-            example("Total NAATs - previous 7 days (may be an underestimate due to delayed reporting)",
-                    True),
-            example("NAAT positivity rate - last 7 days (may be an underestimate due to delayed reporting)",
-                    True),
-            example("NAAT positivity rate - previous 7 days (may be an underestimate due to delayed reporting)",
-                    True),
-            example("NAAT positivity rate - absolute change (may be an underestimate due to delayed reporting)",
-                    False),
-            example("NAAT positivity rate - last 7 days - ages <5",
-                    False),
-            example("Total RT-PCR diagnostic tests - last 7 days (may be an underestimate due to delayed reporting)",
-                    True),
-            example("Viral (RT-PCR) lab test positivity rate - last 7 days (may be an underestimate due to delayed reporting)",
-                    True),
-            example("RT-PCR tests per 100k - last 7 days (may be an underestimate due to delayed reporting)",
-                    False),
-            example("Confirmed COVID-19 admissions - last 7 days",
-                    True),
-            example("Confirmed COVID-19 admissions - percent change",
-                    False),
-            example("Confirmed COVID-19 admissions - last 7 days - ages <18",
-                    False),
-            example("Confirmed COVID-19 admissions - last 7 days - age unknown",
-                    False),
-            example("Confirmed COVID-19 admissions per 100 inpatient beds - last 7 days",
-                    False),
-            example("People who are fully vaccinated",
-                    True),
-            example("People who are fully vaccinated - ages 5-11",
-                    False),
-            example("People who are fully vaccinated as % of total population",
-                    False),
-            example("People with a completed primary series",
-                    True),
-            example("People with a completed primary series - ages 5-11",
-                    False),
-            example("People with a completed primary series as % of total population",
-                    False),
-            example("People with full course",
-                    True),
-            example("People with full course as % of total population",
-                    False)
-        ]
-        for ex in examples:
-            assert Dataset.retain_header(ex.given) == ex.expected, ex.given
-            
-    def test_Dataset_parse_sheet(self):
-        # TODO
-        pass
-
-    def test_fetch_listing(self):
-        inst = namedtuple("attachment", "assetId filename publish cache")
-        instances = list(chain(*[
-            [
-                inst(f"{i}", f"2021010{i}.xlsx", date(2021, 1, i), f"2021010{i}--{i}.xlsx"),
-                inst(f"p{i}", f"2021010{i}.pdf", date(2021, 1, i), f"2021010{i}--p{i}.pdf"),
-            ]
-            for i in [1, 2, 3, 4, 5]
-        ]))
-
-        # Solution from https://stackoverflow.com/questions/15753390/
-        #how-can-i-mock-requests-and-the-response
-        def mocked_requests_get(*args, **kwargs):
-            class MockResponse:
-                def __init__(self, json_data):
-                    self.json_data = json_data
-
-                def json(self):
-                    return self.json_data
-
-            return MockResponse({
-                        'metadata': {
-                            'attachments': [
-                                {"assetId": i.assetId, "filename": i.filename}
-                                for i in instances
-                            ]
-                        }
-                    }
-                )
-
-        def as_listing(instance):
-            return {
-                "assetId": instance.assetId,
-                "filename": instance.filename,
-                "cached_filename": instance.cache,
-                "publish_date": instance.publish
-            }
-        ex = example(
-            {'indicator':{'reports':'new', 'input_cache':''}},
-            [
-                as_listing(instance)
-                for i, instance in filter(lambda x: x[0]%2 == 0, enumerate(instances))
-            ]
-        )
-
-        with patch('requests.get', side_effect=mocked_requests_get):
-            with patch('os.path.exists', return_value=False):
-                for actual, expected in zip(fetch_listing(ex.given), ex.expected):
-                    assert actual == expected
-
-            with patch('os.path.exists', return_value=True):
-                assert fetch_listing(ex.given) == []
-
-    def test_nation_from_state(self):
-        geomapper = GeoMapper()
-        state_pop = geomapper.get_crosswalk("state_id", "pop")
-
-        test_df = pd.DataFrame({
-                'state_id': ['pa', 'wv'],
-                'timestamp': [datetime(year=2020, month=1, day=1)]*2,
-                'val': [15., 150.],
-                'se': [None, None],
-                'sample_size': [None, None],
-                'publish_date': [datetime(year=2020, month=1, day=1)]*2,})
-
-        pa_pop = int(state_pop.loc[state_pop.state_id == "pa", "pop"].iloc[0])
-        wv_pop = int(state_pop.loc[state_pop.state_id == "wv", "pop"].iloc[0])
-        tot_pop = pa_pop + wv_pop
-
-        assert True, nation_from_state(
-                test_df.copy(),
-                "total",
-                geomapper
-            )
-        pd.testing.assert_frame_equal(
-            nation_from_state(
-                test_df.copy(),
-                "total",
-                geomapper
-            ),
-            pd.DataFrame({
-                'geo_id': ['us'],
-                'timestamp': [datetime(year=2020, month=1, day=1)],
-                'val': [15. + 150.],
-                'se': [None],
-                'sample_size': [None],
-                'publish_date': [datetime(year=2020, month=1, day=1)],}),
-            check_like=True
-        )
-
-        pd.testing.assert_frame_equal(
-            nation_from_state(
-                test_df.copy(),
-                "positivity",
-                geomapper
-            ),
-            pd.DataFrame({
-                'geo_id': ['us'],
-                'timestamp': [datetime(year=2020, month=1, day=1)],
-                'val': [15*pa_pop/tot_pop + 150*wv_pop/tot_pop],
-                'se': [None],
-                'sample_size': [None],
-                'publish_date': [datetime(year=2020, month=1, day=1)],}),
-            check_like=True
-        )
-
-    def test_generate_prop_signal_msa(self):
-        geomapper = GeoMapper()
-        county_pop = geomapper.get_crosswalk("fips", "pop")
-        county_msa = geomapper.get_crosswalk("fips", "msa")
-        msa_pop = county_pop.merge(
-            county_msa, on="fips", how="inner"
-        ).groupby(
-            "msa"
-        ).sum(
-            numeric_only=True
-        ).reset_index(
-        )
-
-        test_df = pd.DataFrame({
-                'geo_id': ['35620', '31080'],
-                'timestamp': [datetime(year=2020, month=1, day=1)]*2,
-                'val': [15., 150.],
-                'se': [None, None],
-                'sample_size': [None, None],})
-
-        nyc_pop = int(msa_pop.loc[msa_pop.msa == "35620", "pop"].iloc[0])
-        la_pop = int(msa_pop.loc[msa_pop.msa == "31080", "pop"].iloc[0])
-
-        expected_df = pd.DataFrame({
-                'geo_id': ['35620', '31080'],
-                'timestamp': [datetime(year=2020, month=1, day=1)]*2,
-                'val': [15. / nyc_pop * 100000, 150. / la_pop * 100000],
-                'se': [None, None],
-                'sample_size': [None, None],})
-
-        pd.testing.assert_frame_equal(
-            generate_prop_signal(
-                test_df.copy(),
-                "msa",
-                geomapper
-            ),
-            expected_df,
-            check_like=True
-        )
-    def test_generate_prop_signal_non_msa(self):
-        geomapper = GeoMapper()
-
-        geos = {
-            "state": {
-                "code_name": "state_id",
-                "geo_names": ['pa', 'wv']
-            },
-            "county": {
-                "code_name": "fips",
-                "geo_names": ['36061', '06037']
-            },
-            # nation uses the same logic path so no need to test separately
-            "hhs": {
-                "code_name": "hhs",
-                "geo_names": ["1", "4"]
-            }
-        }
-
-        for geo, settings in geos.items():
-            geo_pop = geomapper.get_crosswalk(settings["code_name"], "pop")
-
-            test_df = pd.DataFrame({
-                    'geo_id': settings["geo_names"],
-                    'timestamp': [datetime(year=2020, month=1, day=1)]*2,
-                    'val': [15., 150.],
-                    'se': [None, None],
-                    'sample_size': [None, None],})
-
-            pop1 = int(geo_pop.loc[geo_pop[settings["code_name"]] == settings["geo_names"][0], "pop"].iloc[0])
-            pop2 = int(geo_pop.loc[geo_pop[settings["code_name"]] == settings["geo_names"][1], "pop"].iloc[0])
-
-            expected_df = pd.DataFrame({
-                    'geo_id': settings["geo_names"],
-                    'timestamp': [datetime(year=2020, month=1, day=1)]*2,
-                    'val': [15. / pop1 * 100000, 150. / pop2 * 100000],
-                    'se': [None, None],
-                    'sample_size': [None, None],})
-
-            pd.testing.assert_frame_equal(
-                generate_prop_signal(
-                    test_df.copy(),
-                    geo,
-                    geomapper
-                ),
-                expected_df,
-                check_like=True
-            )
-
-    def test_unify_testing_sigs(self):
-        positivity_df = pd.DataFrame({
-            'geo_id': ["ca", "ca", "fl", "fl"],
-            'timestamp': [datetime(2021, 10, 27), datetime(2021, 10, 20)]*2,
-            'val': [0.2, 0.34, 0.7, 0.01],
-            'se': [None] * 4,
-            'sample_size': [None] * 4,
-            'publish_date': [datetime(2021, 10, 30)]*4,
-        })
-        base_volume_df = pd.DataFrame({
-            'geo_id': ["ca", "ca", "fl", "fl"],
-            'timestamp': [datetime(2021, 10, 23), datetime(2021, 10, 16)]*2,
-            'val': [None] * 4,
-            'se': [None] * 4,
-            'sample_size': [None] * 4,
-            'publish_date': [datetime(2021, 10, 30)]*4,
-        })
-
-        examples = [
-            example(
-                [positivity_df, base_volume_df.assign(val = [101, 102, 103, 104])],
-                positivity_df.assign(
-                    sample_size = [101, 102, 103, 104],
-                    se = lambda df: np.sqrt(df.val * (1 - df.val) / df.sample_size)
-                )
-            ), # No filtering
-            example(
-                [positivity_df, base_volume_df.assign(val = [110, 111, 112, 113]).iloc[::-1]],
-                positivity_df.assign(
-                    sample_size = [110, 111, 112, 113],
-                    se = lambda df: np.sqrt(df.val * (1 - df.val) / df.sample_size)
-                )
-            ), # No filtering, volume df in reversed order
-            example(
-                [positivity_df, base_volume_df.assign(val = [100, 5, 1, 6])],
-                positivity_df.assign(
-                    sample_size = [100, 5, 1, 6]
-                ).iloc[[0, 3]].assign(
-                    se = lambda df: np.sqrt(df.val * (1 - df.val) / df.sample_size)
-                )
-            )
-        ]
-        for ex in examples:
-            pd.testing.assert_frame_equal(unify_testing_sigs(ex.given[0], ex.given[1]), ex.expected)
-
-        with pytest.raises(AssertionError):
-            # Inputs have different numbers of rows.
-            unify_testing_sigs(positivity_df, positivity_df.head(n=1))
-
-    def test_add_max_ts_col(self):
-        input_df = pd.DataFrame({
-            'geo_id': ["ca", "ca", "fl", "fl"],
-            'timestamp': [datetime(2021, 10, 27), datetime(2021, 10, 20)]*2,
-            'val': [1, 2, 3, 4],
-            'se': [None] * 4,
-            'sample_size': [None] * 4,
-            'publish_date': [datetime(2021, 10, 30)]*4,
-        })
-        examples = [
-            example(input_df, input_df.assign(is_max_group_ts = [True, False, True, False])),
-        ]
-        for ex in examples:
-            pd.testing.assert_frame_equal(add_max_ts_col(ex.given), ex.expected)
-
-        with pytest.raises(AssertionError):
-            # Input df has 2 timestamps per geo id-publish date combination, but not 2 unique timestamps.
-            add_max_ts_col(
-                pd.DataFrame({
-                    'geo_id': ["ca", "ca", "fl", "fl"],
-                    'timestamp': [datetime(2021, 10, 27)] * 4,
-                    'val': [1, 2, 3, 4],
-                    'se': [None] * 4,
-                    'sample_size': [None] * 4,
-                    'publish_date': [datetime(2021, 10, 30)] * 4,
-                })
-            )
-        with pytest.raises(AssertionError):
-            # Input df has more than 2 timestamps per geo id-publish date combination.
-            add_max_ts_col(
-                pd.DataFrame({
-                    'geo_id': ["ca", "ca", "ca", "fl", "fl", "fl"],
-                    'timestamp': [datetime(2021, 10, 27)] * 6,
-                    'val': [1, 2, 3, 4, 5, 6],
-                    'se': [None] * 6,
-                    'sample_size': [None] * 6,
-                    'publish_date': [datetime(2021, 10, 30)] * 6,
-                })
-            )
-
-        try:
-            # Input df has fewer than 2 timestamps per geo id-publish date
-            # combination. This should not raise an exception.
-            add_max_ts_col(
-                pd.DataFrame({
-                    'geo_id': ["ca", "fl"],
-                    'timestamp': [datetime(2021, 10, 27)] * 2,
-                    'val': [1, 2],
-                    'se': [None] * 2,
-                    'sample_size': [None] * 2,
-                    'publish_date': [datetime(2021, 10, 30)] * 2,
-                })
-            )
-        except AssertionError as e:
-            assert False, f"'add_max_ts_col' raised exception: {e}"
-
-        try:
-            # Input df has 2 unique timestamps per geo id-publish date
-            # combination. This should not raise an exception.
-            add_max_ts_col(
-                pd.DataFrame({
-                    'geo_id': ["ca", "ca", "fl", "fl"],
-                    'timestamp': [datetime(2021, 10, 27), datetime(2021, 10, 20)] * 2,
-                    'val': [1, 2, 3, 4],
-                    'se': [None] * 4,
-                    'sample_size': [None] * 4,
-                    'publish_date': [datetime(2021, 10, 30)] * 4,
-                })
-            )
-        except AssertionError as e:
-            assert False, f"'add_max_ts_col' raised exception: {e}"
-
-    def test_std_err(self):
-        df = pd.DataFrame({
-            "val": [0, 0.5, 0.4, 0.3, 0.2, 0.1],
-            "sample_size": [2, 2, 5, 10, 20, 50]
-        })
-
-        expected_se = np.sqrt(df.val * (1 - df.val) / df.sample_size)
-        se = std_err(df)
-
-        assert (se >= 0).all()
-        assert not np.isnan(se).any()
-        assert not np.isinf(se).any()
-        assert np.allclose(se, expected_se, equal_nan=True)
-        with pytest.raises(AssertionError):
-            std_err(
-                pd.DataFrame({
-                    "val": [0, 0.5, 0.4, 0.3, 0.2, 0.1],
-                    "sample_size": [2, 2, 5, 10, 20, 0]
-                })
-            )
-
-    def test_interpolation(self):
-        DTYPES = {"geo_id": str, "timestamp": "datetime64[ns]", "val": float, "se": float, "sample_size": float, "publish_date": "datetime64[ns]"}
-        line = lambda x: 3 * x + 5
-
-        sig1 = _set_df_dtypes(pd.DataFrame({
-            "geo_id": "1",
-            "timestamp": pd.date_range("2022-01-01", "2022-01-10"),
-            "val": [line(i) for i in range(2, 12)],
-            "se": [line(i) for i in range(1, 11)],
-            "sample_size": [line(i) for i in range(0, 10)],
-            "publish_date": pd.to_datetime("2022-01-10")
-        }), dtypes=DTYPES)
-        # A linear signal missing two days which should be filled exactly by the linear interpolation.
-        missing_sig1 = sig1[(sig1.timestamp <= "2022-01-05") | (sig1.timestamp >= "2022-01-08")]
-
-        sig2 = sig1.copy()
-        sig2["geo_id"] = "2"
-        # A linear signal missing everything but the end points, should be filled exactly by linear interpolation.
-        missing_sig2 = sig2[(sig2.timestamp == "2022-01-01") | (sig2.timestamp == "2022-01-10")]
-
-        sig3 = _set_df_dtypes(pd.DataFrame({
-            "geo_id": "3",
-            "timestamp": pd.date_range("2022-01-01", "2022-01-10"),
-            "val": None,
-            "se": [line(i) for i in range(1, 11)],
-            "sample_size": [line(i) for i in range(0, 10)],
-            "publish_date": pd.to_datetime("2022-01-10")
-        }), dtypes=DTYPES)
-        # A signal missing everything, should be dropped since it's all NAs.
-        missing_sig3 = sig3[(sig3.timestamp <= "2022-01-05") | (sig3.timestamp >= "2022-01-08")]
-
-        sig4 = _set_df_dtypes(pd.DataFrame({
-            "geo_id": "4",
-            "timestamp": pd.date_range("2022-01-01", "2022-01-10"),
-            "val": [None] * 9 + [10.0],
-            "se": [line(i) for i in range(1, 11)],
-            "sample_size": [line(i) for i in range(0, 10)],
-            "publish_date": pd.to_datetime("2022-01-10")
-        }), dtypes=DTYPES)
-        # A signal missing everything except for one point, should output a reduced range without NAs.
-        missing_sig4 = sig4[(sig4.timestamp <= "2022-01-05") | (sig4.timestamp >= "2022-01-08")]
-
-        missing_dfs = [missing_sig1, missing_sig2, missing_sig3, missing_sig4]
-        interpolated_dfs1 = interpolate_missing_values({("src", "sig", False): pd.concat(missing_dfs)})
-        expected_dfs = pd.concat([sig1, sig2, sig4.loc[9:]])
-        _assert_frame_equal(interpolated_dfs1[("src", "sig", False)], expected_dfs, index_cols=["geo_id", "timestamp"])
-
-    def test_interpolation_object_type(self):
-        DTYPES = {"geo_id": str, "timestamp": "datetime64[ns]", "val": float, "se": float, "sample_size": float, "publish_date": "datetime64[ns]"}
-        line = lambda x: 3 * x + 5
-
-        sig1 = _set_df_dtypes(pd.DataFrame({
-            "geo_id": "1",
-            "timestamp": pd.date_range("2022-01-01", "2022-01-10"),
-            "val": [line(i) for i in range(2, 12)],
-            "se": [line(i) for i in range(1, 11)],
-            "sample_size": [line(i) for i in range(0, 10)],
-            "publish_date": pd.to_datetime("2022-01-10")
-        }), dtypes=DTYPES)
-        # A linear signal missing two days which should be filled exactly by the linear interpolation.
-        missing_sig1 = sig1[(sig1.timestamp <= "2022-01-05") | (sig1.timestamp >= "2022-01-08")]
-        # set all columns to object type to simulate the miscast we sometimes see when combining dfs
-        missing_sig1 = _set_df_dtypes(missing_sig1, {key: object for key in DTYPES.keys()})
-
-        interpolated_dfs1 = interpolate_missing_values({("src", "sig", False): missing_sig1})
-        expected_dfs = pd.concat([sig1])
-        _assert_frame_equal(interpolated_dfs1[("src", "sig", False)], expected_dfs, index_cols=["geo_id", "timestamp"])
-
-    @patch("delphi_dsew_community_profile.pull.INTERP_LENGTH", 2)
-    def test_extend_listing(self):
-        listing = [
-            {"publish_date": date(2020, 1, 20) - timedelta(days=i)}
-            for i in range(20)
-        ]
-        examples = [
-            # single range
-            example(
-                [{"publish_date": date(2020, 1, 20)}],
-                [{"publish_date": date(2020, 1, 20)}, {"publish_date": date(2020, 1, 19)}]
-            ),
-            # disjoint ranges
-            example(
-                [{"publish_date": date(2020, 1, 20)}, {"publish_date": date(2020, 1, 10)}],
-                [{"publish_date": date(2020, 1, 20)}, {"publish_date": date(2020, 1, 19)},
-                 {"publish_date": date(2020, 1, 10)}, {"publish_date": date(2020, 1, 9)}]
-            ),
-            # conjoined ranges
-            example(
-                [{"publish_date": date(2020, 1, 20)}, {"publish_date": date(2020, 1, 19)}],
-                [{"publish_date": date(2020, 1, 20)}, {"publish_date": date(2020, 1, 19)}, {"publish_date": date(2020, 1, 18)}]
-            ),
-            # empty keep list
-            example(
-                [],
-                []
-            )
-        ]
-        for ex in examples:
-            assert extend_listing_for_interp(ex.given, listing) == ex.expected, ex.given
diff --git a/dsew_community_profile/version.cfg b/dsew_community_profile/version.cfg
deleted file mode 100644
index ae19058ed..000000000
--- a/dsew_community_profile/version.cfg
+++ /dev/null
@@ -1 +0,0 @@
-current_version = 0.3.42

From 243c7d5be7c566b046ec4c32ad1d5c7c01608dcf Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Mon, 17 Jul 2023 14:00:55 -0400
Subject: [PATCH 02/21] remove dsew from workflows

---
 .github/workflows/create-release.yml | 2 +-
 .github/workflows/python-ci.yml      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml
index 55e7c0782..5a56a6d1d 100644
--- a/.github/workflows/create-release.yml
+++ b/.github/workflows/create-release.yml
@@ -61,7 +61,7 @@ jobs:
           bump2version --list ${{ github.event.inputs.versionName }} | grep ^new_version | sed -r s,"^.*=",,
       - name: Copy version to indicator directory
         run: |
-          indicator_list=("changehc" "claims_hosp" "doctor_visits" "dsew_community_profile" "google_symptoms" "hhs_hosp" "nchs_mortality" "nowcast" "quidel_covidtest" "sir_complainsalot")
+          indicator_list=("changehc" "claims_hosp" "doctor_visits" "google_symptoms" "hhs_hosp" "nchs_mortality" "nowcast" "quidel_covidtest" "sir_complainsalot")
           for path in ${indicator_list[@]}; do
             echo "current_version = ${{ steps.indicators.outputs.version }}" > $path/version.cfg
           done
diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 4d376ea5b..7e09009a9 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -16,7 +16,7 @@ jobs:
     if: github.event.pull_request.draft == false
     strategy:
       matrix:
-        packages: [_delphi_utils_python, changehc, claims_hosp, doctor_visits, dsew_community_profile, google_symptoms, hhs_hosp, nchs_mortality, nowcast, quidel_covidtest, sir_complainsalot]
+        packages: [_delphi_utils_python, changehc, claims_hosp, doctor_visits, google_symptoms, hhs_hosp, nchs_mortality, nowcast, quidel_covidtest, sir_complainsalot]
     defaults:
       run:
         working-directory: ${{ matrix.packages }}

From d035d811cda441f5384bd495b0c7ea8482f16e89 Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Mon, 17 Jul 2023 14:01:44 -0400
Subject: [PATCH 03/21] remove dsew from Jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 44cf0d1be..b4fb46aa3 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -10,7 +10,7 @@
    - TODO: #527 Get this list automatically from python-ci.yml at runtime.
  */
 
-def indicator_list = ["backfill_corrections", "changehc", "claims_hosp", "google_symptoms", "hhs_hosp", "nchs_mortality", "quidel_covidtest", "sir_complainsalot", "dsew_community_profile", "doctor_visits"]
+def indicator_list = ["backfill_corrections", "changehc", "claims_hosp", "google_symptoms", "hhs_hosp", "nchs_mortality", "quidel_covidtest", "sir_complainsalot", "doctor_visits"]
 def build_package_main = [:]
 def build_package_prod = [:]
 def deploy_staging = [:]

From 3412cc924e18aa9854ad4152f02738455de7e610 Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Thu, 20 Jul 2023 14:49:39 -0400
Subject: [PATCH 04/21] put max allowed threads in constant

---
 .../delphi_utils/validator/datafetcher.py              | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/_delphi_utils_python/delphi_utils/validator/datafetcher.py b/_delphi_utils_python/delphi_utils/validator/datafetcher.py
index 9e302d822..7e5fafbd6 100644
--- a/_delphi_utils_python/delphi_utils/validator/datafetcher.py
+++ b/_delphi_utils_python/delphi_utils/validator/datafetcher.py
@@ -220,11 +220,13 @@ def get_one_api_df(data_source, min_date, max_date,
     dict_lock.release()
 
 
-def threaded_api_calls(data_source, min_date, max_date, geo_signal_combos, n_threads=32):
+MAX_ALLOWED_THREADS = 32
+
+def threaded_api_calls(data_source, min_date, max_date, geo_signal_combos, n_threads=MAX_ALLOWED_THREADS):
     """Get data from API for all geo-signal combinations in a threaded way."""
-    if n_threads > 32:
-        n_threads = 32
-        print("Warning: Don't run more than 32 threads at once due "
+    if n_threads > MAX_ALLOWED_THREADS:
+        n_threads = MAX_ALLOWED_THREADS
+        warnings.warn(f"Warning: Don't run more than {MAX_ALLOWED_THREADS} threads at once due "
                 + "to API resource limitations")
 
     output_dict = dict()

From 1158857d879c39f1f38ae3ebf13b3816515abe5c Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Thu, 20 Jul 2023 15:31:59 -0400
Subject: [PATCH 05/21] add None-type check and error for api ref data

---
 .../delphi_utils/validator/datafetcher.py            | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/_delphi_utils_python/delphi_utils/validator/datafetcher.py b/_delphi_utils_python/delphi_utils/validator/datafetcher.py
index 7e5fafbd6..27ef3ecbf 100644
--- a/_delphi_utils_python/delphi_utils/validator/datafetcher.py
+++ b/_delphi_utils_python/delphi_utils/validator/datafetcher.py
@@ -166,14 +166,12 @@ def fetch_api_reference(data_source, start_date, end_date, geo_type, signal_type
         api_df = covidcast.signal(
             data_source, signal_type, start_date, end_date, geo_type)
 
+    error_context = f"when fetching reference data from {start_date} to {end_date} " +\
+        f"for data source: {data_source}, signal type: {signal_type}, geo type: {geo_type}"
+    if api_df is None:
+        raise APIDataFetchError("Error: no API data was returned " + error_context)
     if not isinstance(api_df, pd.DataFrame):
-        custom_msg = "Error fetching data from " + str(start_date) + \
-                     " to " + str(end_date) + \
-                     " for data source: " + data_source + \
-                     ", signal type: " + signal_type + \
-                     ", geo type: " + geo_type
-
-        raise APIDataFetchError(custom_msg)
+        raise APIDataFetchError("Error: API return value was not a dataframe " + error_context)
 
     column_names = ["geo_id", "val",
                     "se", "sample_size", "time_value"]

From 8636c0b0bf22a26d00d33fb2968cfaf51edebddb Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Thu, 20 Jul 2023 15:41:20 -0400
Subject: [PATCH 06/21] move api key fetch and set up to dynamic.validate

This lets the `meta` and `signal` calls share the same authentication
init. We also don't need to do a second `read_params` in
`get_geo_signal_combos`; instead pass the API key as an arg.
---
 .../delphi_utils/validator/datafetcher.py                | 7 ++-----
 _delphi_utils_python/delphi_utils/validator/dynamic.py   | 9 ++++++++-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/_delphi_utils_python/delphi_utils/validator/datafetcher.py b/_delphi_utils_python/delphi_utils/validator/datafetcher.py
index 27ef3ecbf..db27498d3 100644
--- a/_delphi_utils_python/delphi_utils/validator/datafetcher.py
+++ b/_delphi_utils_python/delphi_utils/validator/datafetcher.py
@@ -10,7 +10,6 @@
 import pandas as pd
 import numpy as np
 import covidcast
-from .. import read_params
 from .errors import APIDataFetchError, ValidationFailure
 
 FILENAME_REGEX = re.compile(
@@ -103,15 +102,13 @@ def load_csv(path):
         })
 
 
-def get_geo_signal_combos(data_source):
+def get_geo_signal_combos(data_source, api_key):
     """
     Get list of geo type-signal type combinations that we expect to see.
 
     Cross references based on combinations reported available by COVIDcast metadata.
     """
-    params = read_params()
-    assert "validation" in params
-    api_key = ("epidata", params["validation"]["common"]["api_credentials"])
+    api_key = ("epidata", api_key)
     # Maps data_source name with what's in the API, lists used in case of multiple names
     meta_response = requests.get("https://api.covidcast.cmu.edu/epidata/covidcast/meta",
                                  auth=api_key)
diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py
index ea846e03d..deb65b129 100644
--- a/_delphi_utils_python/delphi_utils/validator/dynamic.py
+++ b/_delphi_utils_python/delphi_utils/validator/dynamic.py
@@ -5,6 +5,7 @@
 import re
 import pandas as pd
 import numpy as np
+import covidcast
 from .errors import ValidationFailure
 from .datafetcher import get_geo_signal_combos, threaded_api_calls
 from .utils import relative_difference_by_min, TimeWindow, lag_converter
@@ -20,6 +21,8 @@ class Parameters:
         # data source name, one of
         # https://cmu-delphi.github.io/delphi-epidata/api/covidcast_signals.html
         data_source: str
+        # COVIDcast API key
+        api_key: str
         # span of time over which to perform checks
         time_window: TimeWindow
         # date that this df_to_test was generated; typically 1 day after the last date in df_to_test
@@ -48,6 +51,7 @@ def __init__(self, params):
 
         self.params = self.Parameters(
             data_source=common_params["data_source"],
+            api_key = params["common"]["api_credentials"],
             time_window=TimeWindow.from_params(common_params["end_date"],
                                                common_params["span_length"]),
             generation_date=date.today(),
@@ -74,8 +78,11 @@ def validate(self, all_frames, report):
         # Get 14 days prior to the earliest list date
         outlier_lookbehind = timedelta(days=14)
 
+        # Authenticate API
+        covidcast.use_api_key(self.params.api_key)
+
         # Get all expected combinations of geo_type and signal.
-        geo_signal_combos = get_geo_signal_combos(self.params.data_source)
+        geo_signal_combos = get_geo_signal_combos(self.params.data_source, api_key = self.params.api_key)
 
         all_api_df = threaded_api_calls(self.params.data_source,
                                         self.params.time_window.start_date - outlier_lookbehind,

From 3596d36559d687d7084128e26afffe3243784a91 Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Thu, 20 Jul 2023 18:11:32 -0400
Subject: [PATCH 07/21] linting

---
 _delphi_utils_python/delphi_utils/validator/datafetcher.py | 4 +++-
 _delphi_utils_python/delphi_utils/validator/dynamic.py     | 5 +++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/_delphi_utils_python/delphi_utils/validator/datafetcher.py b/_delphi_utils_python/delphi_utils/validator/datafetcher.py
index db27498d3..ae29c90dd 100644
--- a/_delphi_utils_python/delphi_utils/validator/datafetcher.py
+++ b/_delphi_utils_python/delphi_utils/validator/datafetcher.py
@@ -165,6 +165,7 @@ def fetch_api_reference(data_source, start_date, end_date, geo_type, signal_type
 
     error_context = f"when fetching reference data from {start_date} to {end_date} " +\
         f"for data source: {data_source}, signal type: {signal_type}, geo type: {geo_type}"
+
     if api_df is None:
         raise APIDataFetchError("Error: no API data was returned " + error_context)
     if not isinstance(api_df, pd.DataFrame):
@@ -217,7 +218,8 @@ def get_one_api_df(data_source, min_date, max_date,
 
 MAX_ALLOWED_THREADS = 32
 
-def threaded_api_calls(data_source, min_date, max_date, geo_signal_combos, n_threads=MAX_ALLOWED_THREADS):
+def threaded_api_calls(data_source, min_date, max_date,
+                       geo_signal_combos, n_threads=MAX_ALLOWED_THREADS):
     """Get data from API for all geo-signal combinations in a threaded way."""
     if n_threads > MAX_ALLOWED_THREADS:
         n_threads = MAX_ALLOWED_THREADS
diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py
index deb65b129..2931c65af 100644
--- a/_delphi_utils_python/delphi_utils/validator/dynamic.py
+++ b/_delphi_utils_python/delphi_utils/validator/dynamic.py
@@ -15,7 +15,7 @@ class DynamicValidator:
     """Class for validation of static properties of individual datasets."""
 
     @dataclass
-    class Parameters:
+    class Parameters: # pylint: disable=too-many-instance-attributes
         """Configuration parameters."""
 
         # data source name, one of
@@ -82,7 +82,8 @@ def validate(self, all_frames, report):
         covidcast.use_api_key(self.params.api_key)
 
         # Get all expected combinations of geo_type and signal.
-        geo_signal_combos = get_geo_signal_combos(self.params.data_source, api_key = self.params.api_key)
+        geo_signal_combos = get_geo_signal_combos(self.params.data_source,
+                                                  api_key = self.params.api_key)
 
         all_api_df = threaded_api_calls(self.params.data_source,
                                         self.params.time_window.start_date - outlier_lookbehind,

From 8f0ab2bd4b6b4c8b44f143aebebe49051c85bc2e Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Fri, 21 Jul 2023 10:49:34 -0400
Subject: [PATCH 08/21] pull api key from common_params

---
 _delphi_utils_python/delphi_utils/validator/dynamic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py
index 2931c65af..4911628ee 100644
--- a/_delphi_utils_python/delphi_utils/validator/dynamic.py
+++ b/_delphi_utils_python/delphi_utils/validator/dynamic.py
@@ -51,7 +51,7 @@ def __init__(self, params):
 
         self.params = self.Parameters(
             data_source=common_params["data_source"],
-            api_key = params["common"]["api_credentials"],
+            api_key = common_params["api_credentials"],
             time_window=TimeWindow.from_params(common_params["end_date"],
                                                common_params["span_length"]),
             generation_date=date.today(),

From fd8dc3ca10c52c0a0a59d459e2f05e41ea1d3934 Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Fri, 21 Jul 2023 10:49:43 -0400
Subject: [PATCH 09/21] update tests

---
 _delphi_utils_python/tests/validator/test_datafetcher.py | 9 +++++----
 _delphi_utils_python/tests/validator/test_dynamic.py     | 3 ++-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/_delphi_utils_python/tests/validator/test_datafetcher.py b/_delphi_utils_python/tests/validator/test_datafetcher.py
index 2d8a4e1c1..6b3dfa574 100644
--- a/_delphi_utils_python/tests/validator/test_datafetcher.py
+++ b/_delphi_utils_python/tests/validator/test_datafetcher.py
@@ -54,7 +54,7 @@ def raise_for_status(self):
     def test_bad_api_key(self, **kwargs):
         kwargs["mock_requests"].get("https://api.covidcast.cmu.edu/epidata/covidcast/meta", status_code=429)
         with pytest.raises(HTTPError):
-            get_geo_signal_combos("chng")
+            get_geo_signal_combos("chng", api_key="")
 
     @mock.patch('requests.get', side_effect=mocked_requests_get)
     @mock.patch("covidcast.metadata")
@@ -78,11 +78,11 @@ def test_get_geo_signal_combos(self, mock_metadata, mock_get):
                                                                 "hrr", "msa", "msa",
                                                                 "state"]
                                                   })
-        assert set(get_geo_signal_combos("chng")) == set(
+        assert set(get_geo_signal_combos("chng", api_key="")) == set(
             [("state", "smoothed_outpatient_cli"),
              ("state", "smoothed_outpatient_covid"),
              ("county", "smoothed_outpatient_covid")])
-        assert set(get_geo_signal_combos("covid-act-now")) == set(
+        assert set(get_geo_signal_combos("covid-act-now", api_key="")) == set(
             [("hrr", "pcr_specimen_positivity_rate"),
              ("msa", "pcr_specimen_positivity_rate"),
              ("msa", "pcr_specimen_total_tests")])
@@ -138,7 +138,8 @@ def mock_signal_return_fn(unused_data_source, signal_type, unused_start_date,
             ("state", "b"): ValidationFailure("api_data_fetch_error",
                                               geo_type="state",
                                               signal="b",
-                                             message="Error fetching data from 2020-03-10 "
+                                             message="Error: no API data was returned when "
+                                             "fetching reference data from 2020-03-10 "
                                              "to 2020-06-10 for data source: "
                                              "source, signal type: b, geo type: state")
         }
diff --git a/_delphi_utils_python/tests/validator/test_dynamic.py b/_delphi_utils_python/tests/validator/test_dynamic.py
index ce5a1bf54..c1e39af8e 100644
--- a/_delphi_utils_python/tests/validator/test_dynamic.py
+++ b/_delphi_utils_python/tests/validator/test_dynamic.py
@@ -11,7 +11,8 @@ class TestReferencePadding:
         "common": {
             "data_source": "",
             "span_length": 1,
-            "end_date": "2020-09-02"
+            "end_date": "2020-09-02",
+            "api_credentials": ""
         }
     }
 

From 1eb571978ed8a01ab008a2163914d4e775c3be3c Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Fri, 21 Jul 2023 10:57:08 -0400
Subject: [PATCH 10/21] add empty api credentials to all test params

---
 .../tests/validator/test_dynamic.py               | 15 ++++++++++-----
 .../tests/validator/test_validator.py             | 12 ++++++++----
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/_delphi_utils_python/tests/validator/test_dynamic.py b/_delphi_utils_python/tests/validator/test_dynamic.py
index c1e39af8e..07a50bdf1 100644
--- a/_delphi_utils_python/tests/validator/test_dynamic.py
+++ b/_delphi_utils_python/tests/validator/test_dynamic.py
@@ -82,7 +82,8 @@ class TestCheckRapidChange:
         "common": {
             "data_source": "",
             "span_length": 1,
-            "end_date": "2020-09-02"
+            "end_date": "2020-09-02",
+            "api_credentials": ""
         }
     }
 
@@ -115,7 +116,8 @@ class TestCheckNaVals:
         "common": {
             "data_source": "",
             "span_length": 14,
-            "end_date": "2020-09-02"
+            "end_date": "2020-09-02",
+            "api_credentials": ""
         }
     }
     def test_missing(self):
@@ -138,7 +140,8 @@ class TestCheckAvgValDiffs:
         "common": {
             "data_source": "",
             "span_length": 1,
-            "end_date": "2020-09-02"
+            "end_date": "2020-09-02",
+            "api_credentials": ""
         }
     }
 
@@ -279,7 +282,8 @@ class TestDataOutlier:
         "common": {
             "data_source": "",
             "span_length": 1,
-            "end_date": "2020-09-02"
+            "end_date": "2020-09-02",
+            "api_credentials": ""
         }
     }
     pd.set_option("display.max_rows", None, "display.max_columns", None)
@@ -472,7 +476,8 @@ class TestDateComparison:
         "common": {
             "data_source": "",
             "span_length": 1,
-            "end_date": "2020-09-02"
+            "end_date": "2020-09-02",
+            "api_credentials": ""
         }
     }
 
diff --git a/_delphi_utils_python/tests/validator/test_validator.py b/_delphi_utils_python/tests/validator/test_validator.py
index dc28aa42e..c302b57aa 100644
--- a/_delphi_utils_python/tests/validator/test_validator.py
+++ b/_delphi_utils_python/tests/validator/test_validator.py
@@ -15,7 +15,8 @@ def test_default_settings(self):
                 "common": {
                     "data_source": "",
                     "span_length": 0,
-                    "end_date": "2020-09-01"
+                    "end_date": "2020-09-01",
+                    "api_credentials": ""
                 }
             }
         }
@@ -46,7 +47,8 @@ def test_suppressed_errors(self):
                                            "signal": "b"},
                                           {"check_name":"c",
                                            "date": None,
-                                           "geo_type": "d"}]
+                                           "geo_type": "d"}],
+                    "api_credentials": ""
                 }
             }
         }
@@ -76,7 +78,8 @@ def test_incorrect_suppressed_errors(self):
                                                "date": None,
                                                "geo_type": "d"},
                                               {"check_name": "a",
-                                               "fake": "b"}]
+                                               "fake": "b"}],
+                        "api_credentials": ""
                     }
                 }
             })
@@ -98,7 +101,8 @@ def test_incorrect_suppressed_errors(self):
                                           {"check_name":"c",
                                            "date": None,
                                            "geo_type": "d"},
-                                          ["ab"]]
+                                          ["ab"]],
+                    "api_credentials": ""
                     }
                 }
             })

From 40c92ecc9d0b7915f760e637dd519bf61789da63 Mon Sep 17 00:00:00 2001
From: nmdefries <42820733+nmdefries@users.noreply.github.com>
Date: Fri, 21 Jul 2023 11:14:38 -0400
Subject: [PATCH 11/21] alert about actual # of threads being used

Co-authored-by: Katie Mazaitis <krivard@cs.cmu.edu>
---
 _delphi_utils_python/delphi_utils/validator/datafetcher.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_delphi_utils_python/delphi_utils/validator/datafetcher.py b/_delphi_utils_python/delphi_utils/validator/datafetcher.py
index ae29c90dd..045d67585 100644
--- a/_delphi_utils_python/delphi_utils/validator/datafetcher.py
+++ b/_delphi_utils_python/delphi_utils/validator/datafetcher.py
@@ -223,7 +223,7 @@ def threaded_api_calls(data_source, min_date, max_date,
     """Get data from API for all geo-signal combinations in a threaded way."""
     if n_threads > MAX_ALLOWED_THREADS:
         n_threads = MAX_ALLOWED_THREADS
-        warnings.warn(f"Warning: Don't run more than {MAX_ALLOWED_THREADS} threads at once due "
+        warnings.warn(f"Warning: instead of requested thread count, using only {MAX_ALLOWED_THREADS} threads due "
                 + "to API resource limitations")
 
     output_dict = dict()

From c81824e0ec48cf7113d76b81c37fcd508acda3dc Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Fri, 21 Jul 2023 12:19:10 -0400
Subject: [PATCH 12/21] linting

---
 _delphi_utils_python/delphi_utils/validator/datafetcher.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_delphi_utils_python/delphi_utils/validator/datafetcher.py b/_delphi_utils_python/delphi_utils/validator/datafetcher.py
index 045d67585..39c8555cf 100644
--- a/_delphi_utils_python/delphi_utils/validator/datafetcher.py
+++ b/_delphi_utils_python/delphi_utils/validator/datafetcher.py
@@ -223,8 +223,8 @@ def threaded_api_calls(data_source, min_date, max_date,
     """Get data from API for all geo-signal combinations in a threaded way."""
     if n_threads > MAX_ALLOWED_THREADS:
         n_threads = MAX_ALLOWED_THREADS
-        warnings.warn(f"Warning: instead of requested thread count, using only {MAX_ALLOWED_THREADS} threads due "
-                + "to API resource limitations")
+        warnings.warn("Warning: instead of requested thread count, using " + \
+            f"only {MAX_ALLOWED_THREADS} threads due to API resource limitations")
 
     output_dict = dict()
     dict_lock = threading.Lock()

From 5dbd2b035cd5e9d3b300b59d5daddf73a822ad7f Mon Sep 17 00:00:00 2001
From: Katie Mazaitis <krivard@cs.cmu.edu>
Date: Fri, 21 Jul 2023 13:14:26 -0400
Subject: [PATCH 13/21] Update data_quality issue template: katie -> nolan

---
 .github/ISSUE_TEMPLATE/data_quality_issue.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/ISSUE_TEMPLATE/data_quality_issue.md b/.github/ISSUE_TEMPLATE/data_quality_issue.md
index 769adde01..4d4ad2874 100644
--- a/.github/ISSUE_TEMPLATE/data_quality_issue.md
+++ b/.github/ISSUE_TEMPLATE/data_quality_issue.md
@@ -3,7 +3,7 @@ name: Data quality issue
 about: Missing data, weird data, broken data 
 title: ''
 labels: 'data quality'
-assignees: 'krivard'
+assignees: 'nolangormley'
 ---
 
 **Actual Behavior:**

From 07320038ce6f766d8161e6e1f7742fbd07dd1fe4 Mon Sep 17 00:00:00 2001
From: Katie Mazaitis <krivard@cs.cmu.edu>
Date: Fri, 21 Jul 2023 13:15:18 -0400
Subject: [PATCH 14/21] Update create-release workflow: katie -> george

---
 .github/workflows/create-release.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml
index 55e7c0782..ba5cacf0f 100644
--- a/.github/workflows/create-release.yml
+++ b/.github/workflows/create-release.yml
@@ -72,8 +72,8 @@ jobs:
           base: prod
           title: Release covidcast-indicators ${{ steps.indicators.outputs.version }}
           labels: chore
-          reviewers: krivard
-          assignees: krivard
+          reviewers: melange396
+          assignees: melange396
           body: |
             Releasing:
             * covidcast-indicators ${{ steps.indicators.outputs.version }}

From f2503e5b2c3ced3c66348d31210dba010e12a023 Mon Sep 17 00:00:00 2001
From: Katie Mazaitis <krivard@cs.cmu.edu>
Date: Fri, 21 Jul 2023 13:15:53 -0400
Subject: [PATCH 15/21] Update publish-release workflow: katie -> george

---
 .github/workflows/publish-release.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml
index 630e44c44..a9f1ccae9 100644
--- a/.github/workflows/publish-release.yml
+++ b/.github/workflows/publish-release.yml
@@ -120,7 +120,7 @@ jobs:
           base: main
           title: 'chore: sync prod->main'
           labels: chore
-          reviewers: krivard
-          assignees: krivard
+          reviewers: melange396
+          assignees: melange396
           body: |
             Syncing Prod->Main.

From 0f24afb9f720d80f3500cd8bf40665f3e6f99d5b Mon Sep 17 00:00:00 2001
From: Katie Mazaitis <krivard@cs.cmu.edu>
Date: Fri, 21 Jul 2023 13:17:10 -0400
Subject: [PATCH 16/21] Delete deprecated delphi-utils workflow

---
 .github/workflows/release-delphi-utils.yml | 108 ---------------------
 1 file changed, 108 deletions(-)
 delete mode 100644 .github/workflows/release-delphi-utils.yml

diff --git a/.github/workflows/release-delphi-utils.yml b/.github/workflows/release-delphi-utils.yml
deleted file mode 100644
index 609bb012c..000000000
--- a/.github/workflows/release-delphi-utils.yml
+++ /dev/null
@@ -1,108 +0,0 @@
-name: (Deprecated) Release Delphi Utils
-
-on:
-  push:
-    branches:
-      - not_prod
-    paths:
-      - '_delphi_utils_python/**'
-  workflow_dispatch:
-
-jobs:
-  correct_repository:
-    runs-on: ubuntu-latest
-    steps:
-      - name: fail on fork
-        if: github.repository_owner != 'cmu-delphi'
-        run: exit 1
-
-  create_delphi_utils_release:
-    needs: correct_repository
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: _delphi_utils_python
-    steps:
-      - name: Check out code
-        uses: actions/checkout@v2
-        with:
-          ssh-key: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_SSH }}
-      - name: Set up Python 3.8
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8
-      - name: Extract version
-        id: extract_version
-        run: |
-          python -m pip install bump2version
-          echo -n "::set-output name=version::"
-          bump2version --dry-run --list patch | grep ^current_version | sed -r s,"^.*=",,
-      - name: Tag version
-        run: |
-          git tag delphi-utils/v${{ steps.extract_version.outputs.version }}
-          git push --tags
-    outputs:
-      version: ${{ steps.extract_version.outputs.version }}
-
-  upload_pypi:
-    needs: create_delphi_utils_release
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: _delphi_utils_python
-    steps:
-      - name: Check out code
-        uses: actions/checkout@v2
-      - name: Set up Python 3.8
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8
-      - name: Install testing dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install pylint pytest pydocstyle wheel twine
-      - name: Install
-        run: |
-          make install
-      - name: Release
-        run: |
-          make release
-      - uses: actions/upload-artifact@v2
-        with:
-          name: delphi_utils
-          path: _delphi_utils_python/dist/*.tar.gz
-      - name: Publish a Python distribution to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
-        with:
-          user: __token__
-          password: ${{ secrets.DELPHI_PYPI_PROD_TOKEN }}
-          packages_dir: _delphi_utils_python/dist/
-          skip_existing: true
-          # repository_url: https://test.pypi.org/legacy/
-
-  sync_main:
-    needs: correct_repository
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out code
-        uses: actions/checkout@v2
-        with:
-          ref: main
-          ssh-key: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_SSH }}
-      - name: Reset main branch
-        run: |
-          git fetch origin prod:prod
-          git reset --hard prod
-      - name: Create pull request into main
-        uses: peter-evans/create-pull-request@v3
-        with:
-          token: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_PAT }}
-          branch: bot/sync-prod-main
-          commit-message: 'chore: sync prod-main'
-          base: main
-          title: 'chore: sync prod->main'
-          labels: chore
-          reviewers: krivard
-          assignees: krivard
-          body: |
-            Syncing Prod->Main.

From 64bde444d386bae069d1a17a63cabe283ae76702 Mon Sep 17 00:00:00 2001
From: Katie Mazaitis <krivard@cs.cmu.edu>
Date: Fri, 21 Jul 2023 13:17:41 -0400
Subject: [PATCH 17/21] Delete deprecated delphi-utils release workflow

---
 .../workflows/create-delphi-utils-release.yml | 48 -------------------
 1 file changed, 48 deletions(-)
 delete mode 100644 .github/workflows/create-delphi-utils-release.yml

diff --git a/.github/workflows/create-delphi-utils-release.yml b/.github/workflows/create-delphi-utils-release.yml
deleted file mode 100644
index 4d35b2253..000000000
--- a/.github/workflows/create-delphi-utils-release.yml
+++ /dev/null
@@ -1,48 +0,0 @@
-name: (Deprecated) Create Delphi Utils Release
-
-on:
-  workflow_dispatch:
-    inputs:
-      versionName:
-        description: 'Semantic Version Number (i.e., 5.5.0 or patch, minor, major, prepatch, preminor, premajor, prerelease)'
-        required: true
-        default: patch
-
-jobs:
-  create_release:
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: _delphi_utils_python
-    steps:
-      - name: Check out code
-        uses: actions/checkout@v2
-        with:
-          ref: prod
-          ssh-key: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_SSH }}
-      - name: Reset prod branch
-        run: |
-          git fetch origin main:main
-          git reset --hard main
-      - name: Set up Python 3.8
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8
-      - name: Change version number
-        id: version
-        run: |
-          python -m pip install bump2version
-          echo -n "::set-output name=next_tag::"
-          bump2version --list ${{ github.event.inputs.versionName }} | grep ^new_version | sed -r s,"^.*=",,
-      - name: Create pull request into prod
-        uses: peter-evans/create-pull-request@v3
-        with:
-          branch: release/${{ steps.version.outputs.next_tag }}
-          commit-message: 'chore: release ${{ steps.version.outputs.next_tag }}'
-          base: prod
-          title: Release ${{ steps.version.outputs.next_tag }}
-          labels: chore
-          reviewers: krivard
-          assignees: krivard
-          body: |
-            Releasing ${{ steps.version.outputs.next_tag }}.

From 885e6ce0182cac16fc0b923c392854bd42163cb9 Mon Sep 17 00:00:00 2001
From: Nat DeFries <42820733+nmdefries@users.noreply.github.com>
Date: Fri, 21 Jul 2023 16:48:40 -0400
Subject: [PATCH 18/21] increase hhs expected lag settings by 1 week

---
 ansible/templates/hhs_hosp-params-prod.json.j2          | 4 ++--
 ansible/templates/sir_complainsalot-params-prod.json.j2 | 2 +-
 hhs_hosp/params.json.template                           | 4 ++--
 sir_complainsalot/params.json.template                  | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/ansible/templates/hhs_hosp-params-prod.json.j2 b/ansible/templates/hhs_hosp-params-prod.json.j2
index fbdffa8f9..55b11555c 100644
--- a/ansible/templates/hhs_hosp-params-prod.json.j2
+++ b/ansible/templates/hhs_hosp-params-prod.json.j2
@@ -8,8 +8,8 @@
       "data_source": "hhs",
       "api_credentials": "{{ validation_api_key }}",
       "span_length": 14,
-      "min_expected_lag": {"all": "1"},
-      "max_expected_lag": {"all": "7"},
+      "min_expected_lag": {"all": "8"},
+      "max_expected_lag": {"all": "14"},
       "dry_run": true,
       "suppressed_errors": []
     },
diff --git a/ansible/templates/sir_complainsalot-params-prod.json.j2 b/ansible/templates/sir_complainsalot-params-prod.json.j2
index f2c3ee68e..d32428b3e 100644
--- a/ansible/templates/sir_complainsalot-params-prod.json.j2
+++ b/ansible/templates/sir_complainsalot-params-prod.json.j2
@@ -48,7 +48,7 @@
       "maintainers": []
     },
     "hhs": {
-      "max_age":8,
+      "max_age":15,
       "maintainers": []
     }
   }
diff --git a/hhs_hosp/params.json.template b/hhs_hosp/params.json.template
index e72e0bb86..c2004f935 100644
--- a/hhs_hosp/params.json.template
+++ b/hhs_hosp/params.json.template
@@ -7,8 +7,8 @@
     "common": {
       "data_source": "hhs",
       "span_length": 14,
-      "min_expected_lag": {"all": "1"},
-      "max_expected_lag": {"all": "7"},
+      "min_expected_lag": {"all": "8"},
+      "max_expected_lag": {"all": "14"},
       "dry_run": true,
       "suppressed_errors": []
     },
diff --git a/sir_complainsalot/params.json.template b/sir_complainsalot/params.json.template
index b6c7f885f..058069efb 100644
--- a/sir_complainsalot/params.json.template
+++ b/sir_complainsalot/params.json.template
@@ -47,7 +47,7 @@
       "maintainers": []
     },
     "hhs": {
-      "max_age":8,
+      "max_age":15,
       "maintainers": []
     }
   }

From a6a9c2b86fafd4dff43c120b8ae4c66a6292af81 Mon Sep 17 00:00:00 2001
From: Delphi Deploy Bot <delphibot@example.com>
Date: Wed, 26 Jul 2023 15:10:52 +0000
Subject: [PATCH 19/21] chore: bump delphi_utils to 0.3.19

---
 _delphi_utils_python/.bumpversion.cfg         | 2 +-
 _delphi_utils_python/delphi_utils/__init__.py | 2 +-
 _delphi_utils_python/setup.py                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/_delphi_utils_python/.bumpversion.cfg b/_delphi_utils_python/.bumpversion.cfg
index a54d86d02..25f567b85 100644
--- a/_delphi_utils_python/.bumpversion.cfg
+++ b/_delphi_utils_python/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.18
+current_version = 0.3.19
 commit = True
 message = chore: bump delphi_utils to {new_version}
 tag = False
diff --git a/_delphi_utils_python/delphi_utils/__init__.py b/_delphi_utils_python/delphi_utils/__init__.py
index 00259dc5a..cb3316671 100644
--- a/_delphi_utils_python/delphi_utils/__init__.py
+++ b/_delphi_utils_python/delphi_utils/__init__.py
@@ -15,4 +15,4 @@
 from .nancodes import Nans
 from .weekday import Weekday
 
-__version__ = "0.3.18"
+__version__ = "0.3.19"
diff --git a/_delphi_utils_python/setup.py b/_delphi_utils_python/setup.py
index 1668a34f6..cac08e1f9 100644
--- a/_delphi_utils_python/setup.py
+++ b/_delphi_utils_python/setup.py
@@ -27,7 +27,7 @@
 
 setup(
     name="delphi_utils",
-    version="0.3.18",
+    version="0.3.19",
     description="Shared Utility Functions for Indicators",
     long_description=long_description,
     long_description_content_type="text/markdown",

From f59b90b4a8cb70c0c83ddb0c0c3fb2430c291982 Mon Sep 17 00:00:00 2001
From: Delphi Deploy Bot <delphibot@example.com>
Date: Wed, 26 Jul 2023 15:10:53 +0000
Subject: [PATCH 20/21] chore: bump covidcast-indicators to 0.3.44

---
 .bumpversion.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 8b614a846..6b0da0bdb 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.43
+current_version = 0.3.44
 commit = True
 message = chore: bump covidcast-indicators to {new_version}
 tag = False

From c9d2147280bc4ffc6cb955d95227e5f474d11906 Mon Sep 17 00:00:00 2001
From: rzats <rzats@users.noreply.github.com>
Date: Wed, 26 Jul 2023 15:10:53 +0000
Subject: [PATCH 21/21] [create-pull-request] automated change

---
 changehc/version.cfg          | 2 +-
 claims_hosp/version.cfg       | 2 +-
 doctor_visits/version.cfg     | 2 +-
 google_symptoms/version.cfg   | 2 +-
 hhs_hosp/version.cfg          | 2 +-
 nchs_mortality/version.cfg    | 2 +-
 nowcast/version.cfg           | 2 +-
 quidel_covidtest/version.cfg  | 2 +-
 sir_complainsalot/version.cfg | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/changehc/version.cfg b/changehc/version.cfg
index 8d2332969..aa45f9083 100644
--- a/changehc/version.cfg
+++ b/changehc/version.cfg
@@ -1 +1 @@
-current_version = 0.3.43
+current_version = 0.3.44
diff --git a/claims_hosp/version.cfg b/claims_hosp/version.cfg
index 8d2332969..aa45f9083 100644
--- a/claims_hosp/version.cfg
+++ b/claims_hosp/version.cfg
@@ -1 +1 @@
-current_version = 0.3.43
+current_version = 0.3.44
diff --git a/doctor_visits/version.cfg b/doctor_visits/version.cfg
index 8d2332969..aa45f9083 100644
--- a/doctor_visits/version.cfg
+++ b/doctor_visits/version.cfg
@@ -1 +1 @@
-current_version = 0.3.43
+current_version = 0.3.44
diff --git a/google_symptoms/version.cfg b/google_symptoms/version.cfg
index 8d2332969..aa45f9083 100644
--- a/google_symptoms/version.cfg
+++ b/google_symptoms/version.cfg
@@ -1 +1 @@
-current_version = 0.3.43
+current_version = 0.3.44
diff --git a/hhs_hosp/version.cfg b/hhs_hosp/version.cfg
index 8d2332969..aa45f9083 100644
--- a/hhs_hosp/version.cfg
+++ b/hhs_hosp/version.cfg
@@ -1 +1 @@
-current_version = 0.3.43
+current_version = 0.3.44
diff --git a/nchs_mortality/version.cfg b/nchs_mortality/version.cfg
index 8d2332969..aa45f9083 100644
--- a/nchs_mortality/version.cfg
+++ b/nchs_mortality/version.cfg
@@ -1 +1 @@
-current_version = 0.3.43
+current_version = 0.3.44
diff --git a/nowcast/version.cfg b/nowcast/version.cfg
index 8d2332969..aa45f9083 100644
--- a/nowcast/version.cfg
+++ b/nowcast/version.cfg
@@ -1 +1 @@
-current_version = 0.3.43
+current_version = 0.3.44
diff --git a/quidel_covidtest/version.cfg b/quidel_covidtest/version.cfg
index 8d2332969..aa45f9083 100644
--- a/quidel_covidtest/version.cfg
+++ b/quidel_covidtest/version.cfg
@@ -1 +1 @@
-current_version = 0.3.43
+current_version = 0.3.44
diff --git a/sir_complainsalot/version.cfg b/sir_complainsalot/version.cfg
index 8d2332969..aa45f9083 100644
--- a/sir_complainsalot/version.cfg
+++ b/sir_complainsalot/version.cfg
@@ -1 +1 @@
-current_version = 0.3.43
+current_version = 0.3.44