From 1b254dc9be7823860f4f872ffc248ab35c22676d Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 17 Jul 2023 13:58:51 -0400 Subject: [PATCH 01/21] remove dsew dir and ansible template params --- ...dsew_community_profile-params-prod.json.j2 | 53 - dsew_community_profile/.gitignore | 1 - dsew_community_profile/.pylintrc | 22 - dsew_community_profile/DETAILS.md | 133 --- dsew_community_profile/Makefile | 30 - dsew_community_profile/README.md | 86 -- dsew_community_profile/REVIEW.md | 38 - dsew_community_profile/cache/.gitignore | 0 .../delphi_dsew_community_profile/__init__.py | 13 - .../delphi_dsew_community_profile/__main__.py | 12 - .../constants.py | 118 --- .../delphi_dsew_community_profile/pull.py | 909 ------------------ .../delphi_dsew_community_profile/run.py | 127 --- dsew_community_profile/input_cache/.gitignore | 1 - dsew_community_profile/params.json.template | 45 - dsew_community_profile/setup.py | 30 - dsew_community_profile/static/.gitignore | 0 .../tests/params.json.template | 34 - dsew_community_profile/tests/test_pull.py | 612 ------------ dsew_community_profile/version.cfg | 1 - 20 files changed, 2265 deletions(-) delete mode 100644 ansible/templates/dsew_community_profile-params-prod.json.j2 delete mode 100644 dsew_community_profile/.gitignore delete mode 100644 dsew_community_profile/.pylintrc delete mode 100644 dsew_community_profile/DETAILS.md delete mode 100644 dsew_community_profile/Makefile delete mode 100644 dsew_community_profile/README.md delete mode 100644 dsew_community_profile/REVIEW.md delete mode 100644 dsew_community_profile/cache/.gitignore delete mode 100644 dsew_community_profile/delphi_dsew_community_profile/__init__.py delete mode 100644 dsew_community_profile/delphi_dsew_community_profile/__main__.py delete mode 100644 dsew_community_profile/delphi_dsew_community_profile/constants.py delete mode 100644 dsew_community_profile/delphi_dsew_community_profile/pull.py delete mode 100644 dsew_community_profile/delphi_dsew_community_profile/run.py delete mode 100644 dsew_community_profile/input_cache/.gitignore delete mode 100644 dsew_community_profile/params.json.template delete mode 100644 dsew_community_profile/setup.py delete mode 100644 dsew_community_profile/static/.gitignore delete mode 100644 dsew_community_profile/tests/params.json.template delete mode 100644 dsew_community_profile/tests/test_pull.py delete mode 100644 dsew_community_profile/version.cfg diff --git a/ansible/templates/dsew_community_profile-params-prod.json.j2 b/ansible/templates/dsew_community_profile-params-prod.json.j2 deleted file mode 100644 index f8777c510..000000000 --- a/ansible/templates/dsew_community_profile-params-prod.json.j2 +++ /dev/null @@ -1,53 +0,0 @@ -{ - "common": { - "export_dir": "/common/covidcast/receiving/dsew-cpr", - "log_filename": "/var/log/indicators/dsew_cpr.log" - }, - "indicator": { - "input_cache": "./input_cache", - "reports": "new", - "export_signals": [ - "confirmed covid-19 admissions", - "doses administered", - "booster doses administered", - "fully vaccinated", - "booster dose since", - "positivity" - ] - }, - "validation": { - "common": { - "data_source": "dsew-cpr", - "api_credentials": "{{ validation_api_key }}", - "span_length": 14, - "min_expected_lag": {"all": "3"}, - "max_expected_lag": {"all": "9"}, - "dry_run": true, - "suppressed_errors": [] - }, - "static": { - "minimum_sample_size": 0, - "missing_se_allowed": true, - "missing_sample_size_allowed": true - }, - "dynamic": { - "ref_window_size": 7, - "smoothed_signals": [ - "naats_positivity_7dav", - "confirmed_admissions_covid_1d_prop_7dav", - "confirmed_admissions_covid_1d_7dav", - "doses_admin_7dav", - "booster_doses_admin_7dav" - ] - } - }, - "archive": { - "aws_credentials": { - "aws_access_key_id": "{{ delphi_aws_access_key_id }}", - "aws_secret_access_key": "{{ delphi_aws_secret_access_key }}" - }, - "bucket_name": "delphi-covidcast-indicator-output", - "cache_dir": "./cache", - "indicator_prefix": "delphi_dsew_community_profile" - } -} diff --git a/dsew_community_profile/.gitignore b/dsew_community_profile/.gitignore deleted file mode 100644 index fec731824..000000000 --- a/dsew_community_profile/.gitignore +++ /dev/null @@ -1 +0,0 @@ -input_cache/* diff --git a/dsew_community_profile/.pylintrc b/dsew_community_profile/.pylintrc deleted file mode 100644 index f30837c7e..000000000 --- a/dsew_community_profile/.pylintrc +++ /dev/null @@ -1,22 +0,0 @@ - -[MESSAGES CONTROL] - -disable=logging-format-interpolation, - too-many-locals, - too-many-arguments, - # Allow pytest functions to be part of a class. - no-self-use, - # Allow pytest classes to have one test. - too-few-public-methods - -[BASIC] - -# Allow arbitrarily short-named variables. -variable-rgx=[a-z_][a-z0-9_]* -argument-rgx=[a-z_][a-z0-9_]* -attr-rgx=[a-z_][a-z0-9_]* - -[DESIGN] - -# Don't complain about pytest "unused" arguments. -ignored-argument-names=(_.*|run_as_module) \ No newline at end of file diff --git a/dsew_community_profile/DETAILS.md b/dsew_community_profile/DETAILS.md deleted file mode 100644 index 56816ee06..000000000 --- a/dsew_community_profile/DETAILS.md +++ /dev/null @@ -1,133 +0,0 @@ -# Dataset layout - -The Data Strategy and Execution Workgroup (DSEW) publishes a Community Profile -Report each weekday, comprising a pair of files: an Excel workbook (.xlsx) and a -PDF which shows select metrics from the workbook as time series charts and -choropleth maps. These files are listed as attachments on the healthdata.gov -site: - -https://healthdata.gov/Health/COVID-19-Community-Profile-Report/gqxm-d9w9 - -Each Excel file attachment has a filename. The filename contains a date, -presumably the publish date. The attachment also has an alphanumeric -assetId. Both the filename and the assetId are required for downloading the -file. Whether this means that updated versions of a particular file may be -uploaded by DSEW at later times is not known. The attachment does not explicitly -list an upload timestamp. To be safe, we cache our downloads using both the -assetId and the filename. - -# Workbook layout - -Each Excel file is a workbook with multiple sheets. The exemplar file used in -writing this indicator is "Community Profile Report 20211102.xlsx". The sheets -include: - -- User Notes: Instructions for using the workbook -- Overview: US National figures for the last 5 weeks, plus monthly peaks back to - April 2020 -- Regions*: Figures for FEMA regions (double-checked: they match HHS regions - except that FEMA 2 does not include Palau while HHS 2 does) -- States*: Figures for US states and territories -- CBSAs*: Figures for US Census Block Statistical Areas -- Counties*: Figures for US counties -- Weekly Transmission Categories: Lists of high, substantial, and moderate - transmission states and territories -- National Peaks: Monthly national peaks back to April 2020 -- National Historic: Daily national figures back to January 22 2020 -- Data Notes: Source and methods information for all metrics -- Color Thresholds: Color-coding is used extensively in all sheets; these are - the keys - -The starred sheets above have nearly-identical column layouts, and together -cover the county, MSA, state, and HHS geographical levels used in -covidcast. Rather than aggregate them ourselves and risk a mismatch, this -indicator lifts these geographical aggregations directly from the corresponding -sheets of the workbook. - -GeoMapper _is_ used to generate national figures from -state, due to architectural differences between the starred sheets and the -Overview sheet. If we discover that our nation-level figures differ too much -from those listed in the Overview sheet, we can add dedicated parsing for the -Overview sheet and remove GeoMapper from this indicator altogether. - -# Sheet layout - -## Headers - -Each starred sheet has two rows of headers. The first row uses merged cells to -group several columns together under a single "overheader". This overheader -often includes the reference period for that group of columns, such as: - -- CASES/DEATHS: LAST WEEK (October 26-November 1) -- TESTING: LAST WEEK (October 24-30, Test Volume October 20-26) -- TESTING: PREVIOUS WEEK (October 17-23, Test Volume October 13-19) - -Overheaders have changed periodically since the first report. For example, the -"TESTING: LAST WEEK" overheader above has also appeared as "VIRAL (RT-PCR) LAB -TESTING: LAST WEEK", with and without a separate reference date for Test -Volume. All known overheader forms are checked in test_pull.py. - -The second row contains a header for each column. The headers uniquely identify -each column included in the sheet. Column headers include spaces, and typically -specify both the metric and the reference period over which it was calculated, -such as: - -- Total NAATs - last 7 days (may be an underestimate due to delayed reporting) -- NAAT positivity rate - previous 7 days (may be an underestimate due to delayed - reporting) - -Columns headers have also changed periodically since the first report. For -example, the "Total NAATs - last 7 days" header above has also appeared as -"Total RT-PCR diagnostic tests - last 7 days". - -## Contents - -Each starred sheet contains test positivity and total test volume figures for -two reference periods, "last [week]" and "previous [week]". In some reports, the -reference periods for test positivity and total test volume are the same; in -others, they are different, such that the report contains figures for four -distinct reference periods, two for each metric we extract. - -# Time series conversions and parsing notes - -## Reference date - -The reference period in the overheader never includes the year. We guess the -reference year by picking the same year as the publish date (i.e., the date -extracted from the filename), and if the reference month is greater than the -publish month, subtract 1 from the reference year. This adequately covers the -December-January boundary. - -We select as reference date the end date of the reference period for each -metric. Reference periods are always 7 days, so this indicator produces -seven-day averages. We divide the total testing volume by seven and leave the -test positivity alone. - -## Geo ID - -The Counties sheet lists FIPS codes numerically, such that FIPS with a leading -zero only have four digits. We fix this by zero-filling to five characters. - -MSAs are a subset of CBSAs. We fix this by selecting only CBSAs with type -"Metropolitan". - -Most of the starred sheets have the geo id as the first non-index column. The -Region sheet has no such column. We fix this by generating the HHS ids from the -index column instead. - -## Combining multiple reports - -Each report file generates two reference dates for each metric, up to four -reference dates total. Since it's not clear whether new versions of past files -are ever made available, the default mode (params.indicator.reports="new") -fetches any files that are not already in the input cache, then combines the -results into a single data frame before exporting. This will generate correct -behavior should (for instance) a previously-downloaded file get a new assetId. - -For the initial run on an empty input cache, and for runs configured to process -a range of reports (using params.indicator.reports=YYYY-mm-dd--YYYY-mm-dd), this -indicator makes no distinction between figures that came from different -reports. That may not be what you want. If the covidcast issue date needs to -match the date on the report filename, then the indicator must instead be run -repeatedly, with equal start and end dates, keeping the output of each run -separate. diff --git a/dsew_community_profile/Makefile b/dsew_community_profile/Makefile deleted file mode 100644 index bdea33afd..000000000 --- a/dsew_community_profile/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -.PHONY = venv, lint, test, clean - -dir = $(shell find ./delphi_* -name __init__.py | grep -o 'delphi_[_[:alnum:]]*') - -venv: - python3.8 -m venv env - -install: venv - . env/bin/activate; \ - pip install wheel ; \ - pip install -e ../_delphi_utils_python ;\ - pip install -e . - -install-ci: venv - . env/bin/activate; \ - pip install wheel ; \ - pip install ../_delphi_utils_python ;\ - pip install . - -lint: - . env/bin/activate; pylint $(dir) - . env/bin/activate; pydocstyle $(dir) - -test: - . env/bin/activate ;\ - (cd tests && ../env/bin/pytest --cov=$(dir) --cov-report=term-missing) - -clean: - rm -rf env - rm -f params.json diff --git a/dsew_community_profile/README.md b/dsew_community_profile/README.md deleted file mode 100644 index dc8e1ef09..000000000 --- a/dsew_community_profile/README.md +++ /dev/null @@ -1,86 +0,0 @@ -# COVID-19 Community Profile Report - -The Data Strategy and Execution Workgroup (DSEW) publishes a Community Profile -Report each weekday at this location: - -https://healthdata.gov/Health/COVID-19-Community-Profile-Report/gqxm-d9w9 - -This indicator extracts COVID-19 test figures from these reports. - -Indicator-specific parameters: - -* `input_cache`: a directory where Excel (.xlsx) files downloaded from - healthdata.gov will be stored for posterity. Each file is 3.3 MB in size, so - we expect this directory to require ~1GB of disk space for each year of - operation. -* `reports`: {new | all | YYYY-mm-dd--YYYY-mm-dd} a string indicating which - reports to export. The default, "new", downloads and exports only reports not - already found in the input cache. The "all" setting exports data for all - available reports, downloading them to the input cache if necessary. The date - range setting refers to the date listed in the filename for the report, - presumably the publish date. Only reports named with a date within the - specified range (inclusive) will be downloaded to the input cache if necessary - and exported. -* `export_start_date`: a YYYY-mm-dd string indicating the first date to export. -* `export_end_date`: a YYYY-mm-dd string indicating the final date to export. -* `export_signals`: list of string keys from constants.SIGNALS indicating which - signals to export - -## Running the Indicator - -The indicator is run by directly executing the Python module contained in this -directory. The safest way to do this is to create a virtual environment, -installed the common DELPHI tools, and then install the module and its -dependencies. To do this, run the following command from this directory: - -``` -make install -``` - -This command will install the package in editable mode, so you can make changes that -will automatically propagate to the installed package. - -All of the user-changable parameters are stored in `params.json`. To execute -the module and produce the output datasets (by default, in `receiving`), run -the following: - -``` -env/bin/python -m delphi_dsew_community_profile -``` - -If you want to enter the virtual environment in your shell, -you can run `source env/bin/activate`. Run `deactivate` to leave the virtual environment. - -Once you are finished, you can remove the virtual environment and -params file with the following: - -``` -make clean -``` - -## Testing the code - -To run static tests of the code style, run the following command: - -``` -make lint -``` - -Unit tests are also included in the module. To execute these, run the following -command from this directory: - -``` -make test -``` - -To run individual tests, run the following: - -``` -(cd tests && ../env/bin/pytest .py --cov=delphi_dsew_community_profile --cov-report=term-missing) -``` - -The output will show the number of unit tests that passed and failed, along -with the percentage of code covered by the tests. - -None of the linting or unit tests should fail, and the code lines that are not covered by unit tests should be small and -should not include critical sub-routines. diff --git a/dsew_community_profile/REVIEW.md b/dsew_community_profile/REVIEW.md deleted file mode 100644 index 03f87b17a..000000000 --- a/dsew_community_profile/REVIEW.md +++ /dev/null @@ -1,38 +0,0 @@ -## Code Review (Python) - -A code review of this module should include a careful look at the code and the -output. To assist in the process, but certainly not in replace of it, please -check the following items. - -**Documentation** - -- [ ] the README.md file template is filled out and currently accurate; it is -possible to load and test the code using only the instructions given -- [ ] minimal docstrings (one line describing what the function does) are -included for all functions; full docstrings describing the inputs and expected -outputs should be given for non-trivial functions - -**Structure** - -- [ ] code should pass lint checks (`make lint`) -- [ ] any required metadata files are checked into the repository and placed -within the directory `static` -- [ ] any intermediate files that are created and stored by the module should -be placed in the directory `cache` -- [ ] final expected output files to be uploaded to the API are placed in the -`receiving` directory; output files should not be committed to the respository -- [ ] all options and API keys are passed through the file `params.json` -- [ ] template parameter file (`params.json.template`) is checked into the -code; no personal (i.e., usernames) or private (i.e., API keys) information is -included in this template file - -**Testing** - -- [ ] module can be installed in a new virtual environment (`make install`) -- [ ] reasonably high level of unit test coverage covering all of the main logic -of the code (e.g., missing coverage for raised errors that do not currently seem -possible to reach are okay; missing coverage for options that will be needed are -not) -- [ ] all unit tests run without errors (`make test`) -- [ ] indicator directory has been added to GitHub CI -(`covidcast-indicators/.github/workflows/python-ci.yml`) diff --git a/dsew_community_profile/cache/.gitignore b/dsew_community_profile/cache/.gitignore deleted file mode 100644 index e69de29bb..000000000 diff --git a/dsew_community_profile/delphi_dsew_community_profile/__init__.py b/dsew_community_profile/delphi_dsew_community_profile/__init__.py deleted file mode 100644 index 52a507259..000000000 --- a/dsew_community_profile/delphi_dsew_community_profile/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# -*- coding: utf-8 -*- -"""Module to pull and clean indicators from the XXXXX source. - -This file defines the functions that are made public by the module. As the -module is intended to be executed though the main method, these are primarily -for testing. -""" - -from __future__ import absolute_import - -from . import run - -__version__ = "0.1.0" diff --git a/dsew_community_profile/delphi_dsew_community_profile/__main__.py b/dsew_community_profile/delphi_dsew_community_profile/__main__.py deleted file mode 100644 index ab5a749dc..000000000 --- a/dsew_community_profile/delphi_dsew_community_profile/__main__.py +++ /dev/null @@ -1,12 +0,0 @@ -# -*- coding: utf-8 -*- -"""Call the function run_module when executed. - -This file indicates that calling the module (`python -m delphi_dsew_community_profile`) will -call the function `run_module` found within the run.py file. There should be -no need to change this template. -""" - -from delphi_utils import read_params -from .run import run_module # pragma: no cover - -run_module(read_params()) # pragma: no cover diff --git a/dsew_community_profile/delphi_dsew_community_profile/constants.py b/dsew_community_profile/delphi_dsew_community_profile/constants.py deleted file mode 100644 index 632bfc2ae..000000000 --- a/dsew_community_profile/delphi_dsew_community_profile/constants.py +++ /dev/null @@ -1,118 +0,0 @@ -"""Registry for variations.""" -from collections.abc import Callable as function -from dataclasses import dataclass - -URL_PREFIX = "https://healthdata.gov/api/views/gqxm-d9w9" -DOWNLOAD_ATTACHMENT = URL_PREFIX + "/files/{assetId}?download=true&filename={filename}" -DOWNLOAD_LISTING = URL_PREFIX + ".json" - -INTERP_LENGTH = 5 - -@dataclass -class Transform: - """Transformation filters for interpreting a particular sheet in the workbook.""" - - name: str = None - level: str = None - row_filter: function = None - geo_id_select: function = None - geo_id_apply: function = None - -T_FIRST = lambda df: df[df.columns[0]] -TRANSFORMS = { - t.name: t for t in [ - Transform( - name="Regions", - level="hhs", - geo_id_select=lambda df: df.index.to_series(), - geo_id_apply=lambda x: x.replace("Region ", "") - ), - Transform( - name="States", - level="state", - geo_id_select=T_FIRST, - geo_id_apply=lambda x: x.lower() - ), - Transform( - name="CBSAs", - level="msa", - row_filter=lambda df: df['CBSA type'] == "Metropolitan", - geo_id_select=T_FIRST, - geo_id_apply=lambda x: f"{x}" - ), - Transform( - name="Counties", - level="county", - geo_id_select=T_FIRST, - geo_id_apply=lambda x: f"{x:05}" - ) - ]} - -# key: signal id, string pattern used to find column to report as signal -# is_rate: originating signal is a percentage (e.g. test positivity) -# is_cumulative: originating signal is cumulative (e.g. vaccine doses ever administered) -# api_name: name to use in API -# make_prop: report originating signal as-is and per 100k population -# api_prop_name: name to use in API for proportion signal -SIGNALS = { - "total": { - "is_rate" : False, - "api_name": "covid_naat_num_7dav", - "make_prop": False, - "is_cumulative" : False - }, - "positivity": { - "is_rate" : True, - "api_name": "covid_naat_pct_positive_7dav", - "make_prop": False, - "is_cumulative" : False - }, - "confirmed covid-19 admissions": { - "is_rate" : False, - "api_name": "confirmed_admissions_covid_1d_7dav", - "make_prop": True, - "api_prop_name": "confirmed_admissions_covid_1d_prop_7dav", - "is_cumulative" : False - }, - "fully vaccinated": { - "is_rate" : False, - "api_name": "people_full_vaccinated", - "make_prop": False, - "is_cumulative" : True - }, - "booster dose since": { - "is_rate" : False, - "api_name": "people_booster_doses", - "make_prop": False, - "is_cumulative" : True - }, - "booster doses administered": { - "is_rate" : False, - "api_name": "booster_doses_admin_7dav", - "make_prop": False, - "is_cumulative" : False - }, - "doses administered": { - "is_rate" : False, - "api_name": "doses_admin_7dav", - "make_prop": False, - "is_cumulative" : False - } -} - -COUNTS_7D_SIGNALS = {key for key, value in SIGNALS.items() \ - if not((value["is_rate"]) or (value["is_cumulative"]))} - -def make_signal_name(key, is_prop=False): - """Convert a signal key to the corresponding signal name for the API. - - Note, this function gets called twice with the same `key` for signals that support - population-proportion ("prop") variants. - """ - if is_prop: - return SIGNALS[key]["api_prop_name"] - return SIGNALS[key]["api_name"] - -NEWLINE = "\n" -IS_PROP = True -NOT_PROP = False diff --git a/dsew_community_profile/delphi_dsew_community_profile/pull.py b/dsew_community_profile/delphi_dsew_community_profile/pull.py deleted file mode 100644 index e9b8d24a1..000000000 --- a/dsew_community_profile/delphi_dsew_community_profile/pull.py +++ /dev/null @@ -1,909 +0,0 @@ -# -*- coding: utf-8 -*- -"""Functions to call when downloading data.""" -from dataclasses import dataclass -import datetime -import os -import re -from typing import Dict, Tuple -from urllib.parse import quote_plus as quote_as_url - -import pandas as pd -import numpy as np -import requests - -from delphi_utils.geomap import GeoMapper - -from .constants import ( - TRANSFORMS, SIGNALS, COUNTS_7D_SIGNALS, NEWLINE, - IS_PROP, NOT_PROP, - DOWNLOAD_ATTACHMENT, DOWNLOAD_LISTING, - INTERP_LENGTH -) - -DataDict = Dict[Tuple[str, str, bool], pd.DataFrame] - -# YYYYMMDD -# example: "Community Profile Report 20211104.xlsx" -RE_DATE_FROM_FILENAME = re.compile(r'.*([0-9]{4})([0-9]{2})([0-9]{2}).*xlsx') - -# example: "TESTING: LAST WEEK (October 24-30, Test Volume October 20-26)" -# example: "TESTING: PREVIOUS WEEK (October 17-23, Test Volume October 13-19)" -DATE_EXP = r'(?:([A-Za-z]*) )?([0-9]{1,2})' -DATE_RANGE_EXP = f"{DATE_EXP}-{DATE_EXP}" -RE_DATE_FROM_TEST_HEADER = re.compile( - rf'.*TESTING: (.*) WEEK \({DATE_RANGE_EXP}(?:, Test Volume ({DATE_RANGE_EXP}))? *\)' -) - -# example: "HOSPITAL UTILIZATION: LAST WEEK (January 2-8)" -RE_DATE_FROM_HOSP_HEADER = re.compile( - rf'HOSPITAL UTILIZATION: (.*) WEEK \({DATE_RANGE_EXP}\)' -) - -# example: "COVID-19 VACCINATION DATA: LAST WEEK (January 5-11)" -RE_DATE_FROM_VAC_HEADER_WEEK= re.compile( - rf'COVID-19 VACCINATION DATA: (.*) WEEK \({DATE_RANGE_EXP}\)' -) - -# example: 'COVID-19 VACCINATION DATA: CUMULATIVE (January 11)' -RE_DATE_FROM_VAC_HEADER_CUMULATIVE= re.compile( - rf'COVID-19 VACCINATION DATA: CUMULATIVE (.*)\({DATE_EXP}\)' -) - -# example: "NAAT positivity rate - last 7 days (may be an underestimate due to delayed reporting)" -# example: "Total NAATs - last 7 days (may be an underestimate due to delayed reporting)" -RE_COLUMN_FROM_HEADER = re.compile('- (.*) 7 days') - -@dataclass -class DatasetTimes: - """Collect reference dates for a column.""" - - column: str - positivity_reference_date: datetime.date - total_reference_date: datetime.date - hosp_reference_date: datetime.date - vac_reference_date: datetime.date - cumulative_vac_reference_date: datetime.date - - @staticmethod - def from_header(header, publish_date): - """Convert reference dates in overheader to DatasetTimes.""" - positivity_reference_date = None - total_reference_date = None - hosp_reference_date = None - vac_reference_date = None - cumulative_vac_reference_date= None - def as_date(sub_result, is_single_date): - if is_single_date: - month = sub_result[0] - day = sub_result[1] - month_numeric = datetime.datetime.strptime(month, "%B").month - else: - month = sub_result[2] if sub_result[2] else sub_result[0] - assert month, f"Bad month in header: {header}\nsub_result: {sub_result}" - month_numeric = datetime.datetime.strptime(month, "%B").month - day = sub_result[3] - year = publish_date.year - # year boundary - if month_numeric > publish_date.month: - year -= 1 - return datetime.datetime.strptime(f"{year}-{month}-{day}", "%Y-%B-%d").date() - - if RE_DATE_FROM_TEST_HEADER.match(header): - findall_result = RE_DATE_FROM_TEST_HEADER.findall(header)[0] - column = findall_result[0].lower() - positivity_reference_date = as_date(findall_result[1:5], False) - if findall_result[6]: - # Reports published starting 2021-03-17 specify different reference - # dates for positivity and total test volume - total_reference_date = as_date(findall_result[6:10], False) - else: - total_reference_date = positivity_reference_date - elif RE_DATE_FROM_HOSP_HEADER.match(header): - findall_result = RE_DATE_FROM_HOSP_HEADER.findall(header)[0] - column = findall_result[0].lower() - hosp_reference_date = as_date(findall_result[1:5], False) - elif RE_DATE_FROM_VAC_HEADER_WEEK.match(header): - findall_result = RE_DATE_FROM_VAC_HEADER_WEEK.findall(header)[0] - column = findall_result[0].lower() - vac_reference_date = as_date(findall_result[1:5], False) - elif RE_DATE_FROM_VAC_HEADER_CUMULATIVE.match(header): - findall_result = RE_DATE_FROM_VAC_HEADER_CUMULATIVE.findall(header)[0] - column = findall_result[0].lower() - cumulative_vac_reference_date = as_date(findall_result[1:], True) - else: - raise ValueError(f"Couldn't find reference date in header '{header}'") - return DatasetTimes(column, positivity_reference_date, - total_reference_date, hosp_reference_date, - cumulative_vac_reference_date, vac_reference_date) - def __getitem__(self, key): - """Use DatasetTimes like a dictionary.""" - ref_list = list(SIGNALS.keys()) - if key.lower()=="positivity": - return self.positivity_reference_date - if key.lower()=="total": - return self.total_reference_date - if key.lower()=="confirmed covid-19 admissions": - return self.hosp_reference_date - if key.lower() in ["doses administered","booster doses administered"]: - return self.cumulative_vac_reference_date - if key.lower() in ["fully vaccinated","booster dose since"]: - return self.vac_reference_date - raise ValueError( - f"Bad reference date type request '{key}'; " + \ - "need one of: " + " ,".join(ref_list) - ) - def __setitem__(self, key, newvalue): - """Use DatasetTimes like a dictionary.""" - ref_list = list(SIGNALS.keys()) - if key.lower()=="positivity": - self.positivity_reference_date = newvalue - if key.lower()=="total": - self.total_reference_date = newvalue - if key.lower()=="confirmed covid-19 admissions": - self.hosp_reference_date = newvalue - if key.lower() in ["doses administered","booster doses administered"]: - self.cumulative_vac_reference_date = newvalue - if key.lower() in ["fully vaccinated","booster dose since"]: - self.vac_reference_date = newvalue - if key.lower() not in ref_list: - raise ValueError( - f"Bad reference date type request '{key}'; " + \ - "need one of: " + " ,".join(ref_list) - ) - def __eq__(self, other): - """Check equality by value.""" - return isinstance(other, DatasetTimes) and \ - other.column == self.column and \ - other.positivity_reference_date == self.positivity_reference_date and \ - other.total_reference_date == self.total_reference_date - -class Dataset: - """All data extracted from a single report file.""" - - def __init__(self, config, sheets=TRANSFORMS.keys(), logger=None): - """Create a new Dataset instance. - - Download and cache the requested report file. - - Parse the file into data frames at multiple geo levels. - """ - self.publish_date = self.parse_publish_date(config['filename']) - self.url = DOWNLOAD_ATTACHMENT.format( - assetId=config['assetId'], - filename=quote_as_url(config['filename']) - ) - if not os.path.exists(config['cached_filename']): - if logger: - logger.info("Downloading file", filename=config['cached_filename']) - resp = requests.get(self.url) - with open(config['cached_filename'], 'wb') as f: - f.write(resp.content) - - self.workbook = pd.ExcelFile(config['cached_filename']) - - self.dfs = {} - self.times = {} - for si in sheets: - assert si in TRANSFORMS, f"Bad sheet requested: {si}" - if logger: - logger.info("Building dfs", - sheet=f"{si}", - filename=config['cached_filename']) - sheet = TRANSFORMS[si] - self._parse_times_for_sheet(sheet) - self._parse_sheet(sheet) - - @staticmethod - def parse_publish_date(report_filename): - """Extract publish date from filename.""" - return datetime.date( - *[int(x) for x in RE_DATE_FROM_FILENAME.findall(report_filename)[0]] - ) - @staticmethod - def skip_overheader(header): - """Ignore irrelevant overheaders.""" - # include "TESTING: [LAST|PREVIOUS] WEEK (October 24-30, Test Volume October 20-26)" - # include "VIRAL (RT-PCR) LAB TESTING: [LAST|PREVIOUS] WEEK (August 24-30, ..." - # include "HOSPITAL UTILIZATION: LAST WEEK (January 2-8)" - return not (isinstance(header, str) and \ - (((header.startswith("TESTING:") or \ - header.startswith("VIRAL (RT-PCR) LAB TESTING:") or \ - header.startswith("HOSPITAL UTILIZATION: ")) and \ - # exclude "TESTING: % CHANGE FROM PREVIOUS WEEK" \ - # exclude "TESTING: DEMOGRAPHIC DATA" \ - # exclude "HOSPITAL UTILIZATION: CHANGE FROM PREVIOUS WEEK" \ - # exclude "HOSPITAL UTILIZATION: DEMOGRAPHIC DATA" \ - header.find("WEEK (") > 0) or \ - # include "COVID-19 VACCINATION DATA: CUMULATIVE (January 25)" - # include "COVID-19 VACCINATION DATA: LAST WEEK (January 25-31)" - (header.startswith("COVID-19 VACCINATION DATA: CUMULATIVE") or - header.startswith("COVID-19 VACCINATION DATA: LAST WEEK") \ - ))) - - - def _parse_times_for_sheet(self, sheet): - """Record reference dates for this sheet.""" - # grab reference dates from overheaders - overheaders = pd.read_excel( - self.workbook, sheet_name=sheet.name, - header=None, - nrows=1 - ).values.flatten().tolist() - for h in overheaders: - if self.skip_overheader(h): - continue - - dt = DatasetTimes.from_header(h, self.publish_date) - if dt.column in self.times: - # Items that are not None should be the same between sheets. - # Fill None items with the newly calculated version of the - # field from dt. - for sig in SIGNALS: - if self.times[dt.column][sig] is not None and dt[sig] is not None: - assert self.times[dt.column][sig] == dt[sig], \ - f"Conflicting reference date from {sheet.name} {dt[sig]}" + \ - f"vs previous {self.times[dt.column][sig]}" - elif self.times[dt.column][sig] is None: - self.times[dt.column][sig] = dt[sig] - else: - self.times[dt.column] = dt - - if self.publish_date <= datetime.date(2021, 1, 11): - # No vaccination data available, so we only have hospitalization and testing overheaders - assert len(self.times) == 2, \ - f"No times extracted from overheaders:\n{NEWLINE.join(str(s) for s in overheaders)}" - else: - assert len(self.times) == 3, \ - f"No times extracted from overheaders:\n{NEWLINE.join(str(s) for s in overheaders)}" - - @staticmethod - def retain_header(header): - """Ignore irrelevant headers.""" - return all([ - # include "Total NAATs - [last|previous] 7 days ..." - # include "Total RT-PCR diagnostic tests - [last|previous] 7 days ..." - # include "NAAT positivity rate - [last|previous] 7 days ..." - # include "Viral (RT-PCR) lab test positivity rate - [last|previous] 7 days ..." - # include "Booster doses administered - [last|previous] 7 days ..." - # include "Doses administered - [last|previous] 7 days ..." - (header.startswith("Total NAATs") or - header.startswith("NAAT positivity rate") or - header.startswith("Total RT-PCR") or - header.startswith("Viral (RT-PCR)") or - header.startswith("Booster") or - header.startswith("Doses administered -") - ), - # exclude "NAAT positivity rate - absolute change ..." - header.find("7 days") > 0, - # exclude "NAAT positivity rate - last 7 days - ages <5" - header.find(" ages") < 0, - ]) or ( - # include "Confirmed COVID-19 admissions - last 7 days" - # exclude "Confirmed COVID-19 admissions - percent change" - # exclude "Confirmed COVID-19 admissions - last 7 days - ages <18" - # exclude "Confirmed COVID-19 admissions - last 7 days - age unknown" - # exclude "Confirmed COVID-19 admissions per 100 inpatient beds - last 7 days" - # exclude "Confirmed COVID-19 admissions per 100k - last 7 days" - header == "Confirmed COVID-19 admissions - last 7 days" - ) or all([ - # include "People who are fully vaccinated" - # include "People who have received a booster dose since August 13, 2021" - header.startswith("People who"), - # exclude "People who are fully vaccinated as % of total population" - # exclude "People who have received a booster dose as % of fully vaccinated population" - header.find("%") < 0, - # exclude "People who are fully vaccinated - ages 5-11" ... - # exclude "People who have received a booster dose - ages 65+" ... - header.find(" age") < 0, - # exclude "People who are fully vaccinated - 12-17" ... - header.find("-") < 0, - ]) or all([ - # include "People with a completed primary series" - header.startswith("People with a completed primary series"), - # exclude "People with a completed primary series as % of adult population" - header.find("%") < 0, - # exclude "People with a completed primary series - ages 65+" - header.find(" age") < 0, - # exclude "People with a completed primary series - 12-17" ... - header.find("-") < 0, - ]) or all([ - # include "People with full course administered" - header.startswith("People with full course"), - # exclude "People with full course administered as % of adult population" - header.find("%") < 0, - ]) - def _parse_sheet(self, sheet): - """Extract data frame for this sheet.""" - df = pd.read_excel( - self.workbook, - sheet_name=sheet.name, - header=1, - index_col=0, - ) - if sheet.row_filter: - df = df.loc[sheet.row_filter(df)] - - - def select_fn(h): - """Allow for default to the 7-day in the name of the dataframe column.""" - try: - return (RE_COLUMN_FROM_HEADER.findall(h)[0], h, h.lower()) - except IndexError: - return ("", h, h.lower()) - - select = [ - select_fn(h) - for h in list(df.columns) - if self.retain_header(h) - ] - - for sig in SIGNALS: - ## Check if field is known to be missing - # Hospital admissions not available at the county or CBSA level prior to Jan 8, 2021. - is_hosp_adm_before_jan8 = (sheet.level == "msa" or sheet.level == "county") \ - and self.publish_date < datetime.date(2021, 1, 8) \ - and sig == "confirmed covid-19 admissions" - # Booster data not available before November 1 2021. - is_booster_before_nov1 = self.publish_date < datetime.date(2021, 11, 1) \ - and (sig in ["booster dose since", "booster doses administered"]) - # Booster and weekly doses administered not available below the state level. - is_booster_below_state = ((sheet.level != "hhs" and sheet.level != "state") \ - and (sig in ["doses administered", \ - "booster doses administered", "booster dose since"])) - # Weekly doses administered not available on or before Apr 29, 2021. - is_dose_admin_apr29 = self.publish_date <= datetime.date(2021, 4, 29) \ - and sig == "doses administered" - # People fully vaccinated not available on or before Apr 11, 2021 at the CBSA level. - is_fully_vax_msa_before_apr11 = (sheet.level == "msa" or sheet.level == "county") \ - and self.publish_date <= datetime.date(2021, 4, 11) \ - and sig == "fully vaccinated" - # People fully vaccinated not available before Jan 15, 2021 at any geo level. - is_fully_vax_before_jan14 = self.publish_date <= datetime.date(2021, 1, 14) \ - and sig == "fully vaccinated" - - if any([is_hosp_adm_before_jan8, - is_booster_before_nov1, - is_booster_below_state, - is_dose_admin_apr29, - is_fully_vax_msa_before_apr11, - is_fully_vax_before_jan14 - ]): - self.dfs[(sheet.level, sig, NOT_PROP)] = pd.DataFrame( - columns = ["geo_id", "timestamp", "val", \ - "se", "sample_size", "publish_date"] - ) - continue - - sig_select = [s for s in select if s[-1].find(sig) >= 0] - # The name of the cumulative vaccination was changed after 03/09/2021 - # when J&J vaccines were added. - # fully vacinated signal was renamed again on 01/12/2023 - if (sig == "fully vaccinated") and (len(sig_select) == 0): - # Read these headers if "fully vaccinated" not found in source data - other_sigs = [ - "people with a completed primary series", - "people with full course administered" - ] - sig_select = [s for s in select if s[-1] in other_sigs] - - # Since "doses administered" is a substring of another desired header, - # "booster doses administered", we need to more strictly check if "doses administered" - # occurs at the beginning of a header to find the correct match. - if sig == "doses administered": - sig_select = [s for s in select if s[-1].startswith(sig)] - assert len(sig_select) > 0, \ - f"No {sig} in any of {select}\n\nAll headers:\n{NEWLINE.join(list(df.columns))}" - - self.dfs[(sheet.level, sig, NOT_PROP)] = pd.concat([ - pd.DataFrame({ - "geo_id": sheet.geo_id_select(df).apply(sheet.geo_id_apply), - "timestamp": pd.to_datetime(self.times[si[0]][sig]), - "val": df[si[-2]], - "se": None, - "sample_size": None, - "publish_date": self.publish_date - }) - for si in sig_select - ]) - - for sig in COUNTS_7D_SIGNALS: - assert (sheet.level, sig, NOT_PROP) in self.dfs.keys() - self.dfs[(sheet.level, sig, NOT_PROP)]["val"] /= 7 # 7-day total -> 7-day average - -def as_cached_filename(params, config): - """Formulate a filename to uniquely identify this report in the input cache.""" - # eg "Community Profile Report 20220128.xlsx" - # but delimiters vary; don't get tripped up if they do something wacky like - # Community.Profile.Report.20220128.xlsx - name, _, ext = config['filename'].rpartition(".") - return os.path.join( - params['indicator']['input_cache'], - f"{name}--{config['assetId']}.{ext}" - ) - -def fetch_listing(params): - """Generate the list of report files to process.""" - export_start_date = params['indicator'].get( - 'export_start_date', datetime.datetime.utcfromtimestamp(0).date() - ) - - listing = requests.get(DOWNLOAD_LISTING).json()['metadata']['attachments'] - # drop the pdf files - listing = [ - dict( - el, - cached_filename=as_cached_filename(params, el), - publish_date=Dataset.parse_publish_date(el['filename']) - ) - for el in listing if el['filename'].endswith("xlsx") - ] - - def check_valid_publish_date(x): - return x['publish_date'] >= export_start_date - - if params['indicator']['reports'] == 'new': - # drop files we already have in the input cache - keep = [ - el for el in listing - if not os.path.exists(el['cached_filename']) and check_valid_publish_date(el) - ] - elif params['indicator']['reports'].find("--") > 0: - # drop files outside the specified publish-date range - start_str, _, end_str = params['indicator']['reports'].partition("--") - start_date = datetime.datetime.strptime(start_str, "%Y-%m-%d").date() - end_date = datetime.datetime.strptime(end_str, "%Y-%m-%d").date() - keep = [ - el for el in listing - if (start_date <= el['publish_date'] <= end_date) and check_valid_publish_date(el) - ] - elif params['indicator']['reports'] == 'all': - keep = [ - el for el in listing if check_valid_publish_date(el) - ] - else: - raise ValueError("params['indicator']['reports'] is set to" \ - + f" {params['indicator']['reports']}, which isn't 'new', 'all', or a date range.") - - return extend_listing_for_interp(keep, listing) - -def extend_listing_for_interp(keep, listing): - """Grab additional files from the full listing for interpolation if needed. - - Selects files based purely on publish_date, so may include duplicates where - multiple reports for a single publish_date are available. - - Parameters: - - keep: list of reports desired in the final output - - listing: complete list of reports available from healthdata.gov - - Returns: list of reports including keep and additional files needed for - interpolation. - """ - publish_date_keeplist = set() - for el in keep: - # starts at 0 so includes keep publish_dates - for i in range(INTERP_LENGTH): - publish_date_keeplist.add(el['publish_date'] - datetime.timedelta(days=i)) - keep = [el for el in listing if el['publish_date'] in publish_date_keeplist] - return keep - -def download_and_parse(listing, logger): - """Convert a list of report files into Dataset instances.""" - datasets = {} - for item in listing: - d = Dataset(item, logger=logger) - for sig, df in d.dfs.items(): - if sig not in datasets: - datasets[sig] = [] - datasets[sig].append(df) - return datasets - -def nation_from_state(df, sig, geomapper): - """Compute nation level from state df.""" - if df.empty: - return df - if SIGNALS[sig]["is_rate"]: # true if sig is a rate - df = geomapper.add_population_column(df, "state_id") \ - .rename(columns={"population":"weight"}) - - norm_denom = df.groupby("timestamp").agg(norm_denom=("weight", "sum")) - df = df.join( - norm_denom, on="timestamp", how="left" - ).assign( - weight=lambda x: x.weight / x.norm_denom - ).drop( - "norm_denom", axis=1 - ) - # The filter in `fetch_new_reports` to keep most recent publish date - # gurantees that we'll only see one unique publish date per timestamp - # here, so just keep the first obs of each group. - publish_date_by_ts = df.groupby( - ["timestamp"] - )["publish_date"].first( - ).reset_index( - ) - df = geomapper.replace_geocode( - df.drop("publish_date", axis=1), - 'state_id', - 'nation', - new_col="geo_id" - ) - df["se"] = None - df["sample_size"] = None - # Recreate publish_date column - df = pd.merge(df, publish_date_by_ts, on="timestamp", how="left") - - return df - -def keep_latest_report(df, sig): - """Keep data associated with most recent report for each timestamp.""" - df = df.groupby( - "timestamp" - ).apply( - lambda x: x[x["publish_date"] == x["publish_date"].max()] - ).drop_duplicates( - ) - - if not df.empty: - df = df.reset_index(drop=True) - assert all(df.groupby( - ["timestamp", "geo_id"] - ).size( - ).reset_index( - drop=True - ) == 1), f"Duplicate rows in {sig} indicate that one or" \ - + " more reports were published multiple times and the copies differ" - - return df - -def fetch_new_reports(params, logger=None): - """Retrieve, compute, and collate all data we haven't seen yet.""" - listing = fetch_listing(params) - - # download and parse individual reports - datasets = download_and_parse(listing, logger) - # collect like signals together, keeping most recent publish date - ret = {} - - for key, lst in datasets.items(): - (_, sig, _) = key - latest_key_df = pd.concat(lst) - if sig in ("total", "positivity"): - latest_key_df = pd.concat(apply_thres_change_date( - keep_latest_report, - latest_key_df, - [sig] * 2 - )) - else: - latest_key_df = keep_latest_report(latest_key_df, sig) - - if not latest_key_df.empty: - ret[key] = latest_key_df - - # add nation from state - geomapper = GeoMapper() - for sig in SIGNALS: - state_key = ("state", sig, NOT_PROP) - if state_key not in ret: - continue - - if sig in ("total", "positivity"): - nation_df = pd.concat(apply_thres_change_date( - nation_from_state, - ret[state_key].rename(columns={"geo_id": "state_id"}), - [sig] * 2, - [geomapper] * 2 - )) - else: - nation_df = nation_from_state( - ret[state_key].rename(columns={"geo_id": "state_id"}), - sig, - geomapper - ) - ret[("nation", sig, NOT_PROP)] = nation_df - - for key, df in ret.copy().items(): - (geo, sig, prop) = key - - if sig == "positivity": - # Combine with test volume using publish date. - total_key = (geo, "total", prop) - ret[key] = unify_testing_sigs( - df, ret[total_key] - ).drop( - "publish_date", axis=1 - ) - - # No longer need "total" signal. - del ret[total_key] - elif sig != "total": - # If signal is not test volume or test positivity, we don't need - # publish date. - df = df.drop("publish_date", axis=1) - ret[key] = df - - if SIGNALS[sig]["make_prop"]: - ret[(geo, sig, IS_PROP)] = generate_prop_signal(df, geo, geomapper) - - ret = interpolate_missing_values(ret) - - return ret - -def interpolate_missing_values(dfs: DataDict) -> DataDict: - """Interpolates each signal in the dictionary of dfs.""" - interpolate_df = dict() - for key, df in dfs.items(): - # Here we exclude the 'positivity' signal from interpolation. This is a temporary fix. - # https://github.com/cmu-delphi/covidcast-indicators/issues/1576 - _, sig, _ = key - if sig == "positivity": - reindexed_group_df = df.set_index(["geo_id", "timestamp"]).sort_index().reset_index() - interpolate_df[key] = reindexed_group_df[~reindexed_group_df.val.isna()] - continue - - geo_dfs = [] - for geo, group_df in df.groupby("geo_id"): - reindexed_group_df = group_df.set_index("timestamp").reindex( - pd.date_range(group_df.timestamp.min(), group_df.timestamp.max()) - ) - reindexed_group_df["geo_id"] = geo - if "val" in reindexed_group_df.columns and not reindexed_group_df["val"].isna().all(): - reindexed_group_df["val"] = ( - reindexed_group_df["val"] - .astype(float) - .interpolate(method="linear", limit_area="inside") - ) - if "se" in reindexed_group_df.columns: - reindexed_group_df["se"] = ( - reindexed_group_df["se"] - .astype(float) - .interpolate(method="linear", limit_area="inside") - ) - if ( - "sample_size" in reindexed_group_df.columns - and not reindexed_group_df["sample_size"].isna().all() - ): - reindexed_group_df["sample_size"] = ( - reindexed_group_df["sample_size"] - .astype(float) - .interpolate(method="linear", limit_area="inside") - ) - if "publish_date" in reindexed_group_df.columns: - reindexed_group_df["publish_date"] = reindexed_group_df["publish_date"].fillna( - method="bfill" - ) - reindexed_group_df = reindexed_group_df[~reindexed_group_df.val.isna()] - geo_dfs.append(reindexed_group_df) - interpolate_df[key] = ( - pd.concat(geo_dfs) - .reset_index() - .rename(columns={"index": "timestamp"}) - .set_index(["geo_id", "timestamp"]) - .sort_index() - .reset_index() - ) - return interpolate_df - -def generate_prop_signal(df, geo, geo_mapper): - """Transform base df into a proportion (per 100k population).""" - if geo == "state": - geo = "state_id" - if geo == "county": - geo = "fips" - - # Add population data - if geo == "msa": - map_df = geo_mapper.get_crosswalk("fips", geo) - map_df = geo_mapper.add_population_column( - map_df, "fips" - ).drop( - "fips", axis=1 - ).groupby( - geo - ).sum( - numeric_only=True - ).reset_index( - ) - df = pd.merge(df, map_df, left_on="geo_id", right_on=geo, how="inner") - else: - df = geo_mapper.add_population_column(df, geo, geocode_col="geo_id") - - df["val"] = df["val"] / df["population"] * 100000 - df.drop(["population", geo], axis=1, inplace=True) - - return df - -def unify_testing_sigs(positivity_df, volume_df): - """ - Drop any observations with a sample size of 5 or less. Generate standard errors. - - This combines test positivity and testing volume into a single signal, - where testing volume *from the same spreadsheet/publish date* (NOT the - same reference date) is used as the sample size for test positivity. - - Total testing volume is typically provided for a 7-day period about 4 days - before the test positivity period. Since the CPR is only published on - weekdays, test positivity and test volume are only available for the same - reported dates 3 times a week. We have chosen to censor 7dav test - positivity based on the 7dav test volume provided in the same originating - spreadsheet, corresponding to a period ~4 days earlier. - - This approach makes the signals maximally available (5 days per week) with - low latency. It avoids complications of having to process multiple - spreadsheets each day, and the fact that test positivity and test volume - are not available for all the same reference dates. - - Discussion of decision and alternatives (Delphi-internal share drive): - https://docs.google.com/document/d/1MoIimdM_8OwG4SygoeQ9QEVZzIuDl339_a0xoYa6vuA/edit# - - """ - # Check that we have positivity *and* volume for each publishdate+geo, and - # that they have the same number of timestamps. - pos_count_ts = positivity_df.groupby( - ["publish_date", "geo_id"] - ).agg( - num_obs=("timestamp", "count"), - num_unique_obs=("timestamp", "nunique") - ) - vol_count_ts = volume_df.groupby( - ["publish_date", "geo_id"] - ).agg( - num_obs=("timestamp", "count"), - num_unique_obs=("timestamp", "nunique") - ) - merged = pos_count_ts.merge( - vol_count_ts, - on=["geo_id", "publish_date"], - how="outer", - indicator=True - ) - assert all( - merged["_merge"] == "both" - ) and all( - merged.num_obs_x == merged.num_obs_y - ) and all( - merged.num_unique_obs_x == merged.num_unique_obs_y - ), \ - "Each publish date-geo value combination should be available for both " + \ - "test positivity and test volume, and have the same number of timestamps available." - assert len(positivity_df.index) == len(volume_df.index), \ - "Test positivity and volume data have different numbers of observations." - expected_rows = len(positivity_df.index) - - volume_df = add_max_ts_col(volume_df)[ - ["geo_id", "publish_date", "val", "is_max_group_ts"] - ].rename( - columns={"val":"sample_size"} - ) - col_order = list(positivity_df.columns) - positivity_df = add_max_ts_col(positivity_df).drop(["sample_size"], axis=1) - - # Combine test positivity and test volume, maintaining "this week" and - # "previous week" status. Perform outer join here so that we can later - # check if any observations did not have a match. - df = pd.merge( - positivity_df, volume_df, - on=["publish_date", "geo_id", "is_max_group_ts"], - how="outer", - indicator=True - ).drop( - ["is_max_group_ts"], axis=1 - ) - - # Check that every volume observation was matched with a positivity observation. - assert (len(df.index) == expected_rows) and all(df["_merge"] == "both"), \ - "Some observations in the test positivity data were not matched with test volume data." - - # Drop everything with 5 or fewer total tests. - df = df.loc[df.sample_size > 5] - - # Generate stderr. - df = df.assign( - se=std_err(df) - ).drop( - ["_merge"], - axis=1 - ) - - return df[col_order] - -def add_max_ts_col(df): - """ - Add column to differentiate timestamps for a given publish date-geo combo. - - Each publish date is associated with up to two timestamps for test volume - and test positivity. The older timestamp corresponds to data from the - "previous week"; the newer timestamp corresponds to the "last week". - - Since test volume and test positivity timestamps don't match exactly, we - can't use them to merge the two signals together, but we still need a way - to uniquely identify observations to avoid duplicating observations during - the join. This new column, which is analagous to the "last/previous week" - classification, is used to merge on. - """ - assert_df = df.groupby( - ["publish_date", "geo_id"] - ).agg( - num_obs=("timestamp", "count"), - num_unique_obs=("timestamp", "nunique") - ) - assert all( - assert_df.num_obs <= 2 - ) and all( - assert_df.num_obs == assert_df.num_unique_obs - ), "Testing signals should have up to two timestamps per publish date-geo level " + \ - "combination. Each timestamp should be unique." - - max_ts_by_group = df.groupby( - ["publish_date", "geo_id"], as_index=False - )["timestamp"].max( - ).rename( - columns={"timestamp":"max_timestamp"} - ) - df = pd.merge( - df, max_ts_by_group, - on=["publish_date", "geo_id"], - how="outer" - ).assign( - is_max_group_ts=lambda df: df["timestamp"] == df["max_timestamp"] - ).drop( - ["max_timestamp"], axis=1 - ) - - return df - -def std_err(df): - """ - Find Standard Error of a binomial proportion. - - Assumes input sample_size are all > 0. - - Parameters - ---------- - df: pd.DataFrame - Columns: val, sample_size, ... - - Returns - ------- - pd.Series - Standard error of the positivity rate of PCR-specimen tests. - """ - assert all(df.sample_size > 0), "Sample sizes must be greater than 0" - p = df.val - n = df.sample_size - return np.sqrt(p * (1 - p) / n) - -def apply_thres_change_date(apply_fn, df, *apply_fn_args): - """ - Apply a function separately to data before and after the test volume change date. - - The test volume change date is when test volume and test positivity - started being reported for different reference dates within the same - report. This first occurred on 2021-03-17. - - Parameters - ---------- - apply_fn: function - function to apply to data before and after the test volume change date - df: pd.DataFrame - Columns: val, sample_size, ... - apply_fn_args: tuple of lists - variable number of additional arguments to pass to the `apply_fn`. - Each additional argument should be a list of length 2. The first - element of each list will be passed to the `apply_fn` when processing - pre-change date data; the second element will be used for the - post-change date data. - - Returns - ------- - map object - Iterator with two entries, one for the "before" data and one for the "after" data. - """ - change_date = datetime.date(2021, 3, 17) - list_of_dfs = [df[df.publish_date < change_date], df[df.publish_date >= change_date]] - - for arg_field in apply_fn_args: - assert len(arg_field) == 2, "Extra arguments must be iterables with " + \ - "length 2, the same as the number of dfs to process" - - return map(apply_fn, list_of_dfs, *apply_fn_args) diff --git a/dsew_community_profile/delphi_dsew_community_profile/run.py b/dsew_community_profile/delphi_dsew_community_profile/run.py deleted file mode 100644 index 6de1443a7..000000000 --- a/dsew_community_profile/delphi_dsew_community_profile/run.py +++ /dev/null @@ -1,127 +0,0 @@ -# -*- coding: utf-8 -*- -"""Functions to call when running the indicator. - -This module should contain a function called `run_module`, that is executed when -the module is run with `python -m delphi_dsew_community_profile`. -`run_module`'s lone argument should be a nested dictionary of parameters loaded -from the params.json file. We expect the `params` to have the following -structure: - - - "common": - - "export_dir": str, directory to which the results are exported - - "log_filename": (optional) str, path to log file - - "indicator": (optional) - - Any other indicator-specific settings -""" -from datetime import datetime -import time - -from delphi_utils import get_structured_logger -from delphi_utils.export import create_export_csv -import pandas as pd -import covidcast - -from .constants import make_signal_name, SIGNALS -from .pull import fetch_new_reports - - -def run_module(params): - """ - Run the indicator. - - Arguments - -------- - params: Dict[str, Any] - Nested dictionary of parameters. - """ - start_time = time.time() - logger = get_structured_logger( - __name__, filename=params["common"].get("log_filename"), - log_exceptions=params["common"].get("log_exceptions", True)) - def replace_date_param(p): - if p in params["indicator"]: - if params["indicator"][p] is None: - del params["indicator"][p] - else: - date_param = datetime.strptime(params["indicator"][p], "%Y-%m-%d").date() - params["indicator"][p] = date_param - replace_date_param("export_start_date") - replace_date_param("export_end_date") - export_params = { - 'start_date': params["indicator"].get("export_start_date", None), - 'end_date': params["indicator"].get("export_end_date", None) - } - export_params = { - k: pd.to_datetime(v) if v is not None else v - for k, v in export_params.items() - } - - run_stats = [] - dfs = fetch_new_reports(params, logger) - for key, df in dfs.items(): - (geo, sig, is_prop) = key - if sig not in params["indicator"]["export_signals"]: - continue - dates = create_export_csv( - df, - params['common']['export_dir'], - geo, - make_signal_name(sig, is_prop), - **export_params - ) - if len(dates)>0: - run_stats.append((max(dates), len(dates))) - - ## If any requested signal is not in metadata, generate it for all dates. - # - # Only do so if params.reports is set to "new". If set to "all", the - # previous fetch_new_reports + CSV loop will already have generated the full - # history for new signals. If params.reports is set to a specific date - # range, that request overrides automated backfill. - if params['indicator']['reports'] == 'new': - # Fetch metadata to check how recent signals are - metadata = covidcast.metadata() - sensor_names = { - SIGNALS[key][name_field]: key - for key in params["indicator"]["export_signals"] - for name_field in ["api_name", "api_prop_name"] - if name_field in SIGNALS[key].keys() - } - - # Filter to only those we currently want to produce - cpr_metadata = metadata[(metadata.data_source == "dsew-cpr") & - (metadata.signal.isin(sensor_names.keys()))] - - new_signals = set(sensor_names.keys()).difference(set(cpr_metadata.signal)) - if new_signals: - # If any signal not in metadata yet, we need to backfill its full - # history. - params['indicator']['reports'] = 'all' - params['indicator']['export_signals'] = {sensor_names[key] for key in new_signals} - - dfs = fetch_new_reports(params, logger) - for key, df in dfs.items(): - (geo, sig, is_prop) = key - if sig not in params["indicator"]["export_signals"]: - continue - dates = create_export_csv( - df, - params['common']['export_dir'], - geo, - make_signal_name(sig, is_prop), - **export_params - ) - if len(dates)>0: - run_stats.append((max(dates), len(dates))) - - ## log this indicator run - elapsed_time_in_seconds = round(time.time() - start_time, 2) - min_max_date = run_stats and min(s[0] for s in run_stats) - csv_export_count = sum(s[-1] for s in run_stats) - max_lag_in_days = min_max_date and (datetime.now() - min_max_date).days - formatted_min_max_date = min_max_date and min_max_date.strftime("%Y-%m-%d") - logger.info("Completed indicator run", - elapsed_time_in_seconds = elapsed_time_in_seconds, - csv_export_count = csv_export_count, - max_lag_in_days = max_lag_in_days, - oldest_final_export_date = formatted_min_max_date) diff --git a/dsew_community_profile/input_cache/.gitignore b/dsew_community_profile/input_cache/.gitignore deleted file mode 100644 index 7c1222033..000000000 --- a/dsew_community_profile/input_cache/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.xlsx diff --git a/dsew_community_profile/params.json.template b/dsew_community_profile/params.json.template deleted file mode 100644 index 1fcb75ee7..000000000 --- a/dsew_community_profile/params.json.template +++ /dev/null @@ -1,45 +0,0 @@ -{ - "common": { - "export_dir": "./receiving", - "log_filename": "dsew_cpr.log" - }, - "indicator": { - "input_cache": "./input_cache", - "reports": "new", - "export_start_date": null, - "export_end_date": null, - "export_signals": [ - "confirmed covid-19 admissions", - "positivity", - "doses administered", - "booster doses administered", - "fully vaccinated", - "booster dose since" - ] - }, - "validation": { - "common": { - "data_source": "dsew_cpr", - "span_length": 14, - "min_expected_lag": {"all": "3"}, - "max_expected_lag": {"all": "9"}, - "dry_run": true, - "suppressed_errors": [] - }, - "static": { - "minimum_sample_size": 0, - "missing_se_allowed": true, - "missing_sample_size_allowed": true - }, - "dynamic": { - "ref_window_size": 7, - "smoothed_signals": [ - "naats_positivity_7dav", - "confirmed_admissions_covid_1d_prop_7dav", - "confirmed_admissions_covid_1d_7dav", - "doses_admin_7dav", - "booster_doses_admin_7dav" - ] - } - } -} diff --git a/dsew_community_profile/setup.py b/dsew_community_profile/setup.py deleted file mode 100644 index fb5f9d4a9..000000000 --- a/dsew_community_profile/setup.py +++ /dev/null @@ -1,30 +0,0 @@ -from setuptools import setup -from setuptools import find_packages - -required = [ - "numpy", - "openpyxl", - "pandas", - "pydocstyle", - "pytest", - "pytest-cov", - "pylint==2.8.3", - "delphi-utils", - "covidcast" -] - -setup( - name="delphi_dsew_community_profile", - version="0.1.0", - description="Indicator tracking specimen test results and hospital admissions published in the COVID-19 Community Profile Report by the Data Strategy and Execution Workgroup", - author="", - author_email="", - url="https://github.com/cmu-delphi/covidcast-indicators", - install_requires=required, - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Programming Language :: Python :: 3.8", - ], - packages=find_packages(), -) diff --git a/dsew_community_profile/static/.gitignore b/dsew_community_profile/static/.gitignore deleted file mode 100644 index e69de29bb..000000000 diff --git a/dsew_community_profile/tests/params.json.template b/dsew_community_profile/tests/params.json.template deleted file mode 100644 index 645bd253f..000000000 --- a/dsew_community_profile/tests/params.json.template +++ /dev/null @@ -1,34 +0,0 @@ -{ - "common": { - "export_dir": "./receiving", - "log_filename": "dsew_cpr.log" - }, - "indicator": { - "input_cache": "./input_cache", - "reports": "new" - }, - "validation": { - "common": { - "data_source": "dsew_cpr", - "span_length": 14, - "min_expected_lag": {"all": "5"}, - "max_expected_lag": {"all": "9"}, - "dry_run": true, - "suppressed_errors": [] - }, - "static": { - "minimum_sample_size": 0, - "missing_se_allowed": true, - "missing_sample_size_allowed": true - }, - "dynamic": { - "ref_window_size": 7, - "smoothed_signals": [ - "naats_total_7dav", - "naats_positivity_7dav", - "confirmed_admissions_covid_1d_prop_7dav", - "confirmed_admissions_covid_1d_7dav" - ] - } - } -} diff --git a/dsew_community_profile/tests/test_pull.py b/dsew_community_profile/tests/test_pull.py deleted file mode 100644 index e968df4d7..000000000 --- a/dsew_community_profile/tests/test_pull.py +++ /dev/null @@ -1,612 +0,0 @@ -from collections import namedtuple -from dataclasses import dataclass -from datetime import date, datetime, timedelta -from itertools import chain -from typing import Any, Dict, List, Union -import pandas as pd -from pandas.testing import assert_frame_equal -import numpy as np -import pytest -from unittest.mock import patch, Mock - -from delphi_utils.geomap import GeoMapper - -from delphi_dsew_community_profile.pull import ( - DatasetTimes, Dataset, - fetch_listing, nation_from_state, generate_prop_signal, - std_err, add_max_ts_col, unify_testing_sigs, interpolate_missing_values, - extend_listing_for_interp -) - - -example = namedtuple("example", "given expected") - -def _assert_frame_equal(df1, df2, index_cols: List[str] = None): - # Ensure same columns present. - assert set(df1.columns) == set(df2.columns) - # Ensure same column order. - df1 = df1[df1.columns] - df2 = df2[df1.columns] - # Ensure same row order by using a common index and sorting. - df1 = df1.set_index(index_cols).sort_index() - df2 = df2.set_index(index_cols).sort_index() - return assert_frame_equal(df1, df2) - -def _set_df_dtypes(df: pd.DataFrame, dtypes: Dict[str, Any]) -> pd.DataFrame: - df = df.copy() - for k, v in dtypes.items(): - if k in df.columns: - df[k] = df[k].astype(v) - return df - - -class TestPull: - def test_DatasetTimes(self): - examples = [ - example(DatasetTimes("xyzzy", date(2021, 10, 30), date(2021, 10, 20), date(2021, 10, 22), date(2021, 10, 23), date(2021, 10, 24)), - DatasetTimes("xyzzy", date(2021, 10, 30), date(2021, 10, 20), date(2021, 10, 22), date(2021, 10, 23), date(2021, 10, 24))), - ] - for ex in examples: - assert ex.given == ex.expected, "Equality" - - dt = DatasetTimes("xyzzy", date(2021, 10, 30), date(2021, 10, 20), date(2021, 10, 22), date(2021, 10, 23), date(2021, 10, 24)) - assert dt["positivity"] == date(2021, 10, 30), "positivity" - assert dt["total"] == date(2021, 10, 20), "total" - assert dt["confirmed covid-19 admissions"] == date(2021, 10, 22), "confirmed covid-19 admissions" - assert dt["doses administered"] == date(2021, 10, 24), "doses administered" - assert dt["fully vaccinated"] == date(2021, 10, 23), "fully vaccinated" - with pytest.raises(ValueError): - dt["xyzzy"] - - def test_DatasetTimes_from_header(self): - examples = [ - example("TESTING: LAST WEEK (October 24-30, Test Volume October 20-26)", - DatasetTimes("last", date(2021, 10, 30), date(2021, 10, 26), None, None, None)), - example("TESTING: PREVIOUS WEEK (October 24-30, Test Volume October 20-26)", - DatasetTimes("previous", date(2021, 10, 30), date(2021, 10, 26), None, None, None)), - example("TESTING: LAST WEEK (October 24-November 30, Test Volume October 20-26)", - DatasetTimes("last", date(2021, 11, 30), date(2021, 10, 26), None, None, None)), - example("VIRAL (RT-PCR) LAB TESTING: LAST WEEK (June 7-13, Test Volume June 3-9 )", - DatasetTimes("last", date(2021, 6, 13), date(2021, 6, 9), None, None, None)), - example("VIRAL (RT-PCR) LAB TESTING: LAST WEEK (March 7-13)", - DatasetTimes("last", date(2021, 3, 13), date(2021, 3, 13), None, None, None)), - example("HOSPITAL UTILIZATION: LAST WEEK (June 2-8)", - DatasetTimes("last", None, None, date(2021, 6, 8), None, None)), - example("HOSPITAL UTILIZATION: LAST WEEK (June 28-July 8)", - DatasetTimes("last", None, None, date(2021, 7, 8), None, None)), - example("COVID-19 VACCINATION DATA: CUMULATIVE (January 25)", - DatasetTimes("", None, None, None, date(2021, 1, 25), None)), - example("COVID-19 VACCINATION DATA: LAST WEEK (January 25-31)", - DatasetTimes("last", None, None, None, None, date(2021, 1, 25))) - ] - for ex in examples: - assert DatasetTimes.from_header(ex.given, date(2021, 12, 31)) == ex.expected, ex.given - - # test year boundary - examples = [ - example("TESTING: LAST WEEK (October 24-30, Test Volume October 20-26)", - DatasetTimes("last", date(2020, 10, 30), date(2020, 10, 26), None, None, None)), - ] - for ex in examples: - assert DatasetTimes.from_header(ex.given, date(2021, 1, 1)) == ex.expected, ex.given - - def test_Dataset_skip_overheader(self): - examples = [ - example("TESTING: LAST WEEK (October 24-30, Test Volume October 20-26)", - False), - example("TESTING: PREVIOUS WEEK (October 17-23, Test Volume October 13-19)", - False), - example("VIRAL (RT-PCR) LAB TESTING: LAST WEEK (August 24-30, Test Volume August 20-26)", - False), - example("VIRAL (RT-PCR) LAB TESTING: PREVIOUS WEEK (August 17-23, Test Volume August 13-19)", - False), - example("TESTING: % CHANGE FROM PREVIOUS WEEK", - True), - example("VIRAL (RT-PCR) LAB TESTING: % CHANGE FROM PREVIOUS WEEK", - True), - example("TESTING: DEMOGRAPHIC DATA", - True), - example("HOSPITAL UTILIZATION: LAST WEEK (January 2-8)", - False), - example("HOSPITAL UTILIZATION: CHANGE FROM PREVIOUS WEEK", - True), - example("HOSPITAL UTILIZATION: DEMOGRAPHIC DATA", - True), - example("COVID-19 VACCINATION DATA: CUMULATIVE (January 25)", - False), - example("COVID-19 VACCINATION DATA: LAST WEEK (January 25-31)", - False), - example("COVID-19 VACCINATION DATA: DEMOGRAPHIC DATA", - True) - ] - for ex in examples: - assert Dataset.skip_overheader(ex.given) == ex.expected, ex.given - def test_Dataset_retain_header(self): - examples = [ - example("Total NAATs - last 7 days (may be an underestimate due to delayed reporting)", - True), - example("Total NAATs - previous 7 days (may be an underestimate due to delayed reporting)", - True), - example("NAAT positivity rate - last 7 days (may be an underestimate due to delayed reporting)", - True), - example("NAAT positivity rate - previous 7 days (may be an underestimate due to delayed reporting)", - True), - example("NAAT positivity rate - absolute change (may be an underestimate due to delayed reporting)", - False), - example("NAAT positivity rate - last 7 days - ages <5", - False), - example("Total RT-PCR diagnostic tests - last 7 days (may be an underestimate due to delayed reporting)", - True), - example("Viral (RT-PCR) lab test positivity rate - last 7 days (may be an underestimate due to delayed reporting)", - True), - example("RT-PCR tests per 100k - last 7 days (may be an underestimate due to delayed reporting)", - False), - example("Confirmed COVID-19 admissions - last 7 days", - True), - example("Confirmed COVID-19 admissions - percent change", - False), - example("Confirmed COVID-19 admissions - last 7 days - ages <18", - False), - example("Confirmed COVID-19 admissions - last 7 days - age unknown", - False), - example("Confirmed COVID-19 admissions per 100 inpatient beds - last 7 days", - False), - example("People who are fully vaccinated", - True), - example("People who are fully vaccinated - ages 5-11", - False), - example("People who are fully vaccinated as % of total population", - False), - example("People with a completed primary series", - True), - example("People with a completed primary series - ages 5-11", - False), - example("People with a completed primary series as % of total population", - False), - example("People with full course", - True), - example("People with full course as % of total population", - False) - ] - for ex in examples: - assert Dataset.retain_header(ex.given) == ex.expected, ex.given - - def test_Dataset_parse_sheet(self): - # TODO - pass - - def test_fetch_listing(self): - inst = namedtuple("attachment", "assetId filename publish cache") - instances = list(chain(*[ - [ - inst(f"{i}", f"2021010{i}.xlsx", date(2021, 1, i), f"2021010{i}--{i}.xlsx"), - inst(f"p{i}", f"2021010{i}.pdf", date(2021, 1, i), f"2021010{i}--p{i}.pdf"), - ] - for i in [1, 2, 3, 4, 5] - ])) - - # Solution from https://stackoverflow.com/questions/15753390/ - #how-can-i-mock-requests-and-the-response - def mocked_requests_get(*args, **kwargs): - class MockResponse: - def __init__(self, json_data): - self.json_data = json_data - - def json(self): - return self.json_data - - return MockResponse({ - 'metadata': { - 'attachments': [ - {"assetId": i.assetId, "filename": i.filename} - for i in instances - ] - } - } - ) - - def as_listing(instance): - return { - "assetId": instance.assetId, - "filename": instance.filename, - "cached_filename": instance.cache, - "publish_date": instance.publish - } - ex = example( - {'indicator':{'reports':'new', 'input_cache':''}}, - [ - as_listing(instance) - for i, instance in filter(lambda x: x[0]%2 == 0, enumerate(instances)) - ] - ) - - with patch('requests.get', side_effect=mocked_requests_get): - with patch('os.path.exists', return_value=False): - for actual, expected in zip(fetch_listing(ex.given), ex.expected): - assert actual == expected - - with patch('os.path.exists', return_value=True): - assert fetch_listing(ex.given) == [] - - def test_nation_from_state(self): - geomapper = GeoMapper() - state_pop = geomapper.get_crosswalk("state_id", "pop") - - test_df = pd.DataFrame({ - 'state_id': ['pa', 'wv'], - 'timestamp': [datetime(year=2020, month=1, day=1)]*2, - 'val': [15., 150.], - 'se': [None, None], - 'sample_size': [None, None], - 'publish_date': [datetime(year=2020, month=1, day=1)]*2,}) - - pa_pop = int(state_pop.loc[state_pop.state_id == "pa", "pop"].iloc[0]) - wv_pop = int(state_pop.loc[state_pop.state_id == "wv", "pop"].iloc[0]) - tot_pop = pa_pop + wv_pop - - assert True, nation_from_state( - test_df.copy(), - "total", - geomapper - ) - pd.testing.assert_frame_equal( - nation_from_state( - test_df.copy(), - "total", - geomapper - ), - pd.DataFrame({ - 'geo_id': ['us'], - 'timestamp': [datetime(year=2020, month=1, day=1)], - 'val': [15. + 150.], - 'se': [None], - 'sample_size': [None], - 'publish_date': [datetime(year=2020, month=1, day=1)],}), - check_like=True - ) - - pd.testing.assert_frame_equal( - nation_from_state( - test_df.copy(), - "positivity", - geomapper - ), - pd.DataFrame({ - 'geo_id': ['us'], - 'timestamp': [datetime(year=2020, month=1, day=1)], - 'val': [15*pa_pop/tot_pop + 150*wv_pop/tot_pop], - 'se': [None], - 'sample_size': [None], - 'publish_date': [datetime(year=2020, month=1, day=1)],}), - check_like=True - ) - - def test_generate_prop_signal_msa(self): - geomapper = GeoMapper() - county_pop = geomapper.get_crosswalk("fips", "pop") - county_msa = geomapper.get_crosswalk("fips", "msa") - msa_pop = county_pop.merge( - county_msa, on="fips", how="inner" - ).groupby( - "msa" - ).sum( - numeric_only=True - ).reset_index( - ) - - test_df = pd.DataFrame({ - 'geo_id': ['35620', '31080'], - 'timestamp': [datetime(year=2020, month=1, day=1)]*2, - 'val': [15., 150.], - 'se': [None, None], - 'sample_size': [None, None],}) - - nyc_pop = int(msa_pop.loc[msa_pop.msa == "35620", "pop"].iloc[0]) - la_pop = int(msa_pop.loc[msa_pop.msa == "31080", "pop"].iloc[0]) - - expected_df = pd.DataFrame({ - 'geo_id': ['35620', '31080'], - 'timestamp': [datetime(year=2020, month=1, day=1)]*2, - 'val': [15. / nyc_pop * 100000, 150. / la_pop * 100000], - 'se': [None, None], - 'sample_size': [None, None],}) - - pd.testing.assert_frame_equal( - generate_prop_signal( - test_df.copy(), - "msa", - geomapper - ), - expected_df, - check_like=True - ) - def test_generate_prop_signal_non_msa(self): - geomapper = GeoMapper() - - geos = { - "state": { - "code_name": "state_id", - "geo_names": ['pa', 'wv'] - }, - "county": { - "code_name": "fips", - "geo_names": ['36061', '06037'] - }, - # nation uses the same logic path so no need to test separately - "hhs": { - "code_name": "hhs", - "geo_names": ["1", "4"] - } - } - - for geo, settings in geos.items(): - geo_pop = geomapper.get_crosswalk(settings["code_name"], "pop") - - test_df = pd.DataFrame({ - 'geo_id': settings["geo_names"], - 'timestamp': [datetime(year=2020, month=1, day=1)]*2, - 'val': [15., 150.], - 'se': [None, None], - 'sample_size': [None, None],}) - - pop1 = int(geo_pop.loc[geo_pop[settings["code_name"]] == settings["geo_names"][0], "pop"].iloc[0]) - pop2 = int(geo_pop.loc[geo_pop[settings["code_name"]] == settings["geo_names"][1], "pop"].iloc[0]) - - expected_df = pd.DataFrame({ - 'geo_id': settings["geo_names"], - 'timestamp': [datetime(year=2020, month=1, day=1)]*2, - 'val': [15. / pop1 * 100000, 150. / pop2 * 100000], - 'se': [None, None], - 'sample_size': [None, None],}) - - pd.testing.assert_frame_equal( - generate_prop_signal( - test_df.copy(), - geo, - geomapper - ), - expected_df, - check_like=True - ) - - def test_unify_testing_sigs(self): - positivity_df = pd.DataFrame({ - 'geo_id': ["ca", "ca", "fl", "fl"], - 'timestamp': [datetime(2021, 10, 27), datetime(2021, 10, 20)]*2, - 'val': [0.2, 0.34, 0.7, 0.01], - 'se': [None] * 4, - 'sample_size': [None] * 4, - 'publish_date': [datetime(2021, 10, 30)]*4, - }) - base_volume_df = pd.DataFrame({ - 'geo_id': ["ca", "ca", "fl", "fl"], - 'timestamp': [datetime(2021, 10, 23), datetime(2021, 10, 16)]*2, - 'val': [None] * 4, - 'se': [None] * 4, - 'sample_size': [None] * 4, - 'publish_date': [datetime(2021, 10, 30)]*4, - }) - - examples = [ - example( - [positivity_df, base_volume_df.assign(val = [101, 102, 103, 104])], - positivity_df.assign( - sample_size = [101, 102, 103, 104], - se = lambda df: np.sqrt(df.val * (1 - df.val) / df.sample_size) - ) - ), # No filtering - example( - [positivity_df, base_volume_df.assign(val = [110, 111, 112, 113]).iloc[::-1]], - positivity_df.assign( - sample_size = [110, 111, 112, 113], - se = lambda df: np.sqrt(df.val * (1 - df.val) / df.sample_size) - ) - ), # No filtering, volume df in reversed order - example( - [positivity_df, base_volume_df.assign(val = [100, 5, 1, 6])], - positivity_df.assign( - sample_size = [100, 5, 1, 6] - ).iloc[[0, 3]].assign( - se = lambda df: np.sqrt(df.val * (1 - df.val) / df.sample_size) - ) - ) - ] - for ex in examples: - pd.testing.assert_frame_equal(unify_testing_sigs(ex.given[0], ex.given[1]), ex.expected) - - with pytest.raises(AssertionError): - # Inputs have different numbers of rows. - unify_testing_sigs(positivity_df, positivity_df.head(n=1)) - - def test_add_max_ts_col(self): - input_df = pd.DataFrame({ - 'geo_id': ["ca", "ca", "fl", "fl"], - 'timestamp': [datetime(2021, 10, 27), datetime(2021, 10, 20)]*2, - 'val': [1, 2, 3, 4], - 'se': [None] * 4, - 'sample_size': [None] * 4, - 'publish_date': [datetime(2021, 10, 30)]*4, - }) - examples = [ - example(input_df, input_df.assign(is_max_group_ts = [True, False, True, False])), - ] - for ex in examples: - pd.testing.assert_frame_equal(add_max_ts_col(ex.given), ex.expected) - - with pytest.raises(AssertionError): - # Input df has 2 timestamps per geo id-publish date combination, but not 2 unique timestamps. - add_max_ts_col( - pd.DataFrame({ - 'geo_id': ["ca", "ca", "fl", "fl"], - 'timestamp': [datetime(2021, 10, 27)] * 4, - 'val': [1, 2, 3, 4], - 'se': [None] * 4, - 'sample_size': [None] * 4, - 'publish_date': [datetime(2021, 10, 30)] * 4, - }) - ) - with pytest.raises(AssertionError): - # Input df has more than 2 timestamps per geo id-publish date combination. - add_max_ts_col( - pd.DataFrame({ - 'geo_id': ["ca", "ca", "ca", "fl", "fl", "fl"], - 'timestamp': [datetime(2021, 10, 27)] * 6, - 'val': [1, 2, 3, 4, 5, 6], - 'se': [None] * 6, - 'sample_size': [None] * 6, - 'publish_date': [datetime(2021, 10, 30)] * 6, - }) - ) - - try: - # Input df has fewer than 2 timestamps per geo id-publish date - # combination. This should not raise an exception. - add_max_ts_col( - pd.DataFrame({ - 'geo_id': ["ca", "fl"], - 'timestamp': [datetime(2021, 10, 27)] * 2, - 'val': [1, 2], - 'se': [None] * 2, - 'sample_size': [None] * 2, - 'publish_date': [datetime(2021, 10, 30)] * 2, - }) - ) - except AssertionError as e: - assert False, f"'add_max_ts_col' raised exception: {e}" - - try: - # Input df has 2 unique timestamps per geo id-publish date - # combination. This should not raise an exception. - add_max_ts_col( - pd.DataFrame({ - 'geo_id': ["ca", "ca", "fl", "fl"], - 'timestamp': [datetime(2021, 10, 27), datetime(2021, 10, 20)] * 2, - 'val': [1, 2, 3, 4], - 'se': [None] * 4, - 'sample_size': [None] * 4, - 'publish_date': [datetime(2021, 10, 30)] * 4, - }) - ) - except AssertionError as e: - assert False, f"'add_max_ts_col' raised exception: {e}" - - def test_std_err(self): - df = pd.DataFrame({ - "val": [0, 0.5, 0.4, 0.3, 0.2, 0.1], - "sample_size": [2, 2, 5, 10, 20, 50] - }) - - expected_se = np.sqrt(df.val * (1 - df.val) / df.sample_size) - se = std_err(df) - - assert (se >= 0).all() - assert not np.isnan(se).any() - assert not np.isinf(se).any() - assert np.allclose(se, expected_se, equal_nan=True) - with pytest.raises(AssertionError): - std_err( - pd.DataFrame({ - "val": [0, 0.5, 0.4, 0.3, 0.2, 0.1], - "sample_size": [2, 2, 5, 10, 20, 0] - }) - ) - - def test_interpolation(self): - DTYPES = {"geo_id": str, "timestamp": "datetime64[ns]", "val": float, "se": float, "sample_size": float, "publish_date": "datetime64[ns]"} - line = lambda x: 3 * x + 5 - - sig1 = _set_df_dtypes(pd.DataFrame({ - "geo_id": "1", - "timestamp": pd.date_range("2022-01-01", "2022-01-10"), - "val": [line(i) for i in range(2, 12)], - "se": [line(i) for i in range(1, 11)], - "sample_size": [line(i) for i in range(0, 10)], - "publish_date": pd.to_datetime("2022-01-10") - }), dtypes=DTYPES) - # A linear signal missing two days which should be filled exactly by the linear interpolation. - missing_sig1 = sig1[(sig1.timestamp <= "2022-01-05") | (sig1.timestamp >= "2022-01-08")] - - sig2 = sig1.copy() - sig2["geo_id"] = "2" - # A linear signal missing everything but the end points, should be filled exactly by linear interpolation. - missing_sig2 = sig2[(sig2.timestamp == "2022-01-01") | (sig2.timestamp == "2022-01-10")] - - sig3 = _set_df_dtypes(pd.DataFrame({ - "geo_id": "3", - "timestamp": pd.date_range("2022-01-01", "2022-01-10"), - "val": None, - "se": [line(i) for i in range(1, 11)], - "sample_size": [line(i) for i in range(0, 10)], - "publish_date": pd.to_datetime("2022-01-10") - }), dtypes=DTYPES) - # A signal missing everything, should be dropped since it's all NAs. - missing_sig3 = sig3[(sig3.timestamp <= "2022-01-05") | (sig3.timestamp >= "2022-01-08")] - - sig4 = _set_df_dtypes(pd.DataFrame({ - "geo_id": "4", - "timestamp": pd.date_range("2022-01-01", "2022-01-10"), - "val": [None] * 9 + [10.0], - "se": [line(i) for i in range(1, 11)], - "sample_size": [line(i) for i in range(0, 10)], - "publish_date": pd.to_datetime("2022-01-10") - }), dtypes=DTYPES) - # A signal missing everything except for one point, should output a reduced range without NAs. - missing_sig4 = sig4[(sig4.timestamp <= "2022-01-05") | (sig4.timestamp >= "2022-01-08")] - - missing_dfs = [missing_sig1, missing_sig2, missing_sig3, missing_sig4] - interpolated_dfs1 = interpolate_missing_values({("src", "sig", False): pd.concat(missing_dfs)}) - expected_dfs = pd.concat([sig1, sig2, sig4.loc[9:]]) - _assert_frame_equal(interpolated_dfs1[("src", "sig", False)], expected_dfs, index_cols=["geo_id", "timestamp"]) - - def test_interpolation_object_type(self): - DTYPES = {"geo_id": str, "timestamp": "datetime64[ns]", "val": float, "se": float, "sample_size": float, "publish_date": "datetime64[ns]"} - line = lambda x: 3 * x + 5 - - sig1 = _set_df_dtypes(pd.DataFrame({ - "geo_id": "1", - "timestamp": pd.date_range("2022-01-01", "2022-01-10"), - "val": [line(i) for i in range(2, 12)], - "se": [line(i) for i in range(1, 11)], - "sample_size": [line(i) for i in range(0, 10)], - "publish_date": pd.to_datetime("2022-01-10") - }), dtypes=DTYPES) - # A linear signal missing two days which should be filled exactly by the linear interpolation. - missing_sig1 = sig1[(sig1.timestamp <= "2022-01-05") | (sig1.timestamp >= "2022-01-08")] - # set all columns to object type to simulate the miscast we sometimes see when combining dfs - missing_sig1 = _set_df_dtypes(missing_sig1, {key: object for key in DTYPES.keys()}) - - interpolated_dfs1 = interpolate_missing_values({("src", "sig", False): missing_sig1}) - expected_dfs = pd.concat([sig1]) - _assert_frame_equal(interpolated_dfs1[("src", "sig", False)], expected_dfs, index_cols=["geo_id", "timestamp"]) - - @patch("delphi_dsew_community_profile.pull.INTERP_LENGTH", 2) - def test_extend_listing(self): - listing = [ - {"publish_date": date(2020, 1, 20) - timedelta(days=i)} - for i in range(20) - ] - examples = [ - # single range - example( - [{"publish_date": date(2020, 1, 20)}], - [{"publish_date": date(2020, 1, 20)}, {"publish_date": date(2020, 1, 19)}] - ), - # disjoint ranges - example( - [{"publish_date": date(2020, 1, 20)}, {"publish_date": date(2020, 1, 10)}], - [{"publish_date": date(2020, 1, 20)}, {"publish_date": date(2020, 1, 19)}, - {"publish_date": date(2020, 1, 10)}, {"publish_date": date(2020, 1, 9)}] - ), - # conjoined ranges - example( - [{"publish_date": date(2020, 1, 20)}, {"publish_date": date(2020, 1, 19)}], - [{"publish_date": date(2020, 1, 20)}, {"publish_date": date(2020, 1, 19)}, {"publish_date": date(2020, 1, 18)}] - ), - # empty keep list - example( - [], - [] - ) - ] - for ex in examples: - assert extend_listing_for_interp(ex.given, listing) == ex.expected, ex.given diff --git a/dsew_community_profile/version.cfg b/dsew_community_profile/version.cfg deleted file mode 100644 index ae19058ed..000000000 --- a/dsew_community_profile/version.cfg +++ /dev/null @@ -1 +0,0 @@ -current_version = 0.3.42 From 243c7d5be7c566b046ec4c32ad1d5c7c01608dcf Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 17 Jul 2023 14:00:55 -0400 Subject: [PATCH 02/21] remove dsew from workflows --- .github/workflows/create-release.yml | 2 +- .github/workflows/python-ci.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml index 55e7c0782..5a56a6d1d 100644 --- a/.github/workflows/create-release.yml +++ b/.github/workflows/create-release.yml @@ -61,7 +61,7 @@ jobs: bump2version --list ${{ github.event.inputs.versionName }} | grep ^new_version | sed -r s,"^.*=",, - name: Copy version to indicator directory run: | - indicator_list=("changehc" "claims_hosp" "doctor_visits" "dsew_community_profile" "google_symptoms" "hhs_hosp" "nchs_mortality" "nowcast" "quidel_covidtest" "sir_complainsalot") + indicator_list=("changehc" "claims_hosp" "doctor_visits" "google_symptoms" "hhs_hosp" "nchs_mortality" "nowcast" "quidel_covidtest" "sir_complainsalot") for path in ${indicator_list[@]}; do echo "current_version = ${{ steps.indicators.outputs.version }}" > $path/version.cfg done diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index 4d376ea5b..7e09009a9 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -16,7 +16,7 @@ jobs: if: github.event.pull_request.draft == false strategy: matrix: - packages: [_delphi_utils_python, changehc, claims_hosp, doctor_visits, dsew_community_profile, google_symptoms, hhs_hosp, nchs_mortality, nowcast, quidel_covidtest, sir_complainsalot] + packages: [_delphi_utils_python, changehc, claims_hosp, doctor_visits, google_symptoms, hhs_hosp, nchs_mortality, nowcast, quidel_covidtest, sir_complainsalot] defaults: run: working-directory: ${{ matrix.packages }} From d035d811cda441f5384bd495b0c7ea8482f16e89 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 17 Jul 2023 14:01:44 -0400 Subject: [PATCH 03/21] remove dsew from Jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 44cf0d1be..b4fb46aa3 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -10,7 +10,7 @@ - TODO: #527 Get this list automatically from python-ci.yml at runtime. */ -def indicator_list = ["backfill_corrections", "changehc", "claims_hosp", "google_symptoms", "hhs_hosp", "nchs_mortality", "quidel_covidtest", "sir_complainsalot", "dsew_community_profile", "doctor_visits"] +def indicator_list = ["backfill_corrections", "changehc", "claims_hosp", "google_symptoms", "hhs_hosp", "nchs_mortality", "quidel_covidtest", "sir_complainsalot", "doctor_visits"] def build_package_main = [:] def build_package_prod = [:] def deploy_staging = [:] From 3412cc924e18aa9854ad4152f02738455de7e610 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 20 Jul 2023 14:49:39 -0400 Subject: [PATCH 04/21] put max allowed threads in constant --- .../delphi_utils/validator/datafetcher.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/validator/datafetcher.py b/_delphi_utils_python/delphi_utils/validator/datafetcher.py index 9e302d822..7e5fafbd6 100644 --- a/_delphi_utils_python/delphi_utils/validator/datafetcher.py +++ b/_delphi_utils_python/delphi_utils/validator/datafetcher.py @@ -220,11 +220,13 @@ def get_one_api_df(data_source, min_date, max_date, dict_lock.release() -def threaded_api_calls(data_source, min_date, max_date, geo_signal_combos, n_threads=32): +MAX_ALLOWED_THREADS = 32 + +def threaded_api_calls(data_source, min_date, max_date, geo_signal_combos, n_threads=MAX_ALLOWED_THREADS): """Get data from API for all geo-signal combinations in a threaded way.""" - if n_threads > 32: - n_threads = 32 - print("Warning: Don't run more than 32 threads at once due " + if n_threads > MAX_ALLOWED_THREADS: + n_threads = MAX_ALLOWED_THREADS + warnings.warn(f"Warning: Don't run more than {MAX_ALLOWED_THREADS} threads at once due " + "to API resource limitations") output_dict = dict() From 1158857d879c39f1f38ae3ebf13b3816515abe5c Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 20 Jul 2023 15:31:59 -0400 Subject: [PATCH 05/21] add None-type check and error for api ref data --- .../delphi_utils/validator/datafetcher.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/validator/datafetcher.py b/_delphi_utils_python/delphi_utils/validator/datafetcher.py index 7e5fafbd6..27ef3ecbf 100644 --- a/_delphi_utils_python/delphi_utils/validator/datafetcher.py +++ b/_delphi_utils_python/delphi_utils/validator/datafetcher.py @@ -166,14 +166,12 @@ def fetch_api_reference(data_source, start_date, end_date, geo_type, signal_type api_df = covidcast.signal( data_source, signal_type, start_date, end_date, geo_type) + error_context = f"when fetching reference data from {start_date} to {end_date} " +\ + f"for data source: {data_source}, signal type: {signal_type}, geo type: {geo_type}" + if api_df is None: + raise APIDataFetchError("Error: no API data was returned " + error_context) if not isinstance(api_df, pd.DataFrame): - custom_msg = "Error fetching data from " + str(start_date) + \ - " to " + str(end_date) + \ - " for data source: " + data_source + \ - ", signal type: " + signal_type + \ - ", geo type: " + geo_type - - raise APIDataFetchError(custom_msg) + raise APIDataFetchError("Error: API return value was not a dataframe " + error_context) column_names = ["geo_id", "val", "se", "sample_size", "time_value"] From 8636c0b0bf22a26d00d33fb2968cfaf51edebddb Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 20 Jul 2023 15:41:20 -0400 Subject: [PATCH 06/21] move api key fetch and set up to dynamic.validate This lets the `meta` and `signal` calls share the same authentication init. We also don't need to do a second `read_params` in `get_geo_signal_combos`; instead pass the API key as an arg. --- .../delphi_utils/validator/datafetcher.py | 7 ++----- _delphi_utils_python/delphi_utils/validator/dynamic.py | 9 ++++++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/validator/datafetcher.py b/_delphi_utils_python/delphi_utils/validator/datafetcher.py index 27ef3ecbf..db27498d3 100644 --- a/_delphi_utils_python/delphi_utils/validator/datafetcher.py +++ b/_delphi_utils_python/delphi_utils/validator/datafetcher.py @@ -10,7 +10,6 @@ import pandas as pd import numpy as np import covidcast -from .. import read_params from .errors import APIDataFetchError, ValidationFailure FILENAME_REGEX = re.compile( @@ -103,15 +102,13 @@ def load_csv(path): }) -def get_geo_signal_combos(data_source): +def get_geo_signal_combos(data_source, api_key): """ Get list of geo type-signal type combinations that we expect to see. Cross references based on combinations reported available by COVIDcast metadata. """ - params = read_params() - assert "validation" in params - api_key = ("epidata", params["validation"]["common"]["api_credentials"]) + api_key = ("epidata", api_key) # Maps data_source name with what's in the API, lists used in case of multiple names meta_response = requests.get("https://api.covidcast.cmu.edu/epidata/covidcast/meta", auth=api_key) diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py index ea846e03d..deb65b129 100644 --- a/_delphi_utils_python/delphi_utils/validator/dynamic.py +++ b/_delphi_utils_python/delphi_utils/validator/dynamic.py @@ -5,6 +5,7 @@ import re import pandas as pd import numpy as np +import covidcast from .errors import ValidationFailure from .datafetcher import get_geo_signal_combos, threaded_api_calls from .utils import relative_difference_by_min, TimeWindow, lag_converter @@ -20,6 +21,8 @@ class Parameters: # data source name, one of # https://cmu-delphi.github.io/delphi-epidata/api/covidcast_signals.html data_source: str + # COVIDcast API key + api_key: str # span of time over which to perform checks time_window: TimeWindow # date that this df_to_test was generated; typically 1 day after the last date in df_to_test @@ -48,6 +51,7 @@ def __init__(self, params): self.params = self.Parameters( data_source=common_params["data_source"], + api_key = params["common"]["api_credentials"], time_window=TimeWindow.from_params(common_params["end_date"], common_params["span_length"]), generation_date=date.today(), @@ -74,8 +78,11 @@ def validate(self, all_frames, report): # Get 14 days prior to the earliest list date outlier_lookbehind = timedelta(days=14) + # Authenticate API + covidcast.use_api_key(self.params.api_key) + # Get all expected combinations of geo_type and signal. - geo_signal_combos = get_geo_signal_combos(self.params.data_source) + geo_signal_combos = get_geo_signal_combos(self.params.data_source, api_key = self.params.api_key) all_api_df = threaded_api_calls(self.params.data_source, self.params.time_window.start_date - outlier_lookbehind, From 3596d36559d687d7084128e26afffe3243784a91 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 20 Jul 2023 18:11:32 -0400 Subject: [PATCH 07/21] linting --- _delphi_utils_python/delphi_utils/validator/datafetcher.py | 4 +++- _delphi_utils_python/delphi_utils/validator/dynamic.py | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/validator/datafetcher.py b/_delphi_utils_python/delphi_utils/validator/datafetcher.py index db27498d3..ae29c90dd 100644 --- a/_delphi_utils_python/delphi_utils/validator/datafetcher.py +++ b/_delphi_utils_python/delphi_utils/validator/datafetcher.py @@ -165,6 +165,7 @@ def fetch_api_reference(data_source, start_date, end_date, geo_type, signal_type error_context = f"when fetching reference data from {start_date} to {end_date} " +\ f"for data source: {data_source}, signal type: {signal_type}, geo type: {geo_type}" + if api_df is None: raise APIDataFetchError("Error: no API data was returned " + error_context) if not isinstance(api_df, pd.DataFrame): @@ -217,7 +218,8 @@ def get_one_api_df(data_source, min_date, max_date, MAX_ALLOWED_THREADS = 32 -def threaded_api_calls(data_source, min_date, max_date, geo_signal_combos, n_threads=MAX_ALLOWED_THREADS): +def threaded_api_calls(data_source, min_date, max_date, + geo_signal_combos, n_threads=MAX_ALLOWED_THREADS): """Get data from API for all geo-signal combinations in a threaded way.""" if n_threads > MAX_ALLOWED_THREADS: n_threads = MAX_ALLOWED_THREADS diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py index deb65b129..2931c65af 100644 --- a/_delphi_utils_python/delphi_utils/validator/dynamic.py +++ b/_delphi_utils_python/delphi_utils/validator/dynamic.py @@ -15,7 +15,7 @@ class DynamicValidator: """Class for validation of static properties of individual datasets.""" @dataclass - class Parameters: + class Parameters: # pylint: disable=too-many-instance-attributes """Configuration parameters.""" # data source name, one of @@ -82,7 +82,8 @@ def validate(self, all_frames, report): covidcast.use_api_key(self.params.api_key) # Get all expected combinations of geo_type and signal. - geo_signal_combos = get_geo_signal_combos(self.params.data_source, api_key = self.params.api_key) + geo_signal_combos = get_geo_signal_combos(self.params.data_source, + api_key = self.params.api_key) all_api_df = threaded_api_calls(self.params.data_source, self.params.time_window.start_date - outlier_lookbehind, From 8f0ab2bd4b6b4c8b44f143aebebe49051c85bc2e Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 21 Jul 2023 10:49:34 -0400 Subject: [PATCH 08/21] pull api key from common_params --- _delphi_utils_python/delphi_utils/validator/dynamic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py index 2931c65af..4911628ee 100644 --- a/_delphi_utils_python/delphi_utils/validator/dynamic.py +++ b/_delphi_utils_python/delphi_utils/validator/dynamic.py @@ -51,7 +51,7 @@ def __init__(self, params): self.params = self.Parameters( data_source=common_params["data_source"], - api_key = params["common"]["api_credentials"], + api_key = common_params["api_credentials"], time_window=TimeWindow.from_params(common_params["end_date"], common_params["span_length"]), generation_date=date.today(), From fd8dc3ca10c52c0a0a59d459e2f05e41ea1d3934 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 21 Jul 2023 10:49:43 -0400 Subject: [PATCH 09/21] update tests --- _delphi_utils_python/tests/validator/test_datafetcher.py | 9 +++++---- _delphi_utils_python/tests/validator/test_dynamic.py | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/_delphi_utils_python/tests/validator/test_datafetcher.py b/_delphi_utils_python/tests/validator/test_datafetcher.py index 2d8a4e1c1..6b3dfa574 100644 --- a/_delphi_utils_python/tests/validator/test_datafetcher.py +++ b/_delphi_utils_python/tests/validator/test_datafetcher.py @@ -54,7 +54,7 @@ def raise_for_status(self): def test_bad_api_key(self, **kwargs): kwargs["mock_requests"].get("https://api.covidcast.cmu.edu/epidata/covidcast/meta", status_code=429) with pytest.raises(HTTPError): - get_geo_signal_combos("chng") + get_geo_signal_combos("chng", api_key="") @mock.patch('requests.get', side_effect=mocked_requests_get) @mock.patch("covidcast.metadata") @@ -78,11 +78,11 @@ def test_get_geo_signal_combos(self, mock_metadata, mock_get): "hrr", "msa", "msa", "state"] }) - assert set(get_geo_signal_combos("chng")) == set( + assert set(get_geo_signal_combos("chng", api_key="")) == set( [("state", "smoothed_outpatient_cli"), ("state", "smoothed_outpatient_covid"), ("county", "smoothed_outpatient_covid")]) - assert set(get_geo_signal_combos("covid-act-now")) == set( + assert set(get_geo_signal_combos("covid-act-now", api_key="")) == set( [("hrr", "pcr_specimen_positivity_rate"), ("msa", "pcr_specimen_positivity_rate"), ("msa", "pcr_specimen_total_tests")]) @@ -138,7 +138,8 @@ def mock_signal_return_fn(unused_data_source, signal_type, unused_start_date, ("state", "b"): ValidationFailure("api_data_fetch_error", geo_type="state", signal="b", - message="Error fetching data from 2020-03-10 " + message="Error: no API data was returned when " + "fetching reference data from 2020-03-10 " "to 2020-06-10 for data source: " "source, signal type: b, geo type: state") } diff --git a/_delphi_utils_python/tests/validator/test_dynamic.py b/_delphi_utils_python/tests/validator/test_dynamic.py index ce5a1bf54..c1e39af8e 100644 --- a/_delphi_utils_python/tests/validator/test_dynamic.py +++ b/_delphi_utils_python/tests/validator/test_dynamic.py @@ -11,7 +11,8 @@ class TestReferencePadding: "common": { "data_source": "", "span_length": 1, - "end_date": "2020-09-02" + "end_date": "2020-09-02", + "api_credentials": "" } } From 1eb571978ed8a01ab008a2163914d4e775c3be3c Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 21 Jul 2023 10:57:08 -0400 Subject: [PATCH 10/21] add empty api credentials to all test params --- .../tests/validator/test_dynamic.py | 15 ++++++++++----- .../tests/validator/test_validator.py | 12 ++++++++---- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/_delphi_utils_python/tests/validator/test_dynamic.py b/_delphi_utils_python/tests/validator/test_dynamic.py index c1e39af8e..07a50bdf1 100644 --- a/_delphi_utils_python/tests/validator/test_dynamic.py +++ b/_delphi_utils_python/tests/validator/test_dynamic.py @@ -82,7 +82,8 @@ class TestCheckRapidChange: "common": { "data_source": "", "span_length": 1, - "end_date": "2020-09-02" + "end_date": "2020-09-02", + "api_credentials": "" } } @@ -115,7 +116,8 @@ class TestCheckNaVals: "common": { "data_source": "", "span_length": 14, - "end_date": "2020-09-02" + "end_date": "2020-09-02", + "api_credentials": "" } } def test_missing(self): @@ -138,7 +140,8 @@ class TestCheckAvgValDiffs: "common": { "data_source": "", "span_length": 1, - "end_date": "2020-09-02" + "end_date": "2020-09-02", + "api_credentials": "" } } @@ -279,7 +282,8 @@ class TestDataOutlier: "common": { "data_source": "", "span_length": 1, - "end_date": "2020-09-02" + "end_date": "2020-09-02", + "api_credentials": "" } } pd.set_option("display.max_rows", None, "display.max_columns", None) @@ -472,7 +476,8 @@ class TestDateComparison: "common": { "data_source": "", "span_length": 1, - "end_date": "2020-09-02" + "end_date": "2020-09-02", + "api_credentials": "" } } diff --git a/_delphi_utils_python/tests/validator/test_validator.py b/_delphi_utils_python/tests/validator/test_validator.py index dc28aa42e..c302b57aa 100644 --- a/_delphi_utils_python/tests/validator/test_validator.py +++ b/_delphi_utils_python/tests/validator/test_validator.py @@ -15,7 +15,8 @@ def test_default_settings(self): "common": { "data_source": "", "span_length": 0, - "end_date": "2020-09-01" + "end_date": "2020-09-01", + "api_credentials": "" } } } @@ -46,7 +47,8 @@ def test_suppressed_errors(self): "signal": "b"}, {"check_name":"c", "date": None, - "geo_type": "d"}] + "geo_type": "d"}], + "api_credentials": "" } } } @@ -76,7 +78,8 @@ def test_incorrect_suppressed_errors(self): "date": None, "geo_type": "d"}, {"check_name": "a", - "fake": "b"}] + "fake": "b"}], + "api_credentials": "" } } }) @@ -98,7 +101,8 @@ def test_incorrect_suppressed_errors(self): {"check_name":"c", "date": None, "geo_type": "d"}, - ["ab"]] + ["ab"]], + "api_credentials": "" } } }) From 40c92ecc9d0b7915f760e637dd519bf61789da63 Mon Sep 17 00:00:00 2001 From: nmdefries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 21 Jul 2023 11:14:38 -0400 Subject: [PATCH 11/21] alert about actual # of threads being used Co-authored-by: Katie Mazaitis --- _delphi_utils_python/delphi_utils/validator/datafetcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_delphi_utils_python/delphi_utils/validator/datafetcher.py b/_delphi_utils_python/delphi_utils/validator/datafetcher.py index ae29c90dd..045d67585 100644 --- a/_delphi_utils_python/delphi_utils/validator/datafetcher.py +++ b/_delphi_utils_python/delphi_utils/validator/datafetcher.py @@ -223,7 +223,7 @@ def threaded_api_calls(data_source, min_date, max_date, """Get data from API for all geo-signal combinations in a threaded way.""" if n_threads > MAX_ALLOWED_THREADS: n_threads = MAX_ALLOWED_THREADS - warnings.warn(f"Warning: Don't run more than {MAX_ALLOWED_THREADS} threads at once due " + warnings.warn(f"Warning: instead of requested thread count, using only {MAX_ALLOWED_THREADS} threads due " + "to API resource limitations") output_dict = dict() From c81824e0ec48cf7113d76b81c37fcd508acda3dc Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 21 Jul 2023 12:19:10 -0400 Subject: [PATCH 12/21] linting --- _delphi_utils_python/delphi_utils/validator/datafetcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/validator/datafetcher.py b/_delphi_utils_python/delphi_utils/validator/datafetcher.py index 045d67585..39c8555cf 100644 --- a/_delphi_utils_python/delphi_utils/validator/datafetcher.py +++ b/_delphi_utils_python/delphi_utils/validator/datafetcher.py @@ -223,8 +223,8 @@ def threaded_api_calls(data_source, min_date, max_date, """Get data from API for all geo-signal combinations in a threaded way.""" if n_threads > MAX_ALLOWED_THREADS: n_threads = MAX_ALLOWED_THREADS - warnings.warn(f"Warning: instead of requested thread count, using only {MAX_ALLOWED_THREADS} threads due " - + "to API resource limitations") + warnings.warn("Warning: instead of requested thread count, using " + \ + f"only {MAX_ALLOWED_THREADS} threads due to API resource limitations") output_dict = dict() dict_lock = threading.Lock() From 5dbd2b035cd5e9d3b300b59d5daddf73a822ad7f Mon Sep 17 00:00:00 2001 From: Katie Mazaitis Date: Fri, 21 Jul 2023 13:14:26 -0400 Subject: [PATCH 13/21] Update data_quality issue template: katie -> nolan --- .github/ISSUE_TEMPLATE/data_quality_issue.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/data_quality_issue.md b/.github/ISSUE_TEMPLATE/data_quality_issue.md index 769adde01..4d4ad2874 100644 --- a/.github/ISSUE_TEMPLATE/data_quality_issue.md +++ b/.github/ISSUE_TEMPLATE/data_quality_issue.md @@ -3,7 +3,7 @@ name: Data quality issue about: Missing data, weird data, broken data title: '' labels: 'data quality' -assignees: 'krivard' +assignees: 'nolangormley' --- **Actual Behavior:** From 07320038ce6f766d8161e6e1f7742fbd07dd1fe4 Mon Sep 17 00:00:00 2001 From: Katie Mazaitis Date: Fri, 21 Jul 2023 13:15:18 -0400 Subject: [PATCH 14/21] Update create-release workflow: katie -> george --- .github/workflows/create-release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml index 55e7c0782..ba5cacf0f 100644 --- a/.github/workflows/create-release.yml +++ b/.github/workflows/create-release.yml @@ -72,8 +72,8 @@ jobs: base: prod title: Release covidcast-indicators ${{ steps.indicators.outputs.version }} labels: chore - reviewers: krivard - assignees: krivard + reviewers: melange396 + assignees: melange396 body: | Releasing: * covidcast-indicators ${{ steps.indicators.outputs.version }} From f2503e5b2c3ced3c66348d31210dba010e12a023 Mon Sep 17 00:00:00 2001 From: Katie Mazaitis Date: Fri, 21 Jul 2023 13:15:53 -0400 Subject: [PATCH 15/21] Update publish-release workflow: katie -> george --- .github/workflows/publish-release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml index 630e44c44..a9f1ccae9 100644 --- a/.github/workflows/publish-release.yml +++ b/.github/workflows/publish-release.yml @@ -120,7 +120,7 @@ jobs: base: main title: 'chore: sync prod->main' labels: chore - reviewers: krivard - assignees: krivard + reviewers: melange396 + assignees: melange396 body: | Syncing Prod->Main. From 0f24afb9f720d80f3500cd8bf40665f3e6f99d5b Mon Sep 17 00:00:00 2001 From: Katie Mazaitis Date: Fri, 21 Jul 2023 13:17:10 -0400 Subject: [PATCH 16/21] Delete deprecated delphi-utils workflow --- .github/workflows/release-delphi-utils.yml | 108 --------------------- 1 file changed, 108 deletions(-) delete mode 100644 .github/workflows/release-delphi-utils.yml diff --git a/.github/workflows/release-delphi-utils.yml b/.github/workflows/release-delphi-utils.yml deleted file mode 100644 index 609bb012c..000000000 --- a/.github/workflows/release-delphi-utils.yml +++ /dev/null @@ -1,108 +0,0 @@ -name: (Deprecated) Release Delphi Utils - -on: - push: - branches: - - not_prod - paths: - - '_delphi_utils_python/**' - workflow_dispatch: - -jobs: - correct_repository: - runs-on: ubuntu-latest - steps: - - name: fail on fork - if: github.repository_owner != 'cmu-delphi' - run: exit 1 - - create_delphi_utils_release: - needs: correct_repository - runs-on: ubuntu-latest - defaults: - run: - working-directory: _delphi_utils_python - steps: - - name: Check out code - uses: actions/checkout@v2 - with: - ssh-key: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_SSH }} - - name: Set up Python 3.8 - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - name: Extract version - id: extract_version - run: | - python -m pip install bump2version - echo -n "::set-output name=version::" - bump2version --dry-run --list patch | grep ^current_version | sed -r s,"^.*=",, - - name: Tag version - run: | - git tag delphi-utils/v${{ steps.extract_version.outputs.version }} - git push --tags - outputs: - version: ${{ steps.extract_version.outputs.version }} - - upload_pypi: - needs: create_delphi_utils_release - runs-on: ubuntu-latest - defaults: - run: - working-directory: _delphi_utils_python - steps: - - name: Check out code - uses: actions/checkout@v2 - - name: Set up Python 3.8 - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - name: Install testing dependencies - run: | - python -m pip install --upgrade pip - pip install pylint pytest pydocstyle wheel twine - - name: Install - run: | - make install - - name: Release - run: | - make release - - uses: actions/upload-artifact@v2 - with: - name: delphi_utils - path: _delphi_utils_python/dist/*.tar.gz - - name: Publish a Python distribution to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.DELPHI_PYPI_PROD_TOKEN }} - packages_dir: _delphi_utils_python/dist/ - skip_existing: true - # repository_url: https://test.pypi.org/legacy/ - - sync_main: - needs: correct_repository - runs-on: ubuntu-latest - steps: - - name: Check out code - uses: actions/checkout@v2 - with: - ref: main - ssh-key: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_SSH }} - - name: Reset main branch - run: | - git fetch origin prod:prod - git reset --hard prod - - name: Create pull request into main - uses: peter-evans/create-pull-request@v3 - with: - token: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_PAT }} - branch: bot/sync-prod-main - commit-message: 'chore: sync prod-main' - base: main - title: 'chore: sync prod->main' - labels: chore - reviewers: krivard - assignees: krivard - body: | - Syncing Prod->Main. From 64bde444d386bae069d1a17a63cabe283ae76702 Mon Sep 17 00:00:00 2001 From: Katie Mazaitis Date: Fri, 21 Jul 2023 13:17:41 -0400 Subject: [PATCH 17/21] Delete deprecated delphi-utils release workflow --- .../workflows/create-delphi-utils-release.yml | 48 ------------------- 1 file changed, 48 deletions(-) delete mode 100644 .github/workflows/create-delphi-utils-release.yml diff --git a/.github/workflows/create-delphi-utils-release.yml b/.github/workflows/create-delphi-utils-release.yml deleted file mode 100644 index 4d35b2253..000000000 --- a/.github/workflows/create-delphi-utils-release.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: (Deprecated) Create Delphi Utils Release - -on: - workflow_dispatch: - inputs: - versionName: - description: 'Semantic Version Number (i.e., 5.5.0 or patch, minor, major, prepatch, preminor, premajor, prerelease)' - required: true - default: patch - -jobs: - create_release: - runs-on: ubuntu-latest - defaults: - run: - working-directory: _delphi_utils_python - steps: - - name: Check out code - uses: actions/checkout@v2 - with: - ref: prod - ssh-key: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_SSH }} - - name: Reset prod branch - run: | - git fetch origin main:main - git reset --hard main - - name: Set up Python 3.8 - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - name: Change version number - id: version - run: | - python -m pip install bump2version - echo -n "::set-output name=next_tag::" - bump2version --list ${{ github.event.inputs.versionName }} | grep ^new_version | sed -r s,"^.*=",, - - name: Create pull request into prod - uses: peter-evans/create-pull-request@v3 - with: - branch: release/${{ steps.version.outputs.next_tag }} - commit-message: 'chore: release ${{ steps.version.outputs.next_tag }}' - base: prod - title: Release ${{ steps.version.outputs.next_tag }} - labels: chore - reviewers: krivard - assignees: krivard - body: | - Releasing ${{ steps.version.outputs.next_tag }}. From 885e6ce0182cac16fc0b923c392854bd42163cb9 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 21 Jul 2023 16:48:40 -0400 Subject: [PATCH 18/21] increase hhs expected lag settings by 1 week --- ansible/templates/hhs_hosp-params-prod.json.j2 | 4 ++-- ansible/templates/sir_complainsalot-params-prod.json.j2 | 2 +- hhs_hosp/params.json.template | 4 ++-- sir_complainsalot/params.json.template | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ansible/templates/hhs_hosp-params-prod.json.j2 b/ansible/templates/hhs_hosp-params-prod.json.j2 index fbdffa8f9..55b11555c 100644 --- a/ansible/templates/hhs_hosp-params-prod.json.j2 +++ b/ansible/templates/hhs_hosp-params-prod.json.j2 @@ -8,8 +8,8 @@ "data_source": "hhs", "api_credentials": "{{ validation_api_key }}", "span_length": 14, - "min_expected_lag": {"all": "1"}, - "max_expected_lag": {"all": "7"}, + "min_expected_lag": {"all": "8"}, + "max_expected_lag": {"all": "14"}, "dry_run": true, "suppressed_errors": [] }, diff --git a/ansible/templates/sir_complainsalot-params-prod.json.j2 b/ansible/templates/sir_complainsalot-params-prod.json.j2 index f2c3ee68e..d32428b3e 100644 --- a/ansible/templates/sir_complainsalot-params-prod.json.j2 +++ b/ansible/templates/sir_complainsalot-params-prod.json.j2 @@ -48,7 +48,7 @@ "maintainers": [] }, "hhs": { - "max_age":8, + "max_age":15, "maintainers": [] } } diff --git a/hhs_hosp/params.json.template b/hhs_hosp/params.json.template index e72e0bb86..c2004f935 100644 --- a/hhs_hosp/params.json.template +++ b/hhs_hosp/params.json.template @@ -7,8 +7,8 @@ "common": { "data_source": "hhs", "span_length": 14, - "min_expected_lag": {"all": "1"}, - "max_expected_lag": {"all": "7"}, + "min_expected_lag": {"all": "8"}, + "max_expected_lag": {"all": "14"}, "dry_run": true, "suppressed_errors": [] }, diff --git a/sir_complainsalot/params.json.template b/sir_complainsalot/params.json.template index b6c7f885f..058069efb 100644 --- a/sir_complainsalot/params.json.template +++ b/sir_complainsalot/params.json.template @@ -47,7 +47,7 @@ "maintainers": [] }, "hhs": { - "max_age":8, + "max_age":15, "maintainers": [] } } From a6a9c2b86fafd4dff43c120b8ae4c66a6292af81 Mon Sep 17 00:00:00 2001 From: Delphi Deploy Bot Date: Wed, 26 Jul 2023 15:10:52 +0000 Subject: [PATCH 19/21] chore: bump delphi_utils to 0.3.19 --- _delphi_utils_python/.bumpversion.cfg | 2 +- _delphi_utils_python/delphi_utils/__init__.py | 2 +- _delphi_utils_python/setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/_delphi_utils_python/.bumpversion.cfg b/_delphi_utils_python/.bumpversion.cfg index a54d86d02..25f567b85 100644 --- a/_delphi_utils_python/.bumpversion.cfg +++ b/_delphi_utils_python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.3.18 +current_version = 0.3.19 commit = True message = chore: bump delphi_utils to {new_version} tag = False diff --git a/_delphi_utils_python/delphi_utils/__init__.py b/_delphi_utils_python/delphi_utils/__init__.py index 00259dc5a..cb3316671 100644 --- a/_delphi_utils_python/delphi_utils/__init__.py +++ b/_delphi_utils_python/delphi_utils/__init__.py @@ -15,4 +15,4 @@ from .nancodes import Nans from .weekday import Weekday -__version__ = "0.3.18" +__version__ = "0.3.19" diff --git a/_delphi_utils_python/setup.py b/_delphi_utils_python/setup.py index 1668a34f6..cac08e1f9 100644 --- a/_delphi_utils_python/setup.py +++ b/_delphi_utils_python/setup.py @@ -27,7 +27,7 @@ setup( name="delphi_utils", - version="0.3.18", + version="0.3.19", description="Shared Utility Functions for Indicators", long_description=long_description, long_description_content_type="text/markdown", From f59b90b4a8cb70c0c83ddb0c0c3fb2430c291982 Mon Sep 17 00:00:00 2001 From: Delphi Deploy Bot Date: Wed, 26 Jul 2023 15:10:53 +0000 Subject: [PATCH 20/21] chore: bump covidcast-indicators to 0.3.44 --- .bumpversion.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 8b614a846..6b0da0bdb 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.3.43 +current_version = 0.3.44 commit = True message = chore: bump covidcast-indicators to {new_version} tag = False From c9d2147280bc4ffc6cb955d95227e5f474d11906 Mon Sep 17 00:00:00 2001 From: rzats Date: Wed, 26 Jul 2023 15:10:53 +0000 Subject: [PATCH 21/21] [create-pull-request] automated change --- changehc/version.cfg | 2 +- claims_hosp/version.cfg | 2 +- doctor_visits/version.cfg | 2 +- google_symptoms/version.cfg | 2 +- hhs_hosp/version.cfg | 2 +- nchs_mortality/version.cfg | 2 +- nowcast/version.cfg | 2 +- quidel_covidtest/version.cfg | 2 +- sir_complainsalot/version.cfg | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/changehc/version.cfg b/changehc/version.cfg index 8d2332969..aa45f9083 100644 --- a/changehc/version.cfg +++ b/changehc/version.cfg @@ -1 +1 @@ -current_version = 0.3.43 +current_version = 0.3.44 diff --git a/claims_hosp/version.cfg b/claims_hosp/version.cfg index 8d2332969..aa45f9083 100644 --- a/claims_hosp/version.cfg +++ b/claims_hosp/version.cfg @@ -1 +1 @@ -current_version = 0.3.43 +current_version = 0.3.44 diff --git a/doctor_visits/version.cfg b/doctor_visits/version.cfg index 8d2332969..aa45f9083 100644 --- a/doctor_visits/version.cfg +++ b/doctor_visits/version.cfg @@ -1 +1 @@ -current_version = 0.3.43 +current_version = 0.3.44 diff --git a/google_symptoms/version.cfg b/google_symptoms/version.cfg index 8d2332969..aa45f9083 100644 --- a/google_symptoms/version.cfg +++ b/google_symptoms/version.cfg @@ -1 +1 @@ -current_version = 0.3.43 +current_version = 0.3.44 diff --git a/hhs_hosp/version.cfg b/hhs_hosp/version.cfg index 8d2332969..aa45f9083 100644 --- a/hhs_hosp/version.cfg +++ b/hhs_hosp/version.cfg @@ -1 +1 @@ -current_version = 0.3.43 +current_version = 0.3.44 diff --git a/nchs_mortality/version.cfg b/nchs_mortality/version.cfg index 8d2332969..aa45f9083 100644 --- a/nchs_mortality/version.cfg +++ b/nchs_mortality/version.cfg @@ -1 +1 @@ -current_version = 0.3.43 +current_version = 0.3.44 diff --git a/nowcast/version.cfg b/nowcast/version.cfg index 8d2332969..aa45f9083 100644 --- a/nowcast/version.cfg +++ b/nowcast/version.cfg @@ -1 +1 @@ -current_version = 0.3.43 +current_version = 0.3.44 diff --git a/quidel_covidtest/version.cfg b/quidel_covidtest/version.cfg index 8d2332969..aa45f9083 100644 --- a/quidel_covidtest/version.cfg +++ b/quidel_covidtest/version.cfg @@ -1 +1 @@ -current_version = 0.3.43 +current_version = 0.3.44 diff --git a/sir_complainsalot/version.cfg b/sir_complainsalot/version.cfg index 8d2332969..aa45f9083 100644 --- a/sir_complainsalot/version.cfg +++ b/sir_complainsalot/version.cfg @@ -1 +1 @@ -current_version = 0.3.43 +current_version = 0.3.44