diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index d0411e0bc..02c38a562 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -16,7 +16,7 @@ jobs: if: github.event.pull_request.draft == false strategy: matrix: - packages: [_delphi_utils_python, changehc, claims_hosp, doctor_visits, dsew_community_profile, google_symptoms, hhs_hosp, hhs_facilities, jhu, nchs_mortality, nowcast, quidel_covidtest, sir_complainsalot] + packages: [_delphi_utils_python, changehc, claims_hosp, doctor_visits, dsew_community_profile, google_symptoms, hhs_hosp, jhu, nchs_mortality, nowcast, quidel_covidtest, sir_complainsalot] defaults: run: working-directory: ${{ matrix.packages }} diff --git a/hhs_facilities/.pylintrc b/hhs_facilities/.pylintrc deleted file mode 100644 index f30837c7e..000000000 --- a/hhs_facilities/.pylintrc +++ /dev/null @@ -1,22 +0,0 @@ - -[MESSAGES CONTROL] - -disable=logging-format-interpolation, - too-many-locals, - too-many-arguments, - # Allow pytest functions to be part of a class. - no-self-use, - # Allow pytest classes to have one test. - too-few-public-methods - -[BASIC] - -# Allow arbitrarily short-named variables. -variable-rgx=[a-z_][a-z0-9_]* -argument-rgx=[a-z_][a-z0-9_]* -attr-rgx=[a-z_][a-z0-9_]* - -[DESIGN] - -# Don't complain about pytest "unused" arguments. -ignored-argument-names=(_.*|run_as_module) \ No newline at end of file diff --git a/hhs_facilities/Makefile b/hhs_facilities/Makefile deleted file mode 100644 index bc88f1fec..000000000 --- a/hhs_facilities/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -.PHONY = venv, lint, test, clean - -dir = $(shell find ./delphi_* -name __init__.py | grep -o 'delphi_[_[:alnum:]]*' | head -1) -venv: - python3.8 -m venv env - -install: venv - . env/bin/activate; \ - pip install wheel ; \ - pip install -e ../_delphi_utils_python ;\ - pip install -e . - -install-ci: venv - . env/bin/activate; \ - pip install wheel ; \ - pip install ../_delphi_utils_python ;\ - pip install . - -lint: - . env/bin/activate; pylint $(dir) - . env/bin/activate; pydocstyle $(dir) - -test: - . env/bin/activate ;\ - (cd tests && ../env/bin/pytest --cov=$(dir) --cov-report=term-missing) - -clean: - rm -rf env - rm -f params.json diff --git a/hhs_facilities/README.md b/hhs_facilities/README.md deleted file mode 100644 index 24d4ba147..000000000 --- a/hhs_facilities/README.md +++ /dev/null @@ -1,63 +0,0 @@ -# HHS Faciltities - -This indicator reports weekly hospitalization usage. It reports the TBD. - - -## Running the Indicator - -The indicator is run by directly executing the Python module contained in this -directory. The safest way to do this is to create a virtual environment, -installed the common DELPHI tools, and then install the module and its -dependencies. To do this, run the following command from this directory: - -``` -make install -``` - -This command will install the package in editable mode, so you can make changes that -will automatically propagate to the installed package. - -All of the user-changable parameters are stored in `params.json`. To execute -the module and produce the output datasets (by default, in `receiving`), run -the following: - -``` -env/bin/python -m delphi_hhs_facilities -``` - -If you want to enter the virtual environment in your shell, -you can run `source env/bin/activate`. Run `deactivate` to leave the virtual environment. - -Once you are finished, you can remove the virtual environment and -params file with the following: - -``` -make clean -``` - -## Testing the code - -To run static tests of the code style, run the following command: - -``` -make lint -``` - -Unit tests are also included in the module. To execute these, run the following -command from this directory: - -``` -make test -``` - -To run individual tests, run the following: - -``` -(cd tests && ../env/bin/pytest .py --cov=delphi_hhs_facilities --cov-report=term-missing) -``` - -The output will show the number of unit tests that passed and failed, along -with the percentage of code covered by the tests. - -None of the linting or unit tests should fail, and the code lines that are not covered by unit tests should be small and -should not include critical sub-routines. diff --git a/hhs_facilities/REVIEW.md b/hhs_facilities/REVIEW.md deleted file mode 100644 index 93a5a6579..000000000 --- a/hhs_facilities/REVIEW.md +++ /dev/null @@ -1,39 +0,0 @@ -## Code Review (Python) - -A code review of this module should include a careful look at the code and the -output. To assist in the process, but certainly not in replace of it, please -check the following items. - -**Documentation** - -- [ ] the README.md file template is filled out and currently accurate; it is -possible to load and test the code using only the instructions given -- [ ] minimal docstrings (one line describing what the function does) are -included for all functions; full docstrings describing the inputs and expected -outputs should be given for non-trivial functions - -**Structure** - -- [ ] code should use 4 spaces for indentation; other style decisions are -flexible, but be consistent within a module -- [ ] any required metadata files are checked into the repository and placed -within the directory `static` -- [ ] any intermediate files that are created and stored by the module should -be placed in the directory `cache` -- [ ] final expected output files to be uploaded to the API are placed in the -`receiving` directory; output files should not be committed to the respository -- [ ] all options and API keys are passed through the file `params.json` -- [ ] template parameter file (`params.json.template`) is checked into the -code; no personal (i.e., usernames) or private (i.e., API keys) information is -included in this template file - -**Testing** - -- [ ] module can be installed in a new virtual environment -- [ ] pylint with the default `.pylint` settings run over the module produces -minimal warnings; warnings that do exist have been confirmed as false positives -- [ ] reasonably high level of unit test coverage covering all of the main logic -of the code (e.g., missing coverage for raised errors that do not currently seem -possible to reach are okay; missing coverage for options that will be needed are -not) -- [ ] all unit tests run without errors diff --git a/hhs_facilities/cache/.gitignore b/hhs_facilities/cache/.gitignore deleted file mode 100644 index e69de29bb..000000000 diff --git a/hhs_facilities/delphi_hhs_facilities/__init__.py b/hhs_facilities/delphi_hhs_facilities/__init__.py deleted file mode 100644 index e9c397bc5..000000000 --- a/hhs_facilities/delphi_hhs_facilities/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# -*- coding: utf-8 -*- -"""Module to pull and clean indicators from the HHS facilities source. - -This file defines the functions that are made public by the module. As the -module is intended to be executed though the main method, these are primarily -for testing. -""" - -from __future__ import absolute_import - -from . import run - -__version__ = "0.1.0" diff --git a/hhs_facilities/delphi_hhs_facilities/__main__.py b/hhs_facilities/delphi_hhs_facilities/__main__.py deleted file mode 100644 index 2f335e4ba..000000000 --- a/hhs_facilities/delphi_hhs_facilities/__main__.py +++ /dev/null @@ -1,11 +0,0 @@ -# -*- coding: utf-8 -*- -"""Call the function run_module when executed. - -This file indicates that calling the module (`python -m delphi_hhs_facilities`) will -call the function `run_module` found within the run.py file. There should be -no need to change this template. -""" -from delphi_utils import read_params -from .run import run_module # pragma: no cover - -run_module(read_params()) # pragma: no cover diff --git a/hhs_facilities/delphi_hhs_facilities/constants.py b/hhs_facilities/delphi_hhs_facilities/constants.py deleted file mode 100644 index 25b7ed951..000000000 --- a/hhs_facilities/delphi_hhs_facilities/constants.py +++ /dev/null @@ -1,37 +0,0 @@ -"""Registry for signals and geographies to process.""" -from numpy import nan -from .generate_signals import sum_cols - -NAN_VALUES = { - None: nan, - -999999: 1.5, # -999,999 represents the data range [0-3], so we use the range mean - -999999.0: 1.5 -} - -CONFIRMED_ADMISSIONS = "confirmed_admissions_7d" -CONFIRMED_SUSPECTED_ADMISSIONS = "sum_confirmed_suspected_admissions_7d" - -SIGNALS = [ - # (name, columns to use, operation, date offset) - - (CONFIRMED_ADMISSIONS, - ["previous_day_admission_adult_covid_confirmed_7_day_sum", - "previous_day_admission_pediatric_covid_confirmed_7_day_sum"], - sum_cols, - -1), - - (CONFIRMED_SUSPECTED_ADMISSIONS, - ["previous_day_admission_adult_covid_confirmed_7_day_sum", - "previous_day_admission_pediatric_covid_confirmed_7_day_sum", - "previous_day_admission_adult_covid_suspected_7_day_sum", - "previous_day_admission_pediatric_covid_suspected_7_day_sum"], - sum_cols, - -1), -] - -GEO_RESOLUTIONS = [ - "county", - "msa", - "state", - "hrr" -] diff --git a/hhs_facilities/delphi_hhs_facilities/generate_signals.py b/hhs_facilities/delphi_hhs_facilities/generate_signals.py deleted file mode 100644 index 65ee91692..000000000 --- a/hhs_facilities/delphi_hhs_facilities/generate_signals.py +++ /dev/null @@ -1,73 +0,0 @@ -"""Functions for generating signals.""" - -from typing import Callable - -import pandas as pd -import numpy as np - -from delphi_utils import Nans - - -def add_nancodes(df): - """Add nancodes to a signal dataframe.""" - # Default missingness codes - df["missing_val"] = Nans.NOT_MISSING - df["missing_se"] = Nans.NOT_APPLICABLE - df["missing_sample_size"] = Nans.NOT_APPLICABLE - - # Mark any remaining nans with unknown - remaining_nans_mask = df["val"].isnull() - df.loc[remaining_nans_mask, "missing_val"] = Nans.OTHER - return df - -def generate_signal(df: pd.DataFrame, - input_cols: list, - signal_func: Callable, - date_offset: int) -> pd.DataFrame: - """ - Generate a signal DataFrame derived from an input DataFrame. - - Applies the provided function on the columns specified, and then aggregates by geo and time. - - Parameters - ---------- - df: pd.DataFrame - Input DataFrame containing columns specified in `input_cols`. - input_cols: list of strings - List of column names to pass to `signal_func`. - signal_func: function - Function which takes in a list of Series and produces a signal Series. - date_offset: integer - Number of days to add to the timestamp. This is used because some of the columns are - "previous_day_" and require us adding -1 days to represent the right timespan. - - Returns - ------- - Signal DataFrame that is ready for `create_export_csv`. - """ - df_cols = [df[i] for i in input_cols] - df["val"] = signal_func(df_cols) - df["timestamp"] = df["timestamp"] + pd.Timedelta(days=date_offset) - df = df.groupby(["timestamp", "geo_id"], as_index=False).sum(min_count=1) - df["se"] = df["sample_size"] = np.nan - df = add_nancodes(df) - export_columns = [ - "timestamp", "geo_id", "val", "se", "sample_size", - "missing_val", "missing_se", "missing_sample_size"] - return df[export_columns] - - -def sum_cols(cols: list) -> pd.Series: - """ - Sum a list of Series, requiring 1 non-nan value per row sum. - - Parameters - ---------- - cols: list of Series - List of Series to sum. - - Returns - ------- - Series of summed inputs. - """ - return pd.concat(cols, axis=1).sum(axis=1, min_count=1) diff --git a/hhs_facilities/delphi_hhs_facilities/geo.py b/hhs_facilities/delphi_hhs_facilities/geo.py deleted file mode 100644 index d39188c46..000000000 --- a/hhs_facilities/delphi_hhs_facilities/geo.py +++ /dev/null @@ -1,79 +0,0 @@ -"""Functions for mapping geographic regions.""" - -import pandas as pd -from numpy import dtype -from delphi_utils.geomap import GeoMapper - - -def convert_geo(df: pd.DataFrame, geo: str, gmpr: GeoMapper) -> pd.DataFrame: - """ - Map a DataFrame to desired regions. - - The HHS facility level data contains columns for zip, state, and fips. For state and fips, we - use them as given. For all other geos, we map from zip (the smallest of the regions) to the - desired geo. - - Parameters - ---------- - df: pd.DataFrame - Input DataFrame containing zip, state, and fips columns. - geo: - Desired new geographic resolution. - gmpr: - GeoMapper object. - - Returns - ------- - DataFrame containing new geography column `geo_id` in the `geo` resolution. - """ - if geo == "county": - output_df = df.copy() - output_df["geo_id"] = output_df["fips_code"] - elif geo == "state": - output_df = df.copy() - output_df["geo_id"] = output_df["state"] - elif geo == "hrr": # use zip for HRR since zips nest within HRR while FIPS split across HRRs. - output_df = gmpr.add_geocode(df, "zip", geo) - output_df["geo_id"] = output_df[geo] - else: - output_df = gmpr.add_geocode(df, "fips", geo, from_col="fips_code") - output_df["geo_id"] = output_df[geo] - return output_df - - -def fill_missing_fips(df: pd.DataFrame, gmpr: GeoMapper) -> pd.DataFrame: - """ - Fill in missing FIPS code if zip is present. - - Maps rows that have the FIPS missing but zip present. The rest of the rows, - including those where both FIPS and zip are nan, are kept as is and appended back at the end. - Rows with a zip which fail to map to a FIPS are also kept so that column totals remain equal. - This means that column sums before and after imputation should be identical, and any dropping - of values is handled by downstream geomapping. - - TODO #636 Generalize this function to geomapper. - - Parameters - ---------- - df: pd.DataFrame - Input DataFrame containing zip and fips columns. - gmpr: - GeoMapper object. - - Returns - ------- - DataFrame with missing FIPS imputed with zip. - """ - mask = pd.isna(df["fips_code"]) & ~pd.isna(df["zip"]) - no_fips = df[mask] - fips_present = df[~mask] - no_data_cols = [c for c in df.columns if df[c].dtypes not in (dtype("int64"), dtype("float64"))] - data_cols = list(set(df.columns) - set(no_data_cols)) - added_fips = gmpr.add_geocode(no_fips, "zip", "fips", dropna=False) - added_fips["fips_code"] = added_fips["fips"] - # set weight of unmapped zips to 1 to they don't zero out all the values when multiplied - added_fips.weight.fillna(1, inplace=True) - added_fips[data_cols] = added_fips[data_cols].multiply(added_fips["weight"], axis=0) - fips_filled = added_fips.groupby(no_data_cols, dropna=False, as_index=False).sum(min_count=1) - fips_filled.drop(columns="weight", inplace=True) - return pd.concat([fips_present, fips_filled]).reset_index(drop=True) diff --git a/hhs_facilities/delphi_hhs_facilities/pull.py b/hhs_facilities/delphi_hhs_facilities/pull.py deleted file mode 100644 index ff4303b6c..000000000 --- a/hhs_facilities/delphi_hhs_facilities/pull.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Functions for mapping geographic regions.""" - -from datetime import date - -import pandas as pd -from delphi_utils.geomap import GeoMapper -from delphi_epidata import Epidata - -from .constants import NAN_VALUES - - -def pull_data_iteratively(states: set, dates: dict) -> list: - """ - Pull Epidata API for a set of states and dates. - - To avoid Epidata API row limits, does not grab all values at once. Instead, it loops through - each state and pulls all data for 10 hospitals at a time. - - Parameters - ---------- - states: set - Set of state codes (2 letter lowercase abbreviation) to get data for. - dates: dict - Dict of 'from' and 'to' dates output by Epidata.range(). - - Returns - ------- - List of dictionaries. Concatenation of all the response['epidata'] lists. - """ - responses = [] - for state in states: - lookup_response = Epidata.covid_hosp_facility_lookup(state) - state_hospital_ids = [i["hospital_pk"] for i in lookup_response.get("epidata", [])] - for i in range(0, len(state_hospital_ids), 50): - response = Epidata.covid_hosp_facility(state_hospital_ids[i:i+50], dates) - if response["result"] == 2: - raise Exception(f"Bad result from Epidata: {response['message']}") - responses += response.get("epidata", []) - if len(responses) == 0: - raise Exception("No results found.") - return responses - - -def pull_data() -> pd.DataFrame: - """ - Pull HHS data from Epidata API for all states and dates and convert to a DataFrame. - - Returns - ------- - DataFrame of HHS data. - """ - today = int(date.today().strftime("%Y%m%d")) - past_reference_day = int(date(2020, 1, 1).strftime("%Y%m%d")) # first available date in DB - all_states = GeoMapper().get_geo_values("state_id") - responses = pull_data_iteratively(all_states, Epidata.range(past_reference_day, today)) - all_columns = pd.DataFrame(responses).replace(NAN_VALUES) - all_columns["timestamp"] = pd.to_datetime(all_columns["collection_week"], format="%Y%m%d") - return all_columns diff --git a/hhs_facilities/delphi_hhs_facilities/run.py b/hhs_facilities/delphi_hhs_facilities/run.py deleted file mode 100644 index 43d3a9bdd..000000000 --- a/hhs_facilities/delphi_hhs_facilities/run.py +++ /dev/null @@ -1,57 +0,0 @@ -# -*- coding: utf-8 -*- -"""Main function to run the HHS facilities module. Run with `python -m delphi_hhs_facilities`.""" -from datetime import datetime -import time - -from itertools import product - -from delphi_utils.export import create_export_csv -from delphi_utils.geomap import GeoMapper -from delphi_utils import get_structured_logger - -from .constants import GEO_RESOLUTIONS, SIGNALS -from .generate_signals import generate_signal -from .geo import convert_geo, fill_missing_fips -from .pull import pull_data - - -def run_module(params) -> None: - """ - Run entire hhs_facilities indicator. - - Parameters - ---------- - params - Dictionary containing indicator configuration. Expected to have the following structure: - - "common": - - "export_dir": str, directory to write output - """ - start_time = time.time() - logger = get_structured_logger( - __name__, filename=params["common"].get("log_filename"), - log_exceptions=params["common"].get("log_exceptions", True)) - - raw_df = pull_data() - gmpr = GeoMapper() - filled_fips_df = fill_missing_fips(raw_df, gmpr) - stats = [] - for geo, (sig_name, sig_cols, sig_func, sig_offset) in product(GEO_RESOLUTIONS, SIGNALS): - logger.info("Generating signal and exporting to CSV", - geo_res = geo, - signal_name = sig_name) - mapped_df = convert_geo(filled_fips_df, geo, gmpr) - output_df = generate_signal(mapped_df, sig_cols, sig_func, sig_offset) - dates = create_export_csv(output_df, params["common"]["export_dir"], geo, sig_name) - if len(dates) > 0: - stats.append((max(dates), len(dates))) - - elapsed_time_in_seconds = round(time.time() - start_time, 2) - min_max_date = stats and min(s[0] for s in stats) - csv_export_count = sum(s[-1] for s in stats) - max_lag_in_days = min_max_date and (datetime.now() - min_max_date).days - formatted_min_max_date = min_max_date and min_max_date.strftime("%Y-%m-%d") - logger.info("Completed indicator run", - elapsed_time_in_seconds = elapsed_time_in_seconds, - csv_export_count = csv_export_count, - max_lag_in_days = max_lag_in_days, - oldest_final_export_date = formatted_min_max_date) diff --git a/hhs_facilities/params.json.template b/hhs_facilities/params.json.template deleted file mode 100644 index 81b54a566..000000000 --- a/hhs_facilities/params.json.template +++ /dev/null @@ -1,5 +0,0 @@ -{ - "common": { - "export_dir": "./receiving" - } -} diff --git a/hhs_facilities/receiving/.gitignore b/hhs_facilities/receiving/.gitignore deleted file mode 100644 index e69de29bb..000000000 diff --git a/hhs_facilities/setup.py b/hhs_facilities/setup.py deleted file mode 100644 index 4b26fc84e..000000000 --- a/hhs_facilities/setup.py +++ /dev/null @@ -1,30 +0,0 @@ -from setuptools import setup -from setuptools import find_packages - -required = [ - "numpy", - "pandas", - "pydocstyle", - "pytest", - "pytest-cov", - "pylint==2.8.3", - "delphi-utils", - "covidcast", - "delphi-epidata" -] - -setup( - name="delphi_hhs_facilities", - version="0.1.0", - description="HHS hospital level indicators", - author="", - author_email="", - url="https://github.com/cmu-delphi/covidcast-indicators", - install_requires=required, - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Programming Language :: Python :: 3.8", - ], - packages=find_packages(), -) diff --git a/hhs_facilities/static/.gitignore b/hhs_facilities/static/.gitignore deleted file mode 100644 index e69de29bb..000000000 diff --git a/hhs_facilities/tests/expected/.gitkeep b/hhs_facilities/tests/expected/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/hhs_facilities/tests/expected/20200131_county_confirmed_admissions_7d.csv b/hhs_facilities/tests/expected/20200131_county_confirmed_admissions_7d.csv deleted file mode 100644 index 0745fb95b..000000000 --- a/hhs_facilities/tests/expected/20200131_county_confirmed_admissions_7d.csv +++ /dev/null @@ -1,4 +0,0 @@ -geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size -25013,33.00000000,NA,NA,0,1,1 -72001,76.56462040,NA,NA,0,1,1 -72141,0.43537960,NA,NA,0,1,1 diff --git a/hhs_facilities/tests/expected/20200131_county_sum_confirmed_suspected_admissions_7d.csv b/hhs_facilities/tests/expected/20200131_county_sum_confirmed_suspected_admissions_7d.csv deleted file mode 100644 index b3fb890f6..000000000 --- a/hhs_facilities/tests/expected/20200131_county_sum_confirmed_suspected_admissions_7d.csv +++ /dev/null @@ -1,4 +0,0 @@ -geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size -25013,98.00000000,NA,NA,0,1,1 -72001,161.08400650,NA,NA,0,1,1 -72141,0.91599350,NA,NA,0,1,1 diff --git a/hhs_facilities/tests/expected/20200131_hrr_confirmed_admissions_7d.csv b/hhs_facilities/tests/expected/20200131_hrr_confirmed_admissions_7d.csv deleted file mode 100644 index 9bcb23dfd..000000000 --- a/hhs_facilities/tests/expected/20200131_hrr_confirmed_admissions_7d.csv +++ /dev/null @@ -1,2 +0,0 @@ -geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size -230,33.00000000,NA,NA,0,1,1 diff --git a/hhs_facilities/tests/expected/20200131_hrr_sum_confirmed_suspected_admissions_7d.csv b/hhs_facilities/tests/expected/20200131_hrr_sum_confirmed_suspected_admissions_7d.csv deleted file mode 100644 index 954b8d9eb..000000000 --- a/hhs_facilities/tests/expected/20200131_hrr_sum_confirmed_suspected_admissions_7d.csv +++ /dev/null @@ -1,2 +0,0 @@ -geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size -230,98.00000000,NA,NA,0,1,1 diff --git a/hhs_facilities/tests/expected/20200131_msa_confirmed_admissions_7d.csv b/hhs_facilities/tests/expected/20200131_msa_confirmed_admissions_7d.csv deleted file mode 100644 index ff49eb1bf..000000000 --- a/hhs_facilities/tests/expected/20200131_msa_confirmed_admissions_7d.csv +++ /dev/null @@ -1,4 +0,0 @@ -geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size -10380,0.43537960,NA,NA,0,1,1 -38660,76.56462040,NA,NA,0,1,1 -44140,33.00000000,NA,NA,0,1,1 diff --git a/hhs_facilities/tests/expected/20200131_msa_sum_confirmed_suspected_admissions_7d.csv b/hhs_facilities/tests/expected/20200131_msa_sum_confirmed_suspected_admissions_7d.csv deleted file mode 100644 index 5825ae148..000000000 --- a/hhs_facilities/tests/expected/20200131_msa_sum_confirmed_suspected_admissions_7d.csv +++ /dev/null @@ -1,4 +0,0 @@ -geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size -10380,0.91599350,NA,NA,0,1,1 -38660,161.08400650,NA,NA,0,1,1 -44140,98.00000000,NA,NA,0,1,1 diff --git a/hhs_facilities/tests/expected/20200131_state_confirmed_admissions_7d.csv b/hhs_facilities/tests/expected/20200131_state_confirmed_admissions_7d.csv deleted file mode 100644 index bcbf93e6a..000000000 --- a/hhs_facilities/tests/expected/20200131_state_confirmed_admissions_7d.csv +++ /dev/null @@ -1,3 +0,0 @@ -geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size -AL,33.00000000,NA,NA,0,1,1 -PR,33.00000000,NA,NA,0,1,1 diff --git a/hhs_facilities/tests/expected/20200131_state_sum_confirmed_suspected_admissions_7d.csv b/hhs_facilities/tests/expected/20200131_state_sum_confirmed_suspected_admissions_7d.csv deleted file mode 100644 index 601f55748..000000000 --- a/hhs_facilities/tests/expected/20200131_state_sum_confirmed_suspected_admissions_7d.csv +++ /dev/null @@ -1,3 +0,0 @@ -geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size -AL,98.00000000,NA,NA,0,1,1 -PR,48.00000000,NA,NA,0,1,1 diff --git a/hhs_facilities/tests/params.json.template b/hhs_facilities/tests/params.json.template deleted file mode 100644 index 81b54a566..000000000 --- a/hhs_facilities/tests/params.json.template +++ /dev/null @@ -1,5 +0,0 @@ -{ - "common": { - "export_dir": "./receiving" - } -} diff --git a/hhs_facilities/tests/test_generate_signals.py b/hhs_facilities/tests/test_generate_signals.py deleted file mode 100644 index ef2ec42d1..000000000 --- a/hhs_facilities/tests/test_generate_signals.py +++ /dev/null @@ -1,45 +0,0 @@ -"""Tests for running the signal generation functions.""" - -import pandas as pd -import numpy as np - -from delphi_hhs_facilities.generate_signals import generate_signal, sum_cols -from delphi_utils import Nans - -class TestGenerateSignals: - - def test_generate_signals(self): - test_input = pd.DataFrame( - {"a": [1, 2, 3, 4, np.nan], - "b": [2, np.nan, 4, 6, np.nan], - "geo_id": ["x", "x", "x", "y", "z"], - "timestamp": [pd.Timestamp("20200201"), - pd.Timestamp("20200201"), - pd.Timestamp("20200202"), - pd.Timestamp("20200203"), - pd.Timestamp("20200204")] - }) - test_output = generate_signal(test_input, ["a", "b"], sum_cols, -1) - expected = pd.DataFrame( - {"timestamp": [pd.Timestamp("20200131"), - pd.Timestamp("20200201"), - pd.Timestamp("20200202"), - pd.Timestamp("20200203")], - "geo_id": ["x", "x", "y", "z"], - "val": [5., 7., 10., np.nan], - "se": [np.nan]*4, - "sample_size": [np.nan]*4, - "missing_val": [Nans.NOT_MISSING] * 3 + [Nans.OTHER], - "missing_se": [Nans.NOT_APPLICABLE] * 4, - "missing_sample_size": [Nans.NOT_APPLICABLE] * 4, - }) - pd.testing.assert_frame_equal(test_output, expected) - - def test_sum_cols(self): - test_input = [ - pd.Series([1, 2, 3, np.nan, np.nan]), - pd.Series([np.nan, 3, 6, 9, np.nan]) - ] - test_output = sum_cols(test_input) - expected = pd.Series([1, 5, 9, 9, np.nan]) - pd.testing.assert_series_equal(test_output, expected) diff --git a/hhs_facilities/tests/test_geo.py b/hhs_facilities/tests/test_geo.py deleted file mode 100644 index 7882a57f5..000000000 --- a/hhs_facilities/tests/test_geo.py +++ /dev/null @@ -1,79 +0,0 @@ -"""Tests for running the geo conversion functions.""" - -import pandas as pd -import numpy as np - -from delphi_utils.geomap import GeoMapper -from delphi_hhs_facilities.geo import convert_geo, fill_missing_fips - - -class TestGeo: - - def test_convert_geo(self): - gmpr = GeoMapper() - test_input = pd.DataFrame( - {"state": ["test"], - "fips_code": ["01001"], - "zip": ["01001"], - }) - test_state_output = convert_geo(test_input, "state", gmpr) - pd.testing.assert_series_equal( - test_state_output.geo_id, pd.Series(["test"]), check_names=False - ) - test_county_output = convert_geo(test_input, "county", gmpr) - pd.testing.assert_series_equal( - test_county_output.geo_id, pd.Series(["01001"]), check_names=False - ) - test_msa_output = convert_geo(test_input, "msa", gmpr) - pd.testing.assert_series_equal( - test_msa_output.geo_id, pd.Series(["33860"]), check_names=False - ) - test_hrr_output = convert_geo(test_input, "hrr", gmpr) - pd.testing.assert_series_equal( - test_hrr_output.geo_id, pd.Series(["230"]), check_names=False - ) - - def test_fill_missing_fips(self): - gmpr = GeoMapper() - test_input = pd.DataFrame( - {"hospital_pk": ["test", "test2", "test3"], - "fips_code": ["fakefips", np.nan, np.nan], - "zip": ["01001", "01001", "00601"], - "val1": [1.0, 5.0, 10.0], - "val2": [2.0, 25.0, 210.0] - }) - expected = pd.DataFrame( - {"hospital_pk": ["test", "test2", "test3", "test3"], - "fips_code": ["fakefips", "25013", "72001", "72141"], - "zip": ["01001", "01001", "00601", "00601"], - "val1": [1.0, 5.0, 0.994345718901454*10, 0.005654281098546042*10], - "val2": [2.0, 25.0, 0.994345718901454*210.0, 0.005654281098546042*210.0] - }) - pd.testing.assert_frame_equal(fill_missing_fips(test_input, gmpr), expected) - - # test all nans stay as nan - test_input = pd.DataFrame( - {"hospital_pk": ["test", "test2", "test3"], - "fips_code": ["fakefips", np.nan, np.nan], - "zip": ["01001", "01001", "00601"], - "val1": [1.0, 5.0, np.nan], - "val2": [2.0, 25.0, 210.0] - }) - expected = pd.DataFrame( - {"hospital_pk": ["test", "test2", "test3", "test3"], - "fips_code": ["fakefips", "25013", "72001", "72141"], - "zip": ["01001", "01001", "00601", "00601"], - "val1": [1.0, 5.0, np.nan, np.nan], - "val2": [2.0, 25.0, 0.994345718901454*210.0, 0.005654281098546042*210.0] - }) - pd.testing.assert_frame_equal(fill_missing_fips(test_input, gmpr), expected) - - # test that populated fips or both nan is no-op - test_input_no_missing = pd.DataFrame( - {"hospital_pk": ["test", "test2", "test3", "test4"], - "fips_code": ["fakefips", "testfips", "pseudofips", np.nan], - "zip": ["01001", "01001", "00601", np.nan], - "val": [1.0, 5.0, 10.0, 0.0] - }) - pd.testing.assert_frame_equal(fill_missing_fips(test_input_no_missing, gmpr), - test_input_no_missing) diff --git a/hhs_facilities/tests/test_pull.py b/hhs_facilities/tests/test_pull.py deleted file mode 100644 index 9ea3e3a00..000000000 --- a/hhs_facilities/tests/test_pull.py +++ /dev/null @@ -1,72 +0,0 @@ -"""Tests for running the geo conversion functions.""" - -from unittest.mock import patch - -import pytest -import pandas as pd -import numpy as np - -from delphi_hhs_facilities.pull import pull_data_iteratively, pull_data -from delphi_hhs_facilities.constants import NAN_VALUES - -class TestPull: - - @patch("delphi_hhs_facilities.pull.Epidata.covid_hosp_facility") - @patch("delphi_hhs_facilities.pull.Epidata.covid_hosp_facility_lookup") - def test_pull_data_iteratively(self, covid_hosp_facility_lookup, covid_hosp_facility): - covid_hosp_facility_lookup.return_value = { - "result": 1, - "epidata": [{"hospital_pk": "020001"}, {"hospital_pk": "020006"}], - "message": "success" - } - mock_epidata = [{"collection_week": 20201204, - "total_beds_7_day_sum": 2360, - "all_adult_hospital_beds_7_day_sum": -999999, - "inpatient_beds_7_day_avg": -999999.0, - "total_icu_beds_7_day_avg": np.nan, - "total_staffed_adult_icu_beds_7_day_avg": 32.4}, - {"collection_week": 20201204, - "total_beds_7_day_sum": 1000, - "all_adult_hospital_beds_7_day_sum": 1917, - "inpatient_beds_7_day_avg": 330.6, - "total_icu_beds_7_day_avg": 76.7, - "total_staffed_adult_icu_beds_7_day_avg": 12.1}] - covid_hosp_facility.return_value = { - "result": 1, - "epidata": mock_epidata, - "message": "success"} - output = pull_data_iteratively({"state1", "state2"}, {"from": "test", "to": "date"}) - assert output == mock_epidata * 2 # x2 because there were 2 states that were looped through - - # test failure cases - covid_hosp_facility.return_value = {"result": 2, - "message": "test"} - with pytest.raises(Exception) as exc: - pull_data_iteratively({"state1", "state2"}, {"from": "test", "to": "date"}) - assert "Bad result from Epidata" in str(exc) - covid_hosp_facility_lookup.return_value = {"result": 2, "message": "lookup fail"} - with pytest.raises(Exception) as exc: - pull_data_iteratively({"state1", "state2"}, {"from": "test", "to": "date"}) - assert "No results found" in str(exc) - - @patch("delphi_hhs_facilities.pull.pull_data_iteratively") - def test_pull_data(self, pull_data_iteratively): - pull_data_iteratively.return_value = [{"collection_week": 20201204, - "total_beds_7_day_sum": 2360, - "all_adult_hospital_beds_7_day_sum": -999999, - "inpatient_beds_7_day_avg": -999999.0, - "total_icu_beds_7_day_avg": np.nan, - "total_staffed_adult_icu_beds_7_day_avg": 32.4}] - output = pull_data() - assert output.shape == (1, 7) # 1 mock row, 6 mock + 1 new timestamp column - # verify nans cast properly and timestamp added - pd.testing.assert_frame_equal( - output, - pd.DataFrame({"collection_week": [20201204], - "total_beds_7_day_sum": [2360], - "all_adult_hospital_beds_7_day_sum": [NAN_VALUES[-999999]], - "inpatient_beds_7_day_avg": [NAN_VALUES[-999999]], - "total_icu_beds_7_day_avg": [np.nan], - "total_staffed_adult_icu_beds_7_day_avg": [32.4], - "timestamp": [pd.Timestamp("2020-12-04")]}), - check_names=False) diff --git a/hhs_facilities/tests/test_run.py b/hhs_facilities/tests/test_run.py deleted file mode 100644 index 276c21883..000000000 --- a/hhs_facilities/tests/test_run.py +++ /dev/null @@ -1,39 +0,0 @@ -"""Tests for running the geo conversion functions.""" -from unittest.mock import patch -import tempfile -import os -from itertools import product - -import pandas as pd -import numpy as np - -from delphi_hhs_facilities.run import run_module -from delphi_hhs_facilities.constants import GEO_RESOLUTIONS, SIGNALS - - -class TestRun: - - @patch("delphi_hhs_facilities.run.pull_data") - def test_run_module(self, pull_data): - pull_data.return_value = pd.DataFrame({ - "timestamp": [pd.Timestamp("20200201")]*4, - "fips_code": ["25013", "25013", np.nan, np.nan], - "zip": ["01001", "01001", "00601", "00601"], - "state": ["AL", "AL", "PR", np.nan], - "previous_day_admission_adult_covid_confirmed_7_day_sum": [1., 2., 3., 4.], - "previous_day_admission_pediatric_covid_confirmed_7_day_sum": [10., 20., 30., 40.], - "previous_day_admission_adult_covid_suspected_7_day_sum": [0, 50, 0, 50], - "previous_day_admission_pediatric_covid_suspected_7_day_sum": [5., 10., 15., 20.] - }) - with tempfile.TemporaryDirectory() as tmp: - # when adding tests for new signals, change tmp to './expected' to generate new expected files. - # tests will fail but the files will be created. - params = {"common": {"export_dir": tmp}} - run_module(params) - expected_files = ["_".join(["20200131", geo, sig[0]]) + ".csv" for geo, sig - in product(GEO_RESOLUTIONS, SIGNALS)] - assert sorted(os.listdir(tmp)) == sorted(expected_files) - for f in expected_files: - out_df = pd.read_csv(os.path.join(tmp, f)) - expected_df = pd.read_csv(os.path.join("expected", f)) - pd.testing.assert_frame_equal(out_df, expected_df)