diff --git a/.bumpversion.cfg b/.bumpversion.cfg index bed993158..ab8f53582 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.3.60 +current_version = 0.3.61 commit = True message = chore: bump covidcast-indicators to {new_version} tag = False diff --git a/ansible/templates/sir_complainsalot-params-prod.json.j2 b/ansible/templates/sir_complainsalot-params-prod.json.j2 index 0dc66608d..7bb2d179a 100644 --- a/ansible/templates/sir_complainsalot-params-prod.json.j2 +++ b/ansible/templates/sir_complainsalot-params-prod.json.j2 @@ -44,13 +44,7 @@ }, "nssp": { "max_age":19, - "maintainers": [], - "retired-signals": [ - "pct_ed_visits_combined_2023rvr", - "pct_ed_visits_covid_2023rvr", - "pct_ed_visits_influenza_2023rvr", - "pct_ed_visits_rsv_2023rvr" - ] + "maintainers": [] }, "nhsn": { "max_age":19, diff --git a/changehc/version.cfg b/changehc/version.cfg index de7422054..e1a8912ac 100644 --- a/changehc/version.cfg +++ b/changehc/version.cfg @@ -1 +1 @@ -current_version = 0.3.60 +current_version = 0.3.61 diff --git a/claims_hosp/version.cfg b/claims_hosp/version.cfg index de7422054..e1a8912ac 100644 --- a/claims_hosp/version.cfg +++ b/claims_hosp/version.cfg @@ -1 +1 @@ -current_version = 0.3.60 +current_version = 0.3.61 diff --git a/doctor_visits/version.cfg b/doctor_visits/version.cfg index de7422054..e1a8912ac 100644 --- a/doctor_visits/version.cfg +++ b/doctor_visits/version.cfg @@ -1 +1 @@ -current_version = 0.3.60 +current_version = 0.3.61 diff --git a/google_symptoms/version.cfg b/google_symptoms/version.cfg index de7422054..e1a8912ac 100644 --- a/google_symptoms/version.cfg +++ b/google_symptoms/version.cfg @@ -1 +1 @@ -current_version = 0.3.60 +current_version = 0.3.61 diff --git a/hhs_hosp/version.cfg b/hhs_hosp/version.cfg index de7422054..e1a8912ac 100644 --- a/hhs_hosp/version.cfg +++ b/hhs_hosp/version.cfg @@ -1 +1 @@ -current_version = 0.3.60 +current_version = 0.3.61 diff --git a/nchs_mortality/version.cfg b/nchs_mortality/version.cfg index de7422054..e1a8912ac 100644 --- a/nchs_mortality/version.cfg +++ b/nchs_mortality/version.cfg @@ -1 +1 @@ -current_version = 0.3.60 +current_version = 0.3.61 diff --git a/nhsn/delphi_nhsn/constants.py b/nhsn/delphi_nhsn/constants.py index d51241b4f..da6216322 100644 --- a/nhsn/delphi_nhsn/constants.py +++ b/nhsn/delphi_nhsn/constants.py @@ -6,29 +6,59 @@ PRELIM_DATASET_ID = "mpgq-jmmr" # column name from socrata -TOTAL_ADMISSION_COVID_API = "totalconfc19newadm" -TOTAL_ADMISSION_FLU_API = "totalconfflunewadm" +TOTAL_ADMISSION_COVID_COL = "totalconfc19newadm" +TOTAL_ADMISSION_FLU_COL = "totalconfflunewadm" +TOTAL_ADMISSION_RSV_COL = "totalconfrsvnewadm" +NUM_HOSP_REPORTING_COVID_COL = "totalconfc19newadmhosprep" +NUM_HOSP_REPORTING_FLU_COL = "totalconfflunewadmhosprep" +NUM_HOSP_REPORTING_RSV_COL = "totalconfrsvnewadmhosprep" +# signal name +TOTAL_ADMISSION_COVID = "confirmed_admissions_covid_ew" +TOTAL_ADMISSION_FLU = "confirmed_admissions_flu_ew" +TOTAL_ADMISSION_RSV = "confirmed_admissions_rsv_ew" +NUM_HOSP_REPORTING_COVID = "hosprep_confirmed_admissions_covid_ew" +NUM_HOSP_REPORTING_FLU = "hosprep_confirmed_admissions_flu_ew" +NUM_HOSP_REPORTING_RSV = "hosprep_confirmed_admissions_rsv_ew" SIGNALS_MAP = { - "confirmed_admissions_covid_ew": TOTAL_ADMISSION_COVID_API, - "confirmed_admissions_flu_ew": TOTAL_ADMISSION_FLU_API, + TOTAL_ADMISSION_COVID: TOTAL_ADMISSION_COVID_COL, + TOTAL_ADMISSION_FLU: TOTAL_ADMISSION_FLU_COL, + TOTAL_ADMISSION_RSV: TOTAL_ADMISSION_RSV_COL, + NUM_HOSP_REPORTING_COVID: NUM_HOSP_REPORTING_COVID_COL, + NUM_HOSP_REPORTING_FLU: NUM_HOSP_REPORTING_FLU_COL, + NUM_HOSP_REPORTING_RSV: NUM_HOSP_REPORTING_RSV_COL, } TYPE_DICT = { "timestamp": "datetime64[ns]", "geo_id": str, - "confirmed_admissions_covid_ew": float, - "confirmed_admissions_flu_ew": float, + TOTAL_ADMISSION_COVID: float, + TOTAL_ADMISSION_FLU: float, + TOTAL_ADMISSION_RSV: float, + NUM_HOSP_REPORTING_COVID: float, + NUM_HOSP_REPORTING_FLU: float, + NUM_HOSP_REPORTING_RSV: float, } # signal mapping for secondary, preliminary source +# made copy incase things would diverge + PRELIM_SIGNALS_MAP = { - "confirmed_admissions_covid_ew_prelim": TOTAL_ADMISSION_COVID_API, - "confirmed_admissions_flu_ew_prelim": TOTAL_ADMISSION_FLU_API, + f"{TOTAL_ADMISSION_COVID}_prelim": TOTAL_ADMISSION_COVID_COL, + f"{TOTAL_ADMISSION_FLU}_prelim": TOTAL_ADMISSION_FLU_COL, + f"{TOTAL_ADMISSION_RSV}_prelim": TOTAL_ADMISSION_RSV_COL, + f"{NUM_HOSP_REPORTING_COVID}_prelim": NUM_HOSP_REPORTING_COVID_COL, + f"{NUM_HOSP_REPORTING_FLU}_prelim": NUM_HOSP_REPORTING_FLU_COL, + f"{NUM_HOSP_REPORTING_RSV}_prelim": NUM_HOSP_REPORTING_RSV_COL, } + PRELIM_TYPE_DICT = { "timestamp": "datetime64[ns]", "geo_id": str, - "confirmed_admissions_covid_ew_prelim": float, - "confirmed_admissions_flu_ew_prelim": float, + f"{TOTAL_ADMISSION_COVID}_prelim": float, + f"{TOTAL_ADMISSION_FLU}_prelim": float, + f"{TOTAL_ADMISSION_RSV}_prelim": float, + f"{NUM_HOSP_REPORTING_COVID}_prelim": float, + f"{NUM_HOSP_REPORTING_FLU}_prelim": float, + f"{NUM_HOSP_REPORTING_RSV}_prelim": float, } diff --git a/nhsn/delphi_nhsn/pull.py b/nhsn/delphi_nhsn/pull.py index 7377ef958..31164770e 100644 --- a/nhsn/delphi_nhsn/pull.py +++ b/nhsn/delphi_nhsn/pull.py @@ -1,8 +1,13 @@ # -*- coding: utf-8 -*- """Functions for pulling NSSP ER data.""" +import copy import logging +import random +import time +from datetime import datetime, timedelta from pathlib import Path from typing import Optional +from urllib.error import HTTPError import pandas as pd from delphi_utils import create_backup_csv @@ -11,20 +16,77 @@ from .constants import MAIN_DATASET_ID, PRELIM_DATASET_ID, PRELIM_SIGNALS_MAP, PRELIM_TYPE_DICT, SIGNALS_MAP, TYPE_DICT -def pull_data(socrata_token: str, dataset_id: str): +def check_last_updated(socrata_token, dataset_id, logger): + """ + Check last updated timestamp to determine if data should be pulled or not. + + Note -- if the call to the API fails, the behavior is to treat the data as stale, + as possibly having duplicate is preferable to missing data + + Parameters + ---------- + socrata_token + dataset_id + logger + + Returns bool + ------- + + """ + recently_updated_source = True + try: + client = Socrata("data.cdc.gov", socrata_token) + response = client.get_metadata(dataset_id) + + updated_timestamp = datetime.utcfromtimestamp(int(response["rowsUpdatedAt"])) + now = datetime.utcnow() + recently_updated_source = (now - updated_timestamp) < timedelta(days=1) + + prelim_prefix = "Preliminary " if dataset_id == PRELIM_DATASET_ID else "" + if recently_updated_source: + logger.info( + f"{prelim_prefix}NHSN data was recently updated; Pulling data", updated_timestamp=updated_timestamp + ) + else: + logger.info(f"{prelim_prefix}NHSN data is stale; Skipping", updated_timestamp=updated_timestamp) + # pylint: disable=W0703 + except Exception as e: + logger.info("error while processing socrata metadata; treating data as stale", error=str(e)) + return recently_updated_source + + +def pull_data(socrata_token: str, dataset_id: str, backup_dir: str, logger): """Pull data from Socrata API.""" client = Socrata("data.cdc.gov", socrata_token) + logger.info( + f"Pulling {'main' if dataset_id == MAIN_DATASET_ID else 'preliminary'} data from Socrata API", + dataset_id=dataset_id, + ) results = [] offset = 0 limit = 50000 # maximum limit allowed by SODA 2.0 - while True: + # retry logic for 500 error + try: page = client.get(dataset_id, limit=limit, offset=offset) - if not page: - break # exit the loop if no more results + except HTTPError as err: + if err.code == 503: + time.sleep(2 + random.randint(0, 1000) / 1000.0) + page = client.get(dataset_id, limit=limit, offset=offset) + else: + logger.info("Error pulling data from Socrata API", error=str(err)) + raise err + + while len(page) > 0: results.extend(page) offset += limit + page = client.get(dataset_id, limit=limit, offset=offset) - df = pd.DataFrame.from_records(results) + if results: + df = pd.DataFrame.from_records(results) + sensor = "prelim" if dataset_id == PRELIM_DATASET_ID else None + create_backup_csv(df, backup_dir, False, sensor=sensor, logger=logger) + else: + df = pd.DataFrame() return df @@ -62,6 +124,7 @@ def pull_nhsn_data( backup_dir: str, custom_run: bool, issue_date: Optional[str], + preliminary: bool = False, logger: Optional[logging.Logger] = None, ): """Pull the latest NHSN hospital admission data, and conforms it into a dataset. @@ -79,6 +142,10 @@ def pull_nhsn_data( Directory to which to save raw backup data custom_run: bool Flag indicating if the current run is a patch. If so, don't save any data to disk + preliminary: bool + Flag indicating if the grabbing main or preliminary data + issue_date: + date to indicate which backup file to pull for patching logger: Optional[logging.Logger] logger object @@ -87,83 +154,39 @@ def pull_nhsn_data( pd.DataFrame Dataframe as described above. """ + dataset_id = PRELIM_DATASET_ID if preliminary else MAIN_DATASET_ID # Pull data from Socrata API df = ( - pull_data(socrata_token, dataset_id=MAIN_DATASET_ID) + pull_data(socrata_token, dataset_id, backup_dir, logger) if not custom_run - else pull_data_from_file(backup_dir, issue_date, logger, prelim_flag=False) + else pull_data_from_file(backup_dir, issue_date, logger, prelim_flag=preliminary) ) - keep_columns = list(TYPE_DICT.keys()) - - if not df.empty: - create_backup_csv(df, backup_dir, custom_run, logger=logger) - - df = df.rename(columns={"weekendingdate": "timestamp", "jurisdiction": "geo_id"}) - - for signal, col_name in SIGNALS_MAP.items(): - df[signal] = df[col_name] + recently_updated = True if custom_run else check_last_updated(socrata_token, dataset_id, logger) - df = df[keep_columns] - df["geo_id"] = df["geo_id"].str.lower() - df.loc[df["geo_id"] == "usa", "geo_id"] = "us" - df = df.astype(TYPE_DICT) - else: - df = pd.DataFrame(columns=keep_columns) + type_dict = PRELIM_TYPE_DICT if preliminary else TYPE_DICT + keep_columns = list(type_dict.keys()) + filtered_type_dict = copy.deepcopy(type_dict) - return df - - -def pull_preliminary_nhsn_data( - socrata_token: str, - backup_dir: str, - custom_run: bool, - issue_date: Optional[str], - logger: Optional[logging.Logger] = None, -): - """Pull the latest preliminary NHSN hospital admission data, and conforms it into a dataset. - - The output dataset has: - - - Each row corresponds to a single observation - - Each row additionally has columns for the signals in SIGNALS - - Parameters - ---------- - socrata_token: str - My App Token for pulling the NHSN data - backup_dir: str - Directory to which to save raw backup data - custom_run: bool - Flag indicating if the current run is a patch. If so, don't save any data to disk - logger: Optional[logging.Logger] - logger object - - Returns - ------- - pd.DataFrame - Dataframe as described above. - """ - df = ( - pull_data(socrata_token, dataset_id=PRELIM_DATASET_ID) - if not custom_run - else pull_data_from_file(backup_dir, issue_date, logger, prelim_flag=True) - ) - - keep_columns = list(PRELIM_TYPE_DICT.keys()) - - if not df.empty: - create_backup_csv(df, backup_dir, custom_run, sensor="prelim", logger=logger) + signal_map = PRELIM_SIGNALS_MAP if preliminary else SIGNALS_MAP + if not df.empty and recently_updated: df = df.rename(columns={"weekendingdate": "timestamp", "jurisdiction": "geo_id"}) - for signal, col_name in PRELIM_SIGNALS_MAP.items(): - df[signal] = df[col_name] + for signal, col_name in signal_map.items(): + # older backups don't have certain columns + try: + df[signal] = df[col_name] + except KeyError: + logger.info("column not available in data", col_name=col_name) + keep_columns.remove(signal) + del filtered_type_dict[signal] df = df[keep_columns] - df = df.astype(PRELIM_TYPE_DICT) df["geo_id"] = df["geo_id"].str.lower() df.loc[df["geo_id"] == "usa", "geo_id"] = "us" + + df = df.astype(filtered_type_dict) else: df = pd.DataFrame(columns=keep_columns) diff --git a/nhsn/delphi_nhsn/run.py b/nhsn/delphi_nhsn/run.py index 92e24bbda..ad9be2384 100644 --- a/nhsn/delphi_nhsn/run.py +++ b/nhsn/delphi_nhsn/run.py @@ -14,15 +14,17 @@ unpublished signals are. See `delphi_utils.add_prefix()` - Any other indicator-specific settings """ +import re import time from datetime import date, datetime, timedelta +from itertools import product import numpy as np from delphi_utils import GeoMapper, get_structured_logger from delphi_utils.export import create_export_csv from .constants import GEOS, PRELIM_SIGNALS_MAP, SIGNALS_MAP -from .pull import pull_nhsn_data, pull_preliminary_nhsn_data +from .pull import pull_nhsn_data def run_module(params, logger=None): @@ -54,21 +56,25 @@ def run_module(params, logger=None): export_start_date = export_start_date.strftime("%Y-%m-%d") nhsn_df = pull_nhsn_data(socrata_token, backup_dir, custom_run=custom_run, issue_date=issue_date, logger=logger) - preliminary_nhsn_df = pull_preliminary_nhsn_data( - socrata_token, backup_dir, custom_run=custom_run, issue_date=issue_date, logger=logger + preliminary_nhsn_df = pull_nhsn_data( + socrata_token, backup_dir, custom_run=custom_run, issue_date=issue_date, logger=logger, preliminary=True ) geo_mapper = GeoMapper() - signal_df_dict = {signal: nhsn_df for signal in SIGNALS_MAP} - # some of the source backups do not include for preliminary data TODO remove after first patch + signal_df_dict = dict() + if not nhsn_df.empty: + signal_df_dict.update({signal: nhsn_df for signal in SIGNALS_MAP}) + # some of the source backups do not include for preliminary data if not preliminary_nhsn_df.empty: signal_df_dict.update({signal: preliminary_nhsn_df for signal in PRELIM_SIGNALS_MAP}) - for signal, df_pull in signal_df_dict.items(): - for geo in GEOS: - df = df_pull.copy() + for geo, signals_df in product(GEOS, signal_df_dict.items()): + signal, df_pull = signals_df + df = df_pull.copy() + try: df = df[["timestamp", "geo_id", signal]] df.rename({signal: "val"}, axis=1, inplace=True) + if geo == "nation": df = df[df["geo_id"] == "us"] elif geo == "hhs": @@ -86,6 +92,7 @@ def run_module(params, logger=None): df["se"] = np.nan df["sample_size"] = np.nan + dates = create_export_csv( df, geo_res=geo, @@ -96,6 +103,14 @@ def run_module(params, logger=None): ) if len(dates) > 0: run_stats.append((max(dates), len(dates))) + # some signal columns are unavailable for patching. + except KeyError as e: + missing_signal = re.search(r"'([^']*)'", str(e)).group(1) + full_signal_list = list(SIGNALS_MAP.keys()) + list(PRELIM_SIGNALS_MAP.keys()) + if missing_signal in full_signal_list: + logger.info("signal not available in data", signal=missing_signal) + else: + raise RuntimeError("Column(s) that shouldn't be missing is missing") from e elapsed_time_in_seconds = round(time.time() - start_time, 2) min_max_date = run_stats and min(s[0] for s in run_stats) diff --git a/nhsn/tests/conftest.py b/nhsn/tests/conftest.py index b89946a02..b321f1236 100644 --- a/nhsn/tests/conftest.py +++ b/nhsn/tests/conftest.py @@ -1,5 +1,6 @@ import copy import json +import time from unittest.mock import patch import pytest @@ -60,7 +61,8 @@ def params_w_patch(params): @pytest.fixture(scope="function") def run_as_module(params): - with patch('sodapy.Socrata.get') as mock_get: + with patch('sodapy.Socrata.get') as mock_get, \ + patch('sodapy.Socrata.get_metadata') as mock_get_metadata: def side_effect(*args, **kwargs): if kwargs['offset'] == 0: if "ua7e-t2fy" in args[0]: @@ -70,5 +72,6 @@ def side_effect(*args, **kwargs): else: return [] mock_get.side_effect = side_effect + mock_get_metadata.return_value = {"rowsUpdatedAt": time.time()} run_module(params) diff --git a/nhsn/tests/patch_dir/.gitignore b/nhsn/tests/patch_dir/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/nhsn/tests/test_data/20241119.csv.gz b/nhsn/tests/test_data/20241119.csv.gz new file mode 100644 index 000000000..57c9bafb1 Binary files /dev/null and b/nhsn/tests/test_data/20241119.csv.gz differ diff --git a/nhsn/tests/test_data/20241212.csv.gz b/nhsn/tests/test_data/20241212.csv.gz index 26f91c200..088469c65 100644 Binary files a/nhsn/tests/test_data/20241212.csv.gz and b/nhsn/tests/test_data/20241212.csv.gz differ diff --git a/nhsn/tests/test_data/20241212_prelim.csv.gz b/nhsn/tests/test_data/20241212_prelim.csv.gz index 9ef690301..f1afd8ce0 100644 Binary files a/nhsn/tests/test_data/20241212_prelim.csv.gz and b/nhsn/tests/test_data/20241212_prelim.csv.gz differ diff --git a/nhsn/tests/test_data/expected_df.csv b/nhsn/tests/test_data/expected_df.csv new file mode 100644 index 000000000..556a65b07 --- /dev/null +++ b/nhsn/tests/test_data/expected_df.csv @@ -0,0 +1,21 @@ +timestamp,geo_id,confirmed_admissions_covid_ew,confirmed_admissions_flu_ew,confirmed_admissions_rsv_ew,hosprep_confirmed_admissions_covid_ew,hosprep_confirmed_admissions_flu_ew,hosprep_confirmed_admissions_rsv_ew +2021-08-21,md,53.0,2.0,0.0,13.0,13.0,1.0 +2021-08-21,co,852.0,0.0,,92.0,78.0,0.0 +2021-08-21,us,10384.0,6049.0,84.0,5426.0,5426.0,469.0 +2021-08-28,co,835.0,1.0,,92.0,78.0,0.0 +2021-08-28,us,94596.0,262.0,,5391.0,4397.0,0.0 +2021-09-04,co,1000.0,3.0,,92.0,78.0,0.0 +2021-09-04,us,93241.0,282.0,,5392.0,4396.0,0.0 +2021-09-11,co,982.0,2.0,,92.0,78.0,0.0 +2021-09-11,us,88162.0,247.0,,5391.0,4377.0,0.0 +2021-09-18,co,955.0,0.0,,92.0,78.0,0.0 +2021-09-18,us,79169.0,261.0,,5394.0,4362.0,0.0 +2021-09-25,co,993.0,0.0,,92.0,78.0,0.0 +2021-09-25,us,67740.0,234.0,,5393.0,4368.0,0.0 +2021-10-02,co,970.0,0.0,,92.0,78.0,0.0 +2021-10-02,us,58076.0,253.0,,5395.0,4391.0,0.0 +2021-10-09,co,1079.0,1.0,,92.0,78.0,0.0 +2021-10-09,us,51744.0,341.0,,5396.0,4379.0,0.0 +2021-10-16,co,1231.0,0.0,,92.0,78.0,0.0 +2021-10-16,us,45978.0,266.0,,5394.0,4307.0,0.0 +2021-10-16,region 1,45978.0,266.0,,5394.0,4307.0,0.0 diff --git a/nhsn/tests/test_data/expected_df_prelim.csv b/nhsn/tests/test_data/expected_df_prelim.csv new file mode 100644 index 000000000..7a7fb367d --- /dev/null +++ b/nhsn/tests/test_data/expected_df_prelim.csv @@ -0,0 +1,20 @@ +timestamp,geo_id,confirmed_admissions_covid_ew_prelim,confirmed_admissions_flu_ew_prelim,confirmed_admissions_rsv_ew_prelim,hosprep_confirmed_admissions_covid_ew_prelim,hosprep_confirmed_admissions_flu_ew_prelim,hosprep_confirmed_admissions_rsv_ew_prelim +2021-08-21,mi,269.0,523.0,1.0,152.0,152.0,4.0 +2021-08-21,co,852.0,0.0,,92.0,78.0,0.0 +2021-08-21,us,8946.0,5576.0,61.0,5422.0,5422.0,485.0 +2021-08-28,co,835.0,1.0,,92.0,78.0,0.0 +2021-08-28,us,94596.0,262.0,,5391.0,4397.0,0.0 +2021-09-04,co,1000.0,3.0,,92.0,78.0,0.0 +2021-09-04,us,93241.0,282.0,,5392.0,4396.0,0.0 +2021-09-11,co,982.0,2.0,,92.0,78.0,0.0 +2021-09-11,us,88162.0,247.0,,5391.0,4377.0,0.0 +2021-09-18,co,955.0,0.0,,92.0,78.0,0.0 +2021-09-18,us,79169.0,261.0,,5394.0,4362.0,0.0 +2021-09-25,co,993.0,0.0,,92.0,78.0,0.0 +2021-09-25,us,67740.0,234.0,,5393.0,4368.0,0.0 +2021-10-02,co,970.0,0.0,,92.0,78.0,0.0 +2021-10-02,us,58076.0,253.0,,5395.0,4391.0,0.0 +2021-10-09,co,1079.0,1.0,,92.0,78.0,0.0 +2021-10-09,us,51744.0,341.0,,5396.0,4379.0,0.0 +2021-10-16,co,1231.0,0.0,,92.0,78.0,0.0 +2021-10-16,us,45978.0,266.0,,5394.0,4307.0,0.0 diff --git a/nhsn/tests/test_data/page.json b/nhsn/tests/test_data/page.json index 5d4eda759..4c56160ca 100644 --- a/nhsn/tests/test_data/page.json +++ b/nhsn/tests/test_data/page.json @@ -1,4 +1,161 @@ -[ +[{ + "weekendingdate": "2021-08-21T00:00:00.000", + "jurisdiction": "MD", + "numinptbeds": "2972.43", + "numinptbedsadult": "2582.14", + "numinptbedsped": "195.86", + "numinptbedsocc": "2339.14", + "numinptbedsoccadult": "2056.86", + "numinptbedsoccped": "154.43", + "numicubeds": "329.86", + "numicubedsadult": "289.43", + "numicubedsped": "0.0", + "numicubedsocc": "247.29", + "numicubedsoccadult": "226.86", + "numicubedsoccped": "0.29", + "numconfc19hosppatsadult": "36.5", + "numconfc19hosppatsped": "1.71", + "totalconfc19hosppats": "38.21", + "numconffluhosppatsadult": "0.0", + "numconffluhosppatsped": "0.0", + "totalconffluhosppats": "0.86", + "numconfrsvhosppatsadult": "0.0", + "numconfrsvhosppatsped": "0.0", + "totalconfrsvhosppats": "0.0", + "numconfc19icupatsadult": "3.43", + "numconfc19icupatsped": "0.71", + "totalconfc19icupats": "4.14", + "numconffluicupatsadult": "0.0", + "numconffluicupatsped": "0.0", + "totalconffluicupats": "0.0", + "numconfrsvicupatsadult": "0.0", + "numconfrsvicupatsped": "0.0", + "totalconfrsvicupats": "0.0", + "numconfc19newadmped0to4": "1.0", + "numconfc19newadmped5to17": "0.0", + "totalconfc19newadmped": "1.0", + "numconfc19newadmadult18to49": "6.0", + "totalconfc19newadmadult": "52.0", + "numconfc19newadmunk": "0.0", + "totalconfc19newadm": "53.0", + "totalconfflunewadmped": "0.0", + "totalconfflunewadmadult": "0.0", + "totalconfflunewadm": "2.0", + "totalconfrsvnewadmped": "0.0", + "totalconfrsvnewadmadult": "0.0", + "totalconfrsvnewadm": "0.0", + "pctinptbedsocc": "0.7869", + "pctconfc19inptbeds": "0.0129", + "pctconffluinptbeds": "0.0003", + "pctconfrsvinptbeds": "0.0", + "pcticubedsocc": "0.7497", + "pctconfc19icubeds": "0.0126", + "pctconffluicubeds": "0.0", + "pctconfrsvicubeds": "0.0", + "pctconfc19newadmadult": "0.9811", + "pctconfc19newadmped": "0.0189", + "pctconfflunewadmadult": "0.0", + "pctconfflunewadmped": "0.0", + "numinptbedshosprep": "13", + "numinptbedsocchosprep": "13", + "numicubedshosprep": "13", + "numicubedsocchosprep": "13", + "totalconfc19hosppatshosprep": "13", + "totalconffluhosppatshosprep": "13", + "totalconfrsvhosppatshosprep": "1", + "totalconfc19icupatshosprep": "13", + "totalconffluicupatshosprep": "13", + "totalconfrsvicupatshosprep": "1", + "totalconfc19newadmpedhosprep": "12", + "totalconfc19newadmadulthosprep": "13", + "totalconfc19newadmhosprep": "13", + "totalconfflunewadmpedhosprep": "1", + "totalconfflunewadmadulthosprep": "1", + "totalconfflunewadmhosprep": "13", + "totalconfrsvnewadmpedhosprep": "1", + "totalconfrsvnewadmadulthosprep": "1", + "totalconfrsvnewadmhosprep": "1", + "pctinptbedsocchosprep": "13", + "pcticubedsocchosprep": "13", + "pctconfc19inptbedshosprep": "13", + "pctconffluinptbedshosprep": "13", + "pctconfrsvinptbedshosprep": "1", + "pctconfc19icubedshosprep": "13", + "pctconffluicubedshosprep": "13", + "pctconfrsvicubedshosprep": "1", + "numinptbedsperchosprep": "0.2549", + "numinptbedsoccperchosprep": "0.2549", + "numicubedsperchosprep": "0.2549", + "numicubedsoccperchosprep": "0.2549", + "totalconfc19hosppatsperc": "0.2549", + "totalconffluhosppatsperc": "0.2549", + "totalconfrsvhosppatsperc": "0.0196", + "totalconfc19icupatsperchosprep": "0.2549", + "totalconffluicupatsperchosprep": "0.2549", + "totalconfrsvicupatsperchosprep": "0.0196", + "totalconfc19newadmpedper": "0.2353", + "totalconfc19newadmadultp": "0.2549", + "totalconfc19newadmperchosprep": "0.2549", + "totalconfflunewadmpedper": "0.0196", + "totalconfflunewadmadultp": "0.0196", + "totalconfflunewadmperchosprep": "0.2549", + "totalconfrsvnewadmpedper": "0.0196", + "totalconfrsvnewadmadultp": "0.0196", + "totalconfrsvnewadmperchosprep": "0.0196", + "pctinptbedsoccperchosprep": "0.2549", + "pcticubedsoccperchosprep": "0.2549", + "pctconfc19inptbedsperchosprep": "0.2549", + "pctconffluinptbedsperchosprep": "0.2549", + "pctconfrsvinptbedsperchosprep": "0.0196", + "pctconfc19icubedsperchosprep": "0.2549", + "pctconffluicubedsperchosprep": "0.2549", + "pctconfrsvicubedsperchosprep": "0.0196", + "numinptbedsperchosprepabschg": "0.0196", + "numinptbedsoccperchospre": "0.0196", + "numicubedsperchosprepabschg": "0.0196", + "numicubedsoccperchosprepabschg": "0.0196", + "totalconfc19hosppatsperc_1": "0.0196", + "totalconffluhosppatsperc_1": "0.0196", + "totalconfrsvhosppatsperc_1": "0.0", + "totalconfc19icupatsperch": "0.0196", + "totalconffluicupatsperch": "0.0196", + "totalconfrsvicupatsperch": "0.0", + "totalconfc19newadmpedper_1": "0.0", + "totalconfc19newadmadultp_1": "0.0196", + "totalconfc19newadmpercho": "0.0196", + "totalconfflunewadmpedper_1": "0.0", + "totalconfflunewadmadultp_1": "0.0", + "totalconfflunewadmpercho": "0.0196", + "totalconfrsvnewadmpedper_1": "0.0", + "totalconfrsvnewadmadultp_1": "0.0", + "totalconfrsvnewadmpercho": "0.0", + "pctinptbedsoccperchospre": "0.0196", + "pcticubedsoccperchosprepabschg": "0.0196", + "pctconfc19inptbedspercho": "0.0196", + "pctconffluinptbedspercho": "0.0196", + "pctconfrsvinptbedspercho": "0.0", + "pctconfc19icubedsperchos": "0.0196", + "pctconffluicubedsperchos": "0.0196", + "pctconfrsvicubedsperchos": "0.0", + "numconfc19newadmped0to4per100k": "0.29", + "numconfc19newadmped5to17per100k": "0.0", + "totalconfc19newadmpedper100k": "0.07", + "numconfc19newadmadult18to49per100k": "0.24", + "totalconfc19newadmadultper100k": "1.09", + "totalconfc19newadmper100k": "0.86", + "totalconfflunewadmpedper100k": "0.0", + "totalconfflunewadmadultper100k": "0.0", + "totalconfflunewadmper100k": "0.03", + "totalconfrsvnewadmpedper100k": "0.0", + "totalconfrsvnewadmadultper100k": "0.0", + "totalconfrsvnewadmper100k": "0.0", + "totalconfc19newadmperchosprepabove80pct": "0", + "totalconfc19newadmperchosprepabove90pct": "0", + "totalconfflunewadmperchosprepabove80pct": "0", + "totalconfflunewadmperchosprepabove90pct": "0", + "totalconfrsvnewadmperchosprepabove80pct": "0", + "totalconfrsvnewadmperchosprepabove90pct": "0" + }, { "weekendingdate": "2021-08-21T00:00:00.000", "jurisdiction": "CO", @@ -116,125 +273,166 @@ "pctconfc19icubedsperchos": "0.0", "pctconffluicubedsperchos": "0.0", "pctconfrsvicubedsperchos": "0.0" - }, - { - "weekendingdate": "2021-08-21T00:00:00.000", - "jurisdiction": "USA", - "numinptbeds": "750150.99", - "numinptbedsadult": "679540.51", - "numinptbedsped": "51335.02", - "numinptbedsocc": "572345.09", - "numinptbedsoccadult": "526399.17", - "numinptbedsoccped": "32810.36", - "numicubeds": "120768.79", - "numicubedsadult": "91030.4", - "numicubedsped": "21419.35", - "numicubedsocc": "89853.28", - "numicubedsoccadult": "70595.31", - "numicubedsoccped": "14112.02", - "numconfc19hosppatsadult": "92782.24", - "numconfc19hosppatsped": "1268.64", - "totalconfc19hosppats": "94050.88", - "totalconffluhosppats": "292.67", - "numconfc19icupatsadult": "24692.91", - "totalconfc19icupats": "24692.91", - "totalconffluicupats": "45.57", - "totalconfc19newadmped": "2170.0", - "numconfc19newadmadult18to49": "27365.0", - "totalconfc19newadmadult": "90776.0", - "numconfc19newadmunk": "4827.0", - "totalconfc19newadm": "92946.0", - "totalconfflunewadm": "280.0", - "pctinptbedsocc": "0.763", - "pctconfc19inptbeds": "0.1254", - "pctconffluinptbeds": "0.0004", - "pcticubedsocc": "0.744", - "pctconfc19icubeds": "0.2045", - "pctconffluicubeds": "0.0004", - "pctconfc19newadmadult": "0.9767", - "pctconfc19newadmped": "0.0233", - "numinptbedshosprep": "5396", - "numinptbedsocchosprep": "5396", - "numicubedshosprep": "5396", - "numicubedsocchosprep": "5396", - "totalconfc19hosppatshosprep": "5393", - "totalconffluhosppatshosprep": "4417", - "totalconfrsvhosppatshosprep": "0", - "totalconfc19icupatshosprep": "5393", - "totalconffluicupatshosprep": "4415", - "totalconfrsvicupatshosprep": "0", - "totalconfc19newadmpedhosprep": "5276", - "totalconfc19newadmadulthosprep": "5392", - "totalconfc19newadmhosprep": "5392", - "totalconfflunewadmpedhosprep": "0", - "totalconfflunewadmadulthosprep": "0", - "totalconfflunewadmhosprep": "4415", - "totalconfrsvnewadmpedhosprep": "0", - "totalconfrsvnewadmadulthosprep": "0", - "totalconfrsvnewadmhosprep": "0", - "pctinptbedsocchosprep": "5396", - "pcticubedsocchosprep": "5396", - "pctconfc19inptbedshosprep": "5393", - "pctconffluinptbedshosprep": "4417", - "pctconfrsvinptbedshosprep": "0", - "pctconfc19icubedshosprep": "5393", - "pctconffluicubedshosprep": "4415", - "pctconfrsvicubedshosprep": "0", - "numinptbedsperchosprep": "0.9492", - "numinptbedsoccperchosprep": "0.9492", - "numicubedsperchosprep": "0.9492", - "numicubedsoccperchosprep": "0.9492", - "totalconfc19hosppatsperc": "0.9486", - "totalconffluhosppatsperc": "0.777", - "totalconfrsvhosppatsperc": "0.0", - "totalconfc19icupatsperchosprep": "0.9486", - "totalconffluicupatsperchosprep": "0.7766", - "totalconfrsvicupatsperchosprep": "0.0", - "totalconfc19newadmpedper": "0.9281", - "totalconfc19newadmadultp": "0.9485", - "totalconfc19newadmperchosprep": "94.85", - "totalconfflunewadmpedper": "0.0", - "totalconfflunewadmadultp": "0.0", - "totalconfflunewadmperchosprep": "77.66", - "totalconfrsvnewadmpedper": "0.0", - "totalconfrsvnewadmadultp": "0.0", - "totalconfrsvnewadmperchosprep": "0.0", - "pctinptbedsoccperchosprep": "0.9492", - "pcticubedsoccperchosprep": "0.9492", - "pctconfc19inptbedsperchosprep": "0.9486", - "pctconffluinptbedsperchosprep": "0.777", - "pctconfrsvinptbedsperchosprep": "0.0", - "pctconfc19icubedsperchosprep": "0.9486", - "pctconffluicubedsperchosprep": "0.7766", - "pctconfrsvicubedsperchosprep": "0.0", - "numinptbedsperchosprepabschg": "0.07", - "numinptbedsoccperchospre": "0.07", - "numicubedsperchosprepabschg": "0.07", - "numicubedsoccperchosprepabschg": "0.07", - "totalconfc19hosppatsperc_1": "0.07", - "totalconffluhosppatsperc_1": "0.14", - "totalconfrsvhosppatsperc_1": "0.0", - "totalconfc19icupatsperch": "0.07", - "totalconffluicupatsperch": "0.19", - "totalconfrsvicupatsperch": "0.0", - "totalconfc19newadmpedper_1": "0.07", - "totalconfc19newadmadultp_1": "0.07", - "totalconfc19newadmpercho": "0.07", - "totalconfflunewadmpedper_1": "0.0", - "totalconfflunewadmadultp_1": "0.0", - "totalconfflunewadmpercho": "0.16", - "totalconfrsvnewadmpedper_1": "0.0", - "totalconfrsvnewadmadultp_1": "0.0", - "totalconfrsvnewadmpercho": "0.0", - "pctinptbedsoccperchospre": "0.0007", - "pcticubedsoccperchosprepabschg": "0.0007", - "pctconfc19inptbedspercho": "0.0007", - "pctconffluinptbedspercho": "0.0014", - "pctconfrsvinptbedspercho": "0.0", - "pctconfc19icubedsperchos": "0.0007", - "pctconffluicubedsperchos": "0.0019", - "pctconfrsvicubedsperchos": "0.0" - }, + },{ + "weekendingdate": "2021-08-21T00:00:00.000", + "jurisdiction": "USA", + "numinptbeds": "691631.77", + "numinptbedsadult": "626133.24", + "numinptbedsped": "37667.52", + "numinptbedsocc": "525785.42", + "numinptbedsoccadult": "479767.94", + "numinptbedsoccped": "26370.0", + "numicubeds": "98235.73", + "numicubedsadult": "76084.07", + "numicubedsped": "8987.7", + "numicubedsocc": "69971.29", + "numicubedsoccadult": "55500.93", + "numicubedsoccped": "6279.0", + "numconfc19hosppatsadult": "8413.3", + "numconfc19hosppatsped": "392.43", + "totalconfc19hosppats": "8805.73", + "numconffluhosppatsadult": "131.21", + "numconffluhosppatsped": "21.74", + "totalconffluhosppats": "4869.7", + "numconfrsvhosppatsadult": "55.84", + "numconfrsvhosppatsped": "17.13", + "totalconfrsvhosppats": "72.96", + "numconfc19icupatsadult": "1209.86", + "numconfc19icupatsped": "81.13", + "totalconfc19icupats": "1290.99", + "numconffluicupatsadult": "12.76", + "numconffluicupatsped": "3.0", + "totalconffluicupats": "812.94", + "numconfrsvicupatsadult": "4.86", + "numconfrsvicupatsped": "2.14", + "totalconfrsvicupats": "7.0", + "numconfc19newadmped0to4": "277.0", + "numconfc19newadmped5to17": "190.0", + "totalconfc19newadmped": "533.0", + "numconfc19newadmadult18to49": "1260.0", + "totalconfc19newadmadult": "9851.0", + "numconfc19newadmunk": "337.0", + "totalconfc19newadm": "10384.0", + "totalconfflunewadmped": "19.0", + "totalconfflunewadmadult": "140.0", + "totalconfflunewadm": "6049.0", + "totalconfrsvnewadmped": "35.0", + "totalconfrsvnewadmadult": "49.0", + "totalconfrsvnewadm": "84.0", + "pctinptbedsocc": "0.7602", + "pctconfc19inptbeds": "0.0127", + "pctconffluinptbeds": "0.007", + "pctconfrsvinptbeds": "0.0001", + "pcticubedsocc": "0.7123", + "pctconfc19icubeds": "0.0131", + "pctconffluicubeds": "0.0083", + "pctconfrsvicubeds": "0.0001", + "pctconfc19newadmadult": "0.9487", + "pctconfc19newadmped": "0.0513", + "pctconfflunewadmadult": "0.0231", + "pctconfflunewadmped": "0.0031", + "pctconfrsvnewadmadult": "0.5833", + "pctconfrsvnewadmped": "0.4167", + "numinptbedshosprep": "5426", + "numinptbedsocchosprep": "5426", + "numicubedshosprep": "5426", + "numicubedsocchosprep": "5426", + "totalconfc19hosppatshosprep": "5426", + "totalconffluhosppatshosprep": "5426", + "totalconfrsvhosppatshosprep": "470", + "totalconfc19icupatshosprep": "5426", + "totalconffluicupatshosprep": "5426", + "totalconfrsvicupatshosprep": "469", + "totalconfc19newadmpedhosprep": "5304", + "totalconfc19newadmadulthosprep": "5426", + "totalconfc19newadmhosprep": "5426", + "totalconfflunewadmpedhosprep": "287", + "totalconfflunewadmadulthosprep": "292", + "totalconfflunewadmhosprep": "5426", + "totalconfrsvnewadmpedhosprep": "465", + "totalconfrsvnewadmadulthosprep": "467", + "totalconfrsvnewadmhosprep": "469", + "pctinptbedsocchosprep": "5426", + "pcticubedsocchosprep": "5426", + "pctconfc19inptbedshosprep": "5426", + "pctconffluinptbedshosprep": "5426", + "pctconfrsvinptbedshosprep": "470", + "pctconfc19icubedshosprep": "5426", + "pctconffluicubedshosprep": "5426", + "pctconfrsvicubedshosprep": "469", + "numinptbedsperchosprep": "0.9489", + "numinptbedsoccperchosprep": "0.9489", + "numicubedsperchosprep": "0.9489", + "numicubedsoccperchosprep": "0.9489", + "totalconfc19hosppatsperc": "0.9489", + "totalconffluhosppatsperc": "0.9489", + "totalconfrsvhosppatsperc": "0.0822", + "totalconfc19icupatsperchosprep": "0.9489", + "totalconffluicupatsperchosprep": "0.9489", + "totalconfrsvicupatsperchosprep": "0.082", + "totalconfc19newadmpedper": "0.9276", + "totalconfc19newadmadultp": "0.9489", + "totalconfc19newadmperchosprep": "0.9489", + "totalconfflunewadmpedper": "0.0502", + "totalconfflunewadmadultp": "0.0511", + "totalconfflunewadmperchosprep": "0.9489", + "totalconfrsvnewadmpedper": "0.0813", + "totalconfrsvnewadmadultp": "0.0817", + "totalconfrsvnewadmperchosprep": "0.082", + "pctinptbedsoccperchosprep": "0.9489", + "pcticubedsoccperchosprep": "0.9489", + "pctconfc19inptbedsperchosprep": "0.9489", + "pctconffluinptbedsperchosprep": "0.9489", + "pctconfrsvinptbedsperchosprep": "0.0822", + "pctconfc19icubedsperchosprep": "0.9489", + "pctconffluicubedsperchosprep": "0.9489", + "pctconfrsvicubedsperchosprep": "0.082", + "numinptbedsperchosprepabschg": "-0.0014", + "numinptbedsoccperchospre": "-0.0014", + "numicubedsperchosprepabschg": "-0.0014", + "numicubedsoccperchosprepabschg": "-0.0014", + "totalconfc19hosppatsperc_1": "-0.0014", + "totalconffluhosppatsperc_1": "-0.0014", + "totalconfrsvhosppatsperc_1": "0.0003", + "totalconfc19icupatsperch": "-0.0014", + "totalconffluicupatsperch": "-0.0014", + "totalconfrsvicupatsperch": "0.0002", + "totalconfc19newadmpedper_1": "-0.0012", + "totalconfc19newadmadultp_1": "-0.0014", + "totalconfc19newadmpercho": "-0.0014", + "totalconfflunewadmpedper_1": "0.0", + "totalconfflunewadmadultp_1": "0.0003", + "totalconfflunewadmpercho": "-0.0014", + "totalconfrsvnewadmpedper_1": "0.0002", + "totalconfrsvnewadmadultp_1": "0.0", + "totalconfrsvnewadmpercho": "0.0", + "pctinptbedsoccperchospre": "-0.0014", + "pcticubedsoccperchosprepabschg": "-0.0014", + "pctconfc19inptbedspercho": "-0.0014", + "pctconffluinptbedspercho": "-0.0014", + "pctconfrsvinptbedspercho": "0.0003", + "pctconfc19icubedsperchos": "-0.0014", + "pctconffluicubedsperchos": "-0.0014", + "pctconfrsvicubedsperchos": "0.0002", + "numconfc19newadmped0to4per100k": "1.49", + "numconfc19newadmped5to17per100k": "0.35", + "totalconfc19newadmpedper100k": "0.73", + "numconfc19newadmadult18to49per100k": "0.9", + "totalconfc19newadmadultper100k": "3.73", + "totalconfc19newadmper100k": "3.08", + "totalconfflunewadmpedper100k": "0.03", + "totalconfflunewadmadultper100k": "0.05", + "totalconfflunewadmper100k": "1.79", + "totalconfrsvnewadmpedper100k": "0.05", + "totalconfrsvnewadmadultper100k": "0.02", + "totalconfrsvnewadmper100k": "0.02", + "totalconfc19newadmperchosprepabove80pct": "1", + "totalconfc19newadmperchosprepabove90pct": "1", + "totalconfflunewadmperchosprepabove80pct": "1", + "totalconfflunewadmperchosprepabove90pct": "1", + "totalconfrsvnewadmperchosprepabove80pct": "0", + "totalconfrsvnewadmperchosprepabove90pct": "0" + }, { "weekendingdate": "2021-08-28T00:00:00.000", "jurisdiction": "CO", diff --git a/nhsn/tests/test_data/prelim_page.json b/nhsn/tests/test_data/prelim_page.json index 374639f14..da902995a 100644 --- a/nhsn/tests/test_data/prelim_page.json +++ b/nhsn/tests/test_data/prelim_page.json @@ -1,4 +1,163 @@ -[ +[{ + "weekendingdate": "2021-08-21T00:00:00.000", + "jurisdiction": "MI", + "numinptbeds": "20658.33", + "numinptbedsadult": "19048.19", + "numinptbedsped": "930.14", + "numinptbedsocc": "15875.6", + "numinptbedsoccadult": "14604.12", + "numinptbedsoccped": "608.76", + "numicubeds": "2870.62", + "numicubedsadult": "2265.48", + "numicubedsped": "175.71", + "numicubedsocc": "2153.88", + "numicubedsoccadult": "1733.88", + "numicubedsoccped": "138.86", + "numconfc19hosppatsadult": "249.76", + "numconfc19hosppatsped": "18.0", + "totalconfc19hosppats": "267.76", + "numconffluhosppatsadult": "0.29", + "numconffluhosppatsped": "0.0", + "totalconffluhosppats": "434.24", + "numconfrsvhosppatsadult": "1.14", + "numconfrsvhosppatsped": "0.0", + "totalconfrsvhosppats": "1.14", + "numconfc19icupatsadult": "31.71", + "numconfc19icupatsped": "1.86", + "totalconfc19icupats": "33.57", + "numconffluicupatsadult": "0.0", + "numconffluicupatsped": "0.0", + "totalconffluicupats": "44.29", + "numconfrsvicupatsadult": "0.0", + "numconfrsvicupatsped": "0.0", + "totalconfrsvicupats": "0.0", + "numconfc19newadmped0to4": "12.0", + "numconfc19newadmped5to17": "2.0", + "totalconfc19newadmped": "16.0", + "numconfc19newadmadult18to49": "32.0", + "totalconfc19newadmadult": "253.0", + "numconfc19newadmunk": "6.0", + "totalconfc19newadm": "269.0", + "totalconfflunewadmped": "0.0", + "totalconfflunewadmadult": "1.0", + "totalconfflunewadm": "523.0", + "totalconfrsvnewadmped": "0.0", + "totalconfrsvnewadmadult": "1.0", + "totalconfrsvnewadm": "1.0", + "pctinptbedsocc": "0.7685", + "pctconfc19inptbeds": "0.013", + "pctconffluinptbeds": "0.021", + "pctconfrsvinptbeds": "0.0001", + "pcticubedsocc": "0.7503", + "pctconfc19icubeds": "0.0117", + "pctconffluicubeds": "0.0154", + "pctconfrsvicubeds": "0.0", + "pctconfc19newadmadult": "0.9405", + "pctconfc19newadmped": "0.0595", + "pctconfflunewadmadult": "0.0019", + "pctconfflunewadmped": "0.0", + "pctconfrsvnewadmadult": "1.0", + "pctconfrsvnewadmped": "0.0", + "numinptbedshosprep": "152", + "numinptbedsocchosprep": "152", + "numicubedshosprep": "152", + "numicubedsocchosprep": "152", + "totalconfc19hosppatshosprep": "152", + "totalconffluhosppatshosprep": "152", + "totalconfrsvhosppatshosprep": "4", + "totalconfc19icupatshosprep": "152", + "totalconffluicupatshosprep": "152", + "totalconfrsvicupatshosprep": "4", + "totalconfc19newadmpedhosprep": "148", + "totalconfc19newadmadulthosprep": "152", + "totalconfc19newadmhosprep": "152", + "totalconfflunewadmpedhosprep": "4", + "totalconfflunewadmadulthosprep": "4", + "totalconfflunewadmhosprep": "152", + "totalconfrsvnewadmpedhosprep": "4", + "totalconfrsvnewadmadulthosprep": "4", + "totalconfrsvnewadmhosprep": "4", + "pctinptbedsocchosprep": "152", + "pcticubedsocchosprep": "152", + "pctconfc19inptbedshosprep": "152", + "pctconffluinptbedshosprep": "152", + "pctconfrsvinptbedshosprep": "4", + "pctconfc19icubedshosprep": "152", + "pctconffluicubedshosprep": "152", + "pctconfrsvicubedshosprep": "4", + "numinptbedsperchosprep": "0.9744", + "numinptbedsoccperchosprep": "0.9744", + "numicubedsperchosprep": "0.9744", + "numicubedsoccperchosprep": "0.9744", + "totalconfc19hosppatsperc": "0.9744", + "totalconffluhosppatsperc": "0.9744", + "totalconfrsvhosppatsperc": "0.0256", + "totalconfc19icupatsperchosprep": "0.9744", + "totalconffluicupatsperchosprep": "0.9744", + "totalconfrsvicupatsperchosprep": "0.0256", + "totalconfc19newadmpedper": "0.9487", + "totalconfc19newadmadultp": "0.9744", + "totalconfc19newadmperchosprep": "0.9744", + "totalconfflunewadmpedper": "0.0256", + "totalconfflunewadmadultp": "0.0256", + "totalconfflunewadmperchosprep": "0.9744", + "totalconfrsvnewadmpedper": "0.0256", + "totalconfrsvnewadmadultp": "0.0256", + "totalconfrsvnewadmperchosprep": "0.0256", + "pctinptbedsoccperchosprep": "0.9744", + "pcticubedsoccperchosprep": "0.9744", + "pctconfc19inptbedsperchosprep": "0.9744", + "pctconffluinptbedsperchosprep": "0.9744", + "pctconfrsvinptbedsperchosprep": "0.0256", + "pctconfc19icubedsperchosprep": "0.9744", + "pctconffluicubedsperchosprep": "0.9744", + "pctconfrsvicubedsperchosprep": "0.0256", + "numinptbedsperchosprepabschg": "0.0", + "numinptbedsoccperchospre": "0.0", + "numicubedsperchosprepabschg": "0.0", + "numicubedsoccperchosprepabschg": "0.0", + "totalconfc19hosppatsperc_1": "0.0", + "totalconffluhosppatsperc_1": "0.0", + "totalconfrsvhosppatsperc_1": "0.0064", + "totalconfc19icupatsperch": "0.0", + "totalconffluicupatsperch": "0.0", + "totalconfrsvicupatsperch": "0.0064", + "totalconfc19newadmpedper_1": "0.0", + "totalconfc19newadmadultp_1": "0.0", + "totalconfc19newadmpercho": "0.0", + "totalconfflunewadmpedper_1": "0.0", + "totalconfflunewadmadultp_1": "0.0", + "totalconfflunewadmpercho": "0.0", + "totalconfrsvnewadmpedper_1": "0.0064", + "totalconfrsvnewadmadultp_1": "0.0064", + "totalconfrsvnewadmpercho": "0.0064", + "pctinptbedsoccperchospre": "0.0", + "pcticubedsoccperchosprepabschg": "0.0", + "pctconfc19inptbedspercho": "0.0", + "pctconffluinptbedspercho": "0.0", + "pctconfrsvinptbedspercho": "0.0064", + "pctconfc19icubedsperchos": "0.0", + "pctconffluicubedsperchos": "0.0", + "pctconfrsvicubedsperchos": "0.0064", + "numconfc19newadmped0to4per100k": "2.26", + "numconfc19newadmped5to17per100k": "0.13", + "totalconfc19newadmpedper100k": "0.76", + "numconfc19newadmadult18to49per100k": "0.79", + "totalconfc19newadmadultper100k": "3.19", + "totalconfc19newadmper100k": "2.68", + "totalconfflunewadmpedper100k": "0.0", + "totalconfflunewadmadultper100k": "0.01", + "totalconfflunewadmper100k": "5.21", + "totalconfrsvnewadmpedper100k": "0.0", + "totalconfrsvnewadmadultper100k": "0.01", + "totalconfrsvnewadmper100k": "0.01", + "totalconfc19newadmperchosprepabove80pct": "1", + "totalconfc19newadmperchosprepabove90pct": "1", + "totalconfflunewadmperchosprepabove80pct": "1", + "totalconfflunewadmperchosprepabove90pct": "1", + "totalconfrsvnewadmperchosprepabove80pct": "0", + "totalconfrsvnewadmperchosprepabove90pct": "0" + }, { "weekendingdate": "2021-08-21T00:00:00.000", "jurisdiction": "CO", @@ -117,124 +276,166 @@ "pctconffluicubedsperchos": "0.0", "pctconfrsvicubedsperchos": "0.0" }, - { - "weekendingdate": "2021-08-21T00:00:00.000", - "jurisdiction": "USA", - "numinptbeds": "750150.99", - "numinptbedsadult": "679540.51", - "numinptbedsped": "51335.02", - "numinptbedsocc": "572345.09", - "numinptbedsoccadult": "526399.17", - "numinptbedsoccped": "32810.36", - "numicubeds": "120768.79", - "numicubedsadult": "91030.4", - "numicubedsped": "21419.35", - "numicubedsocc": "89853.28", - "numicubedsoccadult": "70595.31", - "numicubedsoccped": "14112.02", - "numconfc19hosppatsadult": "92782.24", - "numconfc19hosppatsped": "1268.64", - "totalconfc19hosppats": "94050.88", - "totalconffluhosppats": "292.67", - "numconfc19icupatsadult": "24692.91", - "totalconfc19icupats": "24692.91", - "totalconffluicupats": "45.57", - "totalconfc19newadmped": "2170.0", - "numconfc19newadmadult18to49": "27365.0", - "totalconfc19newadmadult": "90776.0", - "numconfc19newadmunk": "4827.0", - "totalconfc19newadm": "92946.0", - "totalconfflunewadm": "280.0", - "pctinptbedsocc": "0.763", - "pctconfc19inptbeds": "0.1254", - "pctconffluinptbeds": "0.0004", - "pcticubedsocc": "0.744", - "pctconfc19icubeds": "0.2045", - "pctconffluicubeds": "0.0004", - "pctconfc19newadmadult": "0.9767", - "pctconfc19newadmped": "0.0233", - "numinptbedshosprep": "5396", - "numinptbedsocchosprep": "5396", - "numicubedshosprep": "5396", - "numicubedsocchosprep": "5396", - "totalconfc19hosppatshosprep": "5393", - "totalconffluhosppatshosprep": "4417", - "totalconfrsvhosppatshosprep": "0", - "totalconfc19icupatshosprep": "5393", - "totalconffluicupatshosprep": "4415", - "totalconfrsvicupatshosprep": "0", - "totalconfc19newadmpedhosprep": "5276", - "totalconfc19newadmadulthosprep": "5392", - "totalconfc19newadmhosprep": "5392", - "totalconfflunewadmpedhosprep": "0", - "totalconfflunewadmadulthosprep": "0", - "totalconfflunewadmhosprep": "4415", - "totalconfrsvnewadmpedhosprep": "0", - "totalconfrsvnewadmadulthosprep": "0", - "totalconfrsvnewadmhosprep": "0", - "pctinptbedsocchosprep": "5396", - "pcticubedsocchosprep": "5396", - "pctconfc19inptbedshosprep": "5393", - "pctconffluinptbedshosprep": "4417", - "pctconfrsvinptbedshosprep": "0", - "pctconfc19icubedshosprep": "5393", - "pctconffluicubedshosprep": "4415", - "pctconfrsvicubedshosprep": "0", - "numinptbedsperchosprep": "0.9477", - "numinptbedsoccperchosprep": "0.9477", - "numicubedsperchosprep": "0.9477", - "numicubedsoccperchosprep": "0.9477", - "totalconfc19hosppatsperc": "0.9471", - "totalconffluhosppatsperc": "0.7757", - "totalconfrsvhosppatsperc": "0.0", - "totalconfc19icupatsperchosprep": "0.9471", - "totalconffluicupatsperchosprep": "0.7754", - "totalconfrsvicupatsperchosprep": "0.0", - "totalconfc19newadmpedper": "0.9266", - "totalconfc19newadmadultp": "0.947", - "totalconfc19newadmperchosprep": "0.947", - "totalconfflunewadmpedper": "0.0", - "totalconfflunewadmadultp": "0.0", - "totalconfflunewadmperchosprep": "0.7754", - "totalconfrsvnewadmpedper": "0.0", - "totalconfrsvnewadmadultp": "0.0", - "totalconfrsvnewadmperchosprep": "0.0", - "pctinptbedsoccperchosprep": "0.9477", - "pcticubedsoccperchosprep": "0.9477", - "pctconfc19inptbedsperchosprep": "0.9471", - "pctconffluinptbedsperchosprep": "0.7757", - "pctconfrsvinptbedsperchosprep": "0.0", - "pctconfc19icubedsperchosprep": "0.9471", - "pctconffluicubedsperchosprep": "0.7754", - "pctconfrsvicubedsperchosprep": "0.0", - "numinptbedsperchosprepabschg": "0.0007", - "numinptbedsoccperchospre": "0.0007", - "numicubedsperchosprepabschg": "0.0007", - "numicubedsoccperchosprepabschg": "0.0007", - "totalconfc19hosppatsperc_1": "0.0007", - "totalconffluhosppatsperc_1": "0.0014", - "totalconfrsvhosppatsperc_1": "0.0", - "totalconfc19icupatsperch": "0.0007", - "totalconffluicupatsperch": "0.0019", - "totalconfrsvicupatsperch": "0.0", - "totalconfc19newadmpedper_1": "0.0007", - "totalconfc19newadmadultp_1": "0.0007", - "totalconfc19newadmpercho": "0.0007", - "totalconfflunewadmpedper_1": "0.0", - "totalconfflunewadmadultp_1": "0.0", - "totalconfflunewadmpercho": "0.0016", - "totalconfrsvnewadmpedper_1": "0.0", - "totalconfrsvnewadmadultp_1": "0.0", - "totalconfrsvnewadmpercho": "0.0", - "pctinptbedsoccperchospre": "0.0007", - "pcticubedsoccperchosprepabschg": "0.0007", - "pctconfc19inptbedspercho": "0.0007", - "pctconffluinptbedspercho": "0.0014", - "pctconfrsvinptbedspercho": "0.0", - "pctconfc19icubedsperchos": "0.0007", - "pctconffluicubedsperchos": "0.0019", - "pctconfrsvicubedsperchos": "0.0" - }, +{ + "weekendingdate": "2021-08-21T00:00:00.000", + "jurisdiction": "USA", + "numinptbeds": "689557.7", + "numinptbedsadult": "624334.25", + "numinptbedsped": "37418.05", + "numinptbedsocc": "519563.99", + "numinptbedsoccadult": "474624.08", + "numinptbedsoccped": "25990.81", + "numicubeds": "97890.16", + "numicubedsadult": "75856.67", + "numicubedsped": "8897.66", + "numicubedsocc": "69095.72", + "numicubedsoccadult": "54758.31", + "numicubedsoccped": "6174.95", + "numconfc19hosppatsadult": "7243.55", + "numconfc19hosppatsped": "353.7", + "totalconfc19hosppats": "7597.25", + "numconffluhosppatsadult": "127.4", + "numconffluhosppatsped": "21.54", + "totalconffluhosppats": "4452.11", + "numconfrsvhosppatsadult": "50.19", + "numconfrsvhosppatsped": "17.26", + "totalconfrsvhosppats": "67.45", + "numconfc19icupatsadult": "1031.2", + "numconfc19icupatsped": "69.71", + "totalconfc19icupats": "1100.9", + "numconffluicupatsadult": "18.02", + "numconffluicupatsped": "1.86", + "totalconffluicupats": "726.08", + "numconfrsvicupatsadult": "3.57", + "numconfrsvicupatsped": "3.29", + "totalconfrsvicupats": "6.86", + "numconfc19newadmped0to4": "265.0", + "numconfc19newadmped5to17": "156.0", + "totalconfc19newadmped": "496.0", + "numconfc19newadmadult18to49": "1132.0", + "totalconfc19newadmadult": "8450.0", + "numconfc19newadmunk": "321.0", + "totalconfc19newadm": "8946.0", + "totalconfflunewadmped": "14.0", + "totalconfflunewadmadult": "156.0", + "totalconfflunewadm": "5576.0", + "totalconfrsvnewadmped": "23.0", + "totalconfrsvnewadmadult": "38.0", + "totalconfrsvnewadm": "61.0", + "pctinptbedsocc": "0.7535", + "pctconfc19inptbeds": "0.011", + "pctconffluinptbeds": "0.0065", + "pctconfrsvinptbeds": "0.0001", + "pcticubedsocc": "0.7058", + "pctconfc19icubeds": "0.0112", + "pctconffluicubeds": "0.0074", + "pctconfrsvicubeds": "0.0001", + "pctconfc19newadmadult": "0.9446", + "pctconfc19newadmped": "0.0554", + "pctconfflunewadmadult": "0.028", + "pctconfflunewadmped": "0.0025", + "pctconfrsvnewadmadult": "0.623", + "pctconfrsvnewadmped": "0.377", + "numinptbedshosprep": "5422", + "numinptbedsocchosprep": "5422", + "numicubedshosprep": "5422", + "numicubedsocchosprep": "5422", + "totalconfc19hosppatshosprep": "5422", + "totalconffluhosppatshosprep": "5422", + "totalconfrsvhosppatshosprep": "488", + "totalconfc19icupatshosprep": "5422", + "totalconffluicupatshosprep": "5422", + "totalconfrsvicupatshosprep": "487", + "totalconfc19newadmpedhosprep": "5300", + "totalconfc19newadmadulthosprep": "5422", + "totalconfc19newadmhosprep": "5422", + "totalconfflunewadmpedhosprep": "324", + "totalconfflunewadmadulthosprep": "326", + "totalconfflunewadmhosprep": "5422", + "totalconfrsvnewadmpedhosprep": "483", + "totalconfrsvnewadmadulthosprep": "483", + "totalconfrsvnewadmhosprep": "485", + "pctinptbedsocchosprep": "5422", + "pcticubedsocchosprep": "5422", + "pctconfc19inptbedshosprep": "5422", + "pctconffluinptbedshosprep": "5422", + "pctconfrsvinptbedshosprep": "488", + "pctconfc19icubedshosprep": "5422", + "pctconffluicubedshosprep": "5422", + "pctconfrsvicubedshosprep": "487", + "numinptbedsperchosprep": "0.9486", + "numinptbedsoccperchosprep": "0.9486", + "numicubedsperchosprep": "0.9486", + "numicubedsoccperchosprep": "0.9486", + "totalconfc19hosppatsperc": "0.9486", + "totalconffluhosppatsperc": "0.9486", + "totalconfrsvhosppatsperc": "0.0854", + "totalconfc19icupatsperchosprep": "0.9486", + "totalconffluicupatsperchosprep": "0.9486", + "totalconfrsvicupatsperchosprep": "0.0852", + "totalconfc19newadmpedper": "0.9272", + "totalconfc19newadmadultp": "0.9486", + "totalconfc19newadmperchosprep": "0.9486", + "totalconfflunewadmpedper": "0.0567", + "totalconfflunewadmadultp": "0.057", + "totalconfflunewadmperchosprep": "0.9486", + "totalconfrsvnewadmpedper": "0.0845", + "totalconfrsvnewadmadultp": "0.0845", + "totalconfrsvnewadmperchosprep": "0.0848", + "pctinptbedsoccperchosprep": "0.9486", + "pcticubedsoccperchosprep": "0.9486", + "pctconfc19inptbedsperchosprep": "0.9486", + "pctconffluinptbedsperchosprep": "0.9486", + "pctconfrsvinptbedsperchosprep": "0.0854", + "pctconfc19icubedsperchosprep": "0.9486", + "pctconffluicubedsperchosprep": "0.9486", + "pctconfrsvicubedsperchosprep": "0.0852", + "numinptbedsperchosprepabschg": "-0.0007", + "numinptbedsoccperchospre": "-0.0007", + "numicubedsperchosprepabschg": "-0.0007", + "numicubedsoccperchosprepabschg": "-0.0007", + "totalconfc19hosppatsperc_1": "-0.0007", + "totalconffluhosppatsperc_1": "-0.0007", + "totalconfrsvhosppatsperc_1": "0.0031", + "totalconfc19icupatsperch": "-0.0007", + "totalconffluicupatsperch": "-0.0007", + "totalconfrsvicupatsperch": "0.0031", + "totalconfc19newadmpedper_1": "-0.0007", + "totalconfc19newadmadultp_1": "-0.0007", + "totalconfc19newadmpercho": "-0.0007", + "totalconfflunewadmpedper_1": "0.0065", + "totalconfflunewadmadultp_1": "0.0059", + "totalconfflunewadmpercho": "-0.0007", + "totalconfrsvnewadmpedper_1": "0.0031", + "totalconfrsvnewadmadultp_1": "0.0028", + "totalconfrsvnewadmpercho": "0.0028", + "pctinptbedsoccperchospre": "-0.0007", + "pcticubedsoccperchosprepabschg": "-0.0007", + "pctconfc19inptbedspercho": "-0.0007", + "pctconffluinptbedspercho": "-0.0007", + "pctconfrsvinptbedspercho": "0.0031", + "pctconfc19icubedsperchos": "-0.0007", + "pctconffluicubedsperchos": "-0.0007", + "pctconfrsvicubedsperchos": "0.0031", + "numconfc19newadmped0to4per100k": "1.42", + "numconfc19newadmped5to17per100k": "0.28", + "totalconfc19newadmpedper100k": "0.67", + "numconfc19newadmadult18to49per100k": "0.8", + "totalconfc19newadmadultper100k": "3.2", + "totalconfc19newadmper100k": "2.65", + "totalconfflunewadmpedper100k": "0.02", + "totalconfflunewadmadultper100k": "0.06", + "totalconfflunewadmper100k": "1.65", + "totalconfrsvnewadmpedper100k": "0.03", + "totalconfrsvnewadmadultper100k": "0.01", + "totalconfrsvnewadmper100k": "0.02", + "totalconfc19newadmperchosprepabove80pct": "1", + "totalconfc19newadmperchosprepabove90pct": "1", + "totalconfflunewadmperchosprepabove80pct": "1", + "totalconfflunewadmperchosprepabove90pct": "1", + "totalconfrsvnewadmperchosprepabove80pct": "0", + "totalconfrsvnewadmperchosprepabove90pct": "0" + }, { "weekendingdate": "2021-08-28T00:00:00.000", "jurisdiction": "CO", diff --git a/nhsn/tests/test_patch.py b/nhsn/tests/test_patch.py index 066ef4736..10b74c26d 100644 --- a/nhsn/tests/test_patch.py +++ b/nhsn/tests/test_patch.py @@ -1,17 +1,19 @@ -import glob import os from collections import defaultdict from pathlib import Path import shutil from unittest.mock import patch as mock_patch - +import re import pandas as pd from datetime import datetime, timedelta +import pytest from epiweeks import Week from delphi_nhsn.patch import filter_source_files, patch -from delphi_nhsn.constants import TOTAL_ADMISSION_COVID_API, TOTAL_ADMISSION_FLU_API +from delphi_nhsn.constants import TOTAL_ADMISSION_COVID_COL, TOTAL_ADMISSION_FLU_COL, \ + NUM_HOSP_REPORTING_FLU_COL, NUM_HOSP_REPORTING_COVID_COL, GEOS, TOTAL_ADMISSION_COVID, TOTAL_ADMISSION_FLU, \ + NUM_HOSP_REPORTING_RSV_COL, TOTAL_ADMISSION_RSV_COL from conftest import TEST_DATA, PRELIM_TEST_DATA, TEST_DIR class TestPatch: @@ -85,17 +87,25 @@ def generate_test_source_files(self): custom_filename = f"{TEST_DIR}/backups/{date}.csv.gz" custom_filename_prelim = f"{TEST_DIR}/backups/{date}_prelim.csv.gz" test_data = pd.DataFrame(TEST_DATA) - test_data[TOTAL_ADMISSION_COVID_API] = int(date) - test_data[TOTAL_ADMISSION_FLU_API] = int(date) + test_data[TOTAL_ADMISSION_COVID_COL] = int(date) + test_data[TOTAL_ADMISSION_FLU_COL] = int(date) + test_data[TOTAL_ADMISSION_RSV_COL] = int(date) + test_data[NUM_HOSP_REPORTING_COVID_COL] = int(date) + test_data[NUM_HOSP_REPORTING_FLU_COL] = int(date) + test_data[NUM_HOSP_REPORTING_RSV_COL] = int(date) test_prelim_data = pd.DataFrame(PRELIM_TEST_DATA) - test_prelim_data[TOTAL_ADMISSION_COVID_API] = int(date) - test_prelim_data[TOTAL_ADMISSION_FLU_API] = int(date) - - test_data = test_data.head(2) + test_prelim_data[TOTAL_ADMISSION_COVID_COL] = int(date) + test_prelim_data[TOTAL_ADMISSION_FLU_COL] = int(date) + test_prelim_data[TOTAL_ADMISSION_RSV_COL] = int(date) + test_prelim_data[NUM_HOSP_REPORTING_COVID_COL] = int(date) + test_prelim_data[NUM_HOSP_REPORTING_FLU_COL] = int(date) + test_prelim_data[NUM_HOSP_REPORTING_RSV_COL] = int(date) + + test_data = test_data.head(3) test_data.to_csv( custom_filename, index=False, na_rep="NA", compression="gzip" ) - test_prelim_data = test_data.head(2) + test_prelim_data = test_data.head(3) test_prelim_data.to_csv( custom_filename_prelim, index=False, na_rep="NA", compression="gzip" ) @@ -108,14 +118,15 @@ def test_patch(self, params_w_patch): file_list, prelim_file_list = self.generate_test_source_files() patch(params_w_patch) - for issue_path in Path(f"{TEST_DIR}/patch_dir").glob("*"): + for issue_path in Path(f"{TEST_DIR}/patch_dir").glob("issue*"): issue_dt_str = issue_path.name.replace("issue_", "") for file in Path(issue_path / "nhsn").iterdir(): df = pd.read_csv(file) assert issue_dt_str == str(int(df["val"][0])) # clean up - shutil.rmtree(f"{TEST_DIR}/patch_dir") + for file in Path(f"{TEST_DIR}/patch_dir").glob("issue*"): + shutil.rmtree(file) for file in file_list: os.remove(file) @@ -123,6 +134,23 @@ def test_patch(self, params_w_patch): for file in prelim_file_list: os.remove(file) + def test_patch_incomplete_file(self, params_w_patch): + os.makedirs(params_w_patch["patch"]["patch_dir"], exist_ok=True) + issue_date = "20241119" + existing_signals = [TOTAL_ADMISSION_COVID, TOTAL_ADMISSION_FLU] + backup_dir = params_w_patch.get("common").get("backup_dir") + shutil.copy(f"{TEST_DIR}/test_data/{issue_date}.csv.gz", backup_dir) + + with mock_patch("delphi_nhsn.patch.read_params", return_value=params_w_patch): + patch(params_w_patch) + + files = list(Path(f"{TEST_DIR}/patch_dir/issue_{issue_date}/nhsn").glob("*.csv")) + dates = set([re.search(r"\d{6}", file.name).group() for file in files]) + assert len(files) == len(GEOS) * len(existing_signals) * len(dates) + # clean up + for file in Path(f"{TEST_DIR}/patch_dir").glob("issue*"): + shutil.rmtree(file) + diff --git a/nhsn/tests/test_pull.py b/nhsn/tests/test_pull.py index daa3acd92..f8e27868b 100644 --- a/nhsn/tests/test_pull.py +++ b/nhsn/tests/test_pull.py @@ -1,24 +1,37 @@ import glob +import time from unittest.mock import patch, MagicMock import os import pytest - +from urllib.error import HTTPError import pandas as pd from delphi_nhsn.pull import ( pull_nhsn_data, pull_data, - pull_preliminary_nhsn_data, pull_data_from_file + pull_data_from_file, + check_last_updated ) -from delphi_nhsn.constants import SIGNALS_MAP, PRELIM_SIGNALS_MAP +from delphi_nhsn.constants import TYPE_DICT, PRELIM_TYPE_DICT, PRELIM_DATASET_ID, MAIN_DATASET_ID from delphi_utils import get_structured_logger from conftest import TEST_DATA, PRELIM_TEST_DATA, TEST_DIR -DATASETS = [{"id":"ua7e-t2fy", - "test_data": TEST_DATA}, - {"id":"mpgq-jmmr", - "test_data":PRELIM_TEST_DATA} +DATASETS = [{"id":MAIN_DATASET_ID, + "test_data": TEST_DATA, + "msg_prefix": "", + "prelim_flag": False, + "expected_data": f"{TEST_DIR}/test_data/expected_df.csv", + "type_dict": TYPE_DICT, + }, + + {"id":PRELIM_DATASET_ID, + "test_data":PRELIM_TEST_DATA, + "msg_prefix": "Preliminary ", + "prelim_flag": True, + "expected_data": f"{TEST_DIR}/test_data/expected_df_prelim.csv", + "type_dict": PRELIM_TYPE_DICT, + } ] @@ -27,13 +40,17 @@ class TestPullNHSNData: @pytest.mark.parametrize('dataset', DATASETS, ids=["data", "prelim_data"]) def test_socrata_call(self, mock_socrata, dataset, params): test_token = params["indicator"]["socrata_token"] + backup_dir = f"{TEST_DIR}/test_data" + logger = get_structured_logger() # Mock Socrata client and its get method mock_client = MagicMock() mock_socrata.return_value = mock_client - mock_client.get.side_effect = [[]] + # testing retry behavior + http_error = HTTPError(url="", hdrs="", fp="", msg="Service Temporarily Unavailable",code=503) + mock_client.get.side_effect = [http_error,[]] - pull_data(test_token, dataset["id"]) + pull_data(test_token, dataset["id"], backup_dir, logger) # Check that Socrata client was initialized with correct arguments mock_socrata.assert_called_once_with("data.cdc.gov", test_token) @@ -41,137 +58,121 @@ def test_socrata_call(self, mock_socrata, dataset, params): # Check that get method was called with correct arguments mock_client.get.assert_any_call(dataset["id"], limit=50000, offset=0) - def test_pull_from_file(self, caplog, params_w_patch): + @pytest.mark.parametrize('dataset', DATASETS, ids=["data", "prelim_data"]) + def test_pull_from_file(self, caplog, dataset, params_w_patch): backup_dir = f"{TEST_DIR}/test_data" issue_date = params_w_patch["patch"]["issue_date"] logger = get_structured_logger() - + prelim_flag = dataset["prelim_flag"] # Load test data - expected_data = pd.DataFrame(TEST_DATA) + expected_data = pd.DataFrame(dataset["test_data"]) + + df = pull_data_from_file(backup_dir, issue_date, logger=logger, prelim_flag=prelim_flag) - df = pull_data_from_file(backup_dir, issue_date, logger=logger) - df = df.astype('str') - expected_data = expected_data.astype('str') + # expected_data reads from dictionary and defaults all the columns as object data types + # compared to the method which pd.read_csv somewhat interprets numerical data types + expected_data = expected_data.astype(df.dtypes.to_dict()) + # expected_data = expected_data.astype('str') assert "Pulling data from file" in caplog.text pd.testing.assert_frame_equal(expected_data, df) - def test_pull_from_file_prelim(self, caplog, params_w_patch): - backup_dir = f"{TEST_DIR}/test_data" - issue_date = params_w_patch["patch"]["issue_date"] + @patch("delphi_nhsn.pull.Socrata") + @patch("delphi_nhsn.pull.create_backup_csv") + @pytest.mark.parametrize('dataset', DATASETS, ids=["data", "prelim_data"]) + def test_pull_nhsn_data_output(self, mock_create_backup, mock_socrata, dataset, caplog, params): + now = time.time() + # Mock Socrata client and its get method + mock_client = MagicMock() + mock_socrata.return_value = mock_client + mock_client.get.side_effect = [dataset["test_data"],[]] + mock_client.get_metadata.return_value = {"rowsUpdatedAt": now} + + backup_dir = params["common"]["backup_dir"] + test_token = params["indicator"]["socrata_token"] + custom_run = params["common"]["custom_run"] logger = get_structured_logger() + expected_df = pd.read_csv(dataset["expected_data"]) + + result = pull_nhsn_data(test_token, backup_dir, custom_run, issue_date=None, logger=logger, preliminary=dataset["prelim_flag"]) + mock_create_backup.assert_called_once() + + expected_columns = set(expected_df.columns) + assert set(result.columns) == expected_columns + + for column in list(result.columns): + # some states don't report confirmed admissions rsv + if column == "confirmed_admissions_rsv_ew" and not dataset["prelim_flag"]: + continue + if column == "confirmed_admissions_rsv_ew_prelim" and dataset["prelim_flag"]: + continue + assert result[column].notnull().all(), f"{column} has rogue NaN" + + expected_df = expected_df.astype(dataset["type_dict"]) + + pd.testing.assert_frame_equal(expected_df, result) + + + @patch("delphi_nhsn.pull.Socrata") + @pytest.mark.parametrize('dataset', DATASETS, ids=["data", "prelim_data"]) + def test_pull_nhsn_data_backup(self, mock_socrata, dataset, caplog, params): + now = time.time() + # Mock Socrata client and its get method + mock_client = MagicMock() + mock_socrata.return_value = mock_client + mock_client.get.side_effect = [dataset["test_data"], []] + + mock_client.get_metadata.return_value = {"rowsUpdatedAt": now} + + today = pd.Timestamp.today().strftime("%Y%m%d") + backup_dir = params["common"]["backup_dir"] + custom_run = params["common"]["custom_run"] + test_token = params["indicator"]["socrata_token"] + # Load test data - expected_data = pd.DataFrame(PRELIM_TEST_DATA) + expected_data = pd.DataFrame(dataset["test_data"]) - df = pull_data_from_file(backup_dir, issue_date, logger=logger, prelim_flag=True) - df = df.astype('str') - expected_data = expected_data.astype('str') + logger = get_structured_logger() + # Call function with test token + pull_nhsn_data(test_token, backup_dir, custom_run, issue_date=None, logger=logger, preliminary=dataset["prelim_flag"]) - assert "Pulling data from file" in caplog.text - pd.testing.assert_frame_equal(expected_data, df) + # Check logger used: + assert "Backup file created" in caplog.text + + # Check that backup file was created + backup_files = glob.glob(f"{backup_dir}/{today}*") + assert len(backup_files) == 2, "Backup file was not created" + + for backup_file in backup_files: + if backup_file.endswith(".csv.gz"): + dtypes = expected_data.dtypes.to_dict() + actual_data = pd.read_csv(backup_file, dtype=dtypes) + else: + actual_data = pd.read_parquet(backup_file) + pd.testing.assert_frame_equal(expected_data, actual_data) + + # clean up + for file in backup_files: + os.remove(file) + + + @pytest.mark.parametrize('dataset', DATASETS, ids=["data", "prelim_data"]) + @pytest.mark.parametrize("updatedAt", [time.time(), time.time() - 172800], ids=["updated", "stale"]) + @patch("delphi_nhsn.pull.Socrata") + def test_check_last_updated(self, mock_socrata, dataset, updatedAt, caplog): + mock_client = MagicMock() + mock_socrata.return_value = mock_client + mock_client.get_metadata.return_value = {"rowsUpdatedAt": updatedAt } + logger = get_structured_logger() + + check_last_updated(mock_client, dataset["id"], logger) + + # Check that get method was called with correct arguments + now = time.time() + if now - updatedAt < 60: + assert f"{dataset['msg_prefix']}NHSN data was recently updated; Pulling data" in caplog.text + else: + stale_msg = f"{dataset['msg_prefix']}NHSN data is stale; Skipping" + assert stale_msg in caplog.text - def test_pull_nhsn_data_output(self, caplog, params): - with patch('sodapy.Socrata.get') as mock_get: - mock_get.side_effect = [TEST_DATA, []] - backup_dir = params["common"]["backup_dir"] - test_token = params["indicator"]["socrata_token"] - custom_run = params["common"]["custom_run"] - - logger = get_structured_logger() - - result = pull_nhsn_data(test_token, backup_dir, custom_run, issue_date=None, logger=logger) - - # Check result - assert result["timestamp"].notnull().all(), "timestamp has rogue NaN" - assert result["geo_id"].notnull().all(), "geography has rogue NaN" - - # Check for each signal in SIGNALS - for signal in SIGNALS_MAP.keys(): - assert result[signal].notnull().all(), f"{signal} has rogue NaN" - def test_pull_nhsn_data_backup(self, caplog, params): - with patch('sodapy.Socrata.get') as mock_get: - mock_get.side_effect = [TEST_DATA, []] - - today = pd.Timestamp.today().strftime("%Y%m%d") - backup_dir = params["common"]["backup_dir"] - custom_run = params["common"]["custom_run"] - test_token = params["indicator"]["socrata_token"] - - # Load test data - expected_data = pd.DataFrame(TEST_DATA) - - logger = get_structured_logger() - # Call function with test token - pull_nhsn_data(test_token, backup_dir, custom_run, issue_date=None, logger=logger) - - # Check logger used: - assert "Backup file created" in caplog.text - - # Check that backup file was created - backup_files = glob.glob(f"{backup_dir}/{today}*") - assert len(backup_files) == 2, "Backup file was not created" - - for backup_file in backup_files: - if backup_file.endswith(".csv.gz"): - dtypes = expected_data.dtypes.to_dict() - actual_data = pd.read_csv(backup_file, dtype=dtypes) - else: - actual_data = pd.read_parquet(backup_file) - pd.testing.assert_frame_equal(expected_data, actual_data) - - # clean up - for file in backup_files: - os.remove(file) - def test_pull_prelim_nhsn_data_output(self, caplog, params): - with patch('sodapy.Socrata.get') as mock_get: - mock_get.side_effect = [PRELIM_TEST_DATA, []] - backup_dir = params["common"]["backup_dir"] - test_token = params["indicator"]["socrata_token"] - custom_run = params["common"]["custom_run"] - - logger = get_structured_logger() - - result = pull_preliminary_nhsn_data(test_token, backup_dir, custom_run, issue_date=None, logger=logger) - - # Check result - assert result["timestamp"].notnull().all(), "timestamp has rogue NaN" - assert result["geo_id"].notnull().all(), "geography has rogue NaN" - - # Check for each signal in SIGNALS - for signal in PRELIM_SIGNALS_MAP.keys(): - assert result[signal].notnull().all(), f"{signal} has rogue NaN" - def test_pull_prelim_nhsn_data_backup(self, caplog, params): - with patch('sodapy.Socrata.get') as mock_get: - mock_get.side_effect = [PRELIM_TEST_DATA, []] - - today = pd.Timestamp.today().strftime("%Y%m%d") - backup_dir = params["common"]["backup_dir"] - custom_run = params["common"]["custom_run"] - test_token = params["indicator"]["socrata_token"] - - # Load test data - expected_data = pd.DataFrame(PRELIM_TEST_DATA) - - logger = get_structured_logger() - # Call function with test token - pull_preliminary_nhsn_data(test_token, backup_dir, custom_run, issue_date=None, logger=logger) - - # Check logger used: - assert "Backup file created" in caplog.text - - # Check that backup file was created - backup_files = glob.glob(f"{backup_dir}/{today}*") - assert len(backup_files) == 2, "Backup file was not created" - - for backup_file in backup_files: - if backup_file.endswith(".csv.gz"): - dtypes = expected_data.dtypes.to_dict() - actual_data = pd.read_csv(backup_file, dtype=dtypes) - else: - actual_data = pd.read_parquet(backup_file) - pd.testing.assert_frame_equal(expected_data, actual_data) - - # clean up - for file in backup_files: - os.remove(file) \ No newline at end of file diff --git a/nssp/delphi_nssp/run.py b/nssp/delphi_nssp/run.py index 55547610c..5ac2380a5 100644 --- a/nssp/delphi_nssp/run.py +++ b/nssp/delphi_nssp/run.py @@ -34,6 +34,7 @@ from .constants import AUXILIARY_COLS, CSV_COLS, GEOS, SIGNALS from .pull import pull_nssp_data + def add_needed_columns(df, col_names=None): """Short util to add expected columns not found in the dataset.""" if col_names is None: diff --git a/nssp/version.cfg b/nssp/version.cfg index de7422054..e1a8912ac 100644 --- a/nssp/version.cfg +++ b/nssp/version.cfg @@ -1 +1 @@ -current_version = 0.3.60 +current_version = 0.3.61 diff --git a/quidel_covidtest/version.cfg b/quidel_covidtest/version.cfg index de7422054..e1a8912ac 100644 --- a/quidel_covidtest/version.cfg +++ b/quidel_covidtest/version.cfg @@ -1 +1 @@ -current_version = 0.3.60 +current_version = 0.3.61 diff --git a/sir_complainsalot/params.json.template b/sir_complainsalot/params.json.template index cf784774f..64c4bee17 100644 --- a/sir_complainsalot/params.json.template +++ b/sir_complainsalot/params.json.template @@ -44,13 +44,7 @@ }, "nssp": { "max_age":19, - "maintainers": [], - "retired-signals": [ - "pct_ed_visits_combined_2023rvr", - "pct_ed_visits_covid_2023rvr", - "pct_ed_visits_influenza_2023rvr", - "pct_ed_visits_rsv_2023rvr" - ] + "maintainers": [] } } } diff --git a/sir_complainsalot/version.cfg b/sir_complainsalot/version.cfg index de7422054..e1a8912ac 100644 --- a/sir_complainsalot/version.cfg +++ b/sir_complainsalot/version.cfg @@ -1 +1 @@ -current_version = 0.3.60 +current_version = 0.3.61