From 8fccf03e5c40c45b4aaf42946f8e4fbac7c3931d Mon Sep 17 00:00:00 2001 From: minhkhul Date: Tue, 1 Apr 2025 19:22:47 -0400 Subject: [PATCH 01/14] delete combined signals + adjust tests --- nssp/delphi_nssp/constants.py | 2 -- nssp/tests/test_patch.py | 12 ++++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/nssp/delphi_nssp/constants.py b/nssp/delphi_nssp/constants.py index 4aa5695d8..fde27c3d0 100644 --- a/nssp/delphi_nssp/constants.py +++ b/nssp/delphi_nssp/constants.py @@ -15,11 +15,9 @@ "percent_visits_covid": "pct_ed_visits_covid", "percent_visits_influenza": "pct_ed_visits_influenza", "percent_visits_rsv": "pct_ed_visits_rsv", - "percent_visits_combined": "pct_ed_visits_combined", "percent_visits_smoothed_covid": "smoothed_pct_ed_visits_covid", "percent_visits_smoothed_1": "smoothed_pct_ed_visits_influenza", "percent_visits_smoothed_rsv": "smoothed_pct_ed_visits_rsv", - "percent_visits_smoothed": "smoothed_pct_ed_visits_combined", } SIGNALS = [val for (key, val) in SIGNALS_MAP.items()] diff --git a/nssp/tests/test_patch.py b/nssp/tests/test_patch.py index fb40e8d2b..c73385d75 100644 --- a/nssp/tests/test_patch.py +++ b/nssp/tests/test_patch.py @@ -259,14 +259,14 @@ def test_full_patch_code(self, mock_read_params, mock_get_structured_logger): # Make sure issue_20210103 has latest weekly data (data from 20210109 instead of 20210108) df_20210108 = pd.read_csv('source_dir/20210108.csv.gz') - df_20210108_nation_combined = df_20210108['percent_visits_combined'].iloc[0] + df_20210108_nation_covid = df_20210108['percent_visits_covid'].iloc[0] df_20210109 = pd.read_csv('source_dir/20210109.csv.gz') - df_20210109_nation_combined = df_20210109['percent_visits_combined'].iloc[0] - assert df_20210108_nation_combined != df_20210109_nation_combined + df_20210109_nation_covid = df_20210109['percent_visits_covid'].iloc[0] + assert df_20210108_nation_covid != df_20210109_nation_covid - df_issue_20210103 = pd.read_csv('patch_dir/issue_20210103/nssp/weekly_202040_nation_pct_ed_visits_combined.csv') - df_issue_20210103_nation_combined = df_issue_20210103['val'].iloc[0] - assert df_20210109_nation_combined == df_issue_20210103_nation_combined + df_issue_20210103 = pd.read_csv('patch_dir/issue_20210103/nssp/weekly_202040_nation_pct_ed_visits_covid.csv') + df_issue_20210103_nation_covid = df_issue_20210103['val'].iloc[0] + assert df_20210109_nation_covid == df_issue_20210103_nation_covid # Clean up the created directories after the test shutil.rmtree(mock_read_params.return_value["patch"]["patch_dir"]) \ No newline at end of file From 4a9554c2ec23bd15e745e142df0847ee407c6c7a Mon Sep 17 00:00:00 2001 From: minhkhul Date: Wed, 2 Apr 2025 10:17:56 -0400 Subject: [PATCH 02/14] Revert "delete combined signals + adjust tests" This reverts commit 8fccf03e5c40c45b4aaf42946f8e4fbac7c3931d. --- nssp/delphi_nssp/constants.py | 2 ++ nssp/tests/test_patch.py | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/nssp/delphi_nssp/constants.py b/nssp/delphi_nssp/constants.py index fde27c3d0..4aa5695d8 100644 --- a/nssp/delphi_nssp/constants.py +++ b/nssp/delphi_nssp/constants.py @@ -15,9 +15,11 @@ "percent_visits_covid": "pct_ed_visits_covid", "percent_visits_influenza": "pct_ed_visits_influenza", "percent_visits_rsv": "pct_ed_visits_rsv", + "percent_visits_combined": "pct_ed_visits_combined", "percent_visits_smoothed_covid": "smoothed_pct_ed_visits_covid", "percent_visits_smoothed_1": "smoothed_pct_ed_visits_influenza", "percent_visits_smoothed_rsv": "smoothed_pct_ed_visits_rsv", + "percent_visits_smoothed": "smoothed_pct_ed_visits_combined", } SIGNALS = [val for (key, val) in SIGNALS_MAP.items()] diff --git a/nssp/tests/test_patch.py b/nssp/tests/test_patch.py index c73385d75..fb40e8d2b 100644 --- a/nssp/tests/test_patch.py +++ b/nssp/tests/test_patch.py @@ -259,14 +259,14 @@ def test_full_patch_code(self, mock_read_params, mock_get_structured_logger): # Make sure issue_20210103 has latest weekly data (data from 20210109 instead of 20210108) df_20210108 = pd.read_csv('source_dir/20210108.csv.gz') - df_20210108_nation_covid = df_20210108['percent_visits_covid'].iloc[0] + df_20210108_nation_combined = df_20210108['percent_visits_combined'].iloc[0] df_20210109 = pd.read_csv('source_dir/20210109.csv.gz') - df_20210109_nation_covid = df_20210109['percent_visits_covid'].iloc[0] - assert df_20210108_nation_covid != df_20210109_nation_covid + df_20210109_nation_combined = df_20210109['percent_visits_combined'].iloc[0] + assert df_20210108_nation_combined != df_20210109_nation_combined - df_issue_20210103 = pd.read_csv('patch_dir/issue_20210103/nssp/weekly_202040_nation_pct_ed_visits_covid.csv') - df_issue_20210103_nation_covid = df_issue_20210103['val'].iloc[0] - assert df_20210109_nation_covid == df_issue_20210103_nation_covid + df_issue_20210103 = pd.read_csv('patch_dir/issue_20210103/nssp/weekly_202040_nation_pct_ed_visits_combined.csv') + df_issue_20210103_nation_combined = df_issue_20210103['val'].iloc[0] + assert df_20210109_nation_combined == df_issue_20210103_nation_combined # Clean up the created directories after the test shutil.rmtree(mock_read_params.return_value["patch"]["patch_dir"]) \ No newline at end of file From 6c2bba8ab61b446a181c9688f97b888f3be09458 Mon Sep 17 00:00:00 2001 From: minhkhul Date: Wed, 2 Apr 2025 15:21:36 -0400 Subject: [PATCH 03/14] no null in csv + adjust tests accordingly --- nssp/delphi_nssp/run.py | 6 ++++- nssp/tests/test_data/page.json | 36 ++++++++++++++++++++++++++ nssp/tests/test_data/page_100_hrr.json | 36 ++++++++++++++++++++++++++ nssp/tests/test_pull.py | 4 --- nssp/tests/test_run.py | 3 +++ 5 files changed, 80 insertions(+), 5 deletions(-) diff --git a/nssp/delphi_nssp/run.py b/nssp/delphi_nssp/run.py index 82a96e5ed..bcc0f69a6 100644 --- a/nssp/delphi_nssp/run.py +++ b/nssp/delphi_nssp/run.py @@ -105,8 +105,9 @@ def run_module(params, logger=None): logger.warning("No primary source data pulled", issue_date=issue_date) break for geo in GEOS: - df = df_pull.copy() + df = df_pull.copy(deep=True) df["val"] = df[signal] + df = df.dropna(subset=["val"]) logger.info("Generating signal and exporting to CSV", geo_type=geo, signal=signal) if geo == "nation": df = df[df["geography"] == "United States"] @@ -140,6 +141,9 @@ def run_module(params, logger=None): else: df = df[df["county"] != "All"] df["geo_id"] = df["fips"] + if df.empty: + logger.info("No data for this signal and geo type combination", geo_type=geo, signal=signal) + continue # add se, sample_size, and na codes missing_cols = set(CSV_COLS) - set(df.columns) df = add_needed_columns(df, col_names=list(missing_cols)) diff --git a/nssp/tests/test_data/page.json b/nssp/tests/test_data/page.json index e80d2817f..692c67619 100644 --- a/nssp/tests/test_data/page.json +++ b/nssp/tests/test_data/page.json @@ -196,5 +196,41 @@ "fips": "8101", "trend_source": "HSA", "buildnumber": "2025-02-28" + }, + { + "_comment":"This record is for testing the case where all signals data is NA for a county", + "week_end":"2022-10-15T00:00:00.000", + "geography":"Colorado", + "county":"Chaffee", + "ed_trends_covid":"Data Unavailable", + "ed_trends_influenza":"Data Unavailable", + "ed_trends_rsv":"Data Unavailable", + "hsa":"Chaffee, CO - Lake, CO", + "hsa_counties":"Chaffee, Lake", + "hsa_nci_id":"786", + "fips":"8015", + "trend_source":"HSA", + "buildnumber":"2025-02-28" + }, + { + "_comment":"This record is for testing the case where some signal data (combined signals) is NA for a county", + "week_end":"2022-10-15T00:00:00.000", + "geography":"Colorado", + "county":"Arapahoe", + "percent_visits_covid": "1", + "percent_visits_influenza": "1", + "percent_visits_rsv": "1", + "percent_visits_smoothed_covid": "1", + "percent_visits_smoothed_1": "1", + "percent_visits_smoothed_rsv": "1", + "ed_trends_covid":"Decreasing", + "ed_trends_influenza":"Decreasing", + "ed_trends_rsv":"Decreasing", + "hsa":"Denver (Denver), CO - Jefferson, CO", + "hsa_counties":"Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit", + "hsa_nci_id":"688", + "fips":"8005", + "trend_source":"HSA", + "buildnumber":"2025-03-28" } ] diff --git a/nssp/tests/test_data/page_100_hrr.json b/nssp/tests/test_data/page_100_hrr.json index 34ae27b35..cf8527c1b 100644 --- a/nssp/tests/test_data/page_100_hrr.json +++ b/nssp/tests/test_data/page_100_hrr.json @@ -196,5 +196,41 @@ "fips": "8101", "trend_source": "HSA", "buildnumber": "2025-02-28" + }, + { + "_comment":"This record is for testing the case where all signals data is NA for a county", + "week_end":"2022-10-15T00:00:00.000", + "geography":"Colorado", + "county":"Chaffee", + "ed_trends_covid":"Data Unavailable", + "ed_trends_influenza":"Data Unavailable", + "ed_trends_rsv":"Data Unavailable", + "hsa":"Chaffee, CO - Lake, CO", + "hsa_counties":"Chaffee, Lake", + "hsa_nci_id":"786", + "fips":"8015", + "trend_source":"HSA", + "buildnumber":"2025-02-28" + }, + { + "_comment":"This record is for testing the case where some signal data (combined signals) is NA for a county", + "week_end":"2022-10-15T00:00:00.000", + "geography":"Colorado", + "county":"Arapahoe", + "percent_visits_covid": "100", + "percent_visits_influenza": "100", + "percent_visits_rsv": "100", + "percent_visits_smoothed_covid": "100", + "percent_visits_smoothed_1": "100", + "percent_visits_smoothed_rsv": "100", + "ed_trends_covid":"Decreasing", + "ed_trends_influenza":"Decreasing", + "ed_trends_rsv":"Decreasing", + "hsa":"Denver (Denver), CO - Jefferson, CO", + "hsa_counties":"Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit", + "hsa_nci_id":"688", + "fips":"8005", + "trend_source":"HSA", + "buildnumber":"2025-03-28" } ] \ No newline at end of file diff --git a/nssp/tests/test_pull.py b/nssp/tests/test_pull.py index 541384d6d..e70594528 100644 --- a/nssp/tests/test_pull.py +++ b/nssp/tests/test_pull.py @@ -90,9 +90,5 @@ def test_normal_pull_nssp_data(self, mock_socrata, params, caplog): assert result["fips"].notnull().all(), "fips has rogue NaN" assert result["fips"].apply(lambda x: isinstance(x, str) and len(x) != 4).all(), "fips formatting should always be 5 digits; include leading zeros if aplicable" - # Check for each signal in SIGNALS - for signal in SIGNALS: - assert result[signal].notnull().all(), f"{signal} has rogue NaN" - for file in backup_files: os.remove(file) diff --git a/nssp/tests/test_run.py b/nssp/tests/test_run.py index c24a76af4..2c269958d 100644 --- a/nssp/tests/test_run.py +++ b/nssp/tests/test_run.py @@ -68,6 +68,9 @@ def test_output_files_exist(self, params, run_as_module): ] assert set(expected_columns).issubset(set(df.columns.values)) + #Verify that there's no NA/empty values in the val columns + assert not df["val"].isnull().any() + for file in Path(export_dir).glob("*.csv"): os.remove(file) From a1211b397b6fba3905303b832816e3b919457143 Mon Sep 17 00:00:00 2001 From: minhkhul Date: Wed, 2 Apr 2025 16:19:44 -0400 Subject: [PATCH 04/14] simplify remove rows with missing values --- nssp/delphi_nssp/run.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nssp/delphi_nssp/run.py b/nssp/delphi_nssp/run.py index bcc0f69a6..ee3bee53b 100644 --- a/nssp/delphi_nssp/run.py +++ b/nssp/delphi_nssp/run.py @@ -107,7 +107,6 @@ def run_module(params, logger=None): for geo in GEOS: df = df_pull.copy(deep=True) df["val"] = df[signal] - df = df.dropna(subset=["val"]) logger.info("Generating signal and exporting to CSV", geo_type=geo, signal=signal) if geo == "nation": df = df[df["geography"] == "United States"] @@ -141,13 +140,14 @@ def run_module(params, logger=None): else: df = df[df["county"] != "All"] df["geo_id"] = df["fips"] - if df.empty: - logger.info("No data for this signal and geo type combination", geo_type=geo, signal=signal) - continue # add se, sample_size, and na codes missing_cols = set(CSV_COLS) - set(df.columns) df = add_needed_columns(df, col_names=list(missing_cols)) df_csv = df[CSV_COLS + ["timestamp"]] + df_csv = df_csv[df_csv["val"].notnull()] + if df_csv.empty: + logger.warning("No data for signal and geo combination", signal=signal, geo=geo) + continue # actual export dates = create_export_csv( df_csv, From 9c34aa88adb43deb6e52f7152c46dce244e35b4b Mon Sep 17 00:00:00 2001 From: minhkhul Date: Wed, 2 Apr 2025 16:36:29 -0400 Subject: [PATCH 05/14] add comments --- nssp/delphi_nssp/run.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nssp/delphi_nssp/run.py b/nssp/delphi_nssp/run.py index ee3bee53b..a390a57a2 100644 --- a/nssp/delphi_nssp/run.py +++ b/nssp/delphi_nssp/run.py @@ -144,10 +144,13 @@ def run_module(params, logger=None): missing_cols = set(CSV_COLS) - set(df.columns) df = add_needed_columns(df, col_names=list(missing_cols)) df_csv = df[CSV_COLS + ["timestamp"]] + + # remove rows with missing values df_csv = df_csv[df_csv["val"].notnull()] if df_csv.empty: logger.warning("No data for signal and geo combination", signal=signal, geo=geo) continue + # actual export dates = create_export_csv( df_csv, From 3ee0ae4b3e9e64bc7c5fcbf5ce9d7d6bdb1cfcc1 Mon Sep 17 00:00:00 2001 From: minhkhul Date: Wed, 2 Apr 2025 17:22:42 -0400 Subject: [PATCH 06/14] lint --- nssp/delphi_nssp/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nssp/delphi_nssp/run.py b/nssp/delphi_nssp/run.py index a390a57a2..89c8a802a 100644 --- a/nssp/delphi_nssp/run.py +++ b/nssp/delphi_nssp/run.py @@ -150,7 +150,7 @@ def run_module(params, logger=None): if df_csv.empty: logger.warning("No data for signal and geo combination", signal=signal, geo=geo) continue - + # actual export dates = create_export_csv( df_csv, From bd3ad702da45365ed912430f7661c7c5907a7084 Mon Sep 17 00:00:00 2001 From: minhkhul <118945681+minhkhul@users.noreply.github.com> Date: Tue, 8 Apr 2025 16:09:36 -0400 Subject: [PATCH 07/14] remove unnecessary deep=true in run.py --- nssp/delphi_nssp/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nssp/delphi_nssp/run.py b/nssp/delphi_nssp/run.py index 89c8a802a..d4e167d66 100644 --- a/nssp/delphi_nssp/run.py +++ b/nssp/delphi_nssp/run.py @@ -105,7 +105,7 @@ def run_module(params, logger=None): logger.warning("No primary source data pulled", issue_date=issue_date) break for geo in GEOS: - df = df_pull.copy(deep=True) + df = df_pull.copy() df["val"] = df[signal] logger.info("Generating signal and exporting to CSV", geo_type=geo, signal=signal) if geo == "nation": From bb0c62acb0dfd9f9682baa148fbf33a89c90f5d1 Mon Sep 17 00:00:00 2001 From: minhkhul Date: Wed, 9 Apr 2025 17:55:26 -0400 Subject: [PATCH 08/14] add test --- nssp/tests/conftest.py | 34 +++++++++++--- nssp/tests/test_data/page.json | 2 - nssp/tests/test_data/page_100_hrr.json | 2 - nssp/tests/test_run.py | 61 +++++++++++++++----------- 4 files changed, 64 insertions(+), 35 deletions(-) diff --git a/nssp/tests/conftest.py b/nssp/tests/conftest.py index c308b6e6e..69c7e9852 100644 --- a/nssp/tests/conftest.py +++ b/nssp/tests/conftest.py @@ -1,13 +1,11 @@ import copy import json -import time -from unittest.mock import patch, MagicMock - -import pytest from pathlib import Path +from unittest.mock import patch -from delphi_nssp.run import run_module +import pytest from delphi_nssp.constants import DATASET_ID +from delphi_nssp.run import run_module TEST_DIR = Path(__file__).parent @@ -20,6 +18,9 @@ with open(f"{TEST_DIR}/test_data/page_100_hrr.json", "r") as f: HRR_TEST_DATA = json.load(f) +with open(f"{TEST_DIR}/test_data/page_no_data.json", "r") as f: + EMPTY_TEST_DATA = json.load(f) + @pytest.fixture(scope="session") def params(): params = { @@ -99,3 +100,26 @@ def side_effect(*args, **kwargs): mock_get.side_effect = side_effect run_module(params) +@pytest.fixture(scope="function") +def run_as_module_empty(params): + """ + Fixture to use EMPTY_TEST_DATA when testing run_module. + + This fixture patches socrara to return the predefined test + data where relevent data is empty. + """ + + def _run_as_module_empty(): + with patch("sodapy.Socrata.get") as mock_get: + + def side_effect(*args, **kwargs): + if kwargs["offset"] == 0: + if DATASET_ID in args[0]: + return EMPTY_TEST_DATA + else: + return [] + + mock_get.side_effect = side_effect + run_module(params) + + return _run_as_module_empty diff --git a/nssp/tests/test_data/page.json b/nssp/tests/test_data/page.json index 692c67619..185fb9635 100644 --- a/nssp/tests/test_data/page.json +++ b/nssp/tests/test_data/page.json @@ -198,7 +198,6 @@ "buildnumber": "2025-02-28" }, { - "_comment":"This record is for testing the case where all signals data is NA for a county", "week_end":"2022-10-15T00:00:00.000", "geography":"Colorado", "county":"Chaffee", @@ -213,7 +212,6 @@ "buildnumber":"2025-02-28" }, { - "_comment":"This record is for testing the case where some signal data (combined signals) is NA for a county", "week_end":"2022-10-15T00:00:00.000", "geography":"Colorado", "county":"Arapahoe", diff --git a/nssp/tests/test_data/page_100_hrr.json b/nssp/tests/test_data/page_100_hrr.json index cf8527c1b..3f8b723b9 100644 --- a/nssp/tests/test_data/page_100_hrr.json +++ b/nssp/tests/test_data/page_100_hrr.json @@ -198,7 +198,6 @@ "buildnumber": "2025-02-28" }, { - "_comment":"This record is for testing the case where all signals data is NA for a county", "week_end":"2022-10-15T00:00:00.000", "geography":"Colorado", "county":"Chaffee", @@ -213,7 +212,6 @@ "buildnumber":"2025-02-28" }, { - "_comment":"This record is for testing the case where some signal data (combined signals) is NA for a county", "week_end":"2022-10-15T00:00:00.000", "geography":"Colorado", "county":"Arapahoe", diff --git a/nssp/tests/test_run.py b/nssp/tests/test_run.py index 2c269958d..315ee37e3 100644 --- a/nssp/tests/test_run.py +++ b/nssp/tests/test_run.py @@ -1,23 +1,25 @@ import glob -from datetime import datetime, date -import json -from pathlib import Path -from unittest.mock import patch -import tempfile +import logging import os -import time -from datetime import datetime +from pathlib import Path import numpy as np import pandas as pd +from delphi_nssp.constants import GEOS, SIGNALS_MAP +from delphi_nssp.run import add_needed_columns from epiweeks import Week -from pandas.testing import assert_frame_equal -from delphi_nssp.constants import GEOS, SIGNALS, SIGNALS_MAP, DATASET_ID -from delphi_nssp.run import ( - add_needed_columns -) +def remove_backup_and_receiving(params): + export_dir = params["common"]["export_dir"] + for file in Path(export_dir).glob("*.csv"): + os.remove(file) + + today = pd.Timestamp.today().strftime("%Y%m%d") + backup_dir = glob.glob(f"{Path(params['common']['backup_dir'])}/{today}*") + for file in backup_dir: + os.remove(file) + class TestRun: def test_add_needed_columns(self): df = pd.DataFrame({"geo_id": ["us"], "val": [1]}) @@ -68,16 +70,10 @@ def test_output_files_exist(self, params, run_as_module): ] assert set(expected_columns).issubset(set(df.columns.values)) - #Verify that there's no NA/empty values in the val columns + # Verify that there's no NA/empty values in the val columns assert not df["val"].isnull().any() - for file in Path(export_dir).glob("*.csv"): - os.remove(file) - - today = pd.Timestamp.today().strftime("%Y%m%d") - backup_dir = glob.glob(f"{Path(params['common']['backup_dir'])}/{today}*") - for file in backup_dir: - os.remove(file) + remove_backup_and_receiving(params) def test_valid_hrr(self, run_as_module_hrr, params): export_dir = params["common"]["export_dir"] @@ -88,10 +84,23 @@ def test_valid_hrr(self, run_as_module_hrr, params): df = pd.read_csv(f) assert (df.val == 100).all() - for file in Path(export_dir).glob("*.csv"): - os.remove(file) + remove_backup_and_receiving(params) + + def test_empty_data(self, run_as_module_empty, params, caplog): + """ + Tests correct handling when there is a geo and signal combination that has no data. + """ + + caplog.set_level(logging.WARNING) + run_as_module_empty() + assert "No data for signal and geo combination" in caplog.text + + export_dir = params["common"]["export_dir"] + csv_files = [f for f in Path(export_dir).glob("*.csv")] + + # Since only one national entry in page_no_data.json with numeric data, + # while the two counties have no numeric fields, + # there should be no county, hrr, hhs, or msa files. + assert not any(geo in f.name for geo in ["county", "hrr", "hhs", "msa"] for f in csv_files) - today = pd.Timestamp.today().strftime("%Y%m%d") - backup_dir = glob.glob(f"{Path(params['common']['backup_dir'])}/{today}*") - for file in backup_dir: - os.remove(file) + remove_backup_and_receiving(params) From 0674e32e79d85e54613147a43b406794665f38dc Mon Sep 17 00:00:00 2001 From: minhkhul Date: Wed, 9 Apr 2025 17:59:07 -0400 Subject: [PATCH 09/14] add page_no_data.json --- nssp/tests/test_data/page_no_data.json | 52 ++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 nssp/tests/test_data/page_no_data.json diff --git a/nssp/tests/test_data/page_no_data.json b/nssp/tests/test_data/page_no_data.json new file mode 100644 index 000000000..05f3ea530 --- /dev/null +++ b/nssp/tests/test_data/page_no_data.json @@ -0,0 +1,52 @@ +[ + { + "week_end":"2022-10-15T00:00:00.000", + "geography":"United States", + "county":"All", + "percent_visits_combined":"2.0", + "percent_visits_covid":"1.63", + "percent_visits_influenza":"0.17", + "percent_visits_rsv":"0.21", + "percent_visits_smoothed":"1.78", + "percent_visits_smoothed_covid":"1.54", + "percent_visits_smoothed_1":"0.12", + "percent_visits_smoothed_rsv":"0.12", + "ed_trends_covid":"Decreasing", + "ed_trends_influenza":"No Change", + "ed_trends_rsv":"Increasing", + "hsa":"All", + "hsa_counties":"All", + "hsa_nci_id":"All", + "fips":"0", + "trend_source":"United States", + "buildnumber":"2025-02-08" + }, + { + "week_end":"2022-10-15T00:00:00.000", + "geography":"Colorado", + "county":"Chaffee", + "ed_trends_covid":"Data Unavailable", + "ed_trends_influenza":"Data Unavailable", + "ed_trends_rsv":"Data Unavailable", + "hsa":"Chaffee, CO - Lake, CO", + "hsa_counties":"Chaffee, Lake", + "hsa_nci_id":"786", + "fips":"8015", + "trend_source":"HSA", + "buildnumber":"2025-02-28" + }, + { + "week_end":"2022-10-15T00:00:00.000", + "geography":"Colorado", + "county":"Arapahoe", + "ed_trends_covid":"Data Unavailable", + "ed_trends_influenza":"Data Unavailable", + "ed_trends_rsv":"Data Unavailable", + "hsa":"Denver (Denver), CO - Jefferson, CO", + "hsa_counties":"Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit", + "hsa_nci_id":"688", + "fips":"8005", + "trend_source":"HSA", + "buildnumber":"2025-03-28" + } +] \ No newline at end of file From 7929b48872b540c2d6b36d519bb5a7e15bb89b7a Mon Sep 17 00:00:00 2001 From: minhkhul Date: Thu, 24 Apr 2025 19:14:18 -0400 Subject: [PATCH 10/14] remove set caplog level to warning + rename test_output_files --- nssp/tests/test_run.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nssp/tests/test_run.py b/nssp/tests/test_run.py index 315ee37e3..905104585 100644 --- a/nssp/tests/test_run.py +++ b/nssp/tests/test_run.py @@ -44,7 +44,7 @@ def generate_week_file_prefix(self, dates): ] return date_prefix - def test_output_files_exist(self, params, run_as_module): + def test_output_files(self, params, run_as_module): export_dir = params["common"]["export_dir"] csv_files = [f.name for f in Path(export_dir).glob("*.csv")] @@ -91,7 +91,6 @@ def test_empty_data(self, run_as_module_empty, params, caplog): Tests correct handling when there is a geo and signal combination that has no data. """ - caplog.set_level(logging.WARNING) run_as_module_empty() assert "No data for signal and geo combination" in caplog.text From ec826cb48873ce9c83dbbe194e7d10f1229a914b Mon Sep 17 00:00:00 2001 From: minhkhul Date: Fri, 25 Apr 2025 13:56:38 -0400 Subject: [PATCH 11/14] add nation assert --- nssp/tests/test_run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nssp/tests/test_run.py b/nssp/tests/test_run.py index 905104585..ffd667418 100644 --- a/nssp/tests/test_run.py +++ b/nssp/tests/test_run.py @@ -101,5 +101,6 @@ def test_empty_data(self, run_as_module_empty, params, caplog): # while the two counties have no numeric fields, # there should be no county, hrr, hhs, or msa files. assert not any(geo in f.name for geo in ["county", "hrr", "hhs", "msa"] for f in csv_files) + assert all("nation" in f.name for f in csv_files) remove_backup_and_receiving(params) From 8c51c50716ad37a85297921c1c4831bd7898a61f Mon Sep 17 00:00:00 2001 From: minhkhul Date: Thu, 8 May 2025 01:38:50 -0400 Subject: [PATCH 12/14] simplify test_empty_data --- nssp/tests/conftest.py | 24 ------------------------ nssp/tests/test_run.py | 17 ++++++++++++----- 2 files changed, 12 insertions(+), 29 deletions(-) diff --git a/nssp/tests/conftest.py b/nssp/tests/conftest.py index 69c7e9852..46da7af10 100644 --- a/nssp/tests/conftest.py +++ b/nssp/tests/conftest.py @@ -99,27 +99,3 @@ def side_effect(*args, **kwargs): return [] mock_get.side_effect = side_effect run_module(params) - -@pytest.fixture(scope="function") -def run_as_module_empty(params): - """ - Fixture to use EMPTY_TEST_DATA when testing run_module. - - This fixture patches socrara to return the predefined test - data where relevent data is empty. - """ - - def _run_as_module_empty(): - with patch("sodapy.Socrata.get") as mock_get: - - def side_effect(*args, **kwargs): - if kwargs["offset"] == 0: - if DATASET_ID in args[0]: - return EMPTY_TEST_DATA - else: - return [] - - mock_get.side_effect = side_effect - run_module(params) - - return _run_as_module_empty diff --git a/nssp/tests/test_run.py b/nssp/tests/test_run.py index ffd667418..3e03e55a8 100644 --- a/nssp/tests/test_run.py +++ b/nssp/tests/test_run.py @@ -2,13 +2,15 @@ import logging import os from pathlib import Path - +import json +from unittest.mock import patch import numpy as np import pandas as pd -from delphi_nssp.constants import GEOS, SIGNALS_MAP -from delphi_nssp.run import add_needed_columns +from delphi_nssp.constants import GEOS, SIGNALS_MAP, DATASET_ID +from delphi_nssp.run import add_needed_columns, run_module from epiweeks import Week +TEST_DIR = Path(__file__).parent def remove_backup_and_receiving(params): export_dir = params["common"]["export_dir"] @@ -86,12 +88,17 @@ def test_valid_hrr(self, run_as_module_hrr, params): remove_backup_and_receiving(params) - def test_empty_data(self, run_as_module_empty, params, caplog): + @patch("sodapy.Socrata.get") + def test_empty_data(self, mock_get, params, caplog): """ Tests correct handling when there is a geo and signal combination that has no data. """ - run_as_module_empty() + with open(f"{TEST_DIR}/test_data/page_no_data.json", "r") as f: + EMPTY_TEST_DATA = json.load(f) + mock_get.side_effect = [EMPTY_TEST_DATA, []] + run_module(params) + assert "No data for signal and geo combination" in caplog.text export_dir = params["common"]["export_dir"] From 0635eaec14c3d9953fd4bd25f478c38af19bcc8e Mon Sep 17 00:00:00 2001 From: minhkhul Date: Mon, 12 May 2025 18:21:01 -0400 Subject: [PATCH 13/14] revert conftest.py --- nssp/tests/conftest.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/nssp/tests/conftest.py b/nssp/tests/conftest.py index 46da7af10..b88dc838a 100644 --- a/nssp/tests/conftest.py +++ b/nssp/tests/conftest.py @@ -1,11 +1,13 @@ import copy import json -from pathlib import Path -from unittest.mock import patch +import time +from unittest.mock import patch, MagicMock import pytest -from delphi_nssp.constants import DATASET_ID +from pathlib import Path + from delphi_nssp.run import run_module +from delphi_nssp.constants import DATASET_ID TEST_DIR = Path(__file__).parent @@ -18,9 +20,6 @@ with open(f"{TEST_DIR}/test_data/page_100_hrr.json", "r") as f: HRR_TEST_DATA = json.load(f) -with open(f"{TEST_DIR}/test_data/page_no_data.json", "r") as f: - EMPTY_TEST_DATA = json.load(f) - @pytest.fixture(scope="session") def params(): params = { @@ -98,4 +97,4 @@ def side_effect(*args, **kwargs): else: return [] mock_get.side_effect = side_effect - run_module(params) + run_module(params) \ No newline at end of file From 8cec78fb2085598f23a896b41762f3540a9b5449 Mon Sep 17 00:00:00 2001 From: minhkhul Date: Mon, 12 May 2025 19:32:50 -0400 Subject: [PATCH 14/14] fully revert conftest.py --- nssp/tests/conftest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nssp/tests/conftest.py b/nssp/tests/conftest.py index b88dc838a..c308b6e6e 100644 --- a/nssp/tests/conftest.py +++ b/nssp/tests/conftest.py @@ -97,4 +97,5 @@ def side_effect(*args, **kwargs): else: return [] mock_get.side_effect = side_effect - run_module(params) \ No newline at end of file + run_module(params) +