cmu-delphi · melange396 · May 13, 2025 · Apr 1, 2025 · Apr 2, 2025 · Apr 2, 2025
diff --git a/nssp/delphi_nssp/run.py b/nssp/delphi_nssp/run.py
@@ -144,6 +144,13 @@ def run_module(params, logger=None):
             missing_cols = set(CSV_COLS) - set(df.columns)
             df = add_needed_columns(df, col_names=list(missing_cols))
             df_csv = df[CSV_COLS + ["timestamp"]]
+
+            # remove rows with missing values
+            df_csv = df_csv[df_csv["val"].notnull()]
+            if df_csv.empty:
+                logger.warning("No data for signal and geo combination", signal=signal, geo=geo)
+                continue
+
             # actual export
             dates = create_export_csv(
                 df_csv,

diff --git a/nssp/tests/conftest.py b/nssp/tests/conftest.py
@@ -1,13 +1,11 @@
 import copy
 import json
-import time
-from unittest.mock import patch, MagicMock
-
-import pytest
 from pathlib import Path
+from unittest.mock import patch
 
-from delphi_nssp.run import run_module
+import pytest
 from delphi_nssp.constants import DATASET_ID
+from delphi_nssp.run import run_module
 
 TEST_DIR = Path(__file__).parent
 
@@ -20,6 +18,9 @@
 with open(f"{TEST_DIR}/test_data/page_100_hrr.json", "r") as f:
     HRR_TEST_DATA = json.load(f)
 
+with open(f"{TEST_DIR}/test_data/page_no_data.json", "r") as f:
+    EMPTY_TEST_DATA = json.load(f)
+
 @pytest.fixture(scope="session")
 def params():
     params = {
@@ -99,3 +100,26 @@ def side_effect(*args, **kwargs):
         mock_get.side_effect = side_effect
         run_module(params)
 
+@pytest.fixture(scope="function")
+def run_as_module_empty(params):
+    """
+    Fixture to use EMPTY_TEST_DATA when testing run_module.
+
+    This fixture patches socrara to return the predefined test
+    data where relevent data is empty.
+    """
+
+    def _run_as_module_empty():
+        with patch("sodapy.Socrata.get") as mock_get:
+
+            def side_effect(*args, **kwargs):
+                if kwargs["offset"] == 0:
+                    if DATASET_ID in args[0]:
+                        return EMPTY_TEST_DATA
+                else:
+                    return []
+
+            mock_get.side_effect = side_effect
+            run_module(params)
+
+    return _run_as_module_empty
diff --git a/nssp/tests/test_data/page.json b/nssp/tests/test_data/page.json
@@ -196,5 +196,39 @@
         "fips": "8101",
         "trend_source": "HSA",
         "buildnumber": "2025-02-28"
+    },
+    {
+        "week_end":"2022-10-15T00:00:00.000",
+        "geography":"Colorado",
+        "county":"Chaffee",
+        "ed_trends_covid":"Data Unavailable",
+        "ed_trends_influenza":"Data Unavailable",
+        "ed_trends_rsv":"Data Unavailable",
+        "hsa":"Chaffee, CO - Lake, CO",
+        "hsa_counties":"Chaffee, Lake",
+        "hsa_nci_id":"786",
+        "fips":"8015",
+        "trend_source":"HSA",
+        "buildnumber":"2025-02-28"
+    },
+    {
+        "week_end":"2022-10-15T00:00:00.000",
+        "geography":"Colorado",
+        "county":"Arapahoe",
+        "percent_visits_covid": "1",
+        "percent_visits_influenza": "1",
+        "percent_visits_rsv": "1",
+        "percent_visits_smoothed_covid": "1",
+        "percent_visits_smoothed_1": "1",
+        "percent_visits_smoothed_rsv": "1",
+        "ed_trends_covid":"Decreasing",
+        "ed_trends_influenza":"Decreasing",
+        "ed_trends_rsv":"Decreasing",
+        "hsa":"Denver (Denver), CO - Jefferson, CO",
+        "hsa_counties":"Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit",
+        "hsa_nci_id":"688",
+        "fips":"8005",
+        "trend_source":"HSA",
+        "buildnumber":"2025-03-28"
     }
 ]
diff --git a/nssp/tests/test_data/page_100_hrr.json b/nssp/tests/test_data/page_100_hrr.json
@@ -196,5 +196,39 @@
         "fips": "8101",
         "trend_source": "HSA",
         "buildnumber": "2025-02-28"
+    },
+    {
+        "week_end":"2022-10-15T00:00:00.000",
+        "geography":"Colorado",
+        "county":"Chaffee",
+        "ed_trends_covid":"Data Unavailable",
+        "ed_trends_influenza":"Data Unavailable",
+        "ed_trends_rsv":"Data Unavailable",
+        "hsa":"Chaffee, CO - Lake, CO",
+        "hsa_counties":"Chaffee, Lake",
+        "hsa_nci_id":"786",
+        "fips":"8015",
+        "trend_source":"HSA",
+        "buildnumber":"2025-02-28"
+    },
+    {
+        "week_end":"2022-10-15T00:00:00.000",
+        "geography":"Colorado",
+        "county":"Arapahoe",
+        "percent_visits_covid": "100",
+        "percent_visits_influenza": "100",
+        "percent_visits_rsv": "100",
+        "percent_visits_smoothed_covid": "100",
+        "percent_visits_smoothed_1": "100",
+        "percent_visits_smoothed_rsv": "100",
+        "ed_trends_covid":"Decreasing",
+        "ed_trends_influenza":"Decreasing",
+        "ed_trends_rsv":"Decreasing",
+        "hsa":"Denver (Denver), CO - Jefferson, CO",
+        "hsa_counties":"Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit",
+        "hsa_nci_id":"688",
+        "fips":"8005",
+        "trend_source":"HSA",
+        "buildnumber":"2025-03-28"
     }
 ]
diff --git a/nssp/tests/test_data/page_no_data.json b/nssp/tests/test_data/page_no_data.json
@@ -0,0 +1,52 @@
+[
+    {
+        "week_end":"2022-10-15T00:00:00.000",
+        "geography":"United States",
+        "county":"All",
+        "percent_visits_combined":"2.0",
+        "percent_visits_covid":"1.63",
+        "percent_visits_influenza":"0.17",
+        "percent_visits_rsv":"0.21",
+        "percent_visits_smoothed":"1.78",
+        "percent_visits_smoothed_covid":"1.54",
+        "percent_visits_smoothed_1":"0.12",
+        "percent_visits_smoothed_rsv":"0.12",
+        "ed_trends_covid":"Decreasing",
+        "ed_trends_influenza":"No Change",
+        "ed_trends_rsv":"Increasing",
+        "hsa":"All",
+        "hsa_counties":"All",
+        "hsa_nci_id":"All",
+        "fips":"0",
+        "trend_source":"United States",
+        "buildnumber":"2025-02-08"
+    },
+    {
+        "week_end":"2022-10-15T00:00:00.000",
+        "geography":"Colorado",
+        "county":"Chaffee",
+        "ed_trends_covid":"Data Unavailable",
+        "ed_trends_influenza":"Data Unavailable",
+        "ed_trends_rsv":"Data Unavailable",
+        "hsa":"Chaffee, CO - Lake, CO",
+        "hsa_counties":"Chaffee, Lake",
+        "hsa_nci_id":"786",
+        "fips":"8015",
+        "trend_source":"HSA",
+        "buildnumber":"2025-02-28"
+    },
+    {
+        "week_end":"2022-10-15T00:00:00.000",
+        "geography":"Colorado",
+        "county":"Arapahoe",
+        "ed_trends_covid":"Data Unavailable",
+        "ed_trends_influenza":"Data Unavailable",
+        "ed_trends_rsv":"Data Unavailable",
+        "hsa":"Denver (Denver), CO - Jefferson, CO",
+        "hsa_counties":"Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit",
+        "hsa_nci_id":"688",
+        "fips":"8005",
+        "trend_source":"HSA",
+        "buildnumber":"2025-03-28"
+    }
+]
diff --git a/nssp/tests/test_pull.py b/nssp/tests/test_pull.py
@@ -90,9 +90,5 @@ def test_normal_pull_nssp_data(self, mock_socrata, params, caplog):
         assert result["fips"].notnull().all(), "fips has rogue NaN"
         assert result["fips"].apply(lambda x: isinstance(x, str) and len(x) != 4).all(), "fips formatting should always be 5 digits; include leading zeros if aplicable"
 
-        # Check for each signal in SIGNALS
-        for signal in SIGNALS:
-            assert result[signal].notnull().all(), f"{signal} has rogue NaN"
-
         for file in backup_files:
             os.remove(file)
diff --git a/nssp/tests/test_run.py b/nssp/tests/test_run.py
@@ -1,23 +1,25 @@
 import glob
-from datetime import datetime, date
-import json
-from pathlib import Path
-from unittest.mock import patch
-import tempfile
+import logging
 import os
-import time
-from datetime import datetime
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
+from delphi_nssp.constants import GEOS, SIGNALS_MAP
+from delphi_nssp.run import add_needed_columns
 from epiweeks import Week
-from pandas.testing import assert_frame_equal
-from delphi_nssp.constants import GEOS, SIGNALS, SIGNALS_MAP, DATASET_ID
-from delphi_nssp.run import (
-    add_needed_columns
-)
 
 
+def remove_backup_and_receiving(params):
+    export_dir = params["common"]["export_dir"]
+    for file in Path(export_dir).glob("*.csv"):
+        os.remove(file)
+
+    today = pd.Timestamp.today().strftime("%Y%m%d")
+    backup_dir = glob.glob(f"{Path(params['common']['backup_dir'])}/{today}*")
+    for file in backup_dir:
+        os.remove(file)
+
 class TestRun:
     def test_add_needed_columns(self):
         df = pd.DataFrame({"geo_id": ["us"], "val": [1]})
@@ -68,13 +70,10 @@ def test_output_files_exist(self, params, run_as_module):
             ]
             assert set(expected_columns).issubset(set(df.columns.values))
 
-        for file in Path(export_dir).glob("*.csv"):
-            os.remove(file)
+            # Verify that there's no NA/empty values in the val columns
+            assert not df["val"].isnull().any()
 
-        today = pd.Timestamp.today().strftime("%Y%m%d")
-        backup_dir = glob.glob(f"{Path(params['common']['backup_dir'])}/{today}*")
-        for file in backup_dir:
-            os.remove(file)
+        remove_backup_and_receiving(params)
 
     def test_valid_hrr(self, run_as_module_hrr, params):
         export_dir = params["common"]["export_dir"]
@@ -85,10 +84,23 @@ def test_valid_hrr(self, run_as_module_hrr, params):
             df = pd.read_csv(f)
             assert (df.val == 100).all()
 
-        for file in Path(export_dir).glob("*.csv"):
-            os.remove(file)
+        remove_backup_and_receiving(params)
+
+    def test_empty_data(self, run_as_module_empty, params, caplog):
+        """
+        Tests correct handling when there is a geo and signal combination that has no data.
+        """
+
+        caplog.set_level(logging.WARNING)
+        run_as_module_empty()
+        assert "No data for signal and geo combination" in caplog.text
+
+        export_dir = params["common"]["export_dir"]
+        csv_files = [f for f in Path(export_dir).glob("*.csv")]
+
+        # Since only one national entry in page_no_data.json with numeric data,
+        # while the two counties have no numeric fields, 
+        # there should be no county, hrr, hhs, or msa files.
+        assert not any(geo in f.name for geo in ["county", "hrr", "hhs", "msa"] for f in csv_files)
 
-        today = pd.Timestamp.today().strftime("%Y%m%d")
-        backup_dir = glob.glob(f"{Path(params['common']['backup_dir'])}/{today}*")
-        for file in backup_dir:
-            os.remove(file)
+        remove_backup_and_receiving(params)