From 8fccf03e5c40c45b4aaf42946f8e4fbac7c3931d Mon Sep 17 00:00:00 2001
From: minhkhul <minhkhul@andrew.cmu.edu>
Date: Tue, 1 Apr 2025 19:22:47 -0400
Subject: [PATCH 01/14] delete combined signals + adjust tests

---
 nssp/delphi_nssp/constants.py |  2 --
 nssp/tests/test_patch.py      | 12 ++++++------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/nssp/delphi_nssp/constants.py b/nssp/delphi_nssp/constants.py
index 4aa5695d8..fde27c3d0 100644
--- a/nssp/delphi_nssp/constants.py
+++ b/nssp/delphi_nssp/constants.py
@@ -15,11 +15,9 @@
     "percent_visits_covid": "pct_ed_visits_covid",
     "percent_visits_influenza": "pct_ed_visits_influenza",
     "percent_visits_rsv": "pct_ed_visits_rsv",
-    "percent_visits_combined": "pct_ed_visits_combined",
     "percent_visits_smoothed_covid": "smoothed_pct_ed_visits_covid",
     "percent_visits_smoothed_1": "smoothed_pct_ed_visits_influenza",
     "percent_visits_smoothed_rsv": "smoothed_pct_ed_visits_rsv",
-    "percent_visits_smoothed": "smoothed_pct_ed_visits_combined",
 }
 
 SIGNALS = [val for (key, val) in SIGNALS_MAP.items()]
diff --git a/nssp/tests/test_patch.py b/nssp/tests/test_patch.py
index fb40e8d2b..c73385d75 100644
--- a/nssp/tests/test_patch.py
+++ b/nssp/tests/test_patch.py
@@ -259,14 +259,14 @@ def test_full_patch_code(self, mock_read_params, mock_get_structured_logger):
 
         # Make sure issue_20210103 has latest weekly data (data from 20210109 instead of 20210108)
         df_20210108 = pd.read_csv('source_dir/20210108.csv.gz')
-        df_20210108_nation_combined = df_20210108['percent_visits_combined'].iloc[0]
+        df_20210108_nation_covid = df_20210108['percent_visits_covid'].iloc[0]
         df_20210109 = pd.read_csv('source_dir/20210109.csv.gz')
-        df_20210109_nation_combined = df_20210109['percent_visits_combined'].iloc[0]
-        assert df_20210108_nation_combined != df_20210109_nation_combined
+        df_20210109_nation_covid = df_20210109['percent_visits_covid'].iloc[0]
+        assert df_20210108_nation_covid != df_20210109_nation_covid
 
-        df_issue_20210103 = pd.read_csv('patch_dir/issue_20210103/nssp/weekly_202040_nation_pct_ed_visits_combined.csv')
-        df_issue_20210103_nation_combined = df_issue_20210103['val'].iloc[0]
-        assert df_20210109_nation_combined == df_issue_20210103_nation_combined
+        df_issue_20210103 = pd.read_csv('patch_dir/issue_20210103/nssp/weekly_202040_nation_pct_ed_visits_covid.csv')
+        df_issue_20210103_nation_covid = df_issue_20210103['val'].iloc[0]
+        assert df_20210109_nation_covid == df_issue_20210103_nation_covid
 
         # Clean up the created directories after the test
         shutil.rmtree(mock_read_params.return_value["patch"]["patch_dir"])
\ No newline at end of file

From 4a9554c2ec23bd15e745e142df0847ee407c6c7a Mon Sep 17 00:00:00 2001
From: minhkhul <minhkhul@andrew.cmu.edu>
Date: Wed, 2 Apr 2025 10:17:56 -0400
Subject: [PATCH 02/14] Revert "delete combined signals + adjust tests"

This reverts commit 8fccf03e5c40c45b4aaf42946f8e4fbac7c3931d.
---
 nssp/delphi_nssp/constants.py |  2 ++
 nssp/tests/test_patch.py      | 12 ++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/nssp/delphi_nssp/constants.py b/nssp/delphi_nssp/constants.py
index fde27c3d0..4aa5695d8 100644
--- a/nssp/delphi_nssp/constants.py
+++ b/nssp/delphi_nssp/constants.py
@@ -15,9 +15,11 @@
     "percent_visits_covid": "pct_ed_visits_covid",
     "percent_visits_influenza": "pct_ed_visits_influenza",
     "percent_visits_rsv": "pct_ed_visits_rsv",
+    "percent_visits_combined": "pct_ed_visits_combined",
     "percent_visits_smoothed_covid": "smoothed_pct_ed_visits_covid",
     "percent_visits_smoothed_1": "smoothed_pct_ed_visits_influenza",
     "percent_visits_smoothed_rsv": "smoothed_pct_ed_visits_rsv",
+    "percent_visits_smoothed": "smoothed_pct_ed_visits_combined",
 }
 
 SIGNALS = [val for (key, val) in SIGNALS_MAP.items()]
diff --git a/nssp/tests/test_patch.py b/nssp/tests/test_patch.py
index c73385d75..fb40e8d2b 100644
--- a/nssp/tests/test_patch.py
+++ b/nssp/tests/test_patch.py
@@ -259,14 +259,14 @@ def test_full_patch_code(self, mock_read_params, mock_get_structured_logger):
 
         # Make sure issue_20210103 has latest weekly data (data from 20210109 instead of 20210108)
         df_20210108 = pd.read_csv('source_dir/20210108.csv.gz')
-        df_20210108_nation_covid = df_20210108['percent_visits_covid'].iloc[0]
+        df_20210108_nation_combined = df_20210108['percent_visits_combined'].iloc[0]
         df_20210109 = pd.read_csv('source_dir/20210109.csv.gz')
-        df_20210109_nation_covid = df_20210109['percent_visits_covid'].iloc[0]
-        assert df_20210108_nation_covid != df_20210109_nation_covid
+        df_20210109_nation_combined = df_20210109['percent_visits_combined'].iloc[0]
+        assert df_20210108_nation_combined != df_20210109_nation_combined
 
-        df_issue_20210103 = pd.read_csv('patch_dir/issue_20210103/nssp/weekly_202040_nation_pct_ed_visits_covid.csv')
-        df_issue_20210103_nation_covid = df_issue_20210103['val'].iloc[0]
-        assert df_20210109_nation_covid == df_issue_20210103_nation_covid
+        df_issue_20210103 = pd.read_csv('patch_dir/issue_20210103/nssp/weekly_202040_nation_pct_ed_visits_combined.csv')
+        df_issue_20210103_nation_combined = df_issue_20210103['val'].iloc[0]
+        assert df_20210109_nation_combined == df_issue_20210103_nation_combined
 
         # Clean up the created directories after the test
         shutil.rmtree(mock_read_params.return_value["patch"]["patch_dir"])
\ No newline at end of file

From 6c2bba8ab61b446a181c9688f97b888f3be09458 Mon Sep 17 00:00:00 2001
From: minhkhul <minhkhul@andrew.cmu.edu>
Date: Wed, 2 Apr 2025 15:21:36 -0400
Subject: [PATCH 03/14] no null in csv + adjust tests accordingly

---
 nssp/delphi_nssp/run.py                |  6 ++++-
 nssp/tests/test_data/page.json         | 36 ++++++++++++++++++++++++++
 nssp/tests/test_data/page_100_hrr.json | 36 ++++++++++++++++++++++++++
 nssp/tests/test_pull.py                |  4 ---
 nssp/tests/test_run.py                 |  3 +++
 5 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/nssp/delphi_nssp/run.py b/nssp/delphi_nssp/run.py
index 82a96e5ed..bcc0f69a6 100644
--- a/nssp/delphi_nssp/run.py
+++ b/nssp/delphi_nssp/run.py
@@ -105,8 +105,9 @@ def run_module(params, logger=None):
             logger.warning("No primary source data pulled", issue_date=issue_date)
             break
         for geo in GEOS:
-            df = df_pull.copy()
+            df = df_pull.copy(deep=True)
             df["val"] = df[signal]
+            df = df.dropna(subset=["val"])
             logger.info("Generating signal and exporting to CSV", geo_type=geo, signal=signal)
             if geo == "nation":
                 df = df[df["geography"] == "United States"]
@@ -140,6 +141,9 @@ def run_module(params, logger=None):
             else:
                 df = df[df["county"] != "All"]
                 df["geo_id"] = df["fips"]
+            if df.empty:
+                logger.info("No data for this signal and geo type combination", geo_type=geo, signal=signal)
+                continue
             # add se, sample_size, and na codes
             missing_cols = set(CSV_COLS) - set(df.columns)
             df = add_needed_columns(df, col_names=list(missing_cols))
diff --git a/nssp/tests/test_data/page.json b/nssp/tests/test_data/page.json
index e80d2817f..692c67619 100644
--- a/nssp/tests/test_data/page.json
+++ b/nssp/tests/test_data/page.json
@@ -196,5 +196,41 @@
         "fips": "8101",
         "trend_source": "HSA",
         "buildnumber": "2025-02-28"
+    },
+    {
+        "_comment":"This record is for testing the case where all signals data is NA for a county",
+        "week_end":"2022-10-15T00:00:00.000",
+        "geography":"Colorado",
+        "county":"Chaffee",
+        "ed_trends_covid":"Data Unavailable",
+        "ed_trends_influenza":"Data Unavailable",
+        "ed_trends_rsv":"Data Unavailable",
+        "hsa":"Chaffee, CO - Lake, CO",
+        "hsa_counties":"Chaffee, Lake",
+        "hsa_nci_id":"786",
+        "fips":"8015",
+        "trend_source":"HSA",
+        "buildnumber":"2025-02-28"
+    },
+    {
+        "_comment":"This record is for testing the case where some signal data (combined signals) is NA for a county",
+        "week_end":"2022-10-15T00:00:00.000",
+        "geography":"Colorado",
+        "county":"Arapahoe",
+        "percent_visits_covid": "1",
+        "percent_visits_influenza": "1",
+        "percent_visits_rsv": "1",
+        "percent_visits_smoothed_covid": "1",
+        "percent_visits_smoothed_1": "1",
+        "percent_visits_smoothed_rsv": "1",
+        "ed_trends_covid":"Decreasing",
+        "ed_trends_influenza":"Decreasing",
+        "ed_trends_rsv":"Decreasing",
+        "hsa":"Denver (Denver), CO - Jefferson, CO",
+        "hsa_counties":"Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit",
+        "hsa_nci_id":"688",
+        "fips":"8005",
+        "trend_source":"HSA",
+        "buildnumber":"2025-03-28"
     }
 ]
diff --git a/nssp/tests/test_data/page_100_hrr.json b/nssp/tests/test_data/page_100_hrr.json
index 34ae27b35..cf8527c1b 100644
--- a/nssp/tests/test_data/page_100_hrr.json
+++ b/nssp/tests/test_data/page_100_hrr.json
@@ -196,5 +196,41 @@
         "fips": "8101",
         "trend_source": "HSA",
         "buildnumber": "2025-02-28"
+    },
+    {
+        "_comment":"This record is for testing the case where all signals data is NA for a county",
+        "week_end":"2022-10-15T00:00:00.000",
+        "geography":"Colorado",
+        "county":"Chaffee",
+        "ed_trends_covid":"Data Unavailable",
+        "ed_trends_influenza":"Data Unavailable",
+        "ed_trends_rsv":"Data Unavailable",
+        "hsa":"Chaffee, CO - Lake, CO",
+        "hsa_counties":"Chaffee, Lake",
+        "hsa_nci_id":"786",
+        "fips":"8015",
+        "trend_source":"HSA",
+        "buildnumber":"2025-02-28"
+    },
+    {
+        "_comment":"This record is for testing the case where some signal data (combined signals) is NA for a county",
+        "week_end":"2022-10-15T00:00:00.000",
+        "geography":"Colorado",
+        "county":"Arapahoe",
+        "percent_visits_covid": "100",
+        "percent_visits_influenza": "100",
+        "percent_visits_rsv": "100",
+        "percent_visits_smoothed_covid": "100",
+        "percent_visits_smoothed_1": "100",
+        "percent_visits_smoothed_rsv": "100",
+        "ed_trends_covid":"Decreasing",
+        "ed_trends_influenza":"Decreasing",
+        "ed_trends_rsv":"Decreasing",
+        "hsa":"Denver (Denver), CO - Jefferson, CO",
+        "hsa_counties":"Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit",
+        "hsa_nci_id":"688",
+        "fips":"8005",
+        "trend_source":"HSA",
+        "buildnumber":"2025-03-28"
     }
 ]
\ No newline at end of file
diff --git a/nssp/tests/test_pull.py b/nssp/tests/test_pull.py
index 541384d6d..e70594528 100644
--- a/nssp/tests/test_pull.py
+++ b/nssp/tests/test_pull.py
@@ -90,9 +90,5 @@ def test_normal_pull_nssp_data(self, mock_socrata, params, caplog):
         assert result["fips"].notnull().all(), "fips has rogue NaN"
         assert result["fips"].apply(lambda x: isinstance(x, str) and len(x) != 4).all(), "fips formatting should always be 5 digits; include leading zeros if aplicable"
 
-        # Check for each signal in SIGNALS
-        for signal in SIGNALS:
-            assert result[signal].notnull().all(), f"{signal} has rogue NaN"
-
         for file in backup_files:
             os.remove(file)
diff --git a/nssp/tests/test_run.py b/nssp/tests/test_run.py
index c24a76af4..2c269958d 100644
--- a/nssp/tests/test_run.py
+++ b/nssp/tests/test_run.py
@@ -68,6 +68,9 @@ def test_output_files_exist(self, params, run_as_module):
             ]
             assert set(expected_columns).issubset(set(df.columns.values))
 
+            #Verify that there's no NA/empty values in the val columns
+            assert not df["val"].isnull().any()
+
         for file in Path(export_dir).glob("*.csv"):
             os.remove(file)
 

From a1211b397b6fba3905303b832816e3b919457143 Mon Sep 17 00:00:00 2001
From: minhkhul <minhkhul@andrew.cmu.edu>
Date: Wed, 2 Apr 2025 16:19:44 -0400
Subject: [PATCH 04/14] simplify remove rows with missing values

---
 nssp/delphi_nssp/run.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/nssp/delphi_nssp/run.py b/nssp/delphi_nssp/run.py
index bcc0f69a6..ee3bee53b 100644
--- a/nssp/delphi_nssp/run.py
+++ b/nssp/delphi_nssp/run.py
@@ -107,7 +107,6 @@ def run_module(params, logger=None):
         for geo in GEOS:
             df = df_pull.copy(deep=True)
             df["val"] = df[signal]
-            df = df.dropna(subset=["val"])
             logger.info("Generating signal and exporting to CSV", geo_type=geo, signal=signal)
             if geo == "nation":
                 df = df[df["geography"] == "United States"]
@@ -141,13 +140,14 @@ def run_module(params, logger=None):
             else:
                 df = df[df["county"] != "All"]
                 df["geo_id"] = df["fips"]
-            if df.empty:
-                logger.info("No data for this signal and geo type combination", geo_type=geo, signal=signal)
-                continue
             # add se, sample_size, and na codes
             missing_cols = set(CSV_COLS) - set(df.columns)
             df = add_needed_columns(df, col_names=list(missing_cols))
             df_csv = df[CSV_COLS + ["timestamp"]]
+            df_csv = df_csv[df_csv["val"].notnull()]
+            if df_csv.empty:
+                logger.warning("No data for signal and geo combination", signal=signal, geo=geo)
+                continue
             # actual export
             dates = create_export_csv(
                 df_csv,

From 9c34aa88adb43deb6e52f7152c46dce244e35b4b Mon Sep 17 00:00:00 2001
From: minhkhul <minhkhul@andrew.cmu.edu>
Date: Wed, 2 Apr 2025 16:36:29 -0400
Subject: [PATCH 05/14] add comments

---
 nssp/delphi_nssp/run.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/nssp/delphi_nssp/run.py b/nssp/delphi_nssp/run.py
index ee3bee53b..a390a57a2 100644
--- a/nssp/delphi_nssp/run.py
+++ b/nssp/delphi_nssp/run.py
@@ -144,10 +144,13 @@ def run_module(params, logger=None):
             missing_cols = set(CSV_COLS) - set(df.columns)
             df = add_needed_columns(df, col_names=list(missing_cols))
             df_csv = df[CSV_COLS + ["timestamp"]]
+
+            # remove rows with missing values
             df_csv = df_csv[df_csv["val"].notnull()]
             if df_csv.empty:
                 logger.warning("No data for signal and geo combination", signal=signal, geo=geo)
                 continue
+            
             # actual export
             dates = create_export_csv(
                 df_csv,

From 3ee0ae4b3e9e64bc7c5fcbf5ce9d7d6bdb1cfcc1 Mon Sep 17 00:00:00 2001
From: minhkhul <minhkhul@andrew.cmu.edu>
Date: Wed, 2 Apr 2025 17:22:42 -0400
Subject: [PATCH 06/14] lint

---
 nssp/delphi_nssp/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nssp/delphi_nssp/run.py b/nssp/delphi_nssp/run.py
index a390a57a2..89c8a802a 100644
--- a/nssp/delphi_nssp/run.py
+++ b/nssp/delphi_nssp/run.py
@@ -150,7 +150,7 @@ def run_module(params, logger=None):
             if df_csv.empty:
                 logger.warning("No data for signal and geo combination", signal=signal, geo=geo)
                 continue
-            
+
             # actual export
             dates = create_export_csv(
                 df_csv,

From bd3ad702da45365ed912430f7661c7c5907a7084 Mon Sep 17 00:00:00 2001
From: minhkhul <118945681+minhkhul@users.noreply.github.com>
Date: Tue, 8 Apr 2025 16:09:36 -0400
Subject: [PATCH 07/14] remove unnecessary deep=true in run.py

---
 nssp/delphi_nssp/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nssp/delphi_nssp/run.py b/nssp/delphi_nssp/run.py
index 89c8a802a..d4e167d66 100644
--- a/nssp/delphi_nssp/run.py
+++ b/nssp/delphi_nssp/run.py
@@ -105,7 +105,7 @@ def run_module(params, logger=None):
             logger.warning("No primary source data pulled", issue_date=issue_date)
             break
         for geo in GEOS:
-            df = df_pull.copy(deep=True)
+            df = df_pull.copy()
             df["val"] = df[signal]
             logger.info("Generating signal and exporting to CSV", geo_type=geo, signal=signal)
             if geo == "nation":

From bb0c62acb0dfd9f9682baa148fbf33a89c90f5d1 Mon Sep 17 00:00:00 2001
From: minhkhul <minhkhul@andrew.cmu.edu>
Date: Wed, 9 Apr 2025 17:55:26 -0400
Subject: [PATCH 08/14] add test

---
 nssp/tests/conftest.py                 | 34 +++++++++++---
 nssp/tests/test_data/page.json         |  2 -
 nssp/tests/test_data/page_100_hrr.json |  2 -
 nssp/tests/test_run.py                 | 61 +++++++++++++++-----------
 4 files changed, 64 insertions(+), 35 deletions(-)

diff --git a/nssp/tests/conftest.py b/nssp/tests/conftest.py
index c308b6e6e..69c7e9852 100644
--- a/nssp/tests/conftest.py
+++ b/nssp/tests/conftest.py
@@ -1,13 +1,11 @@
 import copy
 import json
-import time
-from unittest.mock import patch, MagicMock
-
-import pytest
 from pathlib import Path
+from unittest.mock import patch
 
-from delphi_nssp.run import run_module
+import pytest
 from delphi_nssp.constants import DATASET_ID
+from delphi_nssp.run import run_module
 
 TEST_DIR = Path(__file__).parent
 
@@ -20,6 +18,9 @@
 with open(f"{TEST_DIR}/test_data/page_100_hrr.json", "r") as f:
     HRR_TEST_DATA = json.load(f)
 
+with open(f"{TEST_DIR}/test_data/page_no_data.json", "r") as f:
+    EMPTY_TEST_DATA = json.load(f)
+
 @pytest.fixture(scope="session")
 def params():
     params = {
@@ -99,3 +100,26 @@ def side_effect(*args, **kwargs):
         mock_get.side_effect = side_effect
         run_module(params)
 
+@pytest.fixture(scope="function")
+def run_as_module_empty(params):
+    """
+    Fixture to use EMPTY_TEST_DATA when testing run_module.
+
+    This fixture patches socrara to return the predefined test
+    data where relevent data is empty.
+    """
+
+    def _run_as_module_empty():
+        with patch("sodapy.Socrata.get") as mock_get:
+
+            def side_effect(*args, **kwargs):
+                if kwargs["offset"] == 0:
+                    if DATASET_ID in args[0]:
+                        return EMPTY_TEST_DATA
+                else:
+                    return []
+
+            mock_get.side_effect = side_effect
+            run_module(params)
+
+    return _run_as_module_empty
diff --git a/nssp/tests/test_data/page.json b/nssp/tests/test_data/page.json
index 692c67619..185fb9635 100644
--- a/nssp/tests/test_data/page.json
+++ b/nssp/tests/test_data/page.json
@@ -198,7 +198,6 @@
         "buildnumber": "2025-02-28"
     },
     {
-        "_comment":"This record is for testing the case where all signals data is NA for a county",
         "week_end":"2022-10-15T00:00:00.000",
         "geography":"Colorado",
         "county":"Chaffee",
@@ -213,7 +212,6 @@
         "buildnumber":"2025-02-28"
     },
     {
-        "_comment":"This record is for testing the case where some signal data (combined signals) is NA for a county",
         "week_end":"2022-10-15T00:00:00.000",
         "geography":"Colorado",
         "county":"Arapahoe",
diff --git a/nssp/tests/test_data/page_100_hrr.json b/nssp/tests/test_data/page_100_hrr.json
index cf8527c1b..3f8b723b9 100644
--- a/nssp/tests/test_data/page_100_hrr.json
+++ b/nssp/tests/test_data/page_100_hrr.json
@@ -198,7 +198,6 @@
         "buildnumber": "2025-02-28"
     },
     {
-        "_comment":"This record is for testing the case where all signals data is NA for a county",
         "week_end":"2022-10-15T00:00:00.000",
         "geography":"Colorado",
         "county":"Chaffee",
@@ -213,7 +212,6 @@
         "buildnumber":"2025-02-28"
     },
     {
-        "_comment":"This record is for testing the case where some signal data (combined signals) is NA for a county",
         "week_end":"2022-10-15T00:00:00.000",
         "geography":"Colorado",
         "county":"Arapahoe",
diff --git a/nssp/tests/test_run.py b/nssp/tests/test_run.py
index 2c269958d..315ee37e3 100644
--- a/nssp/tests/test_run.py
+++ b/nssp/tests/test_run.py
@@ -1,23 +1,25 @@
 import glob
-from datetime import datetime, date
-import json
-from pathlib import Path
-from unittest.mock import patch
-import tempfile
+import logging
 import os
-import time
-from datetime import datetime
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
+from delphi_nssp.constants import GEOS, SIGNALS_MAP
+from delphi_nssp.run import add_needed_columns
 from epiweeks import Week
-from pandas.testing import assert_frame_equal
-from delphi_nssp.constants import GEOS, SIGNALS, SIGNALS_MAP, DATASET_ID
-from delphi_nssp.run import (
-    add_needed_columns
-)
 
 
+def remove_backup_and_receiving(params):
+    export_dir = params["common"]["export_dir"]
+    for file in Path(export_dir).glob("*.csv"):
+        os.remove(file)
+
+    today = pd.Timestamp.today().strftime("%Y%m%d")
+    backup_dir = glob.glob(f"{Path(params['common']['backup_dir'])}/{today}*")
+    for file in backup_dir:
+        os.remove(file)
+
 class TestRun:
     def test_add_needed_columns(self):
         df = pd.DataFrame({"geo_id": ["us"], "val": [1]})
@@ -68,16 +70,10 @@ def test_output_files_exist(self, params, run_as_module):
             ]
             assert set(expected_columns).issubset(set(df.columns.values))
 
-            #Verify that there's no NA/empty values in the val columns
+            # Verify that there's no NA/empty values in the val columns
             assert not df["val"].isnull().any()
 
-        for file in Path(export_dir).glob("*.csv"):
-            os.remove(file)
-
-        today = pd.Timestamp.today().strftime("%Y%m%d")
-        backup_dir = glob.glob(f"{Path(params['common']['backup_dir'])}/{today}*")
-        for file in backup_dir:
-            os.remove(file)
+        remove_backup_and_receiving(params)
 
     def test_valid_hrr(self, run_as_module_hrr, params):
         export_dir = params["common"]["export_dir"]
@@ -88,10 +84,23 @@ def test_valid_hrr(self, run_as_module_hrr, params):
             df = pd.read_csv(f)
             assert (df.val == 100).all()
 
-        for file in Path(export_dir).glob("*.csv"):
-            os.remove(file)
+        remove_backup_and_receiving(params)
+
+    def test_empty_data(self, run_as_module_empty, params, caplog):
+        """
+        Tests correct handling when there is a geo and signal combination that has no data.
+        """
+
+        caplog.set_level(logging.WARNING)
+        run_as_module_empty()
+        assert "No data for signal and geo combination" in caplog.text
+
+        export_dir = params["common"]["export_dir"]
+        csv_files = [f for f in Path(export_dir).glob("*.csv")]
+
+        # Since only one national entry in page_no_data.json with numeric data,
+        # while the two counties have no numeric fields, 
+        # there should be no county, hrr, hhs, or msa files.
+        assert not any(geo in f.name for geo in ["county", "hrr", "hhs", "msa"] for f in csv_files)
 
-        today = pd.Timestamp.today().strftime("%Y%m%d")
-        backup_dir = glob.glob(f"{Path(params['common']['backup_dir'])}/{today}*")
-        for file in backup_dir:
-            os.remove(file)
+        remove_backup_and_receiving(params)

From 0674e32e79d85e54613147a43b406794665f38dc Mon Sep 17 00:00:00 2001
From: minhkhul <minhkhul@andrew.cmu.edu>
Date: Wed, 9 Apr 2025 17:59:07 -0400
Subject: [PATCH 09/14] add page_no_data.json

---
 nssp/tests/test_data/page_no_data.json | 52 ++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 nssp/tests/test_data/page_no_data.json

diff --git a/nssp/tests/test_data/page_no_data.json b/nssp/tests/test_data/page_no_data.json
new file mode 100644
index 000000000..05f3ea530
--- /dev/null
+++ b/nssp/tests/test_data/page_no_data.json
@@ -0,0 +1,52 @@
+[
+    {
+        "week_end":"2022-10-15T00:00:00.000",
+        "geography":"United States",
+        "county":"All",
+        "percent_visits_combined":"2.0",
+        "percent_visits_covid":"1.63",
+        "percent_visits_influenza":"0.17",
+        "percent_visits_rsv":"0.21",
+        "percent_visits_smoothed":"1.78",
+        "percent_visits_smoothed_covid":"1.54",
+        "percent_visits_smoothed_1":"0.12",
+        "percent_visits_smoothed_rsv":"0.12",
+        "ed_trends_covid":"Decreasing",
+        "ed_trends_influenza":"No Change",
+        "ed_trends_rsv":"Increasing",
+        "hsa":"All",
+        "hsa_counties":"All",
+        "hsa_nci_id":"All",
+        "fips":"0",
+        "trend_source":"United States",
+        "buildnumber":"2025-02-08"
+    },
+    {
+        "week_end":"2022-10-15T00:00:00.000",
+        "geography":"Colorado",
+        "county":"Chaffee",
+        "ed_trends_covid":"Data Unavailable",
+        "ed_trends_influenza":"Data Unavailable",
+        "ed_trends_rsv":"Data Unavailable",
+        "hsa":"Chaffee, CO - Lake, CO",
+        "hsa_counties":"Chaffee, Lake",
+        "hsa_nci_id":"786",
+        "fips":"8015",
+        "trend_source":"HSA",
+        "buildnumber":"2025-02-28"
+    },
+    {
+        "week_end":"2022-10-15T00:00:00.000",
+        "geography":"Colorado",
+        "county":"Arapahoe",
+        "ed_trends_covid":"Data Unavailable",
+        "ed_trends_influenza":"Data Unavailable",
+        "ed_trends_rsv":"Data Unavailable",
+        "hsa":"Denver (Denver), CO - Jefferson, CO",
+        "hsa_counties":"Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit",
+        "hsa_nci_id":"688",
+        "fips":"8005",
+        "trend_source":"HSA",
+        "buildnumber":"2025-03-28"
+    }
+]
\ No newline at end of file

From 7929b48872b540c2d6b36d519bb5a7e15bb89b7a Mon Sep 17 00:00:00 2001
From: minhkhul <minhkhul@andrew.cmu.edu>
Date: Thu, 24 Apr 2025 19:14:18 -0400
Subject: [PATCH 10/14] remove set caplog level to warning + rename
 test_output_files

---
 nssp/tests/test_run.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/nssp/tests/test_run.py b/nssp/tests/test_run.py
index 315ee37e3..905104585 100644
--- a/nssp/tests/test_run.py
+++ b/nssp/tests/test_run.py
@@ -44,7 +44,7 @@ def generate_week_file_prefix(self, dates):
         ]
         return date_prefix
 
-    def test_output_files_exist(self, params, run_as_module):
+    def test_output_files(self, params, run_as_module):
         export_dir = params["common"]["export_dir"]
         csv_files = [f.name for f in Path(export_dir).glob("*.csv")]
 
@@ -91,7 +91,6 @@ def test_empty_data(self, run_as_module_empty, params, caplog):
         Tests correct handling when there is a geo and signal combination that has no data.
         """
 
-        caplog.set_level(logging.WARNING)
         run_as_module_empty()
         assert "No data for signal and geo combination" in caplog.text
 

From ec826cb48873ce9c83dbbe194e7d10f1229a914b Mon Sep 17 00:00:00 2001
From: minhkhul <minhkhul@andrew.cmu.edu>
Date: Fri, 25 Apr 2025 13:56:38 -0400
Subject: [PATCH 11/14] add nation assert

---
 nssp/tests/test_run.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nssp/tests/test_run.py b/nssp/tests/test_run.py
index 905104585..ffd667418 100644
--- a/nssp/tests/test_run.py
+++ b/nssp/tests/test_run.py
@@ -101,5 +101,6 @@ def test_empty_data(self, run_as_module_empty, params, caplog):
         # while the two counties have no numeric fields, 
         # there should be no county, hrr, hhs, or msa files.
         assert not any(geo in f.name for geo in ["county", "hrr", "hhs", "msa"] for f in csv_files)
+        assert all("nation" in f.name for f in csv_files)
 
         remove_backup_and_receiving(params)

From 8c51c50716ad37a85297921c1c4831bd7898a61f Mon Sep 17 00:00:00 2001
From: minhkhul <minhkhul@andrew.cmu.edu>
Date: Thu, 8 May 2025 01:38:50 -0400
Subject: [PATCH 12/14] simplify test_empty_data

---
 nssp/tests/conftest.py | 24 ------------------------
 nssp/tests/test_run.py | 17 ++++++++++++-----
 2 files changed, 12 insertions(+), 29 deletions(-)

diff --git a/nssp/tests/conftest.py b/nssp/tests/conftest.py
index 69c7e9852..46da7af10 100644
--- a/nssp/tests/conftest.py
+++ b/nssp/tests/conftest.py
@@ -99,27 +99,3 @@ def side_effect(*args, **kwargs):
                 return []
         mock_get.side_effect = side_effect
         run_module(params)
-
-@pytest.fixture(scope="function")
-def run_as_module_empty(params):
-    """
-    Fixture to use EMPTY_TEST_DATA when testing run_module.
-
-    This fixture patches socrara to return the predefined test
-    data where relevent data is empty.
-    """
-
-    def _run_as_module_empty():
-        with patch("sodapy.Socrata.get") as mock_get:
-
-            def side_effect(*args, **kwargs):
-                if kwargs["offset"] == 0:
-                    if DATASET_ID in args[0]:
-                        return EMPTY_TEST_DATA
-                else:
-                    return []
-
-            mock_get.side_effect = side_effect
-            run_module(params)
-
-    return _run_as_module_empty
diff --git a/nssp/tests/test_run.py b/nssp/tests/test_run.py
index ffd667418..3e03e55a8 100644
--- a/nssp/tests/test_run.py
+++ b/nssp/tests/test_run.py
@@ -2,13 +2,15 @@
 import logging
 import os
 from pathlib import Path
-
+import json
+from unittest.mock import patch
 import numpy as np
 import pandas as pd
-from delphi_nssp.constants import GEOS, SIGNALS_MAP
-from delphi_nssp.run import add_needed_columns
+from delphi_nssp.constants import GEOS, SIGNALS_MAP, DATASET_ID
+from delphi_nssp.run import add_needed_columns, run_module
 from epiweeks import Week
 
+TEST_DIR = Path(__file__).parent
 
 def remove_backup_and_receiving(params):
     export_dir = params["common"]["export_dir"]
@@ -86,12 +88,17 @@ def test_valid_hrr(self, run_as_module_hrr, params):
 
         remove_backup_and_receiving(params)
 
-    def test_empty_data(self, run_as_module_empty, params, caplog):
+    @patch("sodapy.Socrata.get")
+    def test_empty_data(self, mock_get, params, caplog):
         """
         Tests correct handling when there is a geo and signal combination that has no data.
         """
 
-        run_as_module_empty()
+        with open(f"{TEST_DIR}/test_data/page_no_data.json", "r") as f:
+            EMPTY_TEST_DATA = json.load(f)
+        mock_get.side_effect = [EMPTY_TEST_DATA, []]
+        run_module(params)
+
         assert "No data for signal and geo combination" in caplog.text
 
         export_dir = params["common"]["export_dir"]

From 0635eaec14c3d9953fd4bd25f478c38af19bcc8e Mon Sep 17 00:00:00 2001
From: minhkhul <minhkhul@andrew.cmu.edu>
Date: Mon, 12 May 2025 18:21:01 -0400
Subject: [PATCH 13/14] revert conftest.py

---
 nssp/tests/conftest.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/nssp/tests/conftest.py b/nssp/tests/conftest.py
index 46da7af10..b88dc838a 100644
--- a/nssp/tests/conftest.py
+++ b/nssp/tests/conftest.py
@@ -1,11 +1,13 @@
 import copy
 import json
-from pathlib import Path
-from unittest.mock import patch
+import time
+from unittest.mock import patch, MagicMock
 
 import pytest
-from delphi_nssp.constants import DATASET_ID
+from pathlib import Path
+
 from delphi_nssp.run import run_module
+from delphi_nssp.constants import DATASET_ID
 
 TEST_DIR = Path(__file__).parent
 
@@ -18,9 +20,6 @@
 with open(f"{TEST_DIR}/test_data/page_100_hrr.json", "r") as f:
     HRR_TEST_DATA = json.load(f)
 
-with open(f"{TEST_DIR}/test_data/page_no_data.json", "r") as f:
-    EMPTY_TEST_DATA = json.load(f)
-
 @pytest.fixture(scope="session")
 def params():
     params = {
@@ -98,4 +97,4 @@ def side_effect(*args, **kwargs):
             else:
                 return []
         mock_get.side_effect = side_effect
-        run_module(params)
+        run_module(params)
\ No newline at end of file

From 8cec78fb2085598f23a896b41762f3540a9b5449 Mon Sep 17 00:00:00 2001
From: minhkhul <minhkhul@andrew.cmu.edu>
Date: Mon, 12 May 2025 19:32:50 -0400
Subject: [PATCH 14/14] fully revert conftest.py

---
 nssp/tests/conftest.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nssp/tests/conftest.py b/nssp/tests/conftest.py
index b88dc838a..c308b6e6e 100644
--- a/nssp/tests/conftest.py
+++ b/nssp/tests/conftest.py
@@ -97,4 +97,5 @@ def side_effect(*args, **kwargs):
             else:
                 return []
         mock_get.side_effect = side_effect
-        run_module(params)
\ No newline at end of file
+        run_module(params)
+