diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 2e07908ff..afb32eb67 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.3.62 +current_version = 0.3.63 commit = True message = chore: bump covidcast-indicators to {new_version} tag = False diff --git a/.github/workflows/json-check.yml b/.github/workflows/json-check.yml index 33245f0bd..4dad5efc7 100644 --- a/.github/workflows/json-check.yml +++ b/.github/workflows/json-check.yml @@ -11,7 +11,7 @@ on: jobs: build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest if: github.event.pull_request.draft == false steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index 498a9555c..a9bac40dc 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -12,7 +12,7 @@ on: jobs: build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest if: github.event.pull_request.draft == false strategy: matrix: diff --git a/changehc/version.cfg b/changehc/version.cfg index 7d0fcb724..5436c4101 100644 --- a/changehc/version.cfg +++ b/changehc/version.cfg @@ -1 +1 @@ -current_version = 0.3.62 +current_version = 0.3.63 diff --git a/claims_hosp/version.cfg b/claims_hosp/version.cfg index 7d0fcb724..5436c4101 100644 --- a/claims_hosp/version.cfg +++ b/claims_hosp/version.cfg @@ -1 +1 @@ -current_version = 0.3.62 +current_version = 0.3.63 diff --git a/doctor_visits/version.cfg b/doctor_visits/version.cfg index 7d0fcb724..5436c4101 100644 --- a/doctor_visits/version.cfg +++ b/doctor_visits/version.cfg @@ -1 +1 @@ -current_version = 0.3.62 +current_version = 0.3.63 diff --git a/google_symptoms/version.cfg b/google_symptoms/version.cfg index 7d0fcb724..5436c4101 100644 --- a/google_symptoms/version.cfg +++ b/google_symptoms/version.cfg @@ -1 +1 @@ -current_version = 0.3.62 +current_version = 0.3.63 diff --git a/hhs_hosp/version.cfg b/hhs_hosp/version.cfg index 7d0fcb724..5436c4101 100644 --- a/hhs_hosp/version.cfg +++ b/hhs_hosp/version.cfg @@ -1 +1 @@ -current_version = 0.3.62 +current_version = 0.3.63 diff --git a/nchs_mortality/version.cfg b/nchs_mortality/version.cfg index 7d0fcb724..5436c4101 100644 --- a/nchs_mortality/version.cfg +++ b/nchs_mortality/version.cfg @@ -1 +1 @@ -current_version = 0.3.62 +current_version = 0.3.63 diff --git a/nssp/delphi_nssp/run.py b/nssp/delphi_nssp/run.py index 82a96e5ed..d4e167d66 100644 --- a/nssp/delphi_nssp/run.py +++ b/nssp/delphi_nssp/run.py @@ -144,6 +144,13 @@ def run_module(params, logger=None): missing_cols = set(CSV_COLS) - set(df.columns) df = add_needed_columns(df, col_names=list(missing_cols)) df_csv = df[CSV_COLS + ["timestamp"]] + + # remove rows with missing values + df_csv = df_csv[df_csv["val"].notnull()] + if df_csv.empty: + logger.warning("No data for signal and geo combination", signal=signal, geo=geo) + continue + # actual export dates = create_export_csv( df_csv, diff --git a/nssp/tests/test_data/page.json b/nssp/tests/test_data/page.json index e80d2817f..185fb9635 100644 --- a/nssp/tests/test_data/page.json +++ b/nssp/tests/test_data/page.json @@ -196,5 +196,39 @@ "fips": "8101", "trend_source": "HSA", "buildnumber": "2025-02-28" + }, + { + "week_end":"2022-10-15T00:00:00.000", + "geography":"Colorado", + "county":"Chaffee", + "ed_trends_covid":"Data Unavailable", + "ed_trends_influenza":"Data Unavailable", + "ed_trends_rsv":"Data Unavailable", + "hsa":"Chaffee, CO - Lake, CO", + "hsa_counties":"Chaffee, Lake", + "hsa_nci_id":"786", + "fips":"8015", + "trend_source":"HSA", + "buildnumber":"2025-02-28" + }, + { + "week_end":"2022-10-15T00:00:00.000", + "geography":"Colorado", + "county":"Arapahoe", + "percent_visits_covid": "1", + "percent_visits_influenza": "1", + "percent_visits_rsv": "1", + "percent_visits_smoothed_covid": "1", + "percent_visits_smoothed_1": "1", + "percent_visits_smoothed_rsv": "1", + "ed_trends_covid":"Decreasing", + "ed_trends_influenza":"Decreasing", + "ed_trends_rsv":"Decreasing", + "hsa":"Denver (Denver), CO - Jefferson, CO", + "hsa_counties":"Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit", + "hsa_nci_id":"688", + "fips":"8005", + "trend_source":"HSA", + "buildnumber":"2025-03-28" } ] diff --git a/nssp/tests/test_data/page_100_hrr.json b/nssp/tests/test_data/page_100_hrr.json index 34ae27b35..3f8b723b9 100644 --- a/nssp/tests/test_data/page_100_hrr.json +++ b/nssp/tests/test_data/page_100_hrr.json @@ -196,5 +196,39 @@ "fips": "8101", "trend_source": "HSA", "buildnumber": "2025-02-28" + }, + { + "week_end":"2022-10-15T00:00:00.000", + "geography":"Colorado", + "county":"Chaffee", + "ed_trends_covid":"Data Unavailable", + "ed_trends_influenza":"Data Unavailable", + "ed_trends_rsv":"Data Unavailable", + "hsa":"Chaffee, CO - Lake, CO", + "hsa_counties":"Chaffee, Lake", + "hsa_nci_id":"786", + "fips":"8015", + "trend_source":"HSA", + "buildnumber":"2025-02-28" + }, + { + "week_end":"2022-10-15T00:00:00.000", + "geography":"Colorado", + "county":"Arapahoe", + "percent_visits_covid": "100", + "percent_visits_influenza": "100", + "percent_visits_rsv": "100", + "percent_visits_smoothed_covid": "100", + "percent_visits_smoothed_1": "100", + "percent_visits_smoothed_rsv": "100", + "ed_trends_covid":"Decreasing", + "ed_trends_influenza":"Decreasing", + "ed_trends_rsv":"Decreasing", + "hsa":"Denver (Denver), CO - Jefferson, CO", + "hsa_counties":"Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit", + "hsa_nci_id":"688", + "fips":"8005", + "trend_source":"HSA", + "buildnumber":"2025-03-28" } ] \ No newline at end of file diff --git a/nssp/tests/test_data/page_no_data.json b/nssp/tests/test_data/page_no_data.json new file mode 100644 index 000000000..05f3ea530 --- /dev/null +++ b/nssp/tests/test_data/page_no_data.json @@ -0,0 +1,52 @@ +[ + { + "week_end":"2022-10-15T00:00:00.000", + "geography":"United States", + "county":"All", + "percent_visits_combined":"2.0", + "percent_visits_covid":"1.63", + "percent_visits_influenza":"0.17", + "percent_visits_rsv":"0.21", + "percent_visits_smoothed":"1.78", + "percent_visits_smoothed_covid":"1.54", + "percent_visits_smoothed_1":"0.12", + "percent_visits_smoothed_rsv":"0.12", + "ed_trends_covid":"Decreasing", + "ed_trends_influenza":"No Change", + "ed_trends_rsv":"Increasing", + "hsa":"All", + "hsa_counties":"All", + "hsa_nci_id":"All", + "fips":"0", + "trend_source":"United States", + "buildnumber":"2025-02-08" + }, + { + "week_end":"2022-10-15T00:00:00.000", + "geography":"Colorado", + "county":"Chaffee", + "ed_trends_covid":"Data Unavailable", + "ed_trends_influenza":"Data Unavailable", + "ed_trends_rsv":"Data Unavailable", + "hsa":"Chaffee, CO - Lake, CO", + "hsa_counties":"Chaffee, Lake", + "hsa_nci_id":"786", + "fips":"8015", + "trend_source":"HSA", + "buildnumber":"2025-02-28" + }, + { + "week_end":"2022-10-15T00:00:00.000", + "geography":"Colorado", + "county":"Arapahoe", + "ed_trends_covid":"Data Unavailable", + "ed_trends_influenza":"Data Unavailable", + "ed_trends_rsv":"Data Unavailable", + "hsa":"Denver (Denver), CO - Jefferson, CO", + "hsa_counties":"Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit", + "hsa_nci_id":"688", + "fips":"8005", + "trend_source":"HSA", + "buildnumber":"2025-03-28" + } +] \ No newline at end of file diff --git a/nssp/tests/test_pull.py b/nssp/tests/test_pull.py index 541384d6d..e70594528 100644 --- a/nssp/tests/test_pull.py +++ b/nssp/tests/test_pull.py @@ -90,9 +90,5 @@ def test_normal_pull_nssp_data(self, mock_socrata, params, caplog): assert result["fips"].notnull().all(), "fips has rogue NaN" assert result["fips"].apply(lambda x: isinstance(x, str) and len(x) != 4).all(), "fips formatting should always be 5 digits; include leading zeros if aplicable" - # Check for each signal in SIGNALS - for signal in SIGNALS: - assert result[signal].notnull().all(), f"{signal} has rogue NaN" - for file in backup_files: os.remove(file) diff --git a/nssp/tests/test_run.py b/nssp/tests/test_run.py index c24a76af4..3e03e55a8 100644 --- a/nssp/tests/test_run.py +++ b/nssp/tests/test_run.py @@ -1,22 +1,26 @@ import glob -from datetime import datetime, date -import json +import logging +import os from pathlib import Path +import json from unittest.mock import patch -import tempfile -import os -import time -from datetime import datetime - import numpy as np import pandas as pd +from delphi_nssp.constants import GEOS, SIGNALS_MAP, DATASET_ID +from delphi_nssp.run import add_needed_columns, run_module from epiweeks import Week -from pandas.testing import assert_frame_equal -from delphi_nssp.constants import GEOS, SIGNALS, SIGNALS_MAP, DATASET_ID -from delphi_nssp.run import ( - add_needed_columns -) +TEST_DIR = Path(__file__).parent + +def remove_backup_and_receiving(params): + export_dir = params["common"]["export_dir"] + for file in Path(export_dir).glob("*.csv"): + os.remove(file) + + today = pd.Timestamp.today().strftime("%Y%m%d") + backup_dir = glob.glob(f"{Path(params['common']['backup_dir'])}/{today}*") + for file in backup_dir: + os.remove(file) class TestRun: def test_add_needed_columns(self): @@ -42,7 +46,7 @@ def generate_week_file_prefix(self, dates): ] return date_prefix - def test_output_files_exist(self, params, run_as_module): + def test_output_files(self, params, run_as_module): export_dir = params["common"]["export_dir"] csv_files = [f.name for f in Path(export_dir).glob("*.csv")] @@ -68,13 +72,10 @@ def test_output_files_exist(self, params, run_as_module): ] assert set(expected_columns).issubset(set(df.columns.values)) - for file in Path(export_dir).glob("*.csv"): - os.remove(file) + # Verify that there's no NA/empty values in the val columns + assert not df["val"].isnull().any() - today = pd.Timestamp.today().strftime("%Y%m%d") - backup_dir = glob.glob(f"{Path(params['common']['backup_dir'])}/{today}*") - for file in backup_dir: - os.remove(file) + remove_backup_and_receiving(params) def test_valid_hrr(self, run_as_module_hrr, params): export_dir = params["common"]["export_dir"] @@ -85,10 +86,28 @@ def test_valid_hrr(self, run_as_module_hrr, params): df = pd.read_csv(f) assert (df.val == 100).all() - for file in Path(export_dir).glob("*.csv"): - os.remove(file) + remove_backup_and_receiving(params) + + @patch("sodapy.Socrata.get") + def test_empty_data(self, mock_get, params, caplog): + """ + Tests correct handling when there is a geo and signal combination that has no data. + """ + + with open(f"{TEST_DIR}/test_data/page_no_data.json", "r") as f: + EMPTY_TEST_DATA = json.load(f) + mock_get.side_effect = [EMPTY_TEST_DATA, []] + run_module(params) + + assert "No data for signal and geo combination" in caplog.text + + export_dir = params["common"]["export_dir"] + csv_files = [f for f in Path(export_dir).glob("*.csv")] + + # Since only one national entry in page_no_data.json with numeric data, + # while the two counties have no numeric fields, + # there should be no county, hrr, hhs, or msa files. + assert not any(geo in f.name for geo in ["county", "hrr", "hhs", "msa"] for f in csv_files) + assert all("nation" in f.name for f in csv_files) - today = pd.Timestamp.today().strftime("%Y%m%d") - backup_dir = glob.glob(f"{Path(params['common']['backup_dir'])}/{today}*") - for file in backup_dir: - os.remove(file) + remove_backup_and_receiving(params) diff --git a/nssp/version.cfg b/nssp/version.cfg index 7d0fcb724..5436c4101 100644 --- a/nssp/version.cfg +++ b/nssp/version.cfg @@ -1 +1 @@ -current_version = 0.3.62 +current_version = 0.3.63 diff --git a/quidel_covidtest/version.cfg b/quidel_covidtest/version.cfg index 7d0fcb724..5436c4101 100644 --- a/quidel_covidtest/version.cfg +++ b/quidel_covidtest/version.cfg @@ -1 +1 @@ -current_version = 0.3.62 +current_version = 0.3.63 diff --git a/sir_complainsalot/version.cfg b/sir_complainsalot/version.cfg index 7d0fcb724..5436c4101 100644 --- a/sir_complainsalot/version.cfg +++ b/sir_complainsalot/version.cfg @@ -1 +1 @@ -current_version = 0.3.62 +current_version = 0.3.63