From 59f5c2244b41f38e6abd426c6928bd3459111451 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=84=B6=E7=84=B6?= <52230092+zhuoran-cheng@users.noreply.github.com> Date: Fri, 20 Aug 2021 13:31:09 -0400 Subject: [PATCH] NCHS data available at HHS, nation level --- .../delphi_nchs_mortality/constants.py | 6 +++++- nchs_mortality/delphi_nchs_mortality/run.py | 7 ++++--- nchs_mortality/tests/test_run.py | 20 +++++++++++++++---- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/nchs_mortality/delphi_nchs_mortality/constants.py b/nchs_mortality/delphi_nchs_mortality/constants.py index 164b84307..783227369 100644 --- a/nchs_mortality/delphi_nchs_mortality/constants.py +++ b/nchs_mortality/delphi_nchs_mortality/constants.py @@ -25,7 +25,11 @@ "prop" ] INCIDENCE_BASE = 100000 -GEO_RES = "state" +GEO_RES = [ + "nation", + "hhs", + "state" +] # this is necessary as a delimiter in the f-string expressions we use to # construct detailed error reports diff --git a/nchs_mortality/delphi_nchs_mortality/run.py b/nchs_mortality/delphi_nchs_mortality/run.py index fa0226fcb..57b9da95c 100644 --- a/nchs_mortality/delphi_nchs_mortality/run.py +++ b/nchs_mortality/delphi_nchs_mortality/run.py @@ -7,6 +7,7 @@ import time from datetime import datetime, date, timedelta from typing import Dict, Any +from itertools import product import numpy as np from delphi_utils import S3ArchiveDiffer, get_structured_logger @@ -60,7 +61,7 @@ def run_module(params: Dict[str, Any]): stats = [] df_pull = pull_nchs_mortality_data(token, test_file) - for metric in METRICS: + for metric,geo in product(METRICS,GEO_RES): if metric == 'percent_of_expected_deaths': print(metric) df = df_pull.copy() @@ -71,7 +72,7 @@ def run_module(params: Dict[str, Any]): sensor_name = "_".join([SENSOR_NAME_MAP[metric]]) dates = export_csv( df, - geo_name=GEO_RES, + geo_name=geo, export_dir=daily_export_dir, start_date=datetime.strptime(export_start_date, "%Y-%m-%d"), sensor=sensor_name, @@ -92,7 +93,7 @@ def run_module(params: Dict[str, Any]): sensor_name = "_".join([SENSOR_NAME_MAP[metric], sensor]) dates = export_csv( df, - geo_name=GEO_RES, + geo_name=geo, export_dir=daily_export_dir, start_date=datetime.strptime(export_start_date, "%Y-%m-%d"), sensor=sensor_name, diff --git a/nchs_mortality/tests/test_run.py b/nchs_mortality/tests/test_run.py index 36dba6698..e8f6f7a0c 100644 --- a/nchs_mortality/tests/test_run.py +++ b/nchs_mortality/tests/test_run.py @@ -36,17 +36,29 @@ def test_output_files_exist(self, run_as_module, date): 'deaths_pneumonia_or_flu_or_covid_incidence'] sensors = ["num", "prop"] - expected_files = [] + expected_files_nation = [] + expected_files_state=[] + expected_files_hhs=[] for d in dates: for metric in metrics: if metric == "deaths_percent_of_expected": - expected_files += ["weekly_" + d + "_state_" \ + expected_files_nation += ["weekly_" + d + "_nation_" \ + + metric + ".csv"] + expected_files_state += ["weekly_" + d + "_state_" \ + + metric + ".csv"] + expected_files_hhs += ["weekly_" + d + "_hhs_" \ + metric + ".csv"] else: for sensor in sensors: - expected_files += ["weekly_" + d + "_state_" \ + expected_files_nation += ["weekly_" + d + "_nation_" \ + + metric + "_" + sensor + ".csv"] + expected_files_state += ["weekly_" + d + "_state_" \ + + metric + "_" + sensor + ".csv"] + expected_files_hhs += ["weekly_" + d + "_hhs_" \ + metric + "_" + sensor + ".csv"] - assert set(expected_files).issubset(set(csv_files)) + assert set(expected_files_nation).issubset(set(csv_files)) + assert set(expected_files_state).issubset(set(csv_files)) + assert set(expected_files_hhs).issubset(set(csv_files)) @pytest.mark.parametrize("date", ["2020-09-14", "2020-09-18"]) def test_output_file_format(self, run_as_module, date):