Skip to content

Add NAN code support to NCHS Mortality #1016

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions nchs_mortality/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ params.json

# Do not commit output files
receiving/*.csv
daily_receiving/*.csv
cache/*.csv
daily_cache/*.csv

# Do not commit test files
tests/receiving/*.csv
Expand Down
20 changes: 17 additions & 3 deletions nchs_mortality/delphi_nchs_mortality/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,26 @@
from typing import Dict, Any

import numpy as np
from delphi_utils import S3ArchiveDiffer, get_structured_logger, create_export_csv
from delphi_utils import S3ArchiveDiffer, get_structured_logger, create_export_csv, Nans

from .archive_diffs import arch_diffs
from .constants import (METRICS, SENSOR_NAME_MAP,
SENSORS, INCIDENCE_BASE, GEO_RES)
from .pull import pull_nchs_mortality_data


def add_nancodes(df):
"""Add nancodes to the dataframe."""
# Default missingness codes
df["missing_val"] = Nans.NOT_MISSING
df["missing_se"] = Nans.NOT_APPLICABLE
df["missing_sample_size"] = Nans.NOT_APPLICABLE

# Mark any remaining nans with unknown
remaining_nans_mask = df["val"].isnull()
df.loc[remaining_nans_mask, "missing_val"] = Nans.OTHER
return df

def run_module(params: Dict[str, Any]):
"""Run module for processing NCHS mortality data.

Expand Down Expand Up @@ -67,7 +79,8 @@ def run_module(params: Dict[str, Any]):
df["val"] = df[metric]
df["se"] = np.nan
df["sample_size"] = np.nan
df = df[~df["val"].isnull()]
df = add_nancodes(df)
# df = df[~df["val"].isnull()]
sensor_name = "_".join([SENSOR_NAME_MAP[metric]])
dates = create_export_csv(
df,
Expand All @@ -91,7 +104,8 @@ def run_module(params: Dict[str, Any]):
df["val"] = df[metric] / df["population"] * INCIDENCE_BASE
df["se"] = np.nan
df["sample_size"] = np.nan
df = df[~df["val"].isnull()]
df = add_nancodes(df)
# df = df[~df["val"].isnull()]
sensor_name = "_".join([SENSOR_NAME_MAP[metric], sensor])
dates = create_export_csv(
df,
Expand Down
6 changes: 5 additions & 1 deletion nchs_mortality/tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,8 @@ def test_output_file_format(self, run_as_module, date):
df = pd.read_csv(
join(output_folder, "weekly_202026_state_deaths_covid_incidence_prop.csv")
)
assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all()
expected_columns = [
"geo_id", "val", "se", "sample_size",
"missing_val", "missing_se", "missing_sample_size"
]
assert (df.columns.values == expected_columns).all()