Skip to content

Fix linting for nchs #454

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions nchs_mortality/delphi_nchs_mortality/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ def pull_nchs_mortality_data(token: str, map_df: pd.DataFrame, test_mode: str):
Dataframe as described above.
"""
# Constants
KEEP_COLUMNS = ['covid_deaths', 'total_deaths',
keep_columns = ['covid_deaths', 'total_deaths',
'percent_of_expected_deaths', 'pneumonia_deaths',
'pneumonia_and_covid_deaths', 'influenza_deaths',
'pneumonia_influenza_or_covid_19_deaths']
TYPE_DICT = {key: float for key in KEEP_COLUMNS}
TYPE_DICT["timestamp"] = 'datetime64[ns]'
type_dict = {key: float for key in keep_columns}
type_dict["timestamp"] = 'datetime64[ns]'

if test_mode == "":
# Pull data from Socrata API
Expand All @@ -52,18 +52,18 @@ def pull_nchs_mortality_data(token: str, map_df: pd.DataFrame, test_mode: str):
# Check missing start_week == end_week
try:
assert sum(df["timestamp"] != df["end_week"]) == 0
except AssertionError:
except AssertionError as exc:
raise ValueError(
"end_week is not always the same as start_week, check the raw file"
)
) from exc

try:
df = df.astype(TYPE_DICT)
except KeyError:
df = df.astype(type_dict)
except KeyError as exc:
raise ValueError("Expected column(s) missed, The dataset "
"schema may have changed. Please investigate and "
"amend the code.")
"schema may have changed. Please investigate and "
"amend the code.") from exc

df = df[df["state"] != "United States"]
df.loc[df["state"] == "New York City", "state"] = "New York"

Expand Down Expand Up @@ -91,7 +91,7 @@ def pull_nchs_mortality_data(token: str, map_df: pd.DataFrame, test_mode: str):
)

# Add population info
KEEP_COLUMNS.extend(["timestamp", "geo_id", "population"])
df = df.merge(map_df, on="state")[KEEP_COLUMNS]
keep_columns.extend(["timestamp", "geo_id", "population"])
df = df.merge(map_df, on="state")[keep_columns]

return df
10 changes: 5 additions & 5 deletions nchs_mortality/delphi_nchs_mortality/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
"prop"
]
INCIDENCE_BASE = 100000
geo_res = "state"
GEO_RES = "state"

def run_module():
def run_module(): # pylint: disable=too-many-branches,too-many-statements
"""Run module for processing NCHS mortality data."""
params = read_params()
export_start_date = params["export_start_date"]
Expand Down Expand Up @@ -65,7 +65,7 @@ def run_module():
sensor_name = "_".join(["wip", metric])
export_csv(
df,
geo_name=geo_res,
geo_name=GEO_RES,
export_dir=daily_export_dir,
start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
sensor=sensor_name,
Expand All @@ -82,7 +82,7 @@ def run_module():
sensor_name = "_".join(["wip", metric, sensor])
export_csv(
df,
geo_name=geo_res,
geo_name=GEO_RES,
export_dir=daily_export_dir,
start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
sensor=sensor_name,
Expand All @@ -104,7 +104,7 @@ def run_module():
params["aws_credentials"])

# Dont update cache from S3 (has daily files), only simulate a update_cache() call
weekly_arch_diff._cache_updated = True
weekly_arch_diff._cache_updated = True # pylint: disable=protected-access

# Diff exports, and make incremental versions
_, common_diffs, new_files = weekly_arch_diff.diff_exports()
Expand Down