Skip to content

Commit d445846

Browse files
authored
Merge pull request #475 from cmu-delphi/fix-nchs-linting
Fix nchs linting
2 parents e75ff69 + 94f75d5 commit d445846

File tree

4 files changed

+113
-78
lines changed

4 files changed

+113
-78
lines changed

nchs_mortality/delphi_nchs_mortality/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- coding: utf-8 -*-
22
"""Call the function run_module when executed.
33
4-
This file indicates that calling the module (`python -m MODULE_NAME`) will
4+
This file indicates that calling the module (`python -m delphi_nchs_mortality`) will
55
call the function `run_module` found within the run.py file. There should be
66
no need to change this template.
77
"""
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# -*- coding: utf-8 -*-
2+
"""Function for diffing and archiving"""
3+
4+
from os import remove, listdir
5+
from os.path import join
6+
from shutil import copy
7+
from datetime import datetime
8+
9+
from delphi_utils import S3ArchiveDiffer
10+
11+
def arch_diffs(params, daily_arch_diff):
12+
"""
13+
We check for updates for NCHS mortality data every weekday as how it is
14+
reported by NCHS and stash these daily updates on S3, but not our API.
15+
On a weekly level (on Mondays), we additionally upload the changes to the
16+
data made over the past week (due to backfill) to our API.
17+
18+
Parameters:
19+
-----------
20+
params: dict
21+
Read from params.json
22+
daily_arch_diff: S3ArchiveDiffer
23+
Used to store and update cache
24+
"""
25+
26+
export_dir = params["export_dir"]
27+
daily_export_dir = params["daily_export_dir"]
28+
cache_dir = params["cache_dir"]
29+
30+
# Weekly run of archive utility on Monday
31+
# - Does not upload to S3, that is handled by daily run of archive utility
32+
# - Exports issues into receiving for the API
33+
if datetime.today().weekday() == 0:
34+
# Copy todays raw output to receiving
35+
for output_file in listdir(daily_export_dir):
36+
copy(
37+
join(daily_export_dir, output_file),
38+
join(export_dir, output_file))
39+
40+
weekly_arch_diff = S3ArchiveDiffer(
41+
cache_dir, export_dir,
42+
params["bucket_name"], "nchs_mortality",
43+
params["aws_credentials"])
44+
45+
# Dont update cache from S3 (has daily files), only simulate a update_cache() call
46+
weekly_arch_diff._cache_updated = True # pylint: disable=protected-access
47+
48+
# Diff exports, and make incremental versions
49+
_, common_diffs, new_files = weekly_arch_diff.diff_exports()
50+
51+
# Archive changed and new files only
52+
to_archive = [f for f, diff in common_diffs.items() if diff is not None]
53+
to_archive += new_files
54+
_, fails = weekly_arch_diff.archive_exports(to_archive, update_s3=False)
55+
56+
# Filter existing exports to exclude those that failed to archive
57+
succ_common_diffs = {f: diff for f, diff in common_diffs.items() if f not in fails}
58+
weekly_arch_diff.filter_exports(succ_common_diffs)
59+
60+
# Report failures: someone should probably look at them
61+
for exported_file in fails:
62+
print(f"Failed to archive (weekly) '{exported_file}'")
63+
64+
# Daily run of archiving utility
65+
# - Uploads changed files to S3
66+
# - Does not export any issues into receiving
67+
68+
# Diff exports, and make incremental versions
69+
_, common_diffs, new_files = daily_arch_diff.diff_exports()
70+
71+
# Archive changed and new files only
72+
to_archive = [f for f, diff in common_diffs.items() if diff is not None]
73+
to_archive += new_files
74+
_, fails = daily_arch_diff.archive_exports(to_archive)
75+
76+
# Daily output not needed anymore, remove them
77+
for exported_file in new_files:
78+
remove(exported_file)
79+
for exported_file, diff_file in common_diffs.items():
80+
remove(exported_file)
81+
remove(diff_file)
82+
83+
# Report failures: someone should probably look at them
84+
for exported_file in fails:
85+
print(f"Failed to archive (daily) '{exported_file}'")
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"""Registry for constants"""
2+
# global constants
3+
METRICS = [
4+
"covid_deaths", "total_deaths", "percent_of_expected_deaths",
5+
"pneumonia_deaths", "pneumonia_and_covid_deaths", "influenza_deaths",
6+
"pneumonia_influenza_or_covid_19_deaths"
7+
]
8+
SENSOR_NAME_MAP = {
9+
"covid_deaths": "deaths_covid_incidence",
10+
"total_deaths": "deaths_allcause_incidence",
11+
"percent_of_expected_deaths": "deaths_percent_of_expected",
12+
"pneumonia_deaths": "deaths_pneumonia_notflu_incidence",
13+
"pneumonia_and_covid_deaths": "deaths_covid_and_pneumonia_notflu_incidence",
14+
"influenza_deaths": "deaths_flu_incidence",
15+
"pneumonia_influenza_or_covid_19_deaths": "deaths_pneumonia_or_flu_or_covid_incidence"
16+
}
17+
SENSORS = [
18+
"num",
19+
"prop"
20+
]
21+
INCIDENCE_BASE = 100000
22+
GEO_RES = "state"

nchs_mortality/delphi_nchs_mortality/run.py

Lines changed: 5 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -2,41 +2,20 @@
22
"""Functions to call when running the function.
33
44
This module should contain a function called `run_module`, that is executed
5-
when the module is run with `python -m MODULE_NAME`.
5+
when the module is run with `python -m delphi_nchs_mortality`.
66
"""
77
from datetime import datetime, date, timedelta
88
from os.path import join
9-
from os import remove, listdir
10-
from shutil import copy
119

1210
import numpy as np
1311
import pandas as pd
1412
from delphi_utils import read_params, S3ArchiveDiffer
1513

1614
from .pull import pull_nchs_mortality_data
1715
from .export import export_csv
18-
19-
# global constants
20-
METRICS = [
21-
'covid_deaths', 'total_deaths', 'percent_of_expected_deaths',
22-
'pneumonia_deaths', 'pneumonia_and_covid_deaths', 'influenza_deaths',
23-
'pneumonia_influenza_or_covid_19_deaths'
24-
]
25-
SENSOR_NAME_MAP = {
26-
"covid_deaths": "deaths_covid_incidence",
27-
"total_deaths": "deaths_allcause_incidence",
28-
"percent_of_expected_deaths": "deaths_percent_of_expected",
29-
"pneumonia_deaths": "deaths_pneumonia_notflu_incidence",
30-
"pneumonia_and_covid_deaths": "deaths_covid_and_pneumonia_notflu_incidence",
31-
"influenza_deaths": "deaths_flu_incidence",
32-
"pneumonia_influenza_or_covid_19_deaths": "deaths_pneumonia_or_flu_or_covid_incidence"
33-
}
34-
SENSORS = [
35-
"num",
36-
"prop"
37-
]
38-
INCIDENCE_BASE = 100000
39-
GEO_RES = "state"
16+
from .archive_diffs import arch_diffs
17+
from .constants import (METRICS, SENSOR_NAME_MAP,
18+
SENSORS, INCIDENCE_BASE, GEO_RES)
4019

4120
def run_module(): # pylint: disable=too-many-branches,too-many-statements
4221
"""Run module for processing NCHS mortality data."""
@@ -46,9 +25,7 @@ def run_module(): # pylint: disable=too-many-branches,too-many-statements
4625
export_start_date = date.today() - timedelta(
4726
days=date.today().weekday() + 2)
4827
export_start_date = export_start_date.strftime('%Y-%m-%d')
49-
export_dir = params["export_dir"]
5028
daily_export_dir = params["daily_export_dir"]
51-
cache_dir = params["cache_dir"]
5229
daily_cache_dir = params["daily_cache_dir"]
5330
static_file_dir = params["static_file_dir"]
5431
token = params["token"]
@@ -100,56 +77,7 @@ def run_module(): # pylint: disable=too-many-branches,too-many-statements
10077
# Weekly run of archive utility on Monday
10178
# - Does not upload to S3, that is handled by daily run of archive utility
10279
# - Exports issues into receiving for the API
103-
if datetime.today().weekday() == 0:
104-
# Copy todays raw output to receiving
105-
for output_file in listdir(daily_export_dir):
106-
copy(
107-
join(daily_export_dir, output_file),
108-
join(export_dir, output_file))
109-
110-
weekly_arch_diff = S3ArchiveDiffer(
111-
cache_dir, export_dir,
112-
params["bucket_name"], "nchs_mortality",
113-
params["aws_credentials"])
114-
115-
# Dont update cache from S3 (has daily files), only simulate a update_cache() call
116-
weekly_arch_diff._cache_updated = True # pylint: disable=protected-access
117-
118-
# Diff exports, and make incremental versions
119-
_, common_diffs, new_files = weekly_arch_diff.diff_exports()
120-
121-
# Archive changed and new files only
122-
to_archive = [f for f, diff in common_diffs.items() if diff is not None]
123-
to_archive += new_files
124-
_, fails = weekly_arch_diff.archive_exports(to_archive, update_s3=False)
125-
126-
# Filter existing exports to exclude those that failed to archive
127-
succ_common_diffs = {f: diff for f, diff in common_diffs.items() if f not in fails}
128-
weekly_arch_diff.filter_exports(succ_common_diffs)
129-
130-
# Report failures: someone should probably look at them
131-
for exported_file in fails:
132-
print(f"Failed to archive (weekly) '{exported_file}'")
133-
13480
# Daily run of archiving utility
13581
# - Uploads changed files to S3
13682
# - Does not export any issues into receiving
137-
138-
# Diff exports, and make incremental versions
139-
_, common_diffs, new_files = daily_arch_diff.diff_exports()
140-
141-
# Archive changed and new files only
142-
to_archive = [f for f, diff in common_diffs.items() if diff is not None]
143-
to_archive += new_files
144-
_, fails = daily_arch_diff.archive_exports(to_archive)
145-
146-
# Daily output not needed anymore, remove them
147-
for exported_file in new_files:
148-
remove(exported_file)
149-
for exported_file, diff_file in common_diffs.items():
150-
remove(exported_file)
151-
remove(diff_file)
152-
153-
# Report failures: someone should probably look at them
154-
for exported_file in fails:
155-
print(f"Failed to archive (daily) '{exported_file}'")
83+
arch_diffs(params, daily_arch_diff)

0 commit comments

Comments
 (0)