Skip to content

Commit 831f554

Browse files
committed
Merge branch 'nancodes' into nans_safegraph_patterns
2 parents 1df3553 + 801f04c commit 831f554

File tree

253 files changed

+17552
-600216
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

253 files changed

+17552
-600216
lines changed

.github/workflows/python-ci.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
if: github.event.pull_request.draft == false
1717
strategy:
1818
matrix:
19-
packages: [_delphi_utils_python, cdc_covidnet, changehc, claims_hosp, combo_cases_and_deaths, covid_act_now, google_symptoms, hhs_hosp, hhs_facilities, jhu, nchs_mortality, nowcast, quidel, quidel_covidtest, safegraph, safegraph_patterns, usafacts]
19+
packages: [_delphi_utils_python, cdc_covidnet, changehc, claims_hosp, combo_cases_and_deaths, covid_act_now, doctor_visits, google_symptoms, hhs_hosp, hhs_facilities, jhu, nchs_mortality, nowcast, quidel, quidel_covidtest, safegraph, safegraph_patterns, usafacts]
2020
defaults:
2121
run:
2222
working-directory: ${{ matrix.packages }}
@@ -34,7 +34,6 @@ jobs:
3434
run: |
3535
make install
3636
- name: Lint
37-
if: ${{ matrix.packages != 'claims_hosp' }}
3837
run: |
3938
make lint
4039
- name: Test

_delphi_utils_python/Makefile

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
.PHONY = venv, lint, test, clean
22

3-
dir = $(shell find ./delphi_* -name __init__.py | grep -o 'delphi_[_[:alnum:]]*')
4-
53
venv:
64
python3.8 -m venv env
75

@@ -11,12 +9,12 @@ install: venv
119
pip install -e .
1210

1311
lint:
14-
. env/bin/activate; pylint $(dir)
15-
. env/bin/activate; pydocstyle $(dir)
12+
. env/bin/activate; pylint delphi_utils
13+
. env/bin/activate; pydocstyle delphi_utils
1614

1715
test:
1816
. env/bin/activate ;\
19-
(cd tests && ../env/bin/pytest --cov=$(dir) --cov-report=term-missing)
17+
(cd tests && ../env/bin/pytest --cov=delphi_utils --cov-report=term-missing)
2018

2119
clean:
2220
rm -rf env

_delphi_utils_python/delphi_utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@
1212
from .geomap import GeoMapper
1313
from .smooth import Smoother
1414
from .signal import add_prefix
15-
from .nancodes import NAN_CODES
15+
from .nancodes import Nans
1616

1717
__version__ = "0.1.0"

_delphi_utils_python/delphi_utils/archive.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -641,12 +641,24 @@ def update_cache(self):
641641
help="Commit message for `archive_type` = 'git'")
642642
args = parser.parse_args()
643643
params = read_params()
644+
645+
# Autodetect whether parameters have been factored hierarchically or not
646+
# See https://github.com/cmu-delphi/covidcast-indicators/issues/847
647+
# Once all indicators have their parameters factored in to "common", "indicator", "validation",
648+
# and "archive", this code will be obsolete.
649+
if "archive" in params:
650+
archive_params = params["archive"]
651+
common_params = params["common"]
652+
else:
653+
archive_params = params
654+
common_params = params
655+
644656
run_module(args.archive_type,
645-
params["cache_dir"],
646-
params["export_dir"],
647-
aws_credentials=params.get("aws_credentials", {}),
657+
archive_params["cache_dir"],
658+
common_params["export_dir"],
659+
aws_credentials=archive_params.get("aws_credentials", {}),
648660
branch_name=args.branch_name,
649-
bucket_name=params.get("bucket_name", ""),
661+
bucket_name=archive_params.get("bucket_name", ""),
650662
commit_message=args.commit_message,
651663
commit_partial_success=args.commit_partial_success,
652664
indicator_prefix=args.indicator_prefix,

_delphi_utils_python/delphi_utils/export.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,16 +64,16 @@ def create_export_csv(
6464
else:
6565
export_filename = f"{date.strftime('%Y%m%d')}_{geo_res}_{metric}_{sensor}.csv"
6666
export_file = join(export_dir, export_filename)
67-
EXPECTED_COLUMNS = [
68-
"geo_id",
69-
"val",
70-
"se",
71-
"sample_size",
72-
"missing_val",
73-
"missing_se",
67+
expected_columns = [
68+
"geo_id",
69+
"val",
70+
"se",
71+
"sample_size",
72+
"missing_val",
73+
"missing_se",
7474
"missing_sample_size"
7575
]
76-
export_df = df[df["timestamp"] == date].filter(items=EXPECTED_COLUMNS)
76+
export_df = df[df["timestamp"] == date].filter(items=expected_columns)
7777
if remove_null_samples:
7878
export_df = export_df[export_df["sample_size"].notnull()]
7979
export_df = export_df.round({"val": 7, "se": 7})

_delphi_utils_python/delphi_utils/geomap.py

Lines changed: 87 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -141,85 +141,77 @@ def _load_crosswalk(self, from_code, to_code):
141141
assert from_code in self.crosswalk_filepaths, \
142142
f"No crosswalk files for {from_code}; try {'; '.join(self.crosswalk_filepaths.keys())}"
143143
assert to_code in self.crosswalk_filepaths[from_code], \
144-
f"No crosswalk file from {from_code} to {to_code}; try" \
144+
f"No crosswalk file from {from_code} to {to_code}; try " \
145145
f"{'; '.join(self.crosswalk_filepaths[from_code].keys())}"
146+
147+
if self.crosswalks[from_code][to_code] is None:
148+
self.crosswalks[from_code][to_code] = self._load_crosswalk_from_file(from_code, to_code)
149+
return self.crosswalks[from_code][to_code]
150+
151+
def _load_crosswalk_from_file(self, from_code, to_code):
146152
stream = pkg_resources.resource_stream(
147153
__name__, self.crosswalk_filepaths[from_code][to_code]
148154
)
149-
if self.crosswalks[from_code][to_code] is None:
150-
# Weighted crosswalks
151-
if (from_code, to_code) in [
152-
("zip", "fips"),
153-
("fips", "zip"),
154-
("jhu_uid", "fips"),
155-
("zip", "msa"),
156-
("fips", "hrr"),
157-
("zip", "hhs")
158-
]:
159-
self.crosswalks[from_code][to_code] = pd.read_csv(
160-
stream,
161-
dtype={
162-
from_code: str,
163-
to_code: str,
164-
"weight": float,
165-
},
166-
)
167-
# Unweighted crosswalks
168-
elif (from_code, to_code) in [
169-
("zip", "hrr"),
170-
("fips", "msa"),
171-
("fips", "hhs"),
172-
("state_code", "hhs")
173-
]:
174-
self.crosswalks[from_code][to_code] = pd.read_csv(
175-
stream,
176-
dtype={from_code: str, to_code: str},
177-
)
178-
# Special table of state codes, state IDs, and state names
179-
elif (from_code, to_code) == ("state", "state"):
180-
self.crosswalks[from_code][to_code] = pd.read_csv(
181-
stream,
182-
dtype={
183-
"state_code": str,
184-
"state_id": str,
185-
"state_name": str,
186-
},
187-
)
188-
elif (from_code, to_code) == ("zip", "state"):
189-
self.crosswalks[from_code][to_code] = pd.read_csv(
190-
stream,
191-
dtype={
192-
"zip": str,
193-
"weight": float,
194-
"state_code": str,
195-
"state_id": str,
196-
"state_name": str,
197-
},
198-
)
199-
elif (from_code, to_code) == ("fips", "state"):
200-
self.crosswalks[from_code][to_code] = pd.read_csv(
201-
stream,
202-
dtype={
203-
"fips": str,
204-
"state_code": str,
205-
"state_id": str,
206-
"state_name": str,
207-
},
208-
)
209-
# Population tables
210-
elif to_code == "pop":
211-
self.crosswalks[from_code][to_code] = pd.read_csv(
212-
stream,
213-
dtype={
214-
from_code: str,
215-
"pop": int,
216-
},
217-
usecols=[
218-
from_code,
219-
"pop"
220-
]
221-
)
222-
return self.crosswalks[from_code][to_code]
155+
usecols = None
156+
dtype = None
157+
# Weighted crosswalks
158+
if (from_code, to_code) in [
159+
("zip", "fips"),
160+
("fips", "zip"),
161+
("jhu_uid", "fips"),
162+
("zip", "msa"),
163+
("fips", "hrr"),
164+
("zip", "hhs")
165+
]:
166+
dtype = {
167+
from_code: str,
168+
to_code: str,
169+
"weight": float,
170+
}
171+
172+
# Unweighted crosswalks
173+
elif (from_code, to_code) in [
174+
("zip", "hrr"),
175+
("fips", "msa"),
176+
("fips", "hhs"),
177+
("state_code", "hhs")
178+
]:
179+
dtype = {from_code: str, to_code: str}
180+
181+
# Special table of state codes, state IDs, and state names
182+
elif (from_code, to_code) == ("state", "state"):
183+
dtype = {
184+
"state_code": str,
185+
"state_id": str,
186+
"state_name": str,
187+
}
188+
elif (from_code, to_code) == ("zip", "state"):
189+
dtype = {
190+
"zip": str,
191+
"weight": float,
192+
"state_code": str,
193+
"state_id": str,
194+
"state_name": str,
195+
}
196+
elif (from_code, to_code) == ("fips", "state"):
197+
dtype = {
198+
"fips": str,
199+
"state_code": str,
200+
"state_id": str,
201+
"state_name": str,
202+
}
203+
204+
# Population tables
205+
elif to_code == "pop":
206+
dtype = {
207+
from_code: str,
208+
"pop": int,
209+
}
210+
usecols = [
211+
from_code,
212+
"pop"
213+
]
214+
return pd.read_csv(stream, dtype=dtype, usecols=usecols)
223215

224216
@staticmethod
225217
def convert_fips_to_mega(data, fips_col="fips", mega_col="megafips"):
@@ -333,19 +325,8 @@ def add_geocode(
333325
else:
334326
df[from_col] = df[from_col].astype(str)
335327

336-
# Assuming that the passed-in records are all United States data, at the moment
337-
if (from_code, new_code) in [("fips", "nation"), # pylint: disable=no-else-return
338-
("zip", "nation"),
339-
("state_code", "nation"),
340-
("state_name", "nation"),
341-
("state_id", "nation")]:
342-
df[new_col] = df[from_col].apply(lambda x: "us")
343-
return df
344-
elif new_code == "nation":
345-
raise ValueError(
346-
f"Conversion to the nation level is not supported "
347-
f"from {from_code}; try fips, zip, or state_*"
348-
)
328+
if new_code == "nation":
329+
return self._add_nation_geocode(df, from_code, from_col, new_col)
349330

350331
# state codes are all stored in one table
351332
if from_code in state_codes and new_code in state_codes:
@@ -375,11 +356,28 @@ def add_geocode(
375356
df.drop(columns=state_codes, inplace=True)
376357
elif new_code in state_codes and from_code in state_codes:
377358
state_codes.remove(new_code)
378-
state_codes.remove(from_code)
359+
if from_code in state_codes:
360+
state_codes.remove(from_code)
379361
df.drop(columns=state_codes, inplace=True)
380362

381363
return df
382364

365+
def _add_nation_geocode(self, df, from_code, from_col, new_col):
366+
"""Add a nation geocode column to a dataframe.
367+
368+
See `add_geocode()` documentation for argument description.
369+
"""
370+
valid_from_codes = ["fips", "zip", "state_code", "state_name", "state_id"]
371+
# Assuming that the passed-in records are all United States data, at the moment
372+
if from_code in valid_from_codes:
373+
df[new_col] = df[from_col].apply(lambda x: "us")
374+
return df
375+
376+
raise ValueError(
377+
f"Conversion to the nation level is not supported "
378+
f"from {from_code}; try {valid_from_codes}"
379+
)
380+
383381
def replace_geocode(
384382
self,
385383
df,
Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
1+
"""Provides unified not-a-number codes for the indicators."""
12

2-
NAN_CODES = {
3-
"Not Missing": 0,
4-
"Not Applicable": 1,
5-
"Region Exception": 2,
6-
"Data Insufficient": 3,
7-
"Unknown": 4
8-
}
3+
from enum import IntEnum
4+
5+
class Nans(IntEnum):
6+
"""An enum of not-a-number codes for the indicators."""
7+
8+
NOT_MISSING = 0
9+
NOT_APPLICABLE = 1
10+
REGION_EXCEPTION = 2
11+
DATA_INSUFFICIENT = 3
12+
PRIVACY = 4
13+
UNKNOWN = 5
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""Indicator running utilities."""
2+
from typing import Any, Callable, Dict, Optional
3+
from .archive import ArchiveDiffer
4+
from .utils import read_params
5+
from .validator.validate import Validator
6+
7+
Params = Dict[str, Any]
8+
9+
# Trivial function to use as default value for validator and archive functions.
10+
NULL_FN = lambda x: None
11+
12+
def run_indicator_pipeline(indicator_fn: Callable[[Params], None],
13+
validator_fn: Callable[[Params], Optional[Validator]] = NULL_FN,
14+
archiver_fn: Callable[[Params], Optional[ArchiveDiffer]] = NULL_FN):
15+
"""Run an indicator with its optional validation and archiving.
16+
17+
Arguments
18+
---------
19+
indicator_fn: Callable[[Params], None]
20+
function that takes a dictionary of parameters and produces indicator output
21+
validator_fn: Callable[[Params], Optional[Validator]]
22+
function that takes a dictionary of parameters and produces the associated Validator or
23+
None if no validation should be performed.
24+
archiver_fn: Callable[[Params], Optional[ArchiveDiffer]]
25+
function that takes a dictionary of parameters and produces the associated ArchiveDiffer or
26+
None if no archiving should be performed.
27+
"""
28+
params = read_params()
29+
indicator_fn(params)
30+
validator = validator_fn(params)
31+
archiver = archiver_fn(params)
32+
if validator:
33+
validation_report = validator.validate()
34+
if archiver and (not validator or validation_report.success()):
35+
archiver.archive()

0 commit comments

Comments
 (0)