Skip to content

Commit 6abcab3

Browse files
authored
Merge pull request #1481 from cmu-delphi/release/indicators_v0.2.22_utils_v0.2.10
Release covidcast-indicators 0.2.22
2 parents b685b2e + 3af5cdc commit 6abcab3

32 files changed

+1328
-32
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.2.21
2+
current_version = 0.2.22
33
commit = True
44
message = chore: bump covidcast-indicators to {new_version}
55
tag = False

.github/workflows/python-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
if: github.event.pull_request.draft == false
1717
strategy:
1818
matrix:
19-
packages: [_delphi_utils_python, changehc, claims_hosp, combo_cases_and_deaths, doctor_visits, google_symptoms, hhs_hosp, hhs_facilities, jhu, nchs_mortality, nowcast, quidel, quidel_covidtest, safegraph_patterns, sir_complainsalot, usafacts]
19+
packages: [_delphi_utils_python, changehc, claims_hosp, combo_cases_and_deaths, doctor_visits, dsew_community_profile, google_symptoms, hhs_hosp, hhs_facilities, jhu, nchs_mortality, nowcast, quidel, quidel_covidtest, safegraph_patterns, sir_complainsalot, usafacts]
2020
defaults:
2121
run:
2222
working-directory: ${{ matrix.packages }}

Jenkinsfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
- Keep in sync with '.github/workflows/python-ci.yml'.
1010
- TODO: #527 Get this list automatically from python-ci.yml at runtime.
1111
*/
12-
def indicator_list = ["changehc", "claims_hosp", "facebook", "google_symptoms", "hhs_hosp", "jhu", "nchs_mortality", "quidel", "quidel_covidtest", "safegraph_patterns", "sir_complainsalot", "usafacts"]
12+
def indicator_list = ["changehc", "claims_hosp", "facebook", "google_symptoms", "hhs_hosp", "jhu", "nchs_mortality", "quidel", "quidel_covidtest", "safegraph_patterns", "sir_complainsalot", "usafacts", "dsew_community_profile"]
1313
def build_package = [:]
1414
def deploy_staging = [:]
1515
def deploy_production = [:]

_delphi_utils_python/.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.2.9
2+
current_version = 0.2.10
33
commit = True
44
message = chore: bump delphi_utils to {new_version}
55
tag = False

_delphi_utils_python/delphi_utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@
1515
from .nancodes import Nans
1616
from .weekday import Weekday
1717

18-
__version__ = "0.2.9"
18+
__version__ = "0.2.10"

_delphi_utils_python/delphi_utils/validator/datafetcher.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -111,14 +111,9 @@ def get_geo_signal_combos(data_source):
111111
Cross references based on combinations reported available by COVIDcast metadata.
112112
"""
113113
# Maps data_source name with what's in the API, lists used in case of multiple names
114-
# pylint: disable=fixme
115-
# TODO: Extract this mapping from meta response instead of hard-coding
116-
# https://github.com/cmu-delphi/covidcast-indicators/issues/1457
117-
source_signal_mappings = {
118-
'indicator-combination': ['indicator-combination-cases-deaths'],
119-
'quidel': ['quidel-covid-ag'],
120-
'safegraph': ['safegraph-weekly']
121-
}
114+
115+
source_signal_mappings = {i['source']:i['db_source'] for i in
116+
requests.get("https://api.covidcast.cmu.edu/epidata/covidcast/meta").json()}
122117
meta = covidcast.metadata()
123118
source_meta = meta[meta['data_source'] == data_source]
124119
# Need to convert np.records to tuples so they are hashable and can be used in sets and dicts.
@@ -130,8 +125,9 @@ def get_geo_signal_combos(data_source):
130125
# True/False indicate if status is active, "unknown" means we should check
131126
sig_combo_seen = dict()
132127
for combo in geo_signal_combos:
133-
if source_signal_mappings.get(data_source):
134-
src_list = source_signal_mappings.get(data_source)
128+
if data_source in source_signal_mappings.values():
129+
src_list = [key for (key, value) in source_signal_mappings.items()
130+
if value == data_source]
135131
else:
136132
src_list = [data_source]
137133
for src in src_list:

_delphi_utils_python/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
setup(
2828
name="delphi_utils",
29-
version="0.2.9",
29+
version="0.2.10",
3030
description="Shared Utility Functions for Indicators",
3131
long_description=long_description,
3232
long_description_content_type="text/markdown",

_delphi_utils_python/tests/validator/test_datafetcher.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,25 +21,44 @@ def test_make_date_filter(self):
2121
assert not date_filter(FILENAME_REGEX.match("20200620_a_b.csv"))
2222
assert not date_filter(FILENAME_REGEX.match("202006_a_b.csv"))
2323

24-
# pylint: disable=fixme
25-
# TODO: mock out the advanced meta endpoint /covidcast/meta as well
26-
# https://github.com/cmu-delphi/covidcast-indicators/issues/1456
24+
# Solution from https://stackoverflow.com/questions/15753390/
25+
#how-can-i-mock-requests-and-the-response
26+
def mocked_requests_get(*args, **kwargs):
27+
class MockResponse:
28+
def __init__(self, json_data, status_code):
29+
self.json_data = json_data
30+
self.status_code = status_code
31+
32+
def json(self):
33+
return self.json_data
34+
if len(kwargs) == 0:
35+
return MockResponse([{'source': 'chng', 'db_source': 'chng'},
36+
{'source': 'covid-act-now', 'db_source': 'covid-act-now'}], 200)
37+
elif kwargs["params"] == {'signal': 'chng:inactive'}:
38+
return MockResponse([{"signals": [{"active": False}]}], 200)
39+
else:
40+
return MockResponse([{"signals": [{"active": True}]}], 200)
41+
@mock.patch('requests.get', side_effect=mocked_requests_get)
2742
@mock.patch("covidcast.metadata")
28-
def test_get_geo_signal_combos(self, mock_metadata):
43+
def test_get_geo_signal_combos(self, mock_metadata, mock_get):
2944
"""Test that the geo signal combos are correctly pulled from the covidcast metadata."""
3045
# Need to use actual data_source and signal names since we reference the API
46+
# We let the chng signal "inactive" be an inactive signal
3147
mock_metadata.return_value = pd.DataFrame({"data_source": ["chng", "chng", "chng",
3248
"covid-act-now",
3349
"covid-act-now",
34-
"covid-act-now"],
50+
"covid-act-now",
51+
"chng"],
3552
"signal": ["smoothed_outpatient_cli",
3653
"smoothed_outpatient_covid",
3754
"smoothed_outpatient_covid",
3855
"pcr_specimen_positivity_rate",
3956
"pcr_specimen_positivity_rate",
40-
"pcr_specimen_total_tests"],
57+
"pcr_specimen_total_tests",
58+
"inactive"],
4159
"geo_type": ["state", "state", "county",
42-
"hrr", "msa", "msa"]
60+
"hrr", "msa", "msa",
61+
"state"]
4362
})
4463

4564
assert set(get_geo_signal_combos("chng")) == set(
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
{
2+
"common": {
3+
"export_dir": "/common/covidcast/receiving/dsew-cpr",
4+
"log_filename": "/var/log/indicators/dsew_cpr.log"
5+
},
6+
"indicator": {
7+
"input_cache": "./input_cache",
8+
"reports": "new"
9+
},
10+
"validation": {
11+
"common": {
12+
"data_source": "dsew-cpr",
13+
"span_length": 14,
14+
"min_expected_lag": {"all": "5"},
15+
"max_expected_lag": {"all": "9"},
16+
"dry_run": true,
17+
"suppressed_errors": []
18+
},
19+
"static": {
20+
"minimum_sample_size": 0,
21+
"missing_se_allowed": true,
22+
"missing_sample_size_allowed": true
23+
},
24+
"dynamic": {
25+
"ref_window_size": 7,
26+
"smoothed_signals": [
27+
"naats_total_7dav",
28+
"naats_positivity_7dav"
29+
]
30+
}
31+
}
32+
}

ansible/templates/facebook-params-prod.json.j2

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
"Survey of COVID-Like Illness - Wave 11": "fb-survey",
3939
"Survey of COVID-Like Illness - Wave 12": "fb-survey",
4040
"Survey of COVID-Like Illness - Wave 12 - Full Launch": "fb-survey",
41+
"Survey of COVID-Like Illness - Wave 13": "fb-survey",
4142
"Survey of COVID-Like Illness - Wave 4": "fb-survey",
4243
"Survey of COVID-Like Illness - Wave 5": "fb-survey",
4344
"Survey of COVID-Like Illness - Wave 6": "fb-survey",

ansible/templates/sir_complainsalot-params-prod.json.j2

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,40 @@
9090
["smoothed_dontneed_reason_not_high_risk", "hrr"], ["smoothed_wdontneed_reason_not_high_risk", "hrr"],
9191
["smoothed_dontneed_reason_not_serious", "hrr"], ["smoothed_wdontneed_reason_not_serious", "hrr"],
9292
["smoothed_dontneed_reason_other", "hrr"], ["smoothed_wdontneed_reason_other", "hrr"],
93-
["smoothed_dontneed_reason_precautions", "hrr"], ["smoothed_wdontneed_reason_precautions", "hrr"]
93+
["smoothed_dontneed_reason_precautions", "hrr"], ["smoothed_wdontneed_reason_precautions", "hrr"],
94+
"smoothed_screening_tested_positive_14d", "smoothed_wscreening_tested_positive_14d",
95+
"smoothed_travel_outside_state_7d", "smoothed_wtravel_outside_state_7d",
96+
"smoothed_belief_vaccinated_mask_unnecessary", "smoothed_wbelief_vaccinated_mask_unnecessary",
97+
"smoothed_belief_children_immune", "smoothed_wbelief_children_immune",
98+
"smoothed_received_2_vaccine_doses", "smoothed_wreceived_2_vaccine_doses",
99+
"smoothed_vaccine_barrier_eligible", "smoothed_wvaccine_barrier_eligible",
100+
"smoothed_vaccine_barrier_no_appointments", "smoothed_wvaccine_barrier_no_appointments",
101+
"smoothed_vaccine_barrier_appointment_time", "smoothed_wvaccine_barrier_appointment_time",
102+
"smoothed_vaccine_barrier_technical_difficulties", "smoothed_wvaccine_barrier_technical_difficulties",
103+
"smoothed_vaccine_barrier_document", "smoothed_wvaccine_barrier_document",
104+
"smoothed_vaccine_barrier_technology_access", "smoothed_wvaccine_barrier_technology_access",
105+
"smoothed_vaccine_barrier_travel", "smoothed_wvaccine_barrier_travel",
106+
"smoothed_vaccine_barrier_language", "smoothed_wvaccine_barrier_language",
107+
"smoothed_vaccine_barrier_childcare", "smoothed_wvaccine_barrier_childcare",
108+
"smoothed_vaccine_barrier_time", "smoothed_wvaccine_barrier_time",
109+
"smoothed_vaccine_barrier_type", "smoothed_wvaccine_barrier_type",
110+
"smoothed_vaccine_barrier_none", "smoothed_wvaccine_barrier_none",
111+
"smoothed_vaccine_barrier_appointment_location", "smoothed_wvaccine_barrier_appointment_location",
112+
"smoothed_vaccine_barrier_other", "smoothed_wvaccine_barrier_other",
113+
"smoothed_vaccine_barrier_eligible_has", "smoothed_wvaccine_barrier_eligible_has",
114+
"smoothed_vaccine_barrier_no_appointments_has", "smoothed_wvaccine_barrier_no_appointments_has",
115+
"smoothed_vaccine_barrier_appointment_time_has", "smoothed_wvaccine_barrier_appointment_time_has",
116+
"smoothed_vaccine_barrier_technical_difficulties_has", "smoothed_wvaccine_barrier_technical_difficulties_has",
117+
"smoothed_vaccine_barrier_document_has", "smoothed_wvaccine_barrier_document_has",
118+
"smoothed_vaccine_barrier_technology_access_has", "smoothed_wvaccine_barrier_technology_access_has",
119+
"smoothed_vaccine_barrier_travel_has", "smoothed_wvaccine_barrier_travel_has",
120+
"smoothed_vaccine_barrier_language_has", "smoothed_wvaccine_barrier_language_has",
121+
"smoothed_vaccine_barrier_childcare_has", "smoothed_wvaccine_barrier_childcare_has",
122+
"smoothed_vaccine_barrier_time_has", "smoothed_wvaccine_barrier_time_has",
123+
"smoothed_vaccine_barrier_type_has", "smoothed_wvaccine_barrier_type_has",
124+
"smoothed_vaccine_barrier_none_has", "smoothed_wvaccine_barrier_none_has",
125+
"smoothed_vaccine_barrier_appointment_location_has", "smoothed_wvaccine_barrier_appointment_location_has",
126+
"smoothed_vaccine_barrier_other_has", "smoothed_wvaccine_barrier_other_has"
94127
]
95128
},
96129
"quidel": {

dsew_community_profile/.pylintrc

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
2+
[MESSAGES CONTROL]
3+
4+
disable=logging-format-interpolation,
5+
too-many-locals,
6+
too-many-arguments,
7+
# Allow pytest functions to be part of a class.
8+
no-self-use,
9+
# Allow pytest classes to have one test.
10+
too-few-public-methods
11+
12+
[BASIC]
13+
14+
# Allow arbitrarily short-named variables.
15+
variable-rgx=[a-z_][a-z0-9_]*
16+
argument-rgx=[a-z_][a-z0-9_]*
17+
attr-rgx=[a-z_][a-z0-9_]*
18+
19+
[DESIGN]
20+
21+
# Don't complain about pytest "unused" arguments.
22+
ignored-argument-names=(_.*|run_as_module)

dsew_community_profile/DETAILS.md

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
# Dataset layout
2+
3+
The Data Strategy and Execution Workgroup (DSEW) publishes a Community Profile
4+
Report each weekday, comprising a pair of files: an Excel workbook (.xlsx) and a
5+
PDF which shows select metrics from the workbook as time series charts and
6+
choropleth maps. These files are listed as attachments on the healthdata.gov
7+
site:
8+
9+
https://healthdata.gov/Health/COVID-19-Community-Profile-Report/gqxm-d9w9
10+
11+
Each Excel file attachment has a filename. The filename contains a date,
12+
presumably the publish date. The attachment also has an alphanumeric
13+
assetId. Both the filename and the assetId are required for downloading the
14+
file. Whether this means that updated versions of a particular file may be
15+
uploaded by DSEW at later times is not known. The attachment does not explicitly
16+
list an upload timestamp. To be safe, we cache our downloads using both the
17+
assetId and the filename.
18+
19+
# Workbook layout
20+
21+
Each Excel file is a workbook with multiple sheets. The exemplar file used in
22+
writing this indicator is "Community Profile Report 20211102.xlsx". The sheets
23+
include:
24+
25+
- User Notes: Instructions for using the workbook
26+
- Overview: US National figures for the last 5 weeks, plus monthly peaks back to
27+
April 2020
28+
- Regions*: Figures for FEMA regions (double-checked: they match HHS regions
29+
except that FEMA 2 does not include Palau while HHS 2 does)
30+
- States*: Figures for US states and territories
31+
- CBSAs*: Figures for US Census Block Statistical Areas
32+
- Counties*: Figures for US counties
33+
- Weekly Transmission Categories: Lists of high, substantial, and moderate
34+
transmission states and territories
35+
- National Peaks: Monthly national peaks back to April 2020
36+
- National Historic: Daily national figures back to January 22 2020
37+
- Data Notes: Source and methods information for all metrics
38+
- Color Thresholds: Color-coding is used extensively in all sheets; these are
39+
the keys
40+
41+
The starred sheets above have nearly-identical column layouts, and together
42+
cover the county, MSA, state, and HHS geographical levels used in
43+
covidcast. Rather than aggregate them ourselves and risk a mismatch, this
44+
indicator lifts these geographical aggregations directly from the corresponding
45+
sheets of the workbook.
46+
47+
GeoMapper _is_ used to generate national figures from
48+
state, due to architectural differences between the starred sheets and the
49+
Overview sheet. If we discover that our nation-level figures differ too much
50+
from those listed in the Overview sheet, we can add dedicated parsing for the
51+
Overview sheet and remove GeoMapper from this indicator altogether.
52+
53+
# Sheet layout
54+
55+
## Headers
56+
57+
Each starred sheet has two rows of headers. The first row uses merged cells to
58+
group several columns together under a single "overheader". This overheader
59+
often includes the reference period for that group of columns, such as:
60+
61+
- CASES/DEATHS: LAST WEEK (October 26-November 1)
62+
- TESTING: LAST WEEK (October 24-30, Test Volume October 20-26)
63+
- TESTING: PREVIOUS WEEK (October 17-23, Test Volume October 13-19)
64+
65+
Overheaders have changed periodically since the first report. For example, the
66+
"TESTING: LAST WEEK" overheader above has also appeared as "VIRAL (RT-PCR) LAB
67+
TESTING: LAST WEEK", with and without a separate reference date for Test
68+
Volume. All known overheader forms are checked in test_pull.py.
69+
70+
The second row contains a header for each column. The headers uniquely identify
71+
each column included in the sheet. Column headers include spaces, and typically
72+
specify both the metric and the reference period over which it was calculated,
73+
such as:
74+
75+
- Total NAATs - last 7 days (may be an underestimate due to delayed reporting)
76+
- NAAT positivity rate - previous 7 days (may be an underestimate due to delayed
77+
reporting)
78+
79+
Columns headers have also changed periodically since the first report. For
80+
example, the "Total NAATs - last 7 days" header above has also appeared as
81+
"Total RT-PCR diagnostic tests - last 7 days".
82+
83+
## Contents
84+
85+
Each starred sheet contains test positivity and total test volume figures for
86+
two reference periods, "last [week]" and "previous [week]". In some reports, the
87+
reference periods for test positivity and total test volume are the same; in
88+
others, they are different, such that the report contains figures for four
89+
distinct reference periods, two for each metric we extract.
90+
91+
# Time series conversions and parsing notes
92+
93+
## Reference date
94+
95+
The reference period in the overheader never includes the year. We guess the
96+
reference year by picking the same year as the publish date (i.e., the date
97+
extracted from the filename), and if the reference month is greater than the
98+
publish month, subtract 1 from the reference year. This adequately covers the
99+
December-January boundary.
100+
101+
We select as reference date the end date of the reference period for each
102+
metric. Reference periods are always 7 days, so this indicator produces
103+
seven-day averages. We divide the total testing volume by seven and leave the
104+
test positivity alone.
105+
106+
## Geo ID
107+
108+
The Counties sheet lists FIPS codes numerically, such that FIPS with a leading
109+
zero only have four digits. We fix this by zero-filling to five characters.
110+
111+
MSAs are a subset of CBSAs. We fix this by selecting only CBSAs with type
112+
"Metropolitan".
113+
114+
Most of the starred sheets have the geo id as the first non-index column. The
115+
Region sheet has no such column. We fix this by generating the HHS ids from the
116+
index column instead.
117+
118+
## Combining multiple reports
119+
120+
Each report file generates two reference dates for each metric, up to four
121+
reference dates total. Since it's not clear whether new versions of past files
122+
are ever made available, the default mode (params.indicator.reports="new")
123+
fetches any files that are not already in the input cache, then combines the
124+
results into a single data frame before exporting. This will generate correct
125+
behavior should (for instance) a previously-downloaded file get a new assetId.
126+
127+
For the initial run on an empty input cache, and for runs configured to process
128+
a range of reports (using params.indicator.reports=YYYY-mm-dd--YYYY-mm-dd), this
129+
indicator makes no distinction between figures that came from different
130+
reports. That may not be what you want. If the covidcast issue date needs to
131+
match the date on the report filename, then the indicator must instead be run
132+
repeatedly, with equal start and end dates, keeping the output of each run
133+
separate.

0 commit comments

Comments
 (0)