Skip to content

Commit 8f12844

Browse files
authored
Merge branch 'main' into utils-ci
2 parents 842298f + f66601f commit 8f12844

File tree

15 files changed

+117
-224
lines changed

15 files changed

+117
-224
lines changed

.github/workflows/python-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
run: |
3535
make install
3636
- name: Lint
37-
if: ${{ matrix.packages != 'changehc' && matrix.packages != 'claims_hosp' && matrix.packages != 'quidel' && matrix.packages != 'usafacts'}}
37+
if: ${{ matrix.packages != 'claims_hosp' && matrix.packages != 'quidel' && matrix.packages != 'usafacts'}}
3838
run: |
3939
make lint
4040
- name: Test

_delphi_utils_python/delphi_utils/archive.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -588,11 +588,11 @@ def archive_exports(self, exported_files: Files) -> Tuple[Files, Files]:
588588
args = parser.parse_args()
589589
params = read_params()
590590
run_module(args.archive_type,
591-
params.cache_dir,
592-
params.export_dir,
593-
aws_credentials=params.aws_credentials,
591+
params["cache_dir"],
592+
params["export_dir"],
593+
aws_credentials=params["aws_credentials"],
594594
branch_name=args.branch_name,
595-
bucket_name=params.bucket_name,
595+
bucket_name=params["bucket_name"],
596596
commit_message=args.commit_message,
597597
commit_partial_success=args.commit_partial_success,
598598
indicator_prefix=args.indicator_prefix,

_delphi_utils_python/delphi_utils/smooth.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,9 @@ def __init__(
150150
else:
151151
self.coeffs = None
152152

153-
def smooth(self,
154-
signal: Union[np.ndarray, pd.Series],
155-
impute_order=2) -> Union[np.ndarray, pd.Series]:
153+
def smooth(
154+
self, signal: Union[np.ndarray, pd.Series], impute_order=2
155+
) -> Union[np.ndarray, pd.Series]:
156156
"""Apply a smoother to a signal.
157157
158158
The major workhorse smoothing function. Imputes the nans and then applies
@@ -202,7 +202,7 @@ def smooth(self,
202202
signal_smoothed = signal
203203

204204
# Append the nans back, since we want to preserve length
205-
signal_smoothed = np.hstack([np.nan*np.ones(ix), signal_smoothed])
205+
signal_smoothed = np.hstack([np.nan * np.ones(ix), signal_smoothed])
206206
# Convert back to pandas if necessary
207207
if is_pandas_series:
208208
signal_smoothed = pd.Series(signal_smoothed)
@@ -297,21 +297,28 @@ def left_gauss_linear_smoother(self, signal):
297297
weights = np.exp(
298298
-((np.arange(idx + 1) - idx) ** 2) / self.gaussian_bandwidth
299299
)
300-
AwA = np.dot(A[: (idx + 1), :].T * weights, A[: (idx + 1), :]) # pylint: disable=invalid-name
300+
AwA = np.dot( # pylint: disable=invalid-name
301+
A[: (idx + 1), :].T * weights, A[: (idx + 1), :]
302+
)
301303
Awy = np.dot( # pylint: disable=invalid-name
302304
A[: (idx + 1), :].T * weights, signal[: (idx + 1)].reshape(-1, 1)
303305
)
304306
try:
305307
beta = np.linalg.solve(AwA, Awy)
306308
signal_smoothed[idx] = np.dot(A[: (idx + 1), :], beta)[-1]
307309
except np.linalg.LinAlgError:
308-
signal_smoothed[idx] = signal[idx] if self.impute else np.nan # pylint: disable=using-constant-test
310+
signal_smoothed[idx] = (
311+
signal[idx] # pylint: disable=using-constant-test
312+
if self.impute
313+
else np.nan
314+
)
309315
if self.minval is not None:
310316
signal_smoothed[signal_smoothed <= self.minval] = self.minval
311317
return signal_smoothed
312318

313319
def savgol_predict(self, signal, poly_fit_degree, nr):
314320
"""Predict a single value using the savgol method.
321+
315322
Fits a polynomial through the values given by the signal and returns the value
316323
of the polynomial at the right-most signal-value. More precisely, for a signal of length
317324
n, fits a poly_fit_degree polynomial through the points signal[-n+1+nr], signal[-n+2+nr],
@@ -473,7 +480,7 @@ def savgol_impute(self, signal, impute_order):
473480
# imputation order is larger than the available data)
474481
else:
475482
signal_imputed[ix] = self.savgol_predict(
476-
signal_imputed[:ix], min(ix-1, impute_order), -1
483+
signal_imputed[:ix], min(ix - 1, impute_order), -1
477484
)
478485
# Away from the boundary, use savgol fitting on a fixed window
479486
else:

ansible/templates/changehc-params-prod.json.j2

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
"static_file_dir": "./static",
33
"export_dir": "/common/covidcast/receiving/changehc",
44
"cache_dir": "./cache",
5-
"input_denom_file": "./tests/test_data/20200601_All_Outpatients_By_County.dat",
6-
"input_covid_file": "./tests/test_data/20200601_Covid_Outpatients_By_County.dat",
5+
"input_denom_file": null,
6+
"input_covid_file": null,
77
"start_date": "2020-02-01",
88
"end_date": null,
99
"drop_date": null,

changehc/.pylintrc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ disable=logging-format-interpolation,
77
# Allow pytest functions to be part of a class.
88
no-self-use,
99
# Allow pytest classes to have one test.
10-
too-few-public-methods
10+
too-few-public-methods,
11+
# Ignore
12+
R0903, C0301, R0914, C0103, W1203, E0611, R0902, R0913, W0105, W0611, W1401
1113

1214
[BASIC]
1315

changehc/delphi_changehc/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Registry for signal names and geo types"""
2-
SMOOTHED = "smoothed_cli"
3-
SMOOTHED_ADJ = "smoothed_adj_cli"
2+
SMOOTHED = "smoothed_outpatient_covid"
3+
SMOOTHED_ADJ = "smoothed_adj_outpatient_covid"
44
SIGNALS = [SMOOTHED, SMOOTHED_ADJ]
55
NA = "NA"
66
HRR = "hrr"

changehc/delphi_changehc/run.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,27 +26,33 @@ def run_module():
2626

2727
logging.basicConfig(level=logging.DEBUG)
2828

29-
## download recent files from FTP server
30-
logging.info("downloading recent files through SFTP")
31-
download(params["cache_dir"], params["ftp_conn"])
32-
33-
## get end date from input file
3429
# the filenames are expected to be in the format:
3530
# Denominator: "YYYYMMDD_All_Outpatients_By_County.dat.gz"
3631
# Numerator: "YYYYMMDD_Covid_Outpatients_By_County.dat.gz"
3732

33+
assert (params["input_denom_file"] is None) == (params["input_covid_file"] is None), \
34+
"exactly one of denom and covid files are provided"
35+
3836
if params["drop_date"] is None:
39-
dropdate_denom = datetime.strptime(
40-
Path(params["input_denom_file"]).name.split("_")[0], "%Y%m%d"
41-
)
42-
43-
dropdate_covid = datetime.strptime(
44-
Path(params["input_covid_file"]).name.split("_")[0], "%Y%m%d"
45-
)
46-
assert dropdate_denom == dropdate_covid, "different drop dates for data files"
47-
dropdate_dt = dropdate_denom
37+
# files are dropped about 8pm the day after the issue date
38+
dropdate_dt = (datetime.now() - timedelta(days=1,hours=20))
39+
dropdate_dt = dropdate_dt.replace(hour=0,minute=0,second=0,microsecond=0)
4840
else:
4941
dropdate_dt = datetime.strptime(params["drop_date"], "%Y-%m-%d")
42+
filedate = dropdate_dt.strftime("%Y%m%d")
43+
44+
if params["input_denom_file"] is None:
45+
46+
## download recent files from FTP server
47+
logging.info("downloading recent files through SFTP")
48+
download(params["cache_dir"], params["ftp_conn"])
49+
50+
input_denom_file = "%s/%s_All_Outpatients_By_County.dat.gz" % (params["cache_dir"],filedate)
51+
input_covid_file = "%s/%s_Covid_Outpatients_By_County.dat.gz" % (params["cache_dir"],filedate)
52+
else:
53+
input_denom_file = params["input_denom_file"]
54+
input_covid_file = params["input_covid_file"]
55+
5056
dropdate = str(dropdate_dt.date())
5157

5258
# range of estimates to produce
@@ -93,8 +99,8 @@ def run_module():
9399
params["se"]
94100
)
95101
su_inst.update_sensor(
96-
params["input_denom_file"],
97-
params["input_covid_file"],
102+
input_denom_file,
103+
input_covid_file,
98104
params["export_dir"]
99105
)
100106
logging.info("finished %s", geo)

changehc/params.json.template

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
"static_file_dir": "./static",
33
"export_dir": "./receiving",
44
"cache_dir": "./cache",
5-
"input_denom_file": "./tests/test_data/20200601_All_Outpatients_By_County.dat",
6-
"input_covid_file": "./tests/test_data/20200601_Covid_Outpatients_By_County.dat",
5+
"input_denom_file": null,
6+
"input_covid_file": null,
77
"start_date": "2020-02-01",
88
"end_date": null,
99
"drop_date": null,

changehc/tests/test_sensor.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
11
# standard
2-
import pytest
32

4-
# third party
5-
from delphi_utils import read_params, Smoother
63
import numpy as np
74
import numpy.random as nr
85
import pandas as pd
9-
106
# first party
117
from delphi_changehc.config import Config
128
from delphi_changehc.load_data import load_combined_data
139
from delphi_changehc.sensor import CHCSensor
10+
# third party
11+
from delphi_utils import read_params
1412

1513
CONFIG = Config()
1614
PARAMS = read_params()
@@ -21,11 +19,6 @@
2119
class TestLoadData:
2220
combined_data = load_combined_data(DENOM_FILEPATH, COVID_FILEPATH, DROP_DATE,
2321
"fips")
24-
# change smoother window length for test data
25-
CHCSensor.smoother = Smoother("savgol",
26-
poly_fit_degree=1,
27-
gaussian_bandwidth=Config.SMOOTHER_BANDWIDTH,
28-
window_length=20)
2922

3023
def test_backfill(self):
3124
num0 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=float).reshape(-1, 1)

jhu/DETAILS.md

Lines changed: 7 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -34,90 +34,16 @@ discrete difference of `cumulative_counts`, and assume that the
3434
problem, because there there is only one county with a nonzero
3535
`cumulative_count` on January 22nd, with a value of 1.
3636

37-
For deriving `incidence`, we use the estimated 2019 county population values
38-
from the US Census Bureau. https://www.census.gov/data/tables/time-series/demo/popest/2010s-counties-total.html
37+
For deriving `incidence`, we use the estimated 2019 county population estimates
38+
from the [US Census Bureau](https://www.census.gov/data/tables/time-series/demo/popest/2010s-counties-total.html).
3939

4040
## Exceptions
4141

42-
At the County (FIPS) level, we report the data _exactly_ as JHU reports their
43-
data, to prevent confusing public consumers of the data.
44-
The visualization and modeling teams should take note of these exceptions.
45-
46-
### New York City
47-
48-
New York City comprises of five boroughs:
49-
50-
|Borough Name |County Name |FIPS Code |
51-
|-------------------|-------------------|---------------|
52-
|Manhattan |New York County |36061 |
53-
|The Bronx |Bronx County |36005 |
54-
|Brooklyn |Kings County |36047 |
55-
|Queens |Queens County |36081 |
56-
|Staten Island |Richmond County |36085 |
57-
58-
**Data from all five boroughs are reported under New York County,
59-
FIPS Code 36061.** The other four boroughs are included in the dataset
60-
and show up in our API, but they should be uniformly zero. (In our population
61-
file under static folder, the population from all five boroughs are also
62-
assigned to FIPS Code 36061 only. The populatio for the rest of the counties
63-
are set to be 1.)
64-
65-
All NYC counts are mapped to the MSA with CBSA ID 35620, which encompasses
66-
all five boroughs. All NYC counts are mapped to HRR 303, which intersects
67-
all five boroughs (297 also intersects the Bronx, 301 also intersects
68-
Brooklyn and Queens, but absent additional information, I am leaving all
69-
counts in 303).
70-
71-
### Kansas City, Missouri
72-
73-
Kansas City intersects the following four counties, which themselves report
74-
confirmed case and deaths data:
75-
76-
|County Name |FIPS Code |
77-
|-------------------|---------------|
78-
|Jackson County |29095 |
79-
|Platte County |29165 |
80-
|Cass County |29037 |
81-
|Clay County |29047 |
82-
83-
**Data from Kansas City is given its own dedicated line, with FIPS
84-
code 70003.** This is how JHU encodes their data. However, the data in
85-
the four counties that Kansas City intersects is not necessarily zero.
86-
87-
For the mapping to HRR and MSA, the counts for Kansas City are dispersed to
88-
these four counties in equal proportions.
89-
90-
### Dukes and Nantucket Counties, Massachusetts
91-
92-
**The counties of Dukes and Nantucket report their figures together,
93-
and we (like JHU) list them under FIPS Code 70002.** Here are the FIPS codes
94-
for the individual counties:
95-
96-
|County Name |FIPS Code |
97-
|-------------------|---------------|
98-
|Dukes County |25007 |
99-
|Nantucket County |25019 |
100-
101-
For the mapping to HRR and MSA, the counts for Dukes and Nantucket are
102-
dispersed to the two counties in equal proportions.
103-
104-
The data in the individual counties is expected to be zero.
105-
106-
### Mismatched FIPS Codes
107-
108-
Finally, there are two FIPS codes that were changed in 2015, leading to
109-
mismatch between us and JHU. We report the data using the FIPS code used
110-
by JHU, again to promote consistency and avoid confusion by external users
111-
of the dataset. For the mapping to MSA, HRR, these two counties are
112-
included properly.
113-
114-
|County Name |State |"Our" FIPS |JHU FIPS |
115-
|-------------------|---------------|-------------------|---------------|
116-
|Oglala Lakota |South Dakota |46113 |46102 |
117-
|Kusilvak |Alaska |02270 |02158 |
118-
119-
Documentation for the changes made by the US Census Bureau in 2015:
120-
https://www.census.gov/programs-surveys/geography/technical-documentation/county-changes.html
42+
To prevent confusing public consumers of the data, we report the data as closely
43+
as possible to the way JHU reports their data, using the same County FIPS codes.
44+
Nonetheless, there are a few exceptions which should be of interest to the
45+
visualization and modeling teams. These exceptions can be found at the [JHU Delphi
46+
Epidata API documentation page](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html#geographical-exceptions).
12147

12248
## Negative incidence
12349

@@ -129,26 +55,3 @@ to County Y, County X may have negative incidence.
12955

13056
Because the MSA and HRR numbers are computed by taking population-weighted
13157
averages, the count data at those geographical levels may be non-integral.
132-
133-
## Counties not in our canonical dataset
134-
135-
Some FIPS codes do not appear as the primary FIPS for any ZIP code in our
136-
canonical `02_20_uszips.csv`; they appear in the `county` exported files, but
137-
for the MSA/HRR mapping, we disburse them equally to the counties with whom
138-
they appear as a secondary FIPS code. The identification of such "secondary"
139-
FIPS codes are documented in `notebooks/create-mappings.ipynb`. The full list
140-
of `secondary, [mapped]` is:
141-
142-
```
143-
SECONDARY_FIPS = [ # generated by notebooks/create-mappings.ipynb
144-
('51620', ['51093', '51175']),
145-
('51685', ['51153']),
146-
('28039', ['28059', '28041', '28131', '28045', '28059', '28109',
147-
'28047']),
148-
('51690', ['51089', '51067']),
149-
('51595', ['51081', '51025', '51175', '51183']),
150-
('51600', ['51059', '51059', '51059']),
151-
('51580', ['51005']),
152-
('51678', ['51163']),
153-
]
154-
```

0 commit comments

Comments
 (0)