Skip to content

Commit bab0b51

Browse files
authored
Merge branch 'main' into dev-smoothing
2 parents 7cfed72 + 40c75a7 commit bab0b51

File tree

221 files changed

+463796
-181113
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

221 files changed

+463796
-181113
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
---
2+
name: Feature release
3+
about: Begin the finishing work for features ready to be included in a release
4+
title: 'Release NEW_THING'
5+
labels: 'release'
6+
assignees: 'benjaminysmith'
7+
---
8+
9+
- [Link to issue]()
10+
- [Link to PR]()
11+
- Proposed release version: <!-- eg 1.12 -->
12+
13+
<!-- Additional information about the feature: -->
14+
15+
16+
<!-- relevant for most work -->
17+
18+
- [ ] API [documentation](https://github.com/cmu-delphi/delphi-epidata/tree/main/docs/api) and/or [changelog](https://github.com/cmu-delphi/delphi-epidata/blob/main/docs/api/covidcast_changelog.md)
19+
- [ ] API mailing list notification
20+
21+
<!-- relevant for new signals -->
22+
23+
- [ ] Statistical review (usually [correlations](https://github.com/cmu-delphi/covidcast/tree/main/docs/R-notebooks))
24+
- [ ] Signal / source name review (usually [Roni](https://docs.google.com/document/d/10hGd4Evce4lJ4VkWaQEKFQxvmw2P4xyYGtIAWF52Sf8/edit?usp=sharing))
25+
26+
<!-- relevant for new map signals -->
27+
28+
- [ ] Visual review
29+
- [ ] [Signal description pop-up text](https://docs.google.com/document/d/1kDqRg8EaI4WQXMaUUbbCGPlsUqEql8kgXCNt6AvMA9I/edit?usp=sharing) review
30+
- [ ] [Map release notes](https://docs.google.com/document/d/1BpxGgIma_Lkd2kxtwEo2DBdHQ3zk6dHRz-leUIRlOIA/edit?usp=sharing)

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,7 @@ venv.bak/
126126
.retry
127127
.indicators-ansible-vault-pass
128128
indicators-ansible-vault-pass
129+
130+
# testing_utils
131+
testing_utils/cache
132+
testing_utils/*.csv

_delphi_utils_python/.pylintrc

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,21 @@
1-
[DESIGN]
21

3-
min-public-methods=1
2+
[MESSAGES CONTROL]
43

4+
disable=logging-format-interpolation,
5+
too-many-locals,
6+
too-many-arguments,
7+
# Allow pytest functions to be part of a class.
8+
no-self-use,
9+
# Allow pytest classes to have one test.
10+
too-few-public-methods
511

6-
[MESSAGES CONTROL]
12+
[BASIC]
13+
14+
# Allow arbitrarily short-named variables.
15+
variable-rgx=[a-z_][a-z0-9_]*
16+
argument-rgx=[a-z_][a-z0-9_]*
17+
18+
[DESIGN]
719

8-
disable=R0801, C0330, E1101, E0611, C0114, C0116, C0103, R0913, R0914, W0702
20+
# Don't complain about pytest "unused" arguments.
21+
ignored-argument-names=(_.*|run_as_module)

_delphi_utils_python/data_proc/geomap/geo_data_proc.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,12 @@ def create_jhu_uid_fips_crosswalk():
217217
{"jhu_uid": "63072999", "fips": "72000", "weight": 1.0},
218218
]
219219
)
220+
cruise_ships = pd.DataFrame(
221+
[
222+
{"jhu_uid": "84088888", "fips": "88888", "weight": 1.0},
223+
{"jhu_uid": "84099999", "fips": "99999", "weight": 1.0},
224+
]
225+
)
220226

221227
jhu_df = (
222228
pd.read_csv(JHU_FIPS_URL, dtype={"UID": str, "FIPS": str})
@@ -234,7 +240,7 @@ def create_jhu_uid_fips_crosswalk():
234240
# Drop the JHU UIDs that were hand-modified
235241
dup_ind = jhu_df["jhu_uid"].isin(
236242
pd.concat(
237-
[hand_additions, unassigned_states, out_of_state, puerto_rico_unassigned]
243+
[hand_additions, unassigned_states, out_of_state, puerto_rico_unassigned, cruise_ships]
238244
)["jhu_uid"].values
239245
)
240246
jhu_df.drop(jhu_df.index[dup_ind], inplace=True)
@@ -391,6 +397,7 @@ def create_fips_population_table():
391397
df_pr = df_pr.groupby("fips").sum().reset_index()
392398
df_pr = df_pr[~df_pr["fips"].isin(census_pop["fips"])]
393399
census_pop_pr = pd.concat([census_pop, df_pr])
400+
394401
census_pop_pr.to_csv(join(OUTPUT_DIR, FIPS_POPULATION_OUT_FILENAME), index=False)
395402

396403

_delphi_utils_python/delphi_utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,6 @@
1010

1111
from .geomap import GeoMapper
1212
from .smooth import Smoother
13+
from .signal import add_prefix, public_signal
1314

1415
__version__ = "0.1.0"

_delphi_utils_python/delphi_utils/archive.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,11 @@ def update_cache(self):
351351

352352
self._cache_updated = True
353353

354-
def archive_exports(self, exported_files: Files) -> Tuple[Files, Files]:
354+
def archive_exports(self,
355+
exported_files: Files,
356+
update_cache: bool = True,
357+
update_s3: bool = True
358+
) -> Tuple[Files, Files]:
355359
"""
356360
Handles actual archiving of files to the S3 bucket.
357361
@@ -375,10 +379,12 @@ def archive_exports(self, exported_files: Files) -> Tuple[Files, Files]:
375379
archive_key = join(self.indicator_prefix, basename(exported_file))
376380

377381
try:
378-
# Update local cache
379-
shutil.copyfile(exported_file, cached_file)
382+
if update_cache:
383+
# Update local cache
384+
shutil.copyfile(exported_file, cached_file)
380385

381-
self.bucket.Object(archive_key).upload_file(exported_file)
386+
if update_s3:
387+
self.bucket.Object(archive_key).upload_file(exported_file)
382388

383389
archive_success.append(exported_file)
384390
except FileNotFoundError:

_delphi_utils_python/delphi_utils/data/jhu_uid_fips_table.csv

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,6 @@ jhu_uid,fips,weight
8282
63072149,72149,1.0
8383
63072151,72151,1.0
8484
63072153,72153,1.0
85-
84088888,88888,1.0
86-
84099999,99999,1.0
8785
84000001,01000,1.0
8886
84000002,02000,1.0
8987
84000004,04000,1.0

_delphi_utils_python/delphi_utils/geomap.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ class GeoMapper:
7777
==========
7878
The main GeoMapper object loads and stores crosswalk dataframes on-demand.
7979
80+
When replacing geocodes with a new one an aggregation step is performed on the data columns
81+
to merge entries (i.e. in the case of a many to one mapping or a weighted mapping). This
82+
requires a specification of the data columns, which are assumed to be all the columns that
83+
are not the geocodes or the date column specified in date_col.
84+
8085
Example 1: to add a new column with a new geocode, possibly with weights:
8186
> gmpr = GeoMapper()
8287
> df = gmpr.add_geocode(df, "fips", "zip", from_col="fips", new_col="geo_id",
@@ -305,7 +310,12 @@ def add_geocode(
305310
)
306311

307312
# state codes are all stored in one table
308-
if new_code in state_codes:
313+
if from_code in state_codes and new_code in state_codes:
314+
crosswalk = self._load_crosswalk(from_code="state", to_code="state")
315+
crosswalk = crosswalk.rename(
316+
columns={from_code: from_col, new_code: new_col}
317+
)
318+
elif new_code in state_codes:
309319
crosswalk = self._load_crosswalk(from_code=from_code, to_code="state")
310320
crosswalk = crosswalk.rename(
311321
columns={from_code: from_col, new_code: new_col}
@@ -322,9 +332,13 @@ def add_geocode(
322332
df = df.merge(crosswalk, left_on=from_col, right_on=from_col, how="left")
323333

324334
# Drop extra state columns
325-
if new_code in state_codes:
335+
if new_code in state_codes and not from_code in state_codes:
326336
state_codes.remove(new_code)
327337
df.drop(columns=state_codes, inplace=True)
338+
elif new_code in state_codes and from_code in state_codes:
339+
state_codes.remove(new_code)
340+
state_codes.remove(from_code)
341+
df.drop(columns=state_codes, inplace=True)
328342

329343
return df
330344

@@ -361,6 +375,9 @@ def replace_geocode(
361375
new_code: {'fips', 'zip', 'state_code', 'state_id', 'state_name', 'hrr', 'msa',
362376
'hhs_region_number'}
363377
Specifies the geocode type of the data in new_col.
378+
date_col: str or None, default "date"
379+
Specify which column contains the date values. Used for value aggregation.
380+
If None, then the aggregation is done only on geo_id.
364381
data_cols: list, default None
365382
A list of data column names to aggregate when doing a weighted coding. If set to
366383
None, then all the columns are used except for date_col and new_col.
@@ -389,12 +406,17 @@ def replace_geocode(
389406
# Multiply and aggregate (this automatically zeros NAs)
390407
df[data_cols] = df[data_cols].multiply(df["weight"], axis=0)
391408
df.drop("weight", axis=1, inplace=True)
392-
df = df.groupby([date_col, new_col]).sum().reset_index()
409+
410+
if not date_col is None:
411+
df = df.groupby([date_col, new_col]).sum().reset_index()
412+
else:
413+
df = df.groupby([new_col]).sum().reset_index()
393414
return df
394415

395-
def add_population_column(self, data, geocode_type, geocode_col=None):
416+
def add_population_column(self, data, geocode_type, geocode_col=None, dropna=True):
396417
"""
397-
Appends a population column to a dateframe, based on the FIPS or ZIP code.
418+
Appends a population column to a dataframe, based on the FIPS or ZIP code. If no
419+
dataframe is provided, the full crosswalk from geocode to population is returned.
398420
399421
Parameters
400422
---------
@@ -412,24 +434,26 @@ def add_population_column(self, data, geocode_type, geocode_col=None):
412434
A dataframe with a population column appended.
413435
"""
414436
geocode_col = geocode_type if geocode_col is None else geocode_col
437+
data = data.copy()
415438

416439
if geocode_type not in ["fips", "zip"]:
417440
raise ValueError(
418441
"Only fips and zip geocodes supported. \
419442
For other codes, aggregate those."
420443
)
421444

445+
pop_df = self._load_crosswalk(from_code=geocode_type, to_code="pop")
446+
422447
if not is_string_dtype(data[geocode_col]):
423448
data[geocode_col] = data[geocode_col].astype(str).str.zfill(5)
424449

425-
pop_df = self._load_crosswalk(from_code=geocode_type, to_code="pop")
426-
450+
merge_type = "inner" if dropna else "left"
427451
data_with_pop = (
428-
data.copy()
429-
.merge(pop_df, left_on=geocode_col, right_on=geocode_type, how="inner")
452+
data
453+
.merge(pop_df, left_on=geocode_col, right_on=geocode_type, how=merge_type)
430454
.rename(columns={"pop": "population"})
431455
)
432-
data_with_pop["population"] = data_with_pop["population"].astype(int)
456+
433457
return data_with_pop
434458

435459
@staticmethod

quidel_covidtest/delphi_quidel_covidtest/handle_wip_sensor.py renamed to _delphi_utils_python/delphi_utils/signal.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
1-
"""This file checks the wip status of signals"""
1+
"""Functions for understanding and creating signal names."""
22
import covidcast
33

4-
def add_prefix(signal_names, wip_signal, prefix):
4+
def add_prefix(signal_names, wip_signal, prefix="wip_"):
55
"""Adds prefix to signal if there is a WIP signal
66
Parameters
77
----------
88
signal_names: List[str]
99
Names of signals to be exported
10-
prefix : 'wip_'
10+
prefix : "wip_"
1111
prefix for new/non public signals
1212
wip_signal : List[str] or bool
1313
a list of wip signals: [], OR
@@ -36,11 +36,11 @@ def add_prefix(signal_names, wip_signal, prefix):
3636
raise ValueError("Supply True | False or '' or [] | list()")
3737

3838

39-
def public_signal(signal_):
39+
def public_signal(signal):
4040
"""Checks if the signal name is already public using COVIDcast
4141
Parameters
4242
----------
43-
signal_ : str
43+
signal : str
4444
Name of the signal
4545
Returns
4646
-------
@@ -50,6 +50,6 @@ def public_signal(signal_):
5050
"""
5151
epidata_df = covidcast.metadata()
5252
for index in range(len(epidata_df)):
53-
if epidata_df['signal'][index] == signal_:
53+
if epidata_df["signal"][index] == signal:
5454
return True
5555
return False

_delphi_utils_python/setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
required = [
55
"boto3",
6+
"covidcast",
67
"gitpython",
78
"moto",
89
"numpy",

_delphi_utils_python/tests/test_geomap.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -278,11 +278,13 @@ def test_zip_to_state_id(self):
278278
def test_add_population_column(self):
279279
gmpr = GeoMapper()
280280
new_data = gmpr.add_population_column(self.fips_data_3, "fips")
281-
assert new_data["population"].sum() == 274963
281+
assert new_data.shape == (5, 5)
282282
new_data = gmpr.add_population_column(self.zip_data, "zip")
283-
assert new_data["population"].sum() == 274902
283+
assert new_data.shape == (6, 5)
284284
with pytest.raises(ValueError):
285285
new_data = gmpr.add_population_column(self.zip_data, "hrr")
286+
new_data = gmpr.add_population_column(self.fips_data_5, "fips")
287+
assert new_data.shape == (4, 5)
286288

287289
def test_add_geocode(self):
288290
gmpr = GeoMapper()
@@ -382,13 +384,20 @@ def test_add_geocode(self):
382384
new_data2 = gmpr.add_geocode(new_data, "state_code", "hhs_region_number")
383385
assert new_data2["hhs_region_number"].unique().size == 2
384386

387+
# state_name -> state_id
388+
new_data = gmpr.replace_geocode(self.zip_data, "zip", "state_name")
389+
new_data2 = gmpr.add_geocode(new_data, "state_name", "state_id")
390+
assert new_data2.shape == (4, 5)
391+
new_data2 = gmpr.replace_geocode(new_data, "state_name", "state_id", new_col="abbr")
392+
assert "abbr" in new_data2.columns
393+
385394
# fips -> nation
386-
new_data = gmpr.replace_geocode(self.fips_data_5, "fips", "nation")
395+
new_data = gmpr.replace_geocode(self.fips_data_5, "fips", "nation", new_col="NATION")
387396
assert new_data.equals(
388397
pd.DataFrame().from_dict(
389398
{
390399
"date": {0: pd.Timestamp("2018-01-01 00:00:00")},
391-
"nation": {0: "us"},
400+
"NATION": {0: "us"},
392401
"count": {0: 10024.0},
393402
"total": {0: 100006.0},
394403
}
@@ -411,6 +420,23 @@ def test_add_geocode(self):
411420
)
412421
)
413422

423+
# hrr -> nation
424+
with pytest.raises(ValueError):
425+
new_data = gmpr.replace_geocode(self.zip_data, "zip", "hrr")
426+
new_data2 = gmpr.replace_geocode(new_data, "hrr", "nation")
427+
414428
# fips -> hrr (dropna=True/False check)
415429
assert not gmpr.add_geocode(self.fips_data_3, "fips", "hrr").isna().any().any()
416430
assert gmpr.add_geocode(self.fips_data_3, "fips", "hrr", dropna=False).isna().any().any()
431+
432+
# fips -> zip (date_col=None chech)
433+
new_data = gmpr.replace_geocode(self.fips_data_5.drop(columns=["date"]), "fips", "hrr", date_col=None)
434+
assert new_data.equals(
435+
pd.DataFrame().from_dict(
436+
{
437+
'hrr': {0: '1', 1: '183', 2: '184', 3: '382', 4: '7'},
438+
'count': {0: 1.772347174163783, 1: 7157.392403522299, 2: 2863.607596477701, 3: 1.0, 4: 0.22765282583621685},
439+
'total': {0: 3.544694348327566, 1: 71424.64801363471, 2: 28576.35198636529, 3: 1.0, 4: 0.4553056516724337}
440+
}
441+
)
442+
)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""Tests for delphi_utils.signal."""
2+
from unittest.mock import patch
3+
import pandas as pd
4+
5+
from delphi_utils.signal import add_prefix, public_signal
6+
7+
# Constants for mocking out the call to `covidcast.metadata` within `public_signal()`.
8+
PUBLIC_SIGNALS = ["sig1", "sig2", "sig3"]
9+
PUBLIC_SIGNALS_FRAME = pd.DataFrame(data={"signal": PUBLIC_SIGNALS})
10+
11+
class TestSignal:
12+
"""Tests for signal.py."""
13+
14+
def test_add_prefix_to_all(self):
15+
"""Tests that `add_prefix()` derives work-in-progress names for all input signals."""
16+
assert add_prefix(["sig1", "sig3"], True, prefix="wip_") == ["wip_sig1", "wip_sig3"]
17+
18+
def test_add_prefix_to_specified(self):
19+
"""Tests that `add_prefix()` derives work-in-progress names for specified signals."""
20+
assert add_prefix(["sig1", "sig2", "sig3"], ["sig2"], prefix="wip_") ==\
21+
["sig1", "wip_sig2", "sig3"]
22+
23+
@patch("covidcast.metadata")
24+
def test_add_prefix_to_non_public(self, metadata):
25+
"""Tests that `add_prefix()` derives work-in-progress names for non-public signals."""
26+
metadata.return_value = PUBLIC_SIGNALS_FRAME
27+
assert add_prefix(["sig0", "sig1"], False, prefix="wip_") == ["wip_sig0", "sig1"]
28+
29+
@patch("covidcast.metadata")
30+
def test_public_signal(self, metadata):
31+
"""Tests that `public_signal()` identifies public vs. private signals."""
32+
metadata.return_value = PUBLIC_SIGNALS_FRAME
33+
assert not public_signal("sig0")
34+
assert public_signal("sig2")

0 commit comments

Comments
 (0)