Skip to content

Commit 4be2e2d

Browse files
authored
Merge pull request #1829 from cmu-delphi/release/indicators_v0.3.36_utils_v0.3.13
Release covidcast-indicators 0.3.36
2 parents 51fba8d + c9dd855 commit 4be2e2d

File tree

41 files changed

+155
-104
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+155
-104
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.3.35
2+
current_version = 0.3.36
33
commit = True
44
message = chore: bump covidcast-indicators to {new_version}
55
tag = False

_delphi_utils_python/.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.3.12
2+
current_version = 0.3.13
33
commit = True
44
message = chore: bump delphi_utils to {new_version}
55
tag = False

_delphi_utils_python/delphi_utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@
1515
from .nancodes import Nans
1616
from .weekday import Weekday
1717

18-
__version__ = "0.3.12"
18+
__version__ = "0.3.13"

_delphi_utils_python/delphi_utils/flash_eval/eval_day.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def output(evd_ranking, day, lag, signal, logger):
147147
"""
148148
starter_link = f"{HTML_LINK}{(day+pd.Timedelta(f'{lag}d')).strftime('%Y-%m_%d')}"
149149
p_text = ""
150-
for j, (index, value) in enumerate(evd_ranking.sort_values(ascending=False).iteritems()):
150+
for j, (index, value) in enumerate(evd_ranking.sort_values(ascending=False).items()):
151151
if j < 30:
152152
start_link = f"{starter_link},{day.strftime('%Y-%m_%d')},{index}"
153153
p_text += f"\t{start_link}|*{index}*, {'{:.2f}'.format(value)}>\n"

_delphi_utils_python/delphi_utils/geomap.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,9 +401,9 @@ def replace_geocode(
401401
df.drop("weight", axis=1, inplace=True)
402402

403403
if not date_col is None:
404-
df = df.groupby([date_col, new_col]).sum().reset_index()
404+
df = df.groupby([date_col, new_col]).sum(numeric_only=True).reset_index()
405405
else:
406-
df = df.groupby([new_col]).sum().reset_index()
406+
df = df.groupby([new_col]).sum(numeric_only=True).reset_index()
407407
return df
408408

409409
def add_population_column(self, data, geocode_type, geocode_col=None, dropna=True):
@@ -501,7 +501,7 @@ def fips_to_megacounty(
501501
)
502502
data.set_index([fips_col, date_col], inplace=True)
503503
data = data.join(mega_data)
504-
data = data.reset_index().groupby([date_col, mega_col]).sum()
504+
data = data.reset_index().groupby([date_col, mega_col]).sum(numeric_only=True)
505505
return data.reset_index()
506506

507507
def as_mapper_name(self, geo_type, state="state_id"):

_delphi_utils_python/delphi_utils/validator/dynamic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def replace_first_six(df, start_date):
195195
start_date = self.params.time_window.start_date)
196196

197197
if not error_df.empty:
198-
for index, value in error_df.iteritems():
198+
for index, value in error_df.items():
199199
report.add_raised_error(
200200
ValidationFailure("check_val_missing",
201201
geo_type=geo_type,

_delphi_utils_python/setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"mock",
1515
"moto",
1616
"numpy",
17-
"pandas>=1.1.0,<2",
17+
"pandas>=1.1.0",
1818
"pydocstyle",
1919
"pylint==2.8.3",
2020
"pytest",
@@ -26,7 +26,7 @@
2626

2727
setup(
2828
name="delphi_utils",
29-
version="0.3.12",
29+
version="0.3.13",
3030
description="Shared Utility Functions for Indicators",
3131
long_description=long_description,
3232
long_description_content_type="text/markdown",

_delphi_utils_python/tests/test_export.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -250,15 +250,15 @@ def test_export_with_null_removal(self):
250250
"""Test that `remove_null_samples = True` removes entries with null samples."""
251251
_clean_directory(self.TEST_DIR)
252252

253-
df_with_nulls = self.DF.copy().append(
254-
{
253+
df_with_nulls = pd.concat(
254+
[self.DF.copy(),
255+
pd.DataFrame({
255256
"geo_id": "66666",
256257
"timestamp": datetime(2020, 6, 6),
257258
"val": 10,
258259
"se": 0.2,
259260
"sample_size": pd.NA,
260-
},
261-
ignore_index=True,
261+
}, index = [0])]
262262
)
263263

264264
create_export_csv(
@@ -283,15 +283,15 @@ def test_export_without_null_removal(self):
283283
"""Test that `remove_null_samples = False` does not remove entries with null samples."""
284284
_clean_directory(self.TEST_DIR)
285285

286-
df_with_nulls = self.DF.copy().append(
287-
{
286+
df_with_nulls = pd.concat(
287+
[self.DF.copy(),
288+
pd.DataFrame({
288289
"geo_id": "66666",
289290
"timestamp": datetime(2020, 6, 6),
290291
"val": 10,
291292
"se": 0.2,
292293
"sample_size": pd.NA,
293-
},
294-
ignore_index=True,
294+
}, index = [0])]
295295
)
296296

297297
create_export_csv(

_delphi_utils_python/tests/test_geomap.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def test_load_fips_chngfips_table(self, geomapper):
196196

197197
def test_load_jhu_uid_fips_table(self, geomapper):
198198
jhu_data = geomapper.get_crosswalk(from_code="jhu_uid", to_code="fips")
199-
assert np.allclose(jhu_data.groupby("jhu_uid").sum(), 1.0)
199+
assert np.allclose(jhu_data.groupby("jhu_uid").sum(numeric_only=True), 1.0)
200200

201201
def test_load_zip_hrr_table(self, geomapper):
202202
zip_data = geomapper.get_crosswalk(from_code="zip", to_code="hrr")

_delphi_utils_python/tests/validator/test_dynamic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def test_half_padding(self):
4848
ref_df, test_df, ref_date, ref_date)
4949

5050
# Check it only takes missing dates - so the last 5 dates
51-
assert new_ref_df.time_value.max() == datetime.strptime("2021-01-11",
51+
assert new_ref_df.time_value.max().date() == datetime.strptime("2021-01-11",
5252
"%Y-%m-%d").date()
5353
assert new_ref_df.shape[0] == 11
5454
assert new_ref_df["val"].iloc[5] == 2
@@ -71,7 +71,7 @@ def test_full_padding(self):
7171
ref_df, test_df, ref_date, ref_date)
7272

7373
# Check it only takes missing dates up to the day before the reference
74-
assert new_ref_df.time_value.max() == datetime.strptime("2021-01-15",
74+
assert new_ref_df.time_value.max().date() == datetime.strptime("2021-01-15",
7575
"%Y-%m-%d").date()
7676
assert new_ref_df.shape[0] == 15
7777
assert new_ref_df["val"].iloc[5] == 2

ansible/templates/sir_complainsalot-params-prod.json.j2

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,14 @@
3232
"max_age":6,
3333
"maintainers": ["U01AP8GSWG3","U01069KCRS7"],
3434
"retired-signals": [
35-
"raw_pct_negative","smoothed_pct_negative","raw_tests_per_device","smoothed_tests_per_device",
36-
["covid_ag_raw_pct_positive_age_0_4", "hrr"], ["covid_ag_raw_pct_positive_age_0_4", "msa"],
37-
["covid_ag_raw_pct_positive_age_5_17", "hrr"], ["covid_ag_raw_pct_positive_age_5_17", "msa"],
38-
["covid_ag_raw_pct_positive_age_50_64", "hrr"], ["covid_ag_raw_pct_positive_age_50_64", "msa"],
39-
["covid_ag_raw_pct_positive_age_65plus", "hrr"], ["covid_ag_raw_pct_positive_age_65plus", "msa"]
35+
"raw_pct_negative", "smoothed_pct_negative",
36+
"raw_tests_per_device", "smoothed_tests_per_device",
37+
"covid_ag_raw_pct_positive_age_0_4", "covid_ag_smoothed_pct_positive_age_0_4",
38+
"covid_ag_raw_pct_positive_age_5_17", "covid_ag_smoothed_pct_positive_age_5_17",
39+
"covid_ag_raw_pct_positive_age_18_49", "covid_ag_smoothed_pct_positive_age_18_49",
40+
"covid_ag_raw_pct_positive_age_50_64", "covid_ag_smoothed_pct_positive_age_50_64",
41+
"covid_ag_raw_pct_positive_age_65plus", "covid_ag_smoothed_pct_positive_age_65plus",
42+
"covid_ag_raw_pct_positive_age_0_17", "covid_ag_smoothed_pct_positive_age_0_17"
4043
]
4144
},
4245
"nchs-mortality": {

backfill_corrections/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ RUN install2.r --error \
2626

2727
RUN --mount=type=secret,id=GITHUB_TOKEN \
2828
export GITHUB_PAT="$(cat /run/secrets/GITHUB_TOKEN)" && \
29+
R -e 'devtools::install_version("bettermc", version = "1.1.2")' && \
2930
R -e 'devtools::install_github("cmu-delphi/covidcast", ref = "evalcast", subdir = "R-packages/evalcast")' && \
3031
R -e 'devtools::install_github(repo="ryantibs/quantgen", subdir="quantgen")' && \
3132
R -e 'install.packages(list.files(path="/opt/gurobi/linux64/R/", pattern="^gurobi_.*[.]tar[.]gz$", full.names = TRUE), repos=NULL)'

backfill_corrections/Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ run-local: setup-dirs
6767
grep "backfill correction completed successfully" $(LOG_FILE)
6868
grep "scheduled core" $(LOG_FILE) ; \
6969
[ "$$?" -eq 1 ]
70+
grep "SIGBUS" $(LOG_FILE) ; \
71+
[ "$$?" -eq 1 ]
7072

7173
gurobi.lic:
7274
@echo WLSACCESSID=$(GRB_WLSACCESSID) >> $(GRB_LICENSE_FILE)
@@ -81,6 +83,7 @@ run:
8183
-v "`realpath $(USR_CACHE_DIR)`:/backfill_corrections/${CACHE_DIR}" \
8284
-v "${PWD}"/params.json:/backfill_corrections/params.host.json \
8385
--env GRB_LICENSE_FILE=$(GRB_LICENSE_FILE) \
86+
--shm-size=2gb \
8487
-it "${DOCKER_IMAGE}:${DOCKER_TAG}" \
8588
/bin/bash -c "cp params.host.json params.json && make gurobi.lic && make standardize-dirs && make run-local OPTIONS=\"${OPTIONS}\" LOG_FILE=${LOG_FILE}"
8689

@@ -124,7 +127,7 @@ standardize-dirs:
124127
$(PYTHON) -m delphi_utils set export_dir $(EXPORT_DIR)
125128

126129
clean:
127-
rm -f $(USR_EXPORT_DIR)/*.csv.gz
130+
rm -rf $(USR_EXPORT_DIR)/*
128131

129132
coverage:
130133
Rscript -e 'covr::package_coverage("delphiBackfillCorrection")'

backfill_corrections/delphiBackfillCorrection/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ importFrom(dplyr,arrange)
3030
importFrom(dplyr,bind_cols)
3131
importFrom(dplyr,bind_rows)
3232
importFrom(dplyr,desc)
33+
importFrom(dplyr,distinct)
3334
importFrom(dplyr,everything)
3435
importFrom(dplyr,filter)
3536
importFrom(dplyr,full_join)

backfill_corrections/delphiBackfillCorrection/R/main.R

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
run_backfill <- function(df, params,
1919
refd_col = "time_value", lag_col = "lag", issued_col = "issue_date",
2020
signal_suffixes = c(""), indicator = "", signal = "") {
21-
df <- filter(df, lag < params$ref_lag + 30) # a rough filtration to save memory
22-
2321
geo_levels <- params$geo_levels
2422
if ("state" %in% geo_levels) {
2523
# If state included, do it last since state processing modifies the
@@ -62,6 +60,7 @@ run_backfill <- function(df, params,
6260
msg_ts("Splitting data into geo groups")
6361
group_dfs <- group_split(df, geo_value)
6462

63+
msg_ts("Beginning training and/or testing...")
6564
# Build model for each location
6665
apply_fn <- ifelse(params$parallel, mclapply, lapply)
6766
result <- apply_fn(group_dfs, function(subdf) {
@@ -317,14 +316,12 @@ main <- function(params,
317316

318317
msg_ts("Reading in and combining associated files")
319318
input_data <- lapply(
320-
files_list,
321-
function(file) {
322-
# refd_col and issued_col read in as strings
323-
read_data(file) %>%
324-
fips_to_geovalue()
325-
}
319+
files_list, read_data # refd_col and issued_col read in as strings
326320
) %>%
327-
bind_rows()
321+
bind_rows() %>%
322+
fips_to_geovalue() %>%
323+
# a rough filter to save memory
324+
filter(lag < params$ref_lag + 30)
328325

329326
if (nrow(input_data) == 0) {
330327
warning("No data available for indicator ", input_group$indicator,

backfill_corrections/delphiBackfillCorrection/R/utils.R

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,8 @@ create_dir_not_exist <- function(path)
169169
#' @return list of input dataframe augmented with lag column, if it
170170
#' didn't already exist, and character vector of one or two value
171171
#' column names, depending on requested `value_type`
172+
#'
173+
#' @importFrom dplyr distinct across
172174
validity_checks <- function(df, value_types, num_col, denom_col, signal_suffixes,
173175
refd_col = "time_value", lag_col = "lag", issued_col = "issue_date") {
174176
if (!missing(signal_suffixes) && !is.na(signal_suffixes) && !all(signal_suffixes == "") && !all(is.na(signal_suffixes))) {
@@ -205,13 +207,16 @@ validity_checks <- function(df, value_types, num_col, denom_col, signal_suffixes
205207
}
206208

207209
# Drop duplicate rows.
208-
duplicate_i <- duplicated(df)
209-
if (any(duplicate_i)) {
210+
raw_df_rows <- nrow(df)
211+
df <- distinct(df)
212+
new_df_rows <- nrow(df)
213+
if (raw_df_rows != new_df_rows) {
210214
warning("Data contains duplicate rows, dropping")
211-
df <- df[!duplicate_i,]
212215
}
213216

214-
if (anyDuplicated(df[, c(refd_col, issued_col, "geo_value", "state_id")])) {
217+
if (new_df_rows != nrow(
218+
distinct(df, across(c(refd_col, issued_col, "geo_value", "state_id")))
219+
)) {
215220
stop("Data contains multiple entries with differing values for at",
216221
" least one reference date-issue date-location combination")
217222
}

changehc/delphi_changehc/load_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def load_chng_data(filepath, dropdate, base_geo,
7171
), "Counts must be nonnegative"
7272

7373
# aggregate age groups (so data is unique by date and base geography)
74-
data = data.groupby([base_geo, Config.DATE_COL]).sum()
74+
data = data.groupby([base_geo, Config.DATE_COL]).sum(numeric_only=True)
7575
data.dropna(inplace=True) # drop rows with any missing entries
7676

7777
return data

changehc/tests/test_update_sensor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def test_geo_reindex(self):
9191
"timestamp": [pd.Timestamp(f'03-{i}-2020') for i in range(1, 14)]})
9292
data_frame = su_inst.geo_reindex(test_data)
9393
assert data_frame.shape[0] == multiple*len(su_inst.fit_dates)
94-
assert (data_frame.sum() == (4200,19000)).all()
94+
assert (data_frame.sum(numeric_only=True) == (4200,19000)).all()
9595

9696
def test_update_sensor(self):
9797
"""Tests that the sensors are properly updated."""

changehc/version.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
current_version = 0.3.35
1+
current_version = 0.3.36

claims_hosp/delphi_claims_hosp/load_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def load_claims_data(claims_filepath, dropdate, base_geo):
4747
), "Claims counts must be nonnegative"
4848

4949
# aggregate age groups (so data is unique by date and base geography)
50-
claims_data = claims_data.groupby([base_geo, Config.DATE_COL]).sum()
50+
claims_data = claims_data.groupby([base_geo, Config.DATE_COL]).sum(numeric_only=True)
5151
claims_data.dropna(inplace=True) # drop rows with any missing entries
5252

5353
return claims_data

claims_hosp/version.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
current_version = 0.3.35
1+
current_version = 0.3.36

doctor_visits/delphi_doctor_visits/geo_maps.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def county_to_msa(self, data):
4949
from_col="PatCountyFIPS",
5050
new_col="cbsa_id")
5151
data.drop(columns="PatCountyFIPS", inplace=True)
52-
data = data.groupby(["ServiceDate", "cbsa_id"]).sum().reset_index()
52+
data = data.groupby(["ServiceDate", "cbsa_id"]).sum(numeric_only=True).reset_index()
5353

5454
return data.groupby("cbsa_id"), "cbsa_id"
5555

@@ -66,7 +66,7 @@ def county_to_state(self, data):
6666
"state_id",
6767
from_col="PatCountyFIPS")
6868
data.drop(columns="PatCountyFIPS", inplace=True)
69-
data = data.groupby(["ServiceDate", "state_id"]).sum().reset_index()
69+
data = data.groupby(["ServiceDate", "state_id"]).sum(numeric_only=True).reset_index()
7070

7171
return data.groupby("state_id"), "state_id"
7272

@@ -83,7 +83,7 @@ def county_to_hhs(self, data):
8383
"hhs",
8484
from_col="PatCountyFIPS")
8585
data.drop(columns="PatCountyFIPS", inplace=True)
86-
data = data.groupby(["ServiceDate", "hhs"]).sum().reset_index()
86+
data = data.groupby(["ServiceDate", "hhs"]).sum(numeric_only=True).reset_index()
8787

8888
return data.groupby("hhs"), "hhs"
8989

@@ -100,7 +100,7 @@ def county_to_nation(self, data):
100100
"nation",
101101
from_col="PatCountyFIPS")
102102
data.drop(columns="PatCountyFIPS", inplace=True)
103-
data = data.groupby(["ServiceDate", "nation"]).sum().reset_index()
103+
data = data.groupby(["ServiceDate", "nation"]).sum(numeric_only=True).reset_index()
104104

105105
return data.groupby("nation"), "nation"
106106

doctor_visits/delphi_doctor_visits/sensor.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,17 @@ def fill_dates(y_data, dates):
6060
last_date = dates[-1]
6161
cols = y_data.columns
6262

63+
df_list = [y_data]
6364
if first_date not in y_data.index:
64-
y_data = y_data.append(
65+
df_list.append(
6566
pd.DataFrame(dict.fromkeys(cols, 0.0), columns=cols, index=[first_date])
6667
)
6768
if last_date not in y_data.index:
68-
y_data = y_data.append(
69+
df_list.append(
6970
pd.DataFrame(dict.fromkeys(cols, 0.0), columns=cols, index=[last_date])
7071
)
7172

72-
y_data.sort_index(inplace=True)
73+
y_data = pd.concat(df_list).sort_index()
7374
y_data = y_data.asfreq("D", fill_value=0)
7475
return y_data
7576

doctor_visits/delphi_doctor_visits/update_sensor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def update_sensor(
101101
data.dropna(inplace=True) # drop rows with any missing entries
102102

103103
# aggregate age groups (so data is unique by service date and FIPS)
104-
data = data.groupby([Config.DATE_COL, Config.GEO_COL]).sum().reset_index()
104+
data = data.groupby([Config.DATE_COL, Config.GEO_COL]).sum(numeric_only=True).reset_index()
105105
assert np.sum(data.duplicated()) == 0, "Duplicates after age group aggregation"
106106
assert (data[Config.COUNT_COLS] >= 0).all().all(), "Counts must be nonnegative"
107107

doctor_visits/version.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
current_version = 0.3.35
1+
current_version = 0.3.36

dsew_community_profile/delphi_dsew_community_profile/pull.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,7 @@ def generate_prop_signal(df, geo, geo_mapper):
701701
).groupby(
702702
geo
703703
).sum(
704+
numeric_only=True
704705
).reset_index(
705706
)
706707
df = pd.merge(df, map_df, left_on="geo_id", right_on=geo, how="inner")

0 commit comments

Comments
 (0)