Skip to content

Release covidcast-indicators 0.3.36 #1829

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 37 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
6122348
move rough lag filter before validation
nmdefries Apr 4, 2023
ce00933
use distinct in validation
nmdefries Apr 4, 2023
4d913c2
call fips-geo col conversion only after binding input
nmdefries Apr 4, 2023
f916bfe
combine filter with other input steps
nmdefries Apr 4, 2023
f580f05
make sure validation imports dplyr across
nmdefries Apr 4, 2023
ea6e3d7
don't use loc to avoid a future warning about calling int on a single…
nmdefries Apr 4, 2023
e37c736
set numeric_only in geomap sum to avoid concatting strings
nmdefries Apr 4, 2023
fab4c62
replace iteritems
nmdefries Apr 4, 2023
53f0d0c
replace append
nmdefries Apr 4, 2023
f6d564d
make dates comparable
nmdefries Apr 4, 2023
5f97212
changehc
nmdefries Apr 4, 2023
2f92ca7
Merge pull request #1824 from cmu-delphi/bot/sync-prod-main
krivard Apr 4, 2023
3845a1e
claims_hosp
nmdefries Apr 4, 2023
4d7c722
doctor_visits
nmdefries Apr 5, 2023
c077c13
cpr
nmdefries Apr 5, 2023
9ca8530
nchs_mortality
nmdefries Apr 5, 2023
fcbbafc
quidel_covidtest
nmdefries Apr 5, 2023
6a82efc
linting
nmdefries Apr 5, 2023
79ab7c8
Merge branch 'main' into ndefries/pandasv2-fix-tests
nmdefries Apr 5, 2023
4f28fba
unpin pandas 2.0.0
nmdefries Apr 5, 2023
ef60502
Merge branch 'main' into ndefries/backfill/speed3
nmdefries Apr 5, 2023
513a39b
concat outside of loops
nmdefries Apr 5, 2023
1647ea2
Merge pull request #1819 from cmu-delphi/ndefries/backfill/speed3
krivard Apr 6, 2023
510a11e
log when geo-splitting is over
nmdefries Apr 11, 2023
02c8c40
remove everything in output dir
nmdefries Apr 11, 2023
5488518
add age signals to sircal params templates
M5Skid Apr 11, 2023
dd671bf
increase shared memory size to support parallel prediction generation
nmdefries Apr 11, 2023
8875b40
check logs for lack of shared memory error
nmdefries Apr 11, 2023
fb446a6
Merge pull request #1825 from cmu-delphi/ndefries/pandasv2-fix-tests
nmdefries Apr 11, 2023
87a18d7
Merge pull request #1826 from M5Skid/quidel_suppress_sircal
krivard Apr 11, 2023
af03a4b
pin bettermc to last cran-approved version
nmdefries Apr 11, 2023
b55abe8
lower shared memory allocation
nmdefries Apr 11, 2023
7ef2f5b
Merge pull request #1827 from cmu-delphi/ndefries/backfill/mem-log-ma…
nmdefries Apr 12, 2023
c7d6869
Merge pull request #1828 from cmu-delphi/ndefries/backfill/bettermc-n…
nmdefries Apr 12, 2023
af6d5bc
chore: bump delphi_utils to 0.3.13
Apr 12, 2023
8bf743f
chore: bump covidcast-indicators to 0.3.36
Apr 12, 2023
c9dd855
[create-pull-request] automated change
krivard Apr 12, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.3.35
current_version = 0.3.36
commit = True
message = chore: bump covidcast-indicators to {new_version}
tag = False
2 changes: 1 addition & 1 deletion _delphi_utils_python/.bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.3.12
current_version = 0.3.13
commit = True
message = chore: bump delphi_utils to {new_version}
tag = False
Expand Down
2 changes: 1 addition & 1 deletion _delphi_utils_python/delphi_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
from .nancodes import Nans
from .weekday import Weekday

__version__ = "0.3.12"
__version__ = "0.3.13"
2 changes: 1 addition & 1 deletion _delphi_utils_python/delphi_utils/flash_eval/eval_day.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def output(evd_ranking, day, lag, signal, logger):
"""
starter_link = f"{HTML_LINK}{(day+pd.Timedelta(f'{lag}d')).strftime('%Y-%m_%d')}"
p_text = ""
for j, (index, value) in enumerate(evd_ranking.sort_values(ascending=False).iteritems()):
for j, (index, value) in enumerate(evd_ranking.sort_values(ascending=False).items()):
if j < 30:
start_link = f"{starter_link},{day.strftime('%Y-%m_%d')},{index}"
p_text += f"\t{start_link}|*{index}*, {'{:.2f}'.format(value)}>\n"
Expand Down
6 changes: 3 additions & 3 deletions _delphi_utils_python/delphi_utils/geomap.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,9 +401,9 @@ def replace_geocode(
df.drop("weight", axis=1, inplace=True)

if not date_col is None:
df = df.groupby([date_col, new_col]).sum().reset_index()
df = df.groupby([date_col, new_col]).sum(numeric_only=True).reset_index()
else:
df = df.groupby([new_col]).sum().reset_index()
df = df.groupby([new_col]).sum(numeric_only=True).reset_index()
return df

def add_population_column(self, data, geocode_type, geocode_col=None, dropna=True):
Expand Down Expand Up @@ -501,7 +501,7 @@ def fips_to_megacounty(
)
data.set_index([fips_col, date_col], inplace=True)
data = data.join(mega_data)
data = data.reset_index().groupby([date_col, mega_col]).sum()
data = data.reset_index().groupby([date_col, mega_col]).sum(numeric_only=True)
return data.reset_index()

def as_mapper_name(self, geo_type, state="state_id"):
Expand Down
2 changes: 1 addition & 1 deletion _delphi_utils_python/delphi_utils/validator/dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def replace_first_six(df, start_date):
start_date = self.params.time_window.start_date)

if not error_df.empty:
for index, value in error_df.iteritems():
for index, value in error_df.items():
report.add_raised_error(
ValidationFailure("check_val_missing",
geo_type=geo_type,
Expand Down
4 changes: 2 additions & 2 deletions _delphi_utils_python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"mock",
"moto",
"numpy",
"pandas>=1.1.0,<2",
"pandas>=1.1.0",
"pydocstyle",
"pylint==2.8.3",
"pytest",
Expand All @@ -26,7 +26,7 @@

setup(
name="delphi_utils",
version="0.3.12",
version="0.3.13",
description="Shared Utility Functions for Indicators",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
16 changes: 8 additions & 8 deletions _delphi_utils_python/tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,15 +250,15 @@ def test_export_with_null_removal(self):
"""Test that `remove_null_samples = True` removes entries with null samples."""
_clean_directory(self.TEST_DIR)

df_with_nulls = self.DF.copy().append(
{
df_with_nulls = pd.concat(
[self.DF.copy(),
pd.DataFrame({
"geo_id": "66666",
"timestamp": datetime(2020, 6, 6),
"val": 10,
"se": 0.2,
"sample_size": pd.NA,
},
ignore_index=True,
}, index = [0])]
)

create_export_csv(
Expand All @@ -283,15 +283,15 @@ def test_export_without_null_removal(self):
"""Test that `remove_null_samples = False` does not remove entries with null samples."""
_clean_directory(self.TEST_DIR)

df_with_nulls = self.DF.copy().append(
{
df_with_nulls = pd.concat(
[self.DF.copy(),
pd.DataFrame({
"geo_id": "66666",
"timestamp": datetime(2020, 6, 6),
"val": 10,
"se": 0.2,
"sample_size": pd.NA,
},
ignore_index=True,
}, index = [0])]
)

create_export_csv(
Expand Down
2 changes: 1 addition & 1 deletion _delphi_utils_python/tests/test_geomap.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def test_load_fips_chngfips_table(self, geomapper):

def test_load_jhu_uid_fips_table(self, geomapper):
jhu_data = geomapper.get_crosswalk(from_code="jhu_uid", to_code="fips")
assert np.allclose(jhu_data.groupby("jhu_uid").sum(), 1.0)
assert np.allclose(jhu_data.groupby("jhu_uid").sum(numeric_only=True), 1.0)

def test_load_zip_hrr_table(self, geomapper):
zip_data = geomapper.get_crosswalk(from_code="zip", to_code="hrr")
Expand Down
4 changes: 2 additions & 2 deletions _delphi_utils_python/tests/validator/test_dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_half_padding(self):
ref_df, test_df, ref_date, ref_date)

# Check it only takes missing dates - so the last 5 dates
assert new_ref_df.time_value.max() == datetime.strptime("2021-01-11",
assert new_ref_df.time_value.max().date() == datetime.strptime("2021-01-11",
"%Y-%m-%d").date()
assert new_ref_df.shape[0] == 11
assert new_ref_df["val"].iloc[5] == 2
Expand All @@ -71,7 +71,7 @@ def test_full_padding(self):
ref_df, test_df, ref_date, ref_date)

# Check it only takes missing dates up to the day before the reference
assert new_ref_df.time_value.max() == datetime.strptime("2021-01-15",
assert new_ref_df.time_value.max().date() == datetime.strptime("2021-01-15",
"%Y-%m-%d").date()
assert new_ref_df.shape[0] == 15
assert new_ref_df["val"].iloc[5] == 2
Expand Down
13 changes: 8 additions & 5 deletions ansible/templates/sir_complainsalot-params-prod.json.j2
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,14 @@
"max_age":6,
"maintainers": ["U01AP8GSWG3","U01069KCRS7"],
"retired-signals": [
"raw_pct_negative","smoothed_pct_negative","raw_tests_per_device","smoothed_tests_per_device",
["covid_ag_raw_pct_positive_age_0_4", "hrr"], ["covid_ag_raw_pct_positive_age_0_4", "msa"],
["covid_ag_raw_pct_positive_age_5_17", "hrr"], ["covid_ag_raw_pct_positive_age_5_17", "msa"],
["covid_ag_raw_pct_positive_age_50_64", "hrr"], ["covid_ag_raw_pct_positive_age_50_64", "msa"],
["covid_ag_raw_pct_positive_age_65plus", "hrr"], ["covid_ag_raw_pct_positive_age_65plus", "msa"]
"raw_pct_negative", "smoothed_pct_negative",
"raw_tests_per_device", "smoothed_tests_per_device",
"covid_ag_raw_pct_positive_age_0_4", "covid_ag_smoothed_pct_positive_age_0_4",
"covid_ag_raw_pct_positive_age_5_17", "covid_ag_smoothed_pct_positive_age_5_17",
"covid_ag_raw_pct_positive_age_18_49", "covid_ag_smoothed_pct_positive_age_18_49",
"covid_ag_raw_pct_positive_age_50_64", "covid_ag_smoothed_pct_positive_age_50_64",
"covid_ag_raw_pct_positive_age_65plus", "covid_ag_smoothed_pct_positive_age_65plus",
"covid_ag_raw_pct_positive_age_0_17", "covid_ag_smoothed_pct_positive_age_0_17"
]
},
"nchs-mortality": {
Expand Down
1 change: 1 addition & 0 deletions backfill_corrections/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ RUN install2.r --error \

RUN --mount=type=secret,id=GITHUB_TOKEN \
export GITHUB_PAT="$(cat /run/secrets/GITHUB_TOKEN)" && \
R -e 'devtools::install_version("bettermc", version = "1.1.2")' && \
R -e 'devtools::install_github("cmu-delphi/covidcast", ref = "evalcast", subdir = "R-packages/evalcast")' && \
R -e 'devtools::install_github(repo="ryantibs/quantgen", subdir="quantgen")' && \
R -e 'install.packages(list.files(path="/opt/gurobi/linux64/R/", pattern="^gurobi_.*[.]tar[.]gz$", full.names = TRUE), repos=NULL)'
Expand Down
5 changes: 4 additions & 1 deletion backfill_corrections/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ run-local: setup-dirs
grep "backfill correction completed successfully" $(LOG_FILE)
grep "scheduled core" $(LOG_FILE) ; \
[ "$$?" -eq 1 ]
grep "SIGBUS" $(LOG_FILE) ; \
[ "$$?" -eq 1 ]

gurobi.lic:
@echo WLSACCESSID=$(GRB_WLSACCESSID) >> $(GRB_LICENSE_FILE)
Expand All @@ -81,6 +83,7 @@ run:
-v "`realpath $(USR_CACHE_DIR)`:/backfill_corrections/${CACHE_DIR}" \
-v "${PWD}"/params.json:/backfill_corrections/params.host.json \
--env GRB_LICENSE_FILE=$(GRB_LICENSE_FILE) \
--shm-size=2gb \
-it "${DOCKER_IMAGE}:${DOCKER_TAG}" \
/bin/bash -c "cp params.host.json params.json && make gurobi.lic && make standardize-dirs && make run-local OPTIONS=\"${OPTIONS}\" LOG_FILE=${LOG_FILE}"

Expand Down Expand Up @@ -124,7 +127,7 @@ standardize-dirs:
$(PYTHON) -m delphi_utils set export_dir $(EXPORT_DIR)

clean:
rm -f $(USR_EXPORT_DIR)/*.csv.gz
rm -rf $(USR_EXPORT_DIR)/*

coverage:
Rscript -e 'covr::package_coverage("delphiBackfillCorrection")'
Expand Down
1 change: 1 addition & 0 deletions backfill_corrections/delphiBackfillCorrection/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ importFrom(dplyr,arrange)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,desc)
importFrom(dplyr,distinct)
importFrom(dplyr,everything)
importFrom(dplyr,filter)
importFrom(dplyr,full_join)
Expand Down
15 changes: 6 additions & 9 deletions backfill_corrections/delphiBackfillCorrection/R/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
run_backfill <- function(df, params,
refd_col = "time_value", lag_col = "lag", issued_col = "issue_date",
signal_suffixes = c(""), indicator = "", signal = "") {
df <- filter(df, lag < params$ref_lag + 30) # a rough filtration to save memory

geo_levels <- params$geo_levels
if ("state" %in% geo_levels) {
# If state included, do it last since state processing modifies the
Expand Down Expand Up @@ -62,6 +60,7 @@ run_backfill <- function(df, params,
msg_ts("Splitting data into geo groups")
group_dfs <- group_split(df, geo_value)

msg_ts("Beginning training and/or testing...")
# Build model for each location
apply_fn <- ifelse(params$parallel, mclapply, lapply)
result <- apply_fn(group_dfs, function(subdf) {
Expand Down Expand Up @@ -317,14 +316,12 @@ main <- function(params,

msg_ts("Reading in and combining associated files")
input_data <- lapply(
files_list,
function(file) {
# refd_col and issued_col read in as strings
read_data(file) %>%
fips_to_geovalue()
}
files_list, read_data # refd_col and issued_col read in as strings
) %>%
bind_rows()
bind_rows() %>%
fips_to_geovalue() %>%
# a rough filter to save memory
filter(lag < params$ref_lag + 30)

if (nrow(input_data) == 0) {
warning("No data available for indicator ", input_group$indicator,
Expand Down
13 changes: 9 additions & 4 deletions backfill_corrections/delphiBackfillCorrection/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ create_dir_not_exist <- function(path)
#' @return list of input dataframe augmented with lag column, if it
#' didn't already exist, and character vector of one or two value
#' column names, depending on requested `value_type`
#'
#' @importFrom dplyr distinct across
validity_checks <- function(df, value_types, num_col, denom_col, signal_suffixes,
refd_col = "time_value", lag_col = "lag", issued_col = "issue_date") {
if (!missing(signal_suffixes) && !is.na(signal_suffixes) && !all(signal_suffixes == "") && !all(is.na(signal_suffixes))) {
Expand Down Expand Up @@ -205,13 +207,16 @@ validity_checks <- function(df, value_types, num_col, denom_col, signal_suffixes
}

# Drop duplicate rows.
duplicate_i <- duplicated(df)
if (any(duplicate_i)) {
raw_df_rows <- nrow(df)
df <- distinct(df)
new_df_rows <- nrow(df)
if (raw_df_rows != new_df_rows) {
warning("Data contains duplicate rows, dropping")
df <- df[!duplicate_i,]
}

if (anyDuplicated(df[, c(refd_col, issued_col, "geo_value", "state_id")])) {
if (new_df_rows != nrow(
distinct(df, across(c(refd_col, issued_col, "geo_value", "state_id")))
)) {
stop("Data contains multiple entries with differing values for at",
" least one reference date-issue date-location combination")
}
Expand Down
2 changes: 1 addition & 1 deletion changehc/delphi_changehc/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def load_chng_data(filepath, dropdate, base_geo,
), "Counts must be nonnegative"

# aggregate age groups (so data is unique by date and base geography)
data = data.groupby([base_geo, Config.DATE_COL]).sum()
data = data.groupby([base_geo, Config.DATE_COL]).sum(numeric_only=True)
data.dropna(inplace=True) # drop rows with any missing entries

return data
Expand Down
2 changes: 1 addition & 1 deletion changehc/tests/test_update_sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def test_geo_reindex(self):
"timestamp": [pd.Timestamp(f'03-{i}-2020') for i in range(1, 14)]})
data_frame = su_inst.geo_reindex(test_data)
assert data_frame.shape[0] == multiple*len(su_inst.fit_dates)
assert (data_frame.sum() == (4200,19000)).all()
assert (data_frame.sum(numeric_only=True) == (4200,19000)).all()

def test_update_sensor(self):
"""Tests that the sensors are properly updated."""
Expand Down
2 changes: 1 addition & 1 deletion changehc/version.cfg
Original file line number Diff line number Diff line change
@@ -1 +1 @@
current_version = 0.3.35
current_version = 0.3.36
2 changes: 1 addition & 1 deletion claims_hosp/delphi_claims_hosp/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def load_claims_data(claims_filepath, dropdate, base_geo):
), "Claims counts must be nonnegative"

# aggregate age groups (so data is unique by date and base geography)
claims_data = claims_data.groupby([base_geo, Config.DATE_COL]).sum()
claims_data = claims_data.groupby([base_geo, Config.DATE_COL]).sum(numeric_only=True)
claims_data.dropna(inplace=True) # drop rows with any missing entries

return claims_data
Expand Down
2 changes: 1 addition & 1 deletion claims_hosp/version.cfg
Original file line number Diff line number Diff line change
@@ -1 +1 @@
current_version = 0.3.35
current_version = 0.3.36
8 changes: 4 additions & 4 deletions doctor_visits/delphi_doctor_visits/geo_maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def county_to_msa(self, data):
from_col="PatCountyFIPS",
new_col="cbsa_id")
data.drop(columns="PatCountyFIPS", inplace=True)
data = data.groupby(["ServiceDate", "cbsa_id"]).sum().reset_index()
data = data.groupby(["ServiceDate", "cbsa_id"]).sum(numeric_only=True).reset_index()

return data.groupby("cbsa_id"), "cbsa_id"

Expand All @@ -66,7 +66,7 @@ def county_to_state(self, data):
"state_id",
from_col="PatCountyFIPS")
data.drop(columns="PatCountyFIPS", inplace=True)
data = data.groupby(["ServiceDate", "state_id"]).sum().reset_index()
data = data.groupby(["ServiceDate", "state_id"]).sum(numeric_only=True).reset_index()

return data.groupby("state_id"), "state_id"

Expand All @@ -83,7 +83,7 @@ def county_to_hhs(self, data):
"hhs",
from_col="PatCountyFIPS")
data.drop(columns="PatCountyFIPS", inplace=True)
data = data.groupby(["ServiceDate", "hhs"]).sum().reset_index()
data = data.groupby(["ServiceDate", "hhs"]).sum(numeric_only=True).reset_index()

return data.groupby("hhs"), "hhs"

Expand All @@ -100,7 +100,7 @@ def county_to_nation(self, data):
"nation",
from_col="PatCountyFIPS")
data.drop(columns="PatCountyFIPS", inplace=True)
data = data.groupby(["ServiceDate", "nation"]).sum().reset_index()
data = data.groupby(["ServiceDate", "nation"]).sum(numeric_only=True).reset_index()

return data.groupby("nation"), "nation"

Expand Down
7 changes: 4 additions & 3 deletions doctor_visits/delphi_doctor_visits/sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,17 @@ def fill_dates(y_data, dates):
last_date = dates[-1]
cols = y_data.columns

df_list = [y_data]
if first_date not in y_data.index:
y_data = y_data.append(
df_list.append(
pd.DataFrame(dict.fromkeys(cols, 0.0), columns=cols, index=[first_date])
)
if last_date not in y_data.index:
y_data = y_data.append(
df_list.append(
pd.DataFrame(dict.fromkeys(cols, 0.0), columns=cols, index=[last_date])
)

y_data.sort_index(inplace=True)
y_data = pd.concat(df_list).sort_index()
y_data = y_data.asfreq("D", fill_value=0)
return y_data

Expand Down
2 changes: 1 addition & 1 deletion doctor_visits/delphi_doctor_visits/update_sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def update_sensor(
data.dropna(inplace=True) # drop rows with any missing entries

# aggregate age groups (so data is unique by service date and FIPS)
data = data.groupby([Config.DATE_COL, Config.GEO_COL]).sum().reset_index()
data = data.groupby([Config.DATE_COL, Config.GEO_COL]).sum(numeric_only=True).reset_index()
assert np.sum(data.duplicated()) == 0, "Duplicates after age group aggregation"
assert (data[Config.COUNT_COLS] >= 0).all().all(), "Counts must be nonnegative"

Expand Down
2 changes: 1 addition & 1 deletion doctor_visits/version.cfg
Original file line number Diff line number Diff line change
@@ -1 +1 @@
current_version = 0.3.35
current_version = 0.3.36
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,7 @@ def generate_prop_signal(df, geo, geo_mapper):
).groupby(
geo
).sum(
numeric_only=True
).reset_index(
)
df = pd.merge(df, map_df, left_on="geo_id", right_on=geo, how="inner")
Expand Down
Loading