Skip to content

Commit 61e5aae

Browse files
authored
Merge pull request #1741 from cmu-delphi/release/indicators_v0.3.27_utils_v0.3.6
Release covidcast-indicators 0.3.27
2 parents ae56598 + fd7a64b commit 61e5aae

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+874
-213
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.3.26
2+
current_version = 0.3.27
33
commit = True
44
message = chore: bump covidcast-indicators to {new_version}
55
tag = False
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"ref_lag": 60,
3+
"input_dir": "/common/backfill/",
4+
"cache_dir": "./cache",
5+
"testing_window": 1,
6+
"training_days": 270,
7+
"lag_pad":2,
8+
"export_dir": "./receiving",
9+
"geo_levels": ["state", "county"],
10+
"value_types": ["count", "fraction"],
11+
"num_col": "num",
12+
"denom_col": "den",
13+
"post": {
14+
"aws_credentials": {
15+
"aws_access_key_id": "{{ backfillcorr_aws_access_key_id }}",
16+
"aws_secret_access_key": "{{ backfillcorr_aws_secret_access_key }}"
17+
},
18+
"bucket_name": "{{ backfillcorr_aws_bucket_name }}"
19+
},
20+
"gurobi": {
21+
"GRB_LICENSEID": "{{ grb_licenseid }}",
22+
"GRB_WLSACCESSID": "{{ grb_wlsaccessid }}",
23+
"GRB_WLSSECRET": "{{ grb_wlssecret }}"
24+
}
25+
}

backfill_corrections/.gitignore

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
.tar.gz
2+
receiving
3+
input
4+
cache
5+
common/backfill
6+
params.json
7+
8+
.DS_Store
9+
10+
# History files
11+
.Rhistory
12+
.Rapp.history
13+
14+
# Session Data files
15+
.RData
16+
17+
# Example code in package build process
18+
*-Ex.R
19+
20+
# Output files from R CMD build
21+
/*.tar.gz
22+
23+
# Output files from R CMD check
24+
/*.Rcheck/
25+
26+
# RStudio files
27+
.Rproj.user/
28+
29+
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
30+
.httr-oauth
31+
32+
# knitr and R markdown default cache directories
33+
/*_cache/
34+
/cache/
35+

backfill_corrections/Dockerfile

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
FROM gurobi/optimizer:9.5.1 as gurobi
2+
3+
## Install R and tidyverse
4+
FROM rocker/tidyverse:latest
5+
6+
WORKDIR /opt/gurobi
7+
COPY --from=gurobi /opt/gurobi .
8+
9+
ENV GUROBI_HOME /opt/gurobi/linux64
10+
ENV PATH $PATH:$GUROBI_HOME/bin
11+
ENV LD_LIBRARY_PATH $GUROBI_HOME/lib
12+
13+
## Install backfill_corrections package and dependencies
14+
# Use delphi's timezome
15+
RUN ln -s -f /usr/share/zoneinfo/America/New_York /etc/localtime
16+
17+
RUN apt-get update && apt-get install -qq -y \
18+
libglpk-dev\
19+
python3-venv \
20+
python3-dev
21+
22+
RUN install2.r --error \
23+
roxygen2 \
24+
zoo \
25+
Rglpk \
26+
argparser
27+
28+
RUN R -e 'devtools::install_github("cmu-delphi/covidcast", ref = "evalcast", subdir = "R-packages/evalcast")' && \
29+
R -e 'devtools::install_github(repo="ryantibs/quantgen", subdir="quantgen")' && \
30+
R -e 'install.packages(list.files(path="/opt/gurobi/linux64/R/", pattern="^gurobi_.*[.]tar[.]gz$", full.names = TRUE), repos=NULL)'
31+
32+
WORKDIR /backfill_corrections/
33+
ADD ./delphiBackfillCorrection ./delphiBackfillCorrection/
34+
ADD [ "Makefile", "run.R", "./" ]
35+
36+
RUN make lib && make install

backfill_corrections/Makefile

Lines changed: 107 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,126 @@
11
SHELL:=/bin/bash
22

3-
TODAY:=$(shell date -u +"%Y-%m-%d")
4-
CURR_TIME:=$(shell date -u +"%Hh%Mm%Ss")
5-
LOG_FILE:=$(TODAY)_$(CURR_TIME).log
3+
# Change training options during `make` call via `make <command> OPTIONS="<options>"`
4+
# Allowed OPTIONS flags are `--train_models` and `--make_predictions`
5+
OPTIONS=
6+
7+
PYTHON:=env/bin/python
8+
USR_INPUT_DIR=$(shell $(PYTHON) -m delphi_utils get input_dir)
9+
USR_CACHE_DIR=$(shell $(PYTHON) -m delphi_utils get cache_dir)
10+
USR_EXPORT_DIR=$(shell $(PYTHON) -m delphi_utils get export_dir)
11+
12+
# Gurobi license
13+
GRB_LICENSE_FILE=./gurobi.lic
14+
GRB_WLSACCESSID=$(shell $(PYTHON) -m delphi_utils get gurobi.GRB_WLSACCESSID)
15+
GRB_WLSSECRET=$(shell $(PYTHON) -m delphi_utils get gurobi.GRB_WLSSECRET)
16+
GRB_LICENSEID=$(shell $(PYTHON) -m delphi_utils get gurobi.GRB_LICENSEID)
17+
18+
# AWS access info
19+
AWS_KEY_ID=$(shell $(PYTHON) -m delphi_utils get post.aws_credentials.aws_access_key_id)
20+
AWS_SECRET_KEY=$(shell $(PYTHON) -m delphi_utils get post.aws_credentials.aws_secret_access_key)
21+
S3_BUCKET=$(shell $(PYTHON) -m delphi_utils get post.bucket_name)
22+
23+
DOCKER_IMAGE=ghcr.io/cmu-delphi/covidcast-indicators-backfill_corrections
24+
DOCKER_TAG=latest
25+
26+
# Static dir names for use inside Docker container
27+
INPUT_DIR=input
28+
CACHE_DIR=cache
29+
LOG_DIR=logs
30+
EXPORT_DIR=receiving
31+
32+
PWD=$(shell pwd)
33+
34+
# System time and date
35+
TODAY:=$(shell date +"%Y-%m-%d")
36+
CURR_TIME:=$(shell date +"%Hh%Mm%Ss")
37+
38+
LOG_FILE:=$(LOG_DIR)/$(TODAY)_$(CURR_TIME).log
639

740
default:
841
@echo No default implemented yet
942

10-
install: dev
43+
install: install-R install-python
1144

12-
dev: delphiBackfillCorrection_1.0.tar.gz
45+
install-R: delphiBackfillCorrection_1.0.tar.gz
1346
R CMD INSTALL delphiBackfillCorrection_1.0.tar.gz
1447

48+
install-python:
49+
if [[ `python3 -c 'import sys; print(sys.version_info.minor)'` -lt 8 ]]; then \
50+
echo 'python must be version 3.8 or higher'; \
51+
exit 1; \
52+
fi
53+
python3 -m venv env
54+
source env/bin/activate && \
55+
pip install wheel && \
56+
pip install delphi_utils
57+
1558
lib:
1659
R -e 'roxygen2::roxygenise("delphiBackfillCorrection")'
1760

18-
run-R:
19-
time Rscript run.R 2>&1 | tee $(LOG_FILE)
61+
run-local: setup-dirs
62+
time Rscript run.R $(OPTIONS) 2>&1 | tee $(LOG_FILE)
2063
grep "backfill correction completed successfully" $(LOG_FILE)
2164
grep "scheduled core" $(LOG_FILE) ; \
2265
[ "$$?" -eq 1 ]
2366

67+
gurobi.lic:
68+
@echo WLSACCESSID=$(GRB_WLSACCESSID) >> $(GRB_LICENSE_FILE)
69+
@echo WLSSECRET=$(GRB_WLSSECRET) >> $(GRB_LICENSE_FILE)
70+
@echo LICENSEID=$(GRB_LICENSEID) >> $(GRB_LICENSE_FILE)
71+
72+
run:
73+
docker run --rm --pull=always \
74+
-v "${PWD}/${LOG_DIR}:/backfill_corrections/${LOG_DIR}" \
75+
-v "`realpath $(USR_EXPORT_DIR)`:/backfill_corrections/${EXPORT_DIR}" \
76+
-v "`realpath $(USR_INPUT_DIR)`:/backfill_corrections/${INPUT_DIR}" \
77+
-v "`realpath $(USR_CACHE_DIR)`:/backfill_corrections/${CACHE_DIR}" \
78+
-v "${PWD}"/params.json:/backfill_corrections/params.json \
79+
--env GRB_LICENSE_FILE=$(GRB_LICENSE_FILE) \
80+
-it "${DOCKER_IMAGE}:${DOCKER_TAG}" \
81+
/bin/bash -c "make gurobi.lic && make run-local OPTIONS=\"${OPTIONS}\""
82+
83+
publish:
84+
aws configure set aws_access_key_id $(AWS_KEY_ID)
85+
aws configure set aws_secret_access_key $(AWS_SECRET_KEY)
86+
aws s3 cp $(USR_INPUT_DIR) $(S3_BUCKET)/ --recursive --exclude "*" --include "*.csv.gz" --acl public-read
87+
echo "SUCCESS: published `ls -1 $(USR_EXPORT_DIR)/*.csv.gz | wc -l` files to the S3 bucket" >> $(LOG_FILE)
88+
89+
pipeline: setup-dirs standardize-dirs run publish teardown clean
90+
91+
# Make sure all user-specified dirs exist locally; create them if not.
92+
setup-dirs:
93+
[ -f $(USR_INPUT_DIR) ] || mkdir -p $(USR_INPUT_DIR)
94+
[ -f $(USR_CACHE_DIR) ] || mkdir -p $(USR_CACHE_DIR)
95+
[ -f $(USR_EXPORT_DIR) ] || mkdir -p $(USR_EXPORT_DIR)
96+
[ -f $(LOG_DIR) ] || mkdir -p $(LOG_DIR)
97+
98+
# Reconfigure `params.json` to use fixed dir names, INPUT_DIR, etc, as defined
99+
# above.
100+
#
101+
# This is a convenience for working with Docker. It allows local dirs to all
102+
# be mounted in the Docker container's working directory,
103+
# `/backfill_corrections/` regardless of the actual locations of the local
104+
# dirs and whether their locations are provided in `params.json` as absolute
105+
# or relative paths.
106+
#
107+
# (An alternative approach would be to check if the user-provided dir paths
108+
# are each absolute or relative first, and only concat relative paths with
109+
# `/backfill_corrections/` in the mount step.)
110+
standardize-dirs:
111+
$(PYTHON) -m delphi_utils set input_dir $(INPUT_DIR)
112+
$(PYTHON) -m delphi_utils set cache_dir $(CACHE_DIR)
113+
$(PYTHON) -m delphi_utils set export_dir $(EXPORT_DIR)
114+
115+
clean:
116+
rm -f $(USR_EXPORT_DIR)/*.csv.gz
117+
118+
# Restore dir names in params to user-provided values.
119+
teardown:
120+
$(PYTHON) -m delphi_utils set input_dir $(USR_INPUT_DIR)
121+
$(PYTHON) -m delphi_utils set cache_dir $(USR_CACHE_DIR)
122+
$(PYTHON) -m delphi_utils set export_dir $(USR_EXPORT_DIR)
123+
24124
coverage:
25125
Rscript -e 'covr::package_coverage("delphiBackfillCorrection")'
26126

backfill_corrections/delphiBackfillCorrection/NAMESPACE

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ export(add_dayofweek)
55
export(add_shift)
66
export(add_sqrtscale)
77
export(add_weekofmonth)
8+
export(assert)
89
export(create_dir_not_exist)
910
export(data_filteration)
1011
export(evaluate)
@@ -15,7 +16,9 @@ export(frac_adj_with_pseudo)
1516
export(get_7dav)
1617
export(main)
1718
export(model_training_and_testing)
19+
export(msg_ts)
1820
export(read_data)
21+
export(read_params)
1922
export(run_backfill)
2023
import(covidcast)
2124
importFrom(arrow,read_parquet)
@@ -29,9 +32,11 @@ importFrom(dplyr,filter)
2932
importFrom(dplyr,group_by)
3033
importFrom(dplyr,group_split)
3134
importFrom(dplyr,if_else)
35+
importFrom(dplyr,mutate)
3236
importFrom(dplyr,pull)
3337
importFrom(dplyr,select)
3438
importFrom(dplyr,summarize)
39+
importFrom(dplyr,ungroup)
3540
importFrom(evalcast,weighted_interval_score)
3641
importFrom(jsonlite,read_json)
3742
importFrom(lubridate,day)

backfill_corrections/delphiBackfillCorrection/R/beta_prior_estimation.R

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ objective <- function(theta, x, prob, ...) {
4848
#' @param start the initialization of the the points in nlm
4949
#' @param base_pseudo_denom the pseudo counts added to denominator if little data for training
5050
#' @param base_pseudo_num the pseudo counts added to numerator if little data for training
51-
#' @param training_end_date the most recent training date
51+
#' @template training_end_date-template
52+
#' @template training_start_date-template
5253
#' @param model_save_dir directory containing trained models
5354
#'
5455
#' @importFrom stats nlm predict
@@ -58,7 +59,8 @@ objective <- function(theta, x, prob, ...) {
5859
est_priors <- function(train_data, prior_test_data, geo, value_type, dw, taus,
5960
covariates, response, lp_solver, lambda,
6061
indicator, signal, geo_level, signal_suffix,
61-
training_end_date, model_save_dir, start=c(0, log(10)),
62+
training_end_date, training_start_date,
63+
model_save_dir, start=c(0, log(10)),
6264
base_pseudo_denom=1000, base_pseudo_num=10,
6365
train_models = TRUE, make_predictions = TRUE) {
6466
sub_train_data <- train_data %>% filter(train_data[[dw]] == 1)
@@ -76,6 +78,7 @@ est_priors <- function(train_data, prior_test_data, geo, value_type, dw, taus,
7678
geo=geo, dw=dw, tau=tau,
7779
value_type=value_type,
7880
training_end_date=training_end_date,
81+
training_start_date=training_start_date,
7982
beta_prior_mode=TRUE)
8083
model_path <- file.path(model_save_dir, model_file_name)
8184

@@ -123,7 +126,8 @@ frac_adj_with_pseudo <- function(data, dw, pseudo_num, pseudo_denom, num_col, de
123126
#' @template train_data-template
124127
#' @param test_data testing data
125128
#' @param prior_test_data testing data for the lag -1 model
126-
#' @param training_end_date the most recent training date
129+
#' @template training_end_date-template
130+
#' @template training_start_date-template
127131
#' @param model_save_dir directory containing trained models
128132
#' @template indicator-template
129133
#' @template signal-template
@@ -141,18 +145,24 @@ frac_adj_with_pseudo <- function(data, dw, pseudo_num, pseudo_denom, num_col, de
141145
frac_adj <- function(train_data, test_data, prior_test_data,
142146
indicator, signal, geo_level, signal_suffix,
143147
lambda, value_type, geo,
144-
training_end_date, model_save_dir,
145-
taus = TAUS, lp_solver = LP_SOLVER,
148+
training_end_date, training_start_date,
149+
model_save_dir,
150+
taus, lp_solver,
146151
train_models = TRUE,
147152
make_predictions = TRUE) {
148153
train_data$value_target <- frac_adj_with_pseudo(train_data, NULL, 1, 100, "value_target_num", "value_target_denom")
149-
train_data$value_7dav <- frac_adj_with_pseudo(train_data, NULL, 1, 100, "value_7dav_num", "value_7dav_denom")
150-
prior_test_data$value_7dav <- frac_adj_with_pseudo(prior_test_data, NULL, 1, 100, "value_7dav_num", "value_7dav_denom")
151-
152154
train_data$log_value_target <- log(train_data$value_target)
155+
156+
test_data$value_target <- frac_adj_with_pseudo(test_data, NULL, 1, 100, "value_target_num", "value_target_denom")
157+
test_data$log_value_target <- log(test_data$value_target)
158+
159+
train_data$value_7dav <- frac_adj_with_pseudo(train_data, NULL, 1, 100, "value_7dav_num", "value_7dav_denom")
153160
train_data$log_value_7dav <- log(train_data$value_7dav)
161+
162+
prior_test_data$value_7dav <- frac_adj_with_pseudo(prior_test_data, NULL, 1, 100, "value_7dav_num", "value_7dav_denom")
154163
prior_test_data$log_value_7dav <- log(prior_test_data$value_7dav)
155164

165+
156166
pre_covariates = c("Mon_ref", "Tue_ref", "Wed_ref", "Thurs_ref", "Fri_ref", "Sat_ref",
157167
"log_value_7dav")
158168
#For training
@@ -172,7 +182,7 @@ frac_adj <- function(train_data, test_data, prior_test_data,
172182
pseudo_counts <- est_priors(train_data, prior_test_data, geo, value_type, cov, taus,
173183
pre_covariates, "log_value_target", lp_solver, lambda,
174184
indicator, signal, geo_level, signal_suffix,
175-
training_end_date, model_save_dir,
185+
training_end_date, training_start_date, model_save_dir,
176186
train_models = train_models,
177187
make_predictions = make_predictions)
178188
pseudo_denum = pseudo_counts[1]

backfill_corrections/delphiBackfillCorrection/R/constants.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ TAUS <- c(0.01, 0.025, 0.1, 0.25, 0.5, 0.75, 0.9, 0.975, 0.99)
33
REF_LAG <- 60
44
TEST_LAGS <- c(1:14, 21, 35, 51)
55
TRAINING_DAYS <- 270
6-
TESTING_WINDOW <- 14
6+
TESTING_WINDOW <- 1
77
LAG_WINDOW <- 5
88
LAMBDA <- 0.1
99
LAG_PAD <- 2

0 commit comments

Comments
 (0)