Skip to content

Commit 275b7af

Browse files
committed
refactor: small rename, no extra negatives
1 parent 4ab4b21 commit 275b7af

File tree

10 files changed

+75
-32
lines changed

10 files changed

+75
-32
lines changed

NAMESPACE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ export(add_id)
55
export(arx_postprocess)
66
export(arx_preprocess)
77
export(collapse_cards)
8-
export(confirm_insufficient_data)
8+
export(confirm_sufficient_data)
99
export(covidhub_probs)
1010
export(evaluate_predictions)
1111
export(extend_ahead)

R/forecaster.R

Lines changed: 50 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ perform_sanity_checks <- function(epi_data,
4343
#' confirm that there's enough data to run this model
4444
#' @description
4545
#' epipredict is a little bit fragile about having enough data to train; we want
46-
#' to be able to return a null result rather than error out; this check say to
47-
#' return a null
46+
#' to be able to return a null result rather than error out.
4847
#' @param epi_data the input data
4948
#' @param buffer how many training data to insist on having (e.g. if `buffer=1`,
5049
#' this trains on one sample; the default is set so that `linear_reg` isn't
@@ -53,19 +52,30 @@ perform_sanity_checks <- function(epi_data,
5352
#' @param args_input the input as supplied to `forecaster_pred`; lags is the
5453
#' important argument, which may or may not be defined, with the default
5554
#' coming from `arx_args_list`
55+
#'
56+
#' # TODO: Buffer should probably be 2 * n(lags) * n(predictors).
57+
#'
5658
#' @export
57-
confirm_insufficient_data <- function(epi_data, ahead, args_input, buffer = 9) {
59+
confirm_sufficient_data <- function(epi_data, ahead, args_input, buffer = 15) {
5860
if (!is.null(args_input$lags)) {
5961
lag_max <- max(args_input$lags)
6062
} else {
6163
lag_max <- 14 # default value of 2 weeks
6264
}
65+
6366
return(
64-
is.infinite(ahead) ||
65-
as.integer(max(epi_data$time_value) - min(epi_data$time_value)) <=
66-
lag_max + ahead + buffer
67+
!is.infinite(ahead) &&
68+
epi_data %>%
69+
# TODO: This isn't generalizable to other signals.
70+
filter(!is.na(hhs) & !is.na(chng)) %>%
71+
# TODO: Quitting forecasting because of one geo_value is bad.
72+
group_by(geo_value) %>%
73+
summarise(has_enough_data = n_distinct(time_value) >= lag_max + ahead + buffer) %>%
74+
pull(has_enough_data) %>%
75+
any()
6776
)
6877
}
78+
6979
# TODO replace with `step_arx_forecaster`
7080
#' add the default steps for arx_forecaster
7181
#' @description
@@ -187,7 +197,8 @@ forecaster_pred <- function(data,
187197
slide_training = 0,
188198
n_training_pad = 5,
189199
forecaster_args = list(),
190-
forecaster_args_names = list()) {
200+
forecaster_args_names = list(),
201+
date_range_step_size = 1L) {
191202
archive <- data
192203
if (length(forecaster_args) > 0) {
193204
names(forecaster_args) <- forecaster_args_names
@@ -210,25 +221,45 @@ forecaster_pred <- function(data,
210221
# restrict the dataset to areas where training is possible
211222
start_date <- min(archive$DT$time_value) + net_slide_training
212223
end_date <- max(archive$DT$time_value) - forecaster_args$ahead
213-
valid_predict_dates <- seq.Date(from = start_date, to = end_date, by = 1)
224+
valid_predict_dates <- seq.Date(from = start_date, to = end_date, by = date_range_step_size)
214225

215226
# first generate the forecasts
216227
before <- n_training + n_training_pad - 1
217-
## TODO epix_slide doesn't support infinite `before`
228+
## TODO: epix_slide doesn't support infinite `before`
218229
## https://github.com/cmu-delphi/epiprocess/issues/219
219230
if (before == Inf) before <- 365L * 10000
220231
res <- epix_slide(archive,
221232
function(data, gk, rtv, ...) {
222-
do.call(
223-
forecaster,
224-
append(
225-
list(
226-
epi_data = data,
227-
outcome = outcome,
228-
extra_sources = extra_sources
229-
),
230-
forecaster_args
231-
)
233+
# TODO: Can we get rid of this tryCatch and instead hook it up to targets
234+
# error handling or something else?
235+
tryCatch(
236+
{
237+
do.call(
238+
forecaster,
239+
append(
240+
list(
241+
epi_data = data,
242+
outcome = outcome,
243+
extra_sources = extra_sources
244+
),
245+
forecaster_args
246+
)
247+
)
248+
},
249+
error = function(e) {
250+
if (interactive()) {
251+
browser()
252+
} else {
253+
dump_vars <- list(
254+
data = data,
255+
rtv = rtv,
256+
forecaster = forecaster,
257+
forecaster_args = forecaster_args,
258+
e = e
259+
)
260+
saveRDS(dump_vars, "forecaster_pred_error.rds")
261+
}
262+
}
232263
)
233264
},
234265
before = before,

R/forecaster_flatline.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ flatline_fc <- function(epi_data,
2323
effective_ahead <- epidataAhead[[2]]
2424
args_input <- list(...)
2525
# edge case where there is no data or less data than the lags; eventually epipredict will handle this
26-
if (confirm_insufficient_data(epi_data, effective_ahead, args_input)) {
26+
if (!confirm_sufficient_data(epi_data, effective_ahead, args_input)) {
2727
null_result <- tibble(
2828
geo_value = character(),
2929
forecast_date = lubridate::Date(),

R/forecaster_scaled_pop.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ scaled_pop <- function(epi_data,
5858
effective_ahead <- epidataAhead[[2]]
5959
args_input <- list(...)
6060
# edge case where there is no data or less data than the lags; eventually epipredict will handle this
61-
if (confirm_insufficient_data(epi_data, effective_ahead, args_input)) {
61+
if (!confirm_sufficient_data(epi_data, effective_ahead, args_input)) {
6262
null_result <- tibble(
6363
geo_value = character(),
6464
forecast_date = lubridate::Date(),
@@ -73,6 +73,7 @@ scaled_pop <- function(epi_data,
7373
args_list <- do.call(arx_args_list, args_input)
7474
# if you want to ignore extra_sources, setting predictors is the way to do it
7575
predictors <- c(outcome, extra_sources)
76+
# TODO: Partial match quantile_level coming from here
7677
argsPredictorsTrainer <- perform_sanity_checks(epi_data, outcome, predictors, trainer, args_list)
7778
args_list <- argsPredictorsTrainer[[1]]
7879
predictors <- argsPredictorsTrainer[[2]]
@@ -98,7 +99,6 @@ scaled_pop <- function(epi_data,
9899
# postprocessing supported by epipredict
99100
postproc <- frosting()
100101
postproc %<>% arx_postprocess(trainer, args_list)
101-
postproc
102102
if (pop_scaling) {
103103
postproc %<>% layer_population_scaling(
104104
.pred, .pred_distn,

flu_hosp_explore.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ forecasts_and_scores_by_ahead <- tar_map(
7272
forecaster = forecaster,
7373
n_training_pad = 30L,
7474
forecaster_args = params,
75-
forecaster_args_names = param_names
75+
forecaster_args_names = param_names,
76+
date_range_step_size = 7L
7677
)
7778
)
7879
),

flu_hosp_explore/data_targets.R

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
geo_type <- "state"
22
time_type <- "day"
33
geo_values <- "*"
4-
time_values <- epidatr::epirange(from = "2020-01-01", to = "2024-01-01")
4+
time_values <- epidatr::epirange(from = "2022-01-01", to = "2024-01-01")
55
fetch_args <- epidatr::fetch_args_list(return_empty = TRUE, timeout_seconds = 200)
66
issues <- "*"
77

@@ -76,6 +76,7 @@ data_targets <- list(
7676
tar_target(
7777
name = chng_archive_data_2022,
7878
command = {
79+
# TODO: Filter out unused columns like missing, direction, etc.
7980
epidatr::pub_covidcast(
8081
source = "chng",
8182
signals = "smoothed_adj_outpatient_flu",
@@ -109,7 +110,10 @@ data_targets <- list(
109110
time_type = time_type,
110111
compactify = TRUE
111112
)
112-
epix_merge(hhs_archive_data_2022, chng_archive_data_2022, sync = "locf")
113+
epix_merge(hhs_archive_data_2022, chng_archive_data_2022, sync = "locf")$DT %>%
114+
filter(!is.na(hhs) & !is.na(chng)) %>%
115+
filter(!geo_value %in% c("as", "pr", "vi", "gu", "mp")) %>%
116+
epiprocess::as_epi_archive()
113117
}
114118
)
115119
)

man/confirm_insufficient_data.Rd renamed to man/confirm_sufficient_data.Rd

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/forecaster_pred.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

run.R

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ input: ") {
4242
project_selection <- readline_wrapper()
4343
external_scores_path <- readline_wrapper("path to RDS file containing external forecast scores, if desired:")
4444

45+
debug_mode <- readline_wrapper("Would you like to run debug mode? (y/[N]): ")
46+
4547
suppressPackageStartupMessages({
4648
library(targets)
4749
library(shiny)
@@ -79,7 +81,11 @@ tar_helper(
7981
)
8082

8183
tar_manifest()
82-
tar_make()
84+
if (debug_mode == "y") {
85+
tar_make(callr_function = NULL)
86+
} else {
87+
tar_make()
88+
}
8389
# tar_make_clustermq(workers = 2) # nolint
8490
# tar_make_future(workers = 2) # nolint
8591

tests/testthat/test-forecasters-basics.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ for (forecaster in forecasters) {
2626
"case_rate",
2727
c("death_rate"),
2828
-2L,
29-
pop_scaling = FALSE
29+
pop_scaling = FALSE,
3030
)
3131
expect_false(res_unscaled %>%
3232
full_join(res,

0 commit comments

Comments
 (0)