cmu-delphi
diff --git a/‎DESCRIPTION
Lines changed: 1 addition & 1 deletion b/‎DESCRIPTION
Lines changed: 1 addition & 1 deletion
diff --git a/‎NAMESPACE
Lines changed: 3 additions & 0 deletions b/‎NAMESPACE
Lines changed: 3 additions & 0 deletions
diff --git a/‎NEWS.md
Lines changed: 7 additions & 4 deletions b/‎NEWS.md
Lines changed: 7 additions & 4 deletions
diff --git a/‎R/arx_classifier.R
Lines changed: 8 additions & 10 deletions b/‎R/arx_classifier.R
Lines changed: 8 additions & 10 deletions
diff --git a/‎R/arx_forecaster.R
Lines changed: 14 additions & 15 deletions b/‎R/arx_forecaster.R
Lines changed: 14 additions & 15 deletions
diff --git a/‎R/autoplot.R
Lines changed: 1 addition & 2 deletions b/‎R/autoplot.R
Lines changed: 1 addition & 2 deletions
diff --git a/‎R/epi_workflow.R
Lines changed: 56 additions & 1 deletion b/‎R/epi_workflow.R
Lines changed: 56 additions & 1 deletion
diff --git a/‎R/flatline_forecaster.R
Lines changed: 6 additions & 6 deletions b/‎R/flatline_forecaster.R
Lines changed: 6 additions & 6 deletions
diff --git a/‎R/frosting.R
Lines changed: 1 addition & 2 deletions b/‎R/frosting.R
Lines changed: 1 addition & 2 deletions
diff --git a/‎R/layer_add_target_date.R
Lines changed: 4 additions & 5 deletions b/‎R/layer_add_target_date.R
Lines changed: 4 additions & 5 deletions
diff --git a/‎R/layer_cdc_flatline_quantiles.R
Lines changed: 1 addition & 5 deletions b/‎R/layer_cdc_flatline_quantiles.R
Lines changed: 1 addition & 5 deletions
diff --git a/‎R/layer_naomit.R
Lines changed: 1 addition & 3 deletions b/‎R/layer_naomit.R
Lines changed: 1 addition & 3 deletions
diff --git a/‎R/layer_point_from_distn.R
Lines changed: 2 additions & 4 deletions b/‎R/layer_point_from_distn.R
Lines changed: 2 additions & 4 deletions
diff --git a/‎R/layer_population_scaling.R
Lines changed: 1 addition & 11 deletions b/‎R/layer_population_scaling.R
Lines changed: 1 addition & 11 deletions
diff --git a/‎R/layer_predictive_distn.R
Lines changed: 1 addition & 3 deletions b/‎R/layer_predictive_distn.R
Lines changed: 1 addition & 3 deletions
diff --git a/‎R/layer_quantile_distn.R
Lines changed: 1 addition & 3 deletions b/‎R/layer_quantile_distn.R
Lines changed: 1 addition & 3 deletions
@@ -1,6 +1,6 @@
 Package: epipredict
 Title: Basic epidemiology forecasting methods
-Version: 0.0.13
+Version: 0.0.14
 Authors@R: c(
     person("Daniel", "McDonald", , "[email protected]", role = c("aut", "cre")),
     person("Ryan", "Tibshirani", , "[email protected]", role = "aut"),
 
@@ -45,6 +45,7 @@ S3method(extrapolate_quantiles,distribution)
 S3method(fit,epi_workflow)
 S3method(flusight_hub_formatter,canned_epipred)
 S3method(flusight_hub_formatter,data.frame)
+S3method(forecast,epi_workflow)
 S3method(format,dist_quantiles)
 S3method(is.na,dist_quantiles)
 S3method(is.na,distribution)
@@ -152,6 +153,7 @@ export(flatline)
 export(flatline_args_list)
 export(flatline_forecaster)
 export(flusight_hub_formatter)
+export(forecast)
 export(frosting)
 export(get_test_data)
 export(grab_names)
@@ -219,6 +221,7 @@ importFrom(dplyr,ungroup)
 importFrom(epiprocess,growth_rate)
 importFrom(generics,augment)
 importFrom(generics,fit)
+importFrom(generics,forecast)
 importFrom(ggplot2,autoplot)
 importFrom(hardhat,refresh_blueprint)
 importFrom(hardhat,run_mold)
 
@@ -31,12 +31,15 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicat
 - Working vignette
 - use `checkmate` for input validation
 - refactor quantile extrapolation (possibly creates different results)
-- force `target_date` + `forecast_date` handling to match the time_type of
-  the epi_df. allows for annual and weekly data
+- force `target_date` + `forecast_date` handling to match the time_type of the
+  epi_df. allows for annual and weekly data
 - add `check_enough_train_data()` that will error if training data is too small
 - added `check_enough_train_data()` to `arx_forecaster()`
-- `layer_residual_quantiles()` will now error if any of the residual quantiles are NA
+- `layer_residual_quantiles()` will now error if any of the residual quantiles
+  are NA
 - `*_args_list()` functions now warn if `forecast_date + ahead != target_date`
-- the `predictor` argument in `arx_forecaster()` now defaults to the value of the `outcome` argument
+- the `predictor` argument in `arx_forecaster()` now defaults to the value of
+  the `outcome` argument
 - `arx_fcast_epi_workflow()` and `arx_class_epi_workflow()` now default to
   `trainer = parsnip::logistic_reg()` to match their more canned versions.
+- add a `forecast()` method simplify generating forecasts
@@ -51,17 +51,15 @@ arx_classifier <- function(
     cli::cli_abort("`trainer` must be a {.pkg parsnip} model of mode 'classification'.")
   }
 
-  wf <- arx_class_epi_workflow(
-    epi_data, outcome, predictors, trainer, args_list
-  )
-
-  latest <- get_test_data(
-    hardhat::extract_preprocessor(wf), epi_data, TRUE, args_list$nafill_buffer,
-    args_list$forecast_date %||% max(epi_data$time_value)
-  )
-
+  wf <- arx_class_epi_workflow(epi_data, outcome, predictors, trainer, args_list)
   wf <- generics::fit(wf, epi_data)
-  preds <- predict(wf, new_data = latest) %>%
+
+  preds <- forecast(
+    wf,
+    fill_locf = TRUE,
+    n_recent = args_list$nafill_buffer,
+    forecast_date = args_list$forecast_date %||% max(epi_data$time_value)
+  ) %>%
     tibble::as_tibble() %>%
     dplyr::select(-time_value)
 
 
@@ -38,26 +38,25 @@
 #'   trainer = quantile_reg(),
 #'   args_list = arx_args_list(quantile_levels = 1:9 / 10)
 #' )
-arx_forecaster <- function(epi_data,
-                           outcome,
-                           predictors = outcome,
-                           trainer = parsnip::linear_reg(),
-                           args_list = arx_args_list()) {
+arx_forecaster <- function(
+    epi_data,
+    outcome,
+    predictors = outcome,
+    trainer = parsnip::linear_reg(),
+    args_list = arx_args_list()) {
   if (!is_regression(trainer)) {
     cli::cli_abort("`trainer` must be a {.pkg parsnip} model of mode 'regression'.")
   }
 
-  wf <- arx_fcast_epi_workflow(
-    epi_data, outcome, predictors, trainer, args_list
-  )
-
-  latest <- get_test_data(
-    hardhat::extract_preprocessor(wf), epi_data, TRUE, args_list$nafill_buffer,
-    args_list$forecast_date %||% max(epi_data$time_value)
-  )
-
+  wf <- arx_fcast_epi_workflow(epi_data, outcome, predictors, trainer, args_list)
   wf <- generics::fit(wf, epi_data)
-  preds <- predict(wf, new_data = latest) %>%
+
+  preds <- forecast(
+    wf,
+    fill_locf = TRUE,
+    n_recent = args_list$nafill_buffer,
+    forecast_date = args_list$forecast_date %||% max(epi_data$time_value)
+  ) %>%
     tibble::as_tibble() %>%
     dplyr::select(-time_value)
 
 
@@ -61,8 +61,7 @@ ggplot2::autoplot
 #'     step_epi_lag(case_rate, lag = c(0, 7, 14)) %>%
 #'     step_epi_naomit()
 #'   ewf <- epi_workflow(r, parsnip::linear_reg(), f) %>% fit(jhu)
-#'   td <- get_test_data(r, jhu)
-#'   predict(ewf, new_data = td)
+#'   forecast(ewf)
 #' })
 #'
 #' p <- do.call(rbind, p)
 
@@ -197,7 +197,11 @@ update_model.epi_workflow <- function(x, spec, ..., formula = NULL) {
 #'
 #' @export
 fit.epi_workflow <- function(object, data, ..., control = workflows::control_workflow()) {
-  object$fit$meta <- list(max_time_value = max(data$time_value), as_of = attributes(data)$metadata$as_of)
+  object$fit$meta <- list(
+    max_time_value = max(data$time_value),
+    as_of = attributes(data)$metadata$as_of
+  )
+  object$original_data <- data
 
   NextMethod()
 }
@@ -326,3 +330,54 @@ print.epi_workflow <- function(x, ...) {
   print_postprocessor(x)
   invisible(x)
 }
+
+
+#' Produce a forecast from an epi workflow
+#'
+#' @param object An epi workflow.
+#' @param ... Not used.
+#' @param fill_locf Logical. Should we use locf to fill in missing data?
+#' @param n_recent Integer or NULL. If filling missing data with locf = TRUE,
+#' how far back are we willing to tolerate missing data? Larger values allow
+#' more filling. The default NULL will determine this from the the recipe. For
+#' example, suppose n_recent = 3, then if the 3 most recent observations in any
+#' geo_value are all NA’s, we won’t be able to fill anything, and an error
+#' message will be thrown. (See details.)
+#' @param forecast_date By default, this is set to the maximum time_value in x.
+#' But if there is data latency such that recent NA's should be filled, this may
+#' be after the last available time_value.
+#'
+#' @return A forecast tibble.
+#'
+#' @export
+forecast.epi_workflow <- function(object, ..., fill_locf = FALSE, n_recent = NULL, forecast_date = NULL) {
+  rlang::check_dots_empty()
+
+  if (!object$trained) {
+    cli_abort(c(
+      "You cannot `forecast()` a {.cls workflow} that has not been trained.",
+      i = "Please use `fit()` before forecasting."
+    ))
+  }
+
+  frosting_fd <- NULL
+  if (has_postprocessor(object) && detect_layer(object, "layer_add_forecast_date")) {
+    frosting_fd <- extract_argument(object, "layer_add_forecast_date", "forecast_date")
+    if (!is.null(frosting_fd) && class(frosting_fd) != class(object$original_data$time_value)) {
+      cli_abort(c(
+        "Error with layer_add_forecast_date():",
+        i = "The type of `forecast_date` must match the type of the `time_value` column in the data."
+      ))
+    }
+  }
+
+  test_data <- get_test_data(
+    hardhat::extract_preprocessor(object),
+    object$original_data,
+    fill_locf = fill_locf,
+    n_recent = n_recent %||% Inf,
+    forecast_date = forecast_date %||% frosting_fd %||% max(object$original_data$time_value)
+  )
+
+  predict(object, new_data = test_data)
+}
@@ -49,11 +49,6 @@ flatline_forecaster <- function(
   forecast_date <- args_list$forecast_date %||% max(epi_data$time_value)
   target_date <- args_list$target_date %||% (forecast_date + args_list$ahead)
 
-  latest <- get_test_data(
-    epi_recipe(epi_data), epi_data, TRUE, args_list$nafill_buffer,
-    forecast_date
-  )
-
   f <- frosting() %>%
     layer_predict() %>%
     layer_residual_quantiles(
@@ -69,7 +64,12 @@ flatline_forecaster <- function(
 
   wf <- epi_workflow(r, eng, f)
   wf <- generics::fit(wf, epi_data)
-  preds <- suppressWarnings(predict(wf, new_data = latest)) %>%
+  preds <- suppressWarnings(forecast(
+    wf,
+    fill_locf = TRUE,
+    n_recent = args_list$nafill_buffer,
+    forecast_date = forecast_date
+  )) %>%
     tibble::as_tibble() %>%
     dplyr::select(-time_value)
 
 
@@ -275,15 +275,14 @@ new_frosting <- function() {
 #'   step_epi_naomit()
 #'
 #' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu)
-#' latest <- get_test_data(recipe = r, x = jhu)
 #'
 #' f <- frosting() %>%
 #'   layer_predict() %>%
 #'   layer_naomit(.pred)
 #'
 #' wf1 <- wf %>% add_frosting(f)
 #'
-#' p <- predict(wf1, latest)
+#' p <- forecast(wf1)
 #' p
 frosting <- function(layers = NULL, requirements = NULL) {
   if (!is_null(layers) || !is_null(requirements)) {
 
@@ -28,17 +28,16 @@
 #'   step_epi_naomit()
 #'
 #' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu)
-#' latest <- get_test_data(r, jhu)
 #'
 #' # Use ahead + forecast date
 #' f <- frosting() %>%
 #'   layer_predict() %>%
-#'   layer_add_forecast_date(forecast_date = "2022-05-31") %>%
+#'   layer_add_forecast_date(forecast_date = as.Date("2022-05-31")) %>%
 #'   layer_add_target_date() %>%
 #'   layer_naomit(.pred)
 #' wf1 <- wf %>% add_frosting(f)
 #'
-#' p <- predict(wf1, latest)
+#' p <- forecast(wf1)
 #' p
 #'
 #' # Use ahead + max time value from pre, fit, post
@@ -49,7 +48,7 @@
 #'   layer_naomit(.pred)
 #' wf2 <- wf %>% add_frosting(f2)
 #'
-#' p2 <- predict(wf2, latest)
+#' p2 <- forecast(wf2)
 #' p2
 #'
 #' # Specify own target date
@@ -59,7 +58,7 @@
 #'   layer_naomit(.pred)
 #' wf3 <- wf %>% add_frosting(f3)
 #'
-#' p3 <- predict(wf3, latest)
+#' p3 <- forecast(wf3)
 #' p3
 layer_add_target_date <-
   function(frosting, target_date = NULL, id = rand_id("add_target_date")) {
 
@@ -64,18 +64,14 @@
 #'
 #' forecast_date <- max(case_death_rate_subset$time_value)
 #'
-#' latest <- get_test_data(
-#'   epi_recipe(case_death_rate_subset), case_death_rate_subset
-#' )
-#'
 #' f <- frosting() %>%
 #'   layer_predict() %>%
 #'   layer_cdc_flatline_quantiles(aheads = c(7, 14, 21, 28), symmetrize = TRUE)
 #'
 #' eng <- parsnip::linear_reg() %>% parsnip::set_engine("flatline")
 #'
 #' wf <- epi_workflow(r, eng, f) %>% fit(case_death_rate_subset)
-#' preds <- suppressWarnings(predict(wf, new_data = latest)) %>%
+#' preds <- forecast(wf) %>%
 #'   dplyr::select(-time_value) %>%
 #'   dplyr::mutate(forecast_date = forecast_date)
 #' preds
 
@@ -20,15 +20,13 @@
 #'
 #' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu)
 #'
-#' latest <- get_test_data(recipe = r, x = jhu)
-#'
 #' f <- frosting() %>%
 #'   layer_predict() %>%
 #'   layer_naomit(.pred)
 #'
 #' wf1 <- wf %>% add_frosting(f)
 #'
-#' p <- predict(wf1, latest)
+#' p <- forecast(wf1)
 #' p
 layer_naomit <- function(frosting, ..., id = rand_id("naomit")) {
   arg_is_chr_scalar(id)
 
@@ -26,16 +26,14 @@
 #'
 #' wf <- epi_workflow(r, quantile_reg(quantile_levels = c(.25, .5, .75))) %>% fit(jhu)
 #'
-#' latest <- get_test_data(recipe = r, x = jhu)
-#'
 #' f1 <- frosting() %>%
 #'   layer_predict() %>%
 #'   layer_quantile_distn() %>% # puts the other quantiles in a different col
 #'   layer_point_from_distn() %>% # mutate `.pred` to contain only a point prediction
 #'   layer_naomit(.pred)
 #' wf1 <- wf %>% add_frosting(f1)
 #'
-#' p1 <- predict(wf1, latest)
+#' p1 <- forecast(wf1)
 #' p1
 #'
 #' f2 <- frosting() %>%
@@ -44,7 +42,7 @@
 #'   layer_naomit(.pred)
 #' wf2 <- wf %>% add_frosting(f2)
 #'
-#' p2 <- predict(wf2, latest)
+#' p2 <- forecast(wf2)
 #' p2
 layer_point_from_distn <- function(frosting,
                                    ...,
 
@@ -78,17 +78,7 @@
 #'   fit(jhu) %>%
 #'   add_frosting(f)
 #'
-#' latest <- get_test_data(
-#'   recipe = r,
-#'   x = epiprocess::jhu_csse_daily_subset %>%
-#'     dplyr::filter(
-#'       time_value > "2021-11-01",
-#'       geo_value %in% c("ca", "ny")
-#'     ) %>%
-#'     dplyr::select(geo_value, time_value, cases)
-#' )
-#'
-#' predict(wf, latest)
+#' forecast(wf)
 layer_population_scaling <- function(frosting,
                                      ...,
                                      df,
 
@@ -30,15 +30,13 @@
 #'
 #' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu)
 #'
-#' latest <- get_test_data(recipe = r, x = jhu)
-#'
 #' f <- frosting() %>%
 #'   layer_predict() %>%
 #'   layer_predictive_distn() %>%
 #'   layer_naomit(.pred)
 #' wf1 <- wf %>% add_frosting(f)
 #'
-#' p <- predict(wf1, latest)
+#' p <- forecast(wf1)
 #' p
 layer_predictive_distn <- function(frosting,
                                    ...,
 
@@ -28,15 +28,13 @@
 #' wf <- epi_workflow(r, quantile_reg(quantile_levels = c(.25, .5, .75))) %>%
 #'   fit(jhu)
 #'
-#' latest <- get_test_data(recipe = r, x = jhu)
-#'
 #' f <- frosting() %>%
 #'   layer_predict() %>%
 #'   layer_quantile_distn() %>%
 #'   layer_naomit(.pred)
 #' wf1 <- wf %>% add_frosting(f)
 #'
-#' p <- predict(wf1, latest)
+#' p <- forecast(wf1)
 #' p
 layer_quantile_distn <- function(frosting,
                                  ...,