cmu-delphi
diff --git a/‎NAMESPACE
Lines changed: 4 additions & 0 deletions b/‎NAMESPACE
Lines changed: 4 additions & 0 deletions
diff --git a/‎R/compat-purrr.R
Lines changed: 65 additions & 0 deletions b/‎R/compat-purrr.R
Lines changed: 65 additions & 0 deletions
diff --git a/‎R/epi_recipe.R
Lines changed: 115 additions & 15 deletions b/‎R/epi_recipe.R
Lines changed: 115 additions & 15 deletions
diff --git a/‎R/epi_workflow.R
Lines changed: 26 additions & 32 deletions b/‎R/epi_workflow.R
Lines changed: 26 additions & 32 deletions
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(augment,epi_workflow)
 S3method(bake,step_epi_ahead)
 S3method(bake,step_epi_lag)
 S3method(epi_keys,default)
@@ -9,6 +10,7 @@ S3method(epi_recipe,default)
 S3method(epi_recipe,epi_df)
 S3method(epi_recipe,formula)
 S3method(predict,epi_workflow)
+S3method(prep,epi_recipe)
 S3method(prep,step_epi_ahead)
 S3method(prep,step_epi_lag)
 S3method(print,step_epi_ahead)
@@ -40,6 +42,8 @@ import(recipes)
 importFrom(magrittr,"%>%")
 importFrom(rlang,"!!")
 importFrom(rlang,":=")
+importFrom(rlang,abort)
+importFrom(rlang,caller_env)
 importFrom(rlang,is_null)
 importFrom(stats,as.formula)
 importFrom(stats,lm)
 
@@ -0,0 +1,65 @@
+# See https://github.com/r-lib/rlang/blob/main/R/compat-purrr.R
+
+
+map <- function(.x, .f, ...) {
+  .f <- rlang::as_function(.f, env = global_env())
+  lapply(.x, .f, ...)
+}
+walk <- function(.x, .f, ...) {
+  map(.x, .f, ...)
+  invisible(.x)
+}
+
+map_lgl <- function(.x, .f, ...) {
+  .rlang_purrr_map_mold(.x, .f, logical(1), ...)
+}
+map_int <- function(.x, .f, ...) {
+  .rlang_purrr_map_mold(.x, .f, integer(1), ...)
+}
+map_dbl <- function(.x, .f, ...) {
+  .rlang_purrr_map_mold(.x, .f, double(1), ...)
+}
+map_chr <- function(.x, .f, ...) {
+  .rlang_purrr_map_mold(.x, .f, character(1), ...)
+}
+.rlang_purrr_map_mold <- function(.x, .f, .mold, ...) {
+  .f <- rlang::as_function(.f, env = global_env())
+  out <- vapply(.x, .f, .mold, ..., USE.NAMES = FALSE)
+  names(out) <- names(.x)
+  out
+}
+
+map2 <- function(.x, .y, .f, ...) {
+  .f <- as_function(.f, env = global_env())
+  out <- mapply(.f, .x, .y, MoreArgs = list(...), SIMPLIFY = FALSE)
+  if (length(out) == length(.x)) {
+    set_names(out, names(.x))
+  } else {
+    set_names(out, NULL)
+  }
+}
+map2_lgl <- function(.x, .y, .f, ...) {
+  as.vector(map2(.x, .y, .f, ...), "logical")
+}
+map2_int <- function(.x, .y, .f, ...) {
+  as.vector(map2(.x, .y, .f, ...), "integer")
+}
+map2_dbl <- function(.x, .y, .f, ...) {
+  as.vector(map2(.x, .y, .f, ...), "double")
+}
+map2_chr <- function(.x, .y, .f, ...) {
+  as.vector(map2(.x, .y, .f, ...), "character")
+}
+imap <- function(.x, .f, ...) {
+  map2(.x, names(.x) %||% seq_along(.x), .f, ...)
+}
+
+pmap <- function(.l, .f, ...) {
+  .f <- as.function(.f)
+  args <- .rlang_purrr_args_recycle(.l)
+  do.call("mapply", c(
+    FUN = list(quote(.f)),
+    args, MoreArgs = quote(list(...)),
+    SIMPLIFY = FALSE, USE.NAMES = FALSE
+  ))
+}
@@ -73,11 +73,7 @@ epi_recipe.default <- function(x, ...) {
 #'
 #' r
 epi_recipe.epi_df <-
-  function(x,
-           formula = NULL,
-           ...,
-           vars = NULL,
-           roles = NULL) {
+  function(x, formula = NULL, ..., vars = NULL, roles = NULL) {
     if (!is.null(formula)) {
       if (!is.null(vars)) {
         rlang::abort(
@@ -115,12 +111,9 @@ epi_recipe.epi_df <-
     ## Check and add roles when available
     if (!is.null(roles)) {
       if (length(roles) != length(vars)) {
-        rlang::abort(
-          paste0(
+        rlang::abort(c(
             "The number of roles should be the same as the number of ",
-            "variables"
-          )
-        )
+            "variables."))
       }
       var_info$role <- roles
     } else {
@@ -161,6 +154,7 @@ epi_recipe.epi_df <-
 
 
 #' @rdname epi_recipe
+#' @importFrom rlang abort
 #' @export
 epi_recipe.formula <- function(formula, data, ...) {
   # we ensure that there's only 1 row in the template
@@ -170,9 +164,9 @@ epi_recipe.formula <- function(formula, data, ...) {
     return(recipes::recipe(formula, data, ...))
   }
 
-  f_funcs <- fun_calls(formula)
+  f_funcs <- recipes:::fun_calls(formula)
   if (any(f_funcs == "-")) {
-    Abort("`-` is not allowed in a recipe formula. Use `step_rm()` instead.")
+    abort("`-` is not allowed in a recipe formula. Use `step_rm()` instead.")
   }
 
   # Check for other in-line functions
@@ -193,11 +187,11 @@ epi_form2args <- function(formula, data, ...) {
   if (! rlang::is_formula(formula)) formula <- as.formula(formula)
 
   ## check for in-line formulas
-  inline_check(formula)
+  recipes:::inline_check(formula)
 
   ## use rlang to get both sides of the formula
-  outcomes <- get_lhs_vars(formula, data)
-  predictors <- get_rhs_vars(formula, data, no_lhs = TRUE)
+  outcomes <- recipes:::get_lhs_vars(formula, data)
+  predictors <- recipes:::get_rhs_vars(formula, data, no_lhs = TRUE)
   keys <- epi_keys(data)
 
   ## if . was used on the rhs, subtract out the outcomes
@@ -316,3 +310,109 @@ default_epi_recipe_blueprint <-
     hardhat::default_recipe_blueprint(
       intercept, allow_novel_levels, fresh, bake_dependent_roles, composition)
   }
+
+
+# unfortunately, everything the same as in prep.recipe except string/fctr handling
+#' @export
+prep.epi_recipe <- function(
+    x, training = NULL, fresh = FALSE, verbose = FALSE,
+    retain = TRUE, log_changes = FALSE, strings_as_factors = TRUE, ...) {
+  training <- recipes:::check_training_set(training, x, fresh)
+  tr_data <- recipes:::train_info(training)
+  keys <- epi_keys(training)
+  orig_lvls <- lapply(training, recipes:::get_levels)
+  orig_lvls <- kill_levels(orig_lvls, keys)
+  if (strings_as_factors) {
+    lvls <- lapply(training, recipes:::get_levels)
+    lvls <- kill_levels(lvls, keys)
+    training <- recipes:::strings2factors(training, lvls)
+  } else {
+    lvls <- NULL
+  }
+  skippers <- map_lgl(x$steps, recipes:::is_skipable)
+  if (any(skippers) & !retain) {
+    rlang::warn(c("Since some operations have `skip = TRUE`, using ",
+                  "`retain = TRUE` will allow those steps results to ",
+                  "be accessible."))
+  }
+  if (fresh) x$term_info <- x$var_info
+
+  running_info <- x$term_info %>% dplyr::mutate(number = 0, skip = FALSE)
+  for (i in seq(along.with = x$steps)) {
+    needs_tuning <- map_lgl(x$steps[[i]], recipes:::is_tune)
+    if (any(needs_tuning)) {
+      arg <- names(needs_tuning)[needs_tuning]
+      arg <- paste0("'", arg, "'", collapse = ", ")
+      msg <- paste0(
+        "You cannot `prep()` a tuneable recipe. Argument(s) with `tune()`: ",
+        arg, ". Do you want to use a tuning function such as `tune_grid()`?")
+      rlang::abort(msg)
+    }
+    note <- paste("oper", i, gsub("_", " ", class(x$steps[[i]])[1]))
+    if (!x$steps[[i]]$trained | fresh) {
+      if (verbose) {
+        cat(note, "[training]", "\n")
+      }
+      before_nms <- names(training)
+      x$steps[[i]] <- prep(x$steps[[i]], training = training,
+                           info = x$term_info)
+      training <- bake(x$steps[[i]], new_data = training)
+      if (!tibble::is_tibble(training)) {
+        abort("bake() methods should always return tibbles")
+      }
+      x$term_info <- recipes:::merge_term_info(get_types(training), x$term_info)
+      if (!is.na(x$steps[[i]]$role)) {
+        new_vars <- setdiff(x$term_info$variable, running_info$variable)
+        pos_new_var <- x$term_info$variable %in% new_vars
+        pos_new_and_na_role <- pos_new_var & is.na(x$term_info$role)
+        pos_new_and_na_source <- pos_new_var & is.na(x$term_info$source)
+        x$term_info$role[pos_new_and_na_role] <- x$steps[[i]]$role
+        x$term_info$source[pos_new_and_na_source] <- "derived"
+      }
+      recipes:::changelog(log_changes, before_nms, names(training), x$steps[[i]])
+      running_info <- rbind(
+        running_info,
+        dplyr::mutate(x$term_info, number = i, skip = x$steps[[i]]$skip))
+    } else {
+      if (verbose) cat(note, "[pre-trained]\n")
+    }
+  }
+  if (strings_as_factors) {
+    lvls <- lapply(training, recipes:::get_levels)
+    lvls <- kill_levels(lvls, keys)
+    check_lvls <- recipes:::has_lvls(lvls)
+    if (!any(check_lvls)) lvls <- NULL
+  } else {
+    lvls <- NULL
+  }
+  if (retain) {
+    if (verbose) {
+      cat("The retained training set is ~",
+          format(object.size(training), units = "Mb", digits = 2),
+          " in memory.\n\n")
+    }
+    x$template <- training
+  } else {
+    x$template <- training[0, ]
+  }
+  x$tr_info <- tr_data
+  x$levels <- lvls
+  x$orig_lvls <- orig_lvls
+  x$retained <- retain
+  x$last_term_info <- running_info %>%
+    dplyr::group_by(variable) %>%
+    dplyr::arrange(dplyr::desc(number)) %>%
+    dplyr::summarise(
+      type = dplyr::first(type),
+      role = as.list(unique(unlist(role))),
+      source = dplyr::first(source),
+      number = dplyr::first(number),
+      skip = dplyr::first(skip),
+      .groups = "keep")
+  x
+}
+
+kill_levels <- function(x, keys) {
+  for (i in which(names(x) %in% keys)) x[[i]] <- list(values = NA, ordered = NA)
+  x
+}
@@ -35,17 +35,21 @@
 #' wf <- epi_workflow(r, linear_reg())
 #'
 #' wf
-epi_workflow <- function(preprocessor = NULL, spec = NULL) {
+epi_workflow <- function(preprocessor = NULL, spec = NULL,
+                         postprocessor = NULL) {
   out <- workflows::workflow(spec = spec)
   class(out) <- c("epi_workflow", class(out))
 
   if (is_epi_recipe(preprocessor)) {
     return(add_epi_recipe(out, preprocessor))
   }
-
   if (!is_null(preprocessor)) {
-    return(workflows:::add_preprocessor(out, preprocessor))
+    out <- workflows:::add_preprocessor(out, preprocessor)
+  }
+  if (!is_null(postprocessor)) {
+    out <- add_postprocessor(out, postprocessor)
   }
+
   out
 }
 
@@ -95,17 +99,11 @@ is_epi_workflow <- function(x) {
 #' @export
 #' @examples
 #'
-#' library(epiprocess)
 #' library(dplyr)
 #' library(parsnip)
 #' library(recipes)
 #'
-#' jhu <- jhu_csse_daily_subset %>%
-#'   filter(time_value > "2021-08-01") %>%
-#'   select(geo_value:death_rate_7d_av) %>%
-#'   rename(case_rate = case_rate_7d_av, death_rate = death_rate_7d_av)
-#'
-#' r <- epi_recipe(jhu) %>%
+#' r <- epi_recipe(case_death_rate_subset) %>%
 #'   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
 #'   step_epi_ahead(death_rate, ahead = 7) %>%
 #'   step_epi_lag(case_rate, lag = c(0, 7, 14)) %>%
@@ -114,33 +112,29 @@ is_epi_workflow <- function(x) {
 #'
 #' wf <- epi_workflow(r, linear_reg()) %>% fit(jhu)
 #'
-#' jhu_latest <- jhu %>%
-#'   filter(!is.na(case_rate), !is.na(death_rate)) %>%
-#'   group_by(geo_value) %>%
-#'   slice_tail(n = 15) %>% # have lags 0,...,14, so need 15 for a complete case
-#'   ungroup()
+#' latest <- get_test_data(r, case_death_rate_subset)
 #'
-#' preds <- predict(wf, jhu_latest, forecast_date = "2021-12-31") %>%
+#' preds <- predict(wf, latest) %>%
 #'   filter(!is.na(.pred))
 #'
 #' preds
-predict.epi_workflow <-
-  function(object, new_data, type = NULL, opts = list(),
-           forecast_date = NULL, ...) {
-    if (!workflows::is_trained_workflow(object)) {
-      rlang::abort(
-        c("Can't predict on an untrained epi_workflow.",
-          i = "Do you need to call `fit()`?"))
-    }
-    if (!is_null(forecast_date)) forecast_date <- as.Date(forecast_date)
-    the_fit <- workflows::extract_fit_parsnip(object)
-    mold <- workflows::extract_mold(object)
-    forged <- hardhat::forge(new_data, blueprint = mold$blueprint)
-    preds <- predict(the_fit, forged$predictors, type = type, opts = opts, ...)
-    keys <- grab_forged_keys(forged, mold, new_data)
-    out <- dplyr::bind_cols(keys, forecast_date = forecast_date, preds)
-    out
+predict.epi_workflow <- function(object, new_data, ...) {
+  if (!workflows::is_trained_workflow(object)) {
+    rlang::abort(
+      c("Can't predict on an untrained epi_workflow.",
+        i = "Do you need to call `fit()`?"))
   }
+  components <- list()
+  the_fit <- workflows::extract_fit_parsnip(object)
+  components$mold <- workflows::extract_mold(object)
+  components$forged <- hardhat::forge(new_data,
+                                      blueprint = components$mold$blueprint)
+  components$keys <- grab_forged_keys(components$forged,
+                                      components$mold, new_data)
+  components <- apply_frosting(object, components, the_fit, ...)
+  out <- dplyr::bind_cols(components$keys, components$preds)
+  out
+}
 
 grab_forged_keys <- function(forged, mold, new_data) {
   keys <- c("time_value", "geo_value", "key")