autoplot new data

dsweber2 · dsweber2 · commit d5cd435baf41 · 2025-02-10T11:11:56.000-06:00
diff --git a/R/autoplot.R b/R/autoplot.R
@@ -16,6 +16,8 @@ ggplot2::autoplot
 #' @param object An `epi_workflow`
 #' @param predictions A data frame with predictions. If `NULL`, only the
 #'   original data is shown.
+#' @param plot_data An epi_df of the data to plot against. This is for the case
+#'   where you have the actual results to compare the forecast against.
 #' @param .levels A numeric vector of levels to plot for any prediction bands.
 #'   More than 3 levels begins to be difficult to see.
 #' @param ... Ignored
@@ -82,7 +84,9 @@ NULL
 #' @export
 #' @rdname autoplot-epipred
 autoplot.epi_workflow <- function(
-    object, predictions = NULL,
+    object,
+    predictions = NULL,
+    plot_data = NULL,
     .levels = c(.5, .8, .9), ...,
     .color_by = c("all_keys", "geo_value", "other_keys", ".response", "all", "none"),
     .facet_by = c(".response", "other_keys", "all_keys", "geo_value", "all", "none"),
@@ -109,30 +113,32 @@ autoplot.epi_workflow <- function(
   }
   keys <- c("geo_value", "time_value", "key")
   mold_roles <- names(mold$extras$roles)
-  edf <- bind_cols(mold$extras$roles[mold_roles %in% keys], y)
-  if (starts_with_impl("ahead_", names(y))) {
-    old_name_y <- unlist(strsplit(names(y), "_"))
-    shift <- as.numeric(old_name_y[2])
-    new_name_y <- paste(old_name_y[-c(1:2)], collapse = "_")
-    edf <- rename(edf, !!new_name_y := !!names(y))
-  } else if (starts_with_impl("lag_", names(y))) {
-    old_name_y <- unlist(strsplit(names(y), "_"))
-    shift <- -as.numeric(old_name_y[2])
-    new_name_y <- paste(old_name_y[-c(1:2)], collapse = "_")
-    edf <- rename(edf, !!new_name_y := !!names(y))
-  }
-
-  if (!is.null(shift)) {
-    edf <- mutate(edf, time_value = time_value + shift)
+  # extract the relevant column names for plotting
+  old_name_y <- unlist(strsplit(names(y), "_"))
+  new_name_y <- paste(old_name_y[-c(1:2)], collapse = "_")
+  if (is.null(plot_data)) {
+    # the outcome has shifted, so we need to shift it forward (or back)
+    # by the corresponding amount
+    plot_data <- bind_cols(mold$extras$roles[mold_roles %in% keys], y)
+    if (starts_with_impl("ahead_", names(y))) {
+      shift <- as.numeric(old_name_y[2])
+    } else if (starts_with_impl("lag_", names(y))) {
+      old_name_y <- unlist(strsplit(names(y), "_"))
+      shift <- -as.numeric(old_name_y[2])
+    }
+    plot_data <- rename(plot_data, !!new_name_y := !!names(y))
+    if (!is.null(shift)) {
+      plot_data <- mutate(plot_data, time_value = time_value + shift)
+    }
+    other_keys <- setdiff(key_colnames(object), c("geo_value", "time_value"))
+    plot_data <- as_epi_df(plot_data,
+      as_of = object$fit$meta$as_of,
+      other_keys = other_keys
+    )
   }
-  other_keys <- setdiff(key_colnames(object), c("geo_value", "time_value"))
-  edf <- as_epi_df(edf,
-    as_of = object$fit$meta$as_of,
-    other_keys = other_keys
-  )
   if (is.null(predictions)) {
     return(autoplot(
-      edf, new_name_y,
+      plot_data, new_name_y,
       .color_by = .color_by, .facet_by = .facet_by, .base_color = .base_color,
       .max_facets = .max_facets
     ))
@@ -144,27 +150,27 @@ autoplot.epi_workflow <- function(
     }
     predictions <- rename(predictions, time_value = target_date)
   }
-  pred_cols_ok <- hardhat::check_column_names(predictions, key_colnames(edf))
+  pred_cols_ok <- hardhat::check_column_names(predictions, key_colnames(plot_data))
   if (!pred_cols_ok$ok) {
     cli_warn(c(
       "`predictions` is missing required variables: {.var {pred_cols_ok$missing_names}}.",
       i = "Plotting the original data."
     ))
     return(autoplot(
-      edf, !!new_name_y,
+      plot_data, !!new_name_y,
       .color_by = .color_by, .facet_by = .facet_by, .base_color = .base_color,
       .max_facets = .max_facets
     ))
   }
 
   # First we plot the history, always faceted by everything
-  bp <- autoplot(edf, !!new_name_y,
+  bp <- autoplot(plot_data, !!new_name_y,
     .color_by = "none", .facet_by = "all_keys",
     .base_color = "black", .max_facets = .max_facets
   )
 
   # Now, prepare matching facets in the predictions
-  ek <- epi_keys_only(edf)
+  ek <- epi_keys_only(plot_data)
   predictions <- predictions %>%
     mutate(
       .facets = interaction(!!!rlang::syms(as.list(ek)), sep = "/"),
@@ -202,7 +208,7 @@ autoplot.epi_workflow <- function(
 #' @export
 #' @rdname autoplot-epipred
 autoplot.canned_epipred <- function(
-    object, ...,
+    object, plot_data = NULL, ...,
     .color_by = c("all_keys", "geo_value", "other_keys", ".response", "all", "none"),
     .facet_by = c(".response", "other_keys", "all_keys", "geo_value", "all", "none"),
     .base_color = "dodgerblue4",
@@ -216,7 +222,7 @@ autoplot.canned_epipred <- function(
   predictions <- object$predictions %>%
     rename(time_value = target_date)
 
-  autoplot(ewf, predictions,
+  autoplot(ewf, predictions, plot_data, ...,
     .color_by = .color_by, .facet_by = .facet_by,
     .base_color = .base_color, .max_facets = .max_facets
   )