From 8a1e2d624adbaed5c031b00261f317c0f93f92b4 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <daniel@stat.ubc.ca>
Date: Thu, 14 Sep 2023 16:18:30 -0700
Subject: [PATCH 01/22] start CDC baseline layer

---
 R/layer_cdc_flatline_quantiles.R | 102 +++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 R/layer_cdc_flatline_quantiles.R

diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R
new file mode 100644
index 000000000..0e2fa4f1b
--- /dev/null
+++ b/R/layer_cdc_flatline_quantiles.R
@@ -0,0 +1,102 @@
+layer_cdc_flatline_quantiles <- function(
+    frosting,
+    ...,
+    aheads = 1:4,
+    quantiles = c(.01, .025, 1:19 / 20, .975, .99),
+    nsims = 1e5,
+    by_key = "geo_value",
+    symmetrize = FALSE,
+    id = rand_id("cdc_baseline_bands")) {
+
+  rlang::check_dots_empty()
+
+  arg_is_int(aheads)
+  arg_is_probabilities(quantiles)
+  arg_is_pos_int(nsims)
+  arg_is_scalar(nsims)
+  arg_is_chr_scalar(id)
+  arg_is_lgl_scalar(symmetrize)
+  arg_is_chr(by_key, allow_null = TRUE, allow_na = TRUE, allow_empty = TRUE)
+
+  add_layer(
+    frosting,
+    layer_cdc_flatline_quantiles_new(
+      aheads = aheads,
+      quantiles = quantiles,
+      nsims = nsims,
+      by_key = by_key,
+      symmetrize = symmetrize,
+      id = id
+    )
+  )
+}
+
+layer_cdc_flatline_quantiles_new <- function(
+    aheads,
+    quantiles,
+    nsims,
+    by_key,
+    symmetrize,
+    id
+) {
+  layer(
+    "cdc_flatline_quantiles",
+    aheads,
+    quantiles,
+    nsims,
+    by_key,
+    symmetrize,
+    id
+  )
+}
+
+#' @export
+slather.layer_cdc_flatline_quantiles <-
+  function(object, components, workflow, new_data, ...) {
+    the_fit <- workflows::extract_fit_parsnip(workflow)
+    s <- ifelse(object$symmetrize, -1, NA)
+    r <- grab_residuals(the_fit, components)
+
+    ## Handle any grouping requests
+    if (length(object$by_key) > 0L) {
+      key_cols <- dplyr::bind_cols(
+        geo_value = components$mold$extras$roles$geo_value,
+        components$mold$extras$roles$key
+      )
+      common <- intersect(object$by_key, names(key_cols))
+      excess <- setdiff(object$by_key, names(key_cols))
+      if (length(excess) > 0L) {
+        rlang::warn(
+          "Requested residual grouping key(s) {excess} are unavailable ",
+          "in the original data. Grouping by the remainder: {common}."
+        )
+      }
+      if (length(common) > 0L) {
+        r <- r %>% dplyr::select(tidyselect::any_of(c(common, ".resid")))
+        common_in_r <- common[common %in% names(r)]
+        if (length(common_in_r) != length(common)) {
+          rlang::warn(
+            "Some grouping keys are not in data.frame returned by the",
+            "`residuals()` method. Groupings may not be correct."
+          )
+        }
+        r <- dplyr::bind_cols(key_cols, r) %>%
+          dplyr::group_by(!!!rlang::syms(common))
+      }
+    }
+
+
+
+
+
+
+    # always return components
+    components
+  }
+
+propogate_samples <- function(x, p, horizon, nsim, symmetrize) {
+  samp <- quantile(x, probs = c(0, seq_len(nsim)) / nsim)
+
+  for (iter in seq(horizon)) {}
+}
+

From cea1599dfad47e77c8a8026a66b16963ed7c86dd Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Fri, 22 Sep 2023 10:33:18 -0700
Subject: [PATCH 02/22] upgrade enframer

---
 R/utils-enframer.R | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/R/utils-enframer.R b/R/utils-enframer.R
index 387d04356..0a8152906 100644
--- a/R/utils-enframer.R
+++ b/R/utils-enframer.R
@@ -13,10 +13,11 @@ enframer <- function(df, x, fill = NA) {
 }
 
 enlist <- function(...) {
-  # in epiprocess
-  x <- list(...)
-  n <- as.character(sys.call())[-1]
-  if (!is.null(n0 <- names(x))) n[n0 != ""] <- n0[n0 != ""]
-  names(x) <- n
-  x
+  # converted to thin wrapper around
+  rlang::dots_list(
+    ...,
+    .homonyms = "error",
+    .named = TRUE,
+    .check_assign = TRUE
+  )
 }

From d606741cb05d8546f82ca057e5515a53b19b2a05 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Sat, 23 Sep 2023 16:33:50 -0700
Subject: [PATCH 03/22] functions, remains to check validity

---
 R/compat-purrr.R                        |   5 +
 R/layer_cdc_flatline_quantiles.R        | 144 +++++++++++++++++-------
 R/layer_residual_quantiles.R            |   8 +-
 tests/testthat/test-propogate_samples.R |   8 ++
 tests/testthat/test-shuffle.R           |   5 +
 5 files changed, 128 insertions(+), 42 deletions(-)
 create mode 100644 tests/testthat/test-propogate_samples.R
 create mode 100644 tests/testthat/test-shuffle.R

diff --git a/R/compat-purrr.R b/R/compat-purrr.R
index 7e28bd630..712926f73 100644
--- a/R/compat-purrr.R
+++ b/R/compat-purrr.R
@@ -32,6 +32,11 @@ map_chr <- function(.x, .f, ...) {
   .rlang_purrr_map_mold(.x, .f, character(1), ...)
 }
 
+map_vec <- function(.x, .f, ...) {
+  out <- map(.x, .f, ...)
+  vctrs::list_unchop(out)
+}
+
 map_dfr <- function(.x, .f, ..., .id = NULL) {
   .f <- rlang::as_function(.f, env = rlang::global_env())
   res <- map(.x, .f, ...)
diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R
index 0e2fa4f1b..c64b96c2d 100644
--- a/R/layer_cdc_flatline_quantiles.R
+++ b/R/layer_cdc_flatline_quantiles.R
@@ -6,6 +6,7 @@ layer_cdc_flatline_quantiles <- function(
     nsims = 1e5,
     by_key = "geo_value",
     symmetrize = FALSE,
+    nonneg = TRUE,
     id = rand_id("cdc_baseline_bands")) {
 
   rlang::check_dots_empty()
@@ -15,7 +16,7 @@ layer_cdc_flatline_quantiles <- function(
   arg_is_pos_int(nsims)
   arg_is_scalar(nsims)
   arg_is_chr_scalar(id)
-  arg_is_lgl_scalar(symmetrize)
+  arg_is_lgl_scalar(symmetrize, nonneg)
   arg_is_chr(by_key, allow_null = TRUE, allow_na = TRUE, allow_empty = TRUE)
 
   add_layer(
@@ -26,6 +27,7 @@ layer_cdc_flatline_quantiles <- function(
       nsims = nsims,
       by_key = by_key,
       symmetrize = symmetrize,
+      nonneg = nonneg,
       id = id
     )
   )
@@ -37,16 +39,18 @@ layer_cdc_flatline_quantiles_new <- function(
     nsims,
     by_key,
     symmetrize,
+    nonneg,
     id
 ) {
   layer(
     "cdc_flatline_quantiles",
-    aheads,
-    quantiles,
-    nsims,
-    by_key,
-    symmetrize,
-    id
+    aheads = aheads,
+    quantiles = quantiles,
+    nsims = nsims,
+    by_key = by_key,
+    symmetrize = symmetrize,
+    nonneg = nonneg,
+    id = id
   )
 }
 
@@ -54,49 +58,113 @@ layer_cdc_flatline_quantiles_new <- function(
 slather.layer_cdc_flatline_quantiles <-
   function(object, components, workflow, new_data, ...) {
     the_fit <- workflows::extract_fit_parsnip(workflow)
-    s <- ifelse(object$symmetrize, -1, NA)
+    if (!inherits(the_fit, "_flatline")) {
+      cli::cli_warn(
+        c("Predictions for this workflow were not produced by the {.cls flatline}",
+        "{.pkg parsnip} engine. Results may be unexpected. See {.fn epipredict::flatline}.")
+      )
+    }
+    p <- components$predictions
+    ek <- kill_time_value(epi_keys_mold(components$mold))
     r <- grab_residuals(the_fit, components)
 
-    ## Handle any grouping requests
+    avail_grps <- character(0L)
     if (length(object$by_key) > 0L) {
-      key_cols <- dplyr::bind_cols(
-        geo_value = components$mold$extras$roles$geo_value,
-        components$mold$extras$roles$key
-      )
-      common <- intersect(object$by_key, names(key_cols))
-      excess <- setdiff(object$by_key, names(key_cols))
-      if (length(excess) > 0L) {
-        rlang::warn(
-          "Requested residual grouping key(s) {excess} are unavailable ",
-          "in the original data. Grouping by the remainder: {common}."
-        )
+      cols_in_preds <- hardhat::check_column_names(p, object$by_key)
+      if (!cols_in_preds$ok) {
+        cli::cli_warn(c(
+          "Predicted values are missing key columns: {.var cols_in_preds$missing_names}.",
+          "Ignoring these."
+        ))
       }
-      if (length(common) > 0L) {
-        r <- r %>% dplyr::select(tidyselect::any_of(c(common, ".resid")))
-        common_in_r <- common[common %in% names(r)]
-        if (length(common_in_r) != length(common)) {
-          rlang::warn(
-            "Some grouping keys are not in data.frame returned by the",
-            "`residuals()` method. Groupings may not be correct."
-          )
+      if (inherits(the_fit, "_flatline")) {
+        cols_in_resids <- hardhat::check_column_names(r, object$by_key)
+        if (!cols_in_resids$ok) {
+          cli::cli_warn(c(
+            "Existing residuals are missing key columns: {.var cols_in_resids$missing_names}.",
+            "Ignoring these."
+          ))
+        }
+        # use only the keys that are in the predictions and requested.
+        avail_grps <- intersect(ek, setdiff(
+          object$by_key,
+          c(cols_in_preds$missing_names, cols_in_resids$missing_names)
+        ))
+      } else { # not flatline, but we'll try
+        key_cols <- dplyr::bind_cols(
+          geo_value = components$mold$extras$roles$geo_value,
+          components$mold$extras$roles$key
+        )
+        cols_in_resids <- hardhat::check_column_names(key_cols, object$by_key)
+        if (!cols_in_resids$ok) {
+          cli::cli_warn(c(
+            "Requested residuals are missing key columns: {.var cols_in_resids$missing_names}.",
+            "Ignoring these."
+          ))
         }
-        r <- dplyr::bind_cols(key_cols, r) %>%
-          dplyr::group_by(!!!rlang::syms(common))
+        avail_grps <- intersect(names(key_cols), setdiff(
+          object$by_key,
+          c(cols_in_preds$missing_names, cols_in_resids$missing_names)
+        ))
+        r <- dplyr::bind_cols(key_cols, r)
       }
     }
+    r <- r %>%
+      dplyr::select(tidyselect::all_of(c(avail_grps, ".resid"))) %>%
+      dplyr::group_by(!!!rlang::syms(avail_grps)) %>%
+      dplyr::summarise(.resid = list(.resid), .groups = "drop")
 
+    res <- dplyr::left_join(p, r, by = avail_grps) %>%
+      dplyr::rowwise() %>%
+      dplyr::mutate(
+        .pred_distn_all = propogate_samples(
+          .resid, .pred, object$quantiles,
+          object$aheads, object$nsim, object$symmetrize, object$nonneg
+        )
+      ) %>%
+      dplyr::select(tidyselect::all_of(c(avail_grps, ".pred_distn_all")))
 
-
-
-
-
-    # always return components
+    # res <- check_pname(res, components$predictions, object)
+    components$predictions <- dplyr::left_join(
+      components$predictions,
+      res,
+      by = avail_grps
+    )
     components
   }
 
-propogate_samples <- function(x, p, horizon, nsim, symmetrize) {
-  samp <- quantile(x, probs = c(0, seq_len(nsim)) / nsim)
+propogate_samples <- function(
+    r, p, quantiles, aheads, nsim, symmetrize, nonneg) {
+  max_ahead <- max(aheads)
+  samp <- quantile(r, probs = c(0, seq_len(nsim - 1)) / (nsim - 1), na.rm = TRUE)
+  res <- list()
 
-  for (iter in seq(horizon)) {}
+  # p should be all the same
+  p <- max(p, na.rm = TRUE)
+
+  raw <- samp + p
+  if (nonneg) raw <- pmax(0, raw)
+  res[[1]] <- raw
+  if (max_ahead > 1L) {
+    for (iter in 2:max_ahead) {
+      samp <- shuffle(samp)
+      raw <- raw + samp
+      if (symmetrize) symmetric <- raw - (median(raw) + p)
+      else symmetric <- raw
+      if (nonneg) symmetric <- pmax(0, symmetric)
+      res[[iter]] <- symmetric
+    }
+  }
+  res <- res[aheads]
+  list(tibble::tibble(
+    aheads = aheads,
+    .pred_distn = map_vec(
+      res, ~ dist_quantiles(quantile(.x, quantiles), tau = quantiles)
+    )
+  ))
 }
 
+shuffle <- function(x) {
+  stopifnot(is.vector(x))
+  sample(x, length(x), replace = FALSE)
+}
diff --git a/R/layer_residual_quantiles.R b/R/layer_residual_quantiles.R
index a9a8cab24..b9a71e265 100644
--- a/R/layer_residual_quantiles.R
+++ b/R/layer_residual_quantiles.R
@@ -141,8 +141,8 @@ grab_residuals <- function(the_fit, components) {
       if (".resid" %in% names(r)) { # success
         return(r)
       } else { # failure
-        rlang::warn(c(
-          "The `residuals()` method for objects of class {cl} results in",
+        cli::cli_warn(c(
+          "The `residuals()` method for {.cls cl} objects results in",
           "a data frame without a column named `.resid`.",
           i = "Residual quantiles will be calculated directly from the",
           i = "difference between predictions and observations.",
@@ -152,8 +152,8 @@ grab_residuals <- function(the_fit, components) {
     } else if (is.vector(drop(r))) { # also success
       return(tibble(.resid = drop(r)))
     } else { # failure
-      rlang::warn(c(
-        "The `residuals()` method for objects of class {cl} results in an",
+      cli::cli_warn(c(
+        "The `residuals()` method for {.cls cl} objects results in an",
         "object that is neither a data frame with a column named `.resid`,",
         "nor something coercible to a vector.",
         i = "Residual quantiles will be calculated directly from the",
diff --git a/tests/testthat/test-propogate_samples.R b/tests/testthat/test-propogate_samples.R
new file mode 100644
index 000000000..3b02404b6
--- /dev/null
+++ b/tests/testthat/test-propogate_samples.R
@@ -0,0 +1,8 @@
+test_that("propogate_samples", {
+  r <- -30:50
+  p <- 40
+  quantiles <- 1:9 / 10
+  aheads <- c(2, 4, 7)
+  nsim <- 100
+
+})
diff --git a/tests/testthat/test-shuffle.R b/tests/testthat/test-shuffle.R
new file mode 100644
index 000000000..94bc1aa3b
--- /dev/null
+++ b/tests/testthat/test-shuffle.R
@@ -0,0 +1,5 @@
+test_that("shuffle works", {
+  expect_error(shuffle(matrix(NA, 2, 2)))
+  expect_length(shuffle(1:10), 10L)
+  expect_identical(sort(shuffle(1:10)), 1:10)
+})

From 7294c000fba0fc2a3dc2298ce654a70879a58627 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Sun, 24 Sep 2023 11:19:09 -0700
Subject: [PATCH 04/22] correct symmetrization, enhance documentation of the
 "ahead" param in `flatline_forecaster()`.

---
 R/flatline_forecaster.R          | 6 ++++++
 R/layer_cdc_flatline_quantiles.R | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/R/flatline_forecaster.R b/R/flatline_forecaster.R
index e437f50ea..197c8cca5 100644
--- a/R/flatline_forecaster.R
+++ b/R/flatline_forecaster.R
@@ -94,6 +94,12 @@ flatline_forecaster <- function(
 #' Constructs a list of arguments for [flatline_forecaster()].
 #'
 #' @inheritParams arx_args_list
+#' @param ahead Integer. Unlike [arx_forecaster()], this doesn't have any effect
+#'   on the predicted values. Predictions are always the most recent observation.
+#'   However, this _does_ impact the residuals stored in the object. Residuals
+#'   are calculated based on this number to mimic how badly you would have done.
+#'   So for example, `ahead = 7` will create residuals by comparing values
+#'   7 days apart.
 #'
 #' @return A list containing updated parameter choices with class `flatline_alist`.
 #' @export
diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R
index c64b96c2d..3f178f6da 100644
--- a/R/layer_cdc_flatline_quantiles.R
+++ b/R/layer_cdc_flatline_quantiles.R
@@ -149,7 +149,7 @@ propogate_samples <- function(
     for (iter in 2:max_ahead) {
       samp <- shuffle(samp)
       raw <- raw + samp
-      if (symmetrize) symmetric <- raw - (median(raw) + p)
+      if (symmetrize) symmetric <- raw - (median(raw) - p)
       else symmetric <- raw
       if (nonneg) symmetric <- pmax(0, symmetric)
       res[[iter]] <- symmetric
@@ -157,7 +157,7 @@ propogate_samples <- function(
   }
   res <- res[aheads]
   list(tibble::tibble(
-    aheads = aheads,
+    ahead = aheads,
     .pred_distn = map_vec(
       res, ~ dist_quantiles(quantile(.x, quantiles), tau = quantiles)
     )

From f18e88f7b45602c4bbdf4a26d32252f2203485b2 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Sun, 24 Sep 2023 11:29:14 -0700
Subject: [PATCH 05/22] better defaults, cli, pred is scalar in
 propagate_samples

---
 R/layer_cdc_flatline_quantiles.R | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R
index 3f178f6da..f2b55e5ec 100644
--- a/R/layer_cdc_flatline_quantiles.R
+++ b/R/layer_cdc_flatline_quantiles.R
@@ -3,7 +3,7 @@ layer_cdc_flatline_quantiles <- function(
     ...,
     aheads = 1:4,
     quantiles = c(.01, .025, 1:19 / 20, .975, .99),
-    nsims = 1e5,
+    nsims = 1e3,
     by_key = "geo_value",
     symmetrize = FALSE,
     nonneg = TRUE,
@@ -73,7 +73,7 @@ slather.layer_cdc_flatline_quantiles <-
       cols_in_preds <- hardhat::check_column_names(p, object$by_key)
       if (!cols_in_preds$ok) {
         cli::cli_warn(c(
-          "Predicted values are missing key columns: {.var cols_in_preds$missing_names}.",
+          "Predicted values are missing key columns: {.val {cols_in_preds$missing_names}}.",
           "Ignoring these."
         ))
       }
@@ -81,7 +81,7 @@ slather.layer_cdc_flatline_quantiles <-
         cols_in_resids <- hardhat::check_column_names(r, object$by_key)
         if (!cols_in_resids$ok) {
           cli::cli_warn(c(
-            "Existing residuals are missing key columns: {.var cols_in_resids$missing_names}.",
+            "Existing residuals are missing key columns: {.val {cols_in_resids$missing_names}}.",
             "Ignoring these."
           ))
         }
@@ -98,7 +98,7 @@ slather.layer_cdc_flatline_quantiles <-
         cols_in_resids <- hardhat::check_column_names(key_cols, object$by_key)
         if (!cols_in_resids$ok) {
           cli::cli_warn(c(
-            "Requested residuals are missing key columns: {.var cols_in_resids$missing_names}.",
+            "Requested residuals are missing key columns: {.val {cols_in_resids$missing_names}}.",
             "Ignoring these."
           ))
         }
@@ -139,9 +139,6 @@ propogate_samples <- function(
   samp <- quantile(r, probs = c(0, seq_len(nsim - 1)) / (nsim - 1), na.rm = TRUE)
   res <- list()
 
-  # p should be all the same
-  p <- max(p, na.rm = TRUE)
-
   raw <- samp + p
   if (nonneg) raw <- pmax(0, raw)
   res[[1]] <- raw

From d6a28f371d0f8436b436da2b511a8e02816094c6 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Sun, 24 Sep 2023 16:40:31 -0700
Subject: [PATCH 06/22] redocument

---
 NAMESPACE                           |   2 +
 R/layer_cdc_flatline_quantiles.R    |  94 +++++++++++++++++++++
 man/add_frosting.Rd                 |   4 +-
 man/arx_fcast_epi_workflow.Rd       |  12 ++-
 man/arx_forecaster.Rd               |  12 ++-
 man/create_layer.Rd                 |   6 +-
 man/dist_quantiles.Rd               |   2 +-
 man/extrapolate_quantiles.Rd        |   8 +-
 man/fit-epi_workflow.Rd             |   2 +-
 man/flatline.Rd                     |   6 +-
 man/flatline_args_list.Rd           |   8 +-
 man/frosting.Rd                     |   4 +-
 man/layer_add_forecast_date.Rd      |  12 +--
 man/layer_add_target_date.Rd        |   6 +-
 man/layer_cdc_flatline_quantiles.Rd | 125 ++++++++++++++++++++++++++++
 man/layer_population_scaling.Rd     |  29 ++++---
 man/layer_predict.Rd                |   6 +-
 man/nested_quantiles.Rd             |   4 +-
 man/smooth_quantile_reg.Rd          |  27 +++---
 man/step_epi_shift.Rd               |   2 +-
 man/step_growth_rate.Rd             |   4 +-
 man/step_lag_difference.Rd          |   4 +-
 man/step_population_scaling.Rd      |  26 +++---
 man/step_training_window.Rd         |   9 +-
 24 files changed, 340 insertions(+), 74 deletions(-)
 create mode 100644 man/layer_cdc_flatline_quantiles.Rd

diff --git a/NAMESPACE b/NAMESPACE
index b22ec53a5..e361dec00 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -79,6 +79,7 @@ S3method(residuals,flatline)
 S3method(run_mold,default_epi_recipe_blueprint)
 S3method(slather,layer_add_forecast_date)
 S3method(slather,layer_add_target_date)
+S3method(slather,layer_cdc_flatline_quantiles)
 S3method(slather,layer_naomit)
 S3method(slather,layer_point_from_distn)
 S3method(slather,layer_population_scaling)
@@ -131,6 +132,7 @@ export(is_layer)
 export(layer)
 export(layer_add_forecast_date)
 export(layer_add_target_date)
+export(layer_cdc_flatline_quantiles)
 export(layer_naomit)
 export(layer_point_from_distn)
 export(layer_population_scaling)
diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R
index f2b55e5ec..afee37577 100644
--- a/R/layer_cdc_flatline_quantiles.R
+++ b/R/layer_cdc_flatline_quantiles.R
@@ -1,3 +1,97 @@
+#' CDC Flatline Forecast Quantiles
+#'
+#' This layer creates quantile forecasts by taking a sample from the
+#' interpolated CDF of the flatline residuals, and shuffling them.
+#' These are then added on to the point prediction.
+#'
+#' @details
+#' This layer is intended to be used in concert with [flatline()]. But it can
+#' also be used with anything else. As long as residuals are available in the
+#' the fitted model, this layer could be useful. Like
+#' [layer_residual_quantiles()] it only uses the residuals for the fitted model
+#' object. However, it propagates these forward for *all* aheads, by
+#' iteratively shuffling them (randomly), and then adding them to the previous
+#' set. This is in contrast to what happens with the [flatline_forecaster()].
+#' When using [flatline()] as the underlying engine (here), both will result in the
+#' same predictions (the most recent observed value), but that model calculates
+#' separate residuals for each `ahead` by comparing to observations further into
+#' the future. This version continues to use the same set of residuals, and
+#' adds them on to produce wider intervals as `ahead` increases.
+#'
+#'
+#' @inheritParams layer_residual_quantiles
+#' @param aheads Numeric vector of desired forecast horizons. These should be
+#'   given in the "units of the training data". So, for example, for data
+#'   typically observed daily (possibly with missing values), but
+#'   with weekly forecast targets, you would use `c(7, 14, 21, 28)`. But with
+#'   weekly data, you would use `1:4`.
+#' @param quantiles Numeric vector of probabilities with values in (0,1)
+#'   referring to the desired predictive intervals. The default is the standard
+#'   set for the COVID Forecast Hub.
+#' @param nsims Positive integer. The number of draws from the empirical CDF.
+#'   These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting
+#'   in linear interpolation on the X scale. This is achieved with
+#'   [stats::quantile()] Type 7 (the default for that function).
+#' @param nonneg Logical. Force all predictive intervals be non-negative.
+#'   Because non-negativity is forced _before_ propagating forward, this
+#'   has slightly different behaviour than would occur if using
+#'   [layer_threshold_preds()].
+#'
+#' @return an updated `frosting` postprocessor. Calling [predict()] will result
+#'   in an additional `<list-col>` named `.pred_distn_all` containing 2-column
+#'   [tibble::tibble()]'s. For each
+#'   desired combination of `key`'s, the tibble will contain one row per ahead
+#'   with the associated [dist_quantiles()].
+#' @export
+#'
+#' @examples
+#' r <- epi_recipe(case_death_rate_subset) %>%
+#'   # data is "daily", so we fit this to 1 ahead, the result will contain
+#'   # 1 day ahead residuals
+#'   step_epi_ahead(death_rate, ahead = 1L, skip = TRUE) %>%
+#'   recipes::update_role(death_rate, new_role = "predictor") %>%
+#'   recipes::add_role(time_value, geo_value, new_role = "predictor")
+#'
+#' forecast_date <- max(case_death_rate_subset$time_value)
+#'
+#' latest <- get_test_data(
+#'   epi_recipe(case_death_rate_subset), case_death_rate_subset
+#' )
+#'
+#' f <- frosting() %>%
+#'   layer_predict() %>%
+#'   layer_cdc_flatline_quantiles(aheads = c(7, 14, 21, 28), symmetrize = TRUE)
+#'
+#' eng <- parsnip::linear_reg() %>% parsnip::set_engine("flatline")
+#'
+#' wf <- epi_workflow(r, eng, f) %>% fit(case_death_rate_subset)
+#' preds <- suppressWarnings(predict(wf, new_data = latest)) %>%
+#'   dplyr::select(-time_value) %>%
+#'   dplyr::mutate(forecast_date = forecast_date)
+#' preds
+#'
+#' preds <- preds %>%
+#'   unnest(.pred_distn_all) %>%
+#'   pivot_quantiles(.pred_distn) %>%
+#'   mutate(target_date = forecast_date + ahead)
+#'
+#' library(ggplot2)
+#' four_states <- c("ca", "pa", "wa", "ny")
+#' preds %>%
+#'   filter(geo_value %in% four_states) %>%
+#'   ggplot(aes(target_date)) +
+#'   geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) +
+#'   geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) +
+#'   geom_line(aes(y = .pred), color = "orange") +
+#'   geom_line(data = case_death_rate_subset %>% filter(geo_value %in% four_states),
+#'     aes(x = time_value, y = death_rate)) +
+#'   scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) +
+#'   labs(x = "Date", y = "Death rate") +
+#'   facet_wrap(~geo_value, scales = "free_y") +
+#'   theme_bw() +
+#'   geom_vline(xintercept = forecast_date)
+#'
+#'
 layer_cdc_flatline_quantiles <- function(
     frosting,
     ...,
diff --git a/man/add_frosting.Rd b/man/add_frosting.Rd
index d7d217777..4d77572a1 100644
--- a/man/add_frosting.Rd
+++ b/man/add_frosting.Rd
@@ -35,7 +35,9 @@ latest <- jhu \%>\%
   dplyr::filter(time_value >= max(time_value) - 14)
 
 # Add frosting to a workflow and predict
-f <- frosting() \%>\% layer_predict() \%>\% layer_naomit(.pred)
+f <- frosting() \%>\%
+  layer_predict() \%>\%
+  layer_naomit(.pred)
 wf1 <- wf \%>\% add_frosting(f)
 p1 <- predict(wf1, latest)
 p1
diff --git a/man/arx_fcast_epi_workflow.Rd b/man/arx_fcast_epi_workflow.Rd
index fdd309959..7a6b66305 100644
--- a/man/arx_fcast_epi_workflow.Rd
+++ b/man/arx_fcast_epi_workflow.Rd
@@ -41,12 +41,16 @@ use \code{\link[=quantile_reg]{quantile_reg()}}) but can be omitted.
 jhu <- case_death_rate_subset \%>\%
   dplyr::filter(time_value >= as.Date("2021-12-01"))
 
-arx_fcast_epi_workflow(jhu, "death_rate",
-  c("case_rate", "death_rate"))
+arx_fcast_epi_workflow(
+  jhu, "death_rate",
+  c("case_rate", "death_rate")
+)
 
 arx_fcast_epi_workflow(jhu, "death_rate",
-  c("case_rate", "death_rate"), trainer = quantile_reg(),
-  args_list = arx_args_list(levels = 1:9 / 10))
+  c("case_rate", "death_rate"),
+  trainer = quantile_reg(),
+  args_list = arx_args_list(levels = 1:9 / 10)
+)
 }
 \seealso{
 \code{\link[=arx_forecaster]{arx_forecaster()}}
diff --git a/man/arx_forecaster.Rd b/man/arx_forecaster.Rd
index d4866aa0e..e121f272c 100644
--- a/man/arx_forecaster.Rd
+++ b/man/arx_forecaster.Rd
@@ -41,12 +41,16 @@ that it estimates a model for a particular target horizon.
 jhu <- case_death_rate_subset \%>\%
   dplyr::filter(time_value >= as.Date("2021-12-01"))
 
-out <- arx_forecaster(jhu, "death_rate",
-  c("case_rate", "death_rate"))
+out <- arx_forecaster(
+  jhu, "death_rate",
+  c("case_rate", "death_rate")
+)
 
 out <- arx_forecaster(jhu, "death_rate",
-  c("case_rate", "death_rate"), trainer = quantile_reg(),
-  args_list = arx_args_list(levels = 1:9 / 10))
+  c("case_rate", "death_rate"),
+  trainer = quantile_reg(),
+  args_list = arx_args_list(levels = 1:9 / 10)
+)
 }
 \seealso{
 \code{\link[=arx_fcast_epi_workflow]{arx_fcast_epi_workflow()}}, \code{\link[=arx_args_list]{arx_args_list()}}
diff --git a/man/create_layer.Rd b/man/create_layer.Rd
index 399d62efa..d36385fb2 100644
--- a/man/create_layer.Rd
+++ b/man/create_layer.Rd
@@ -20,9 +20,9 @@ fill in the name of the layer, and open the file.
 \examples{
 \dontrun{
 
-  # Note: running this will write `layer_strawberry.R` to
-  # the `R/` directory of your current project
-  create_layer("strawberry")
+# Note: running this will write `layer_strawberry.R` to
+# the `R/` directory of your current project
+create_layer("strawberry")
 }
 
 }
diff --git a/man/dist_quantiles.Rd b/man/dist_quantiles.Rd
index 50f00dc32..739bae5a8 100644
--- a/man/dist_quantiles.Rd
+++ b/man/dist_quantiles.Rd
@@ -15,7 +15,7 @@ dist_quantiles(x, tau)
 A distribution parameterized by a set of quantiles
 }
 \examples{
-dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2,.4,.6,.8)))
+dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8)))
 quantile(dstn, p = c(.1, .25, .5, .9))
 median(dstn)
 
diff --git a/man/extrapolate_quantiles.Rd b/man/extrapolate_quantiles.Rd
index 985d7cae8..cc6cb2c3c 100644
--- a/man/extrapolate_quantiles.Rd
+++ b/man/extrapolate_quantiles.Rd
@@ -24,12 +24,14 @@ library(distributional)
 dstn <- dist_normal(c(10, 2), c(5, 10))
 extrapolate_quantiles(dstn, p = c(.25, 0.5, .75))
 
-dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2,.4,.6,.8)))
+dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8)))
 # because this distribution is already quantiles, any extra quantiles are
 # appended
 extrapolate_quantiles(dstn, p = c(.25, 0.5, .75))
 
-dstn <- c(dist_normal(c(10, 2), c(5, 10)),
-  dist_quantiles(list(1:4, 8:11), list(c(.2,.4,.6,.8))))
+dstn <- c(
+  dist_normal(c(10, 2), c(5, 10)),
+  dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8)))
+)
 extrapolate_quantiles(dstn, p = c(.25, 0.5, .75))
 }
diff --git a/man/fit-epi_workflow.Rd b/man/fit-epi_workflow.Rd
index fb1c3af28..3dfa0029a 100644
--- a/man/fit-epi_workflow.Rd
+++ b/man/fit-epi_workflow.Rd
@@ -29,7 +29,7 @@ preprocessing the data and fitting the underlying parsnip model.
 }
 \examples{
 jhu <- case_death_rate_subset \%>\%
-filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
+  filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 
 r <- epi_recipe(jhu) \%>\%
   step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\%
diff --git a/man/flatline.Rd b/man/flatline.Rd
index a396cfeb9..c353ff163 100644
--- a/man/flatline.Rd
+++ b/man/flatline.Rd
@@ -38,8 +38,10 @@ This is an internal function that is used to create a \code{\link[parsnip:linear
 model. It has somewhat odd behaviour (see below).
 }
 \examples{
-tib <- data.frame(y = runif(100),
-  expand.grid(k = letters[1:4], j = letters[5:9], time_value = 1:5)) \%>\%
+tib <- data.frame(
+  y = runif(100),
+  expand.grid(k = letters[1:4], j = letters[5:9], time_value = 1:5)
+) \%>\%
   dplyr::group_by(k, j) \%>\%
   dplyr::mutate(y2 = dplyr::lead(y, 2)) # predict 2 steps ahead
 flat <- flatline(y2 ~ j + k + y, tib) # predictions for 20 locations
diff --git a/man/flatline_args_list.Rd b/man/flatline_args_list.Rd
index 55d93c1db..dcae448f1 100644
--- a/man/flatline_args_list.Rd
+++ b/man/flatline_args_list.Rd
@@ -17,8 +17,12 @@ flatline_args_list(
 )
 }
 \arguments{
-\item{ahead}{Integer. Number of time steps ahead (in days) of the forecast
-date for which forecasts should be produced.}
+\item{ahead}{Integer. Unlike \code{\link[=arx_forecaster]{arx_forecaster()}}, this doesn't have any effect
+on the predicted values. Predictions are always the most recent observation.
+However, this \emph{does} impact the residuals stored in the object. Residuals
+are calculated based on this number to mimic how badly you would have done.
+So for example, \code{ahead = 7} will create residuals by comparing values
+7 days apart.}
 
 \item{n_training}{Integer. An upper limit for the number of rows per
 key that are used for training
diff --git a/man/frosting.Rd b/man/frosting.Rd
index 83a8d6a9d..362c40a4f 100644
--- a/man/frosting.Rd
+++ b/man/frosting.Rd
@@ -24,8 +24,8 @@ The arguments are currently placeholders and must be NULL
 \examples{
 
 # Toy example to show that frosting can be created and added for postprocessing
- f <- frosting()
- wf <- epi_workflow() \%>\% add_frosting(f)
+f <- frosting()
+wf <- epi_workflow() \%>\% add_frosting(f)
 
 # A more realistic example
 jhu <- case_death_rate_subset \%>\%
diff --git a/man/layer_add_forecast_date.Rd b/man/layer_add_forecast_date.Rd
index 421978eb5..4e173d662 100644
--- a/man/layer_add_forecast_date.Rd
+++ b/man/layer_add_forecast_date.Rd
@@ -46,15 +46,17 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu)
 latest <- jhu \%>\%
   dplyr::filter(time_value >= max(time_value) - 14)
 
- # Don't specify `forecast_date` (by default, this should be last date in latest)
-f <- frosting() \%>\% layer_predict() \%>\%
-   layer_naomit(.pred)
+# Don't specify `forecast_date` (by default, this should be last date in latest)
+f <- frosting() \%>\%
+  layer_predict() \%>\%
+  layer_naomit(.pred)
 wf0 <- wf \%>\% add_frosting(f)
 p0 <- predict(wf0, latest)
 p0
 
 # Specify a `forecast_date` that is greater than or equal to `as_of` date
-f <- frosting() \%>\% layer_predict() \%>\%
+f <- frosting() \%>\%
+  layer_predict() \%>\%
   layer_add_forecast_date(forecast_date = "2022-05-31") \%>\%
   layer_naomit(.pred)
 wf1 <- wf \%>\% add_frosting(f)
@@ -73,7 +75,7 @@ p2 <- predict(wf2, latest)
 p2
 
 # Do not specify a forecast_date
- f3 <- frosting() \%>\%
+f3 <- frosting() \%>\%
   layer_predict() \%>\%
   layer_add_forecast_date() \%>\%
   layer_naomit(.pred)
diff --git a/man/layer_add_target_date.Rd b/man/layer_add_target_date.Rd
index 58ff7770f..3c2884e10 100644
--- a/man/layer_add_target_date.Rd
+++ b/man/layer_add_target_date.Rd
@@ -48,7 +48,8 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu)
 latest <- get_test_data(r, jhu)
 
 # Use ahead + forecast date
-f <- frosting() \%>\% layer_predict() \%>\%
+f <- frosting() \%>\%
+  layer_predict() \%>\%
   layer_add_forecast_date(forecast_date = "2022-05-31") \%>\%
   layer_add_target_date() \%>\%
   layer_naomit(.pred)
@@ -59,7 +60,8 @@ p
 
 # Use ahead + max time value from pre, fit, post
 # which is the same if include `layer_add_forecast_date()`
-f2 <- frosting() \%>\% layer_predict() \%>\%
+f2 <- frosting() \%>\%
+  layer_predict() \%>\%
   layer_add_target_date() \%>\%
   layer_naomit(.pred)
 wf2 <- wf \%>\% add_frosting(f2)
diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd
new file mode 100644
index 000000000..c5bb33d3b
--- /dev/null
+++ b/man/layer_cdc_flatline_quantiles.Rd
@@ -0,0 +1,125 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/layer_cdc_flatline_quantiles.R
+\name{layer_cdc_flatline_quantiles}
+\alias{layer_cdc_flatline_quantiles}
+\title{CDC Flatline Forecast Quantiles}
+\usage{
+layer_cdc_flatline_quantiles(
+  frosting,
+  ...,
+  aheads = 1:4,
+  quantiles = c(0.01, 0.025, 1:19/20, 0.975, 0.99),
+  nsims = 1000,
+  by_key = "geo_value",
+  symmetrize = FALSE,
+  nonneg = TRUE,
+  id = rand_id("cdc_baseline_bands")
+)
+}
+\arguments{
+\item{frosting}{a \code{frosting} postprocessor}
+
+\item{...}{Unused, include for consistency with other layers.}
+
+\item{aheads}{Numeric vector of desired forecast horizons. These should be
+given in the "units of the training data". So, for example, for data
+typically observed daily (possibly with missing values), but
+with weekly forecast targets, you would use \code{c(7, 14, 21, 28)}. But with
+weekly data, you would use \code{1:4}.}
+
+\item{quantiles}{Numeric vector of probabilities with values in (0,1)
+referring to the desired predictive intervals. The default is the standard
+set for the COVID Forecast Hub.}
+
+\item{nsims}{Positive integer. The number of draws from the empirical CDF.
+These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting
+in linear interpolation on the X scale. This is achieved with
+\code{\link[stats:quantile]{stats::quantile()}} Type 7 (the default for that function).}
+
+\item{by_key}{A character vector of keys to group the residuals by before
+calculating quantiles. The default, \code{c()} performs no grouping.}
+
+\item{symmetrize}{logical. If \code{TRUE} then interval will be symmetric.}
+
+\item{nonneg}{Logical. Force all predictive intervals be non-negative.
+Because non-negativity is forced \emph{before} propagating forward, this
+has slightly different behaviour than would occur if using
+\code{\link[=layer_threshold_preds]{layer_threshold_preds()}}.}
+
+\item{id}{a random id string}
+}
+\value{
+an updated \code{frosting} postprocessor. Calling \code{\link[=predict]{predict()}} will result
+in an additional \verb{<list-col>} named \code{.pred_distn_all} containing 2-column
+\code{\link[tibble:tibble]{tibble::tibble()}}'s. For each
+desired combination of \code{key}'s, the tibble will contain one row per ahead
+with the associated \code{\link[=dist_quantiles]{dist_quantiles()}}.
+}
+\description{
+This layer creates quantile forecasts by taking a sample from the
+interpolated CDF of the flatline residuals, and shuffling them.
+These are then added on to the point prediction.
+}
+\details{
+This layer is intended to be used in concert with \code{\link[=flatline]{flatline()}}. But it can
+also be used with anything else. As long as residuals are available in the
+the fitted model, this layer could be useful. Like
+\code{\link[=layer_residual_quantiles]{layer_residual_quantiles()}} it only uses the residuals for the fitted model
+object. However, it propagates these forward for \emph{all} aheads, by
+iteratively shuffling them (randomly), and then adding them to the previous
+set. This is in contrast to what happens with the \code{\link[=flatline_forecaster]{flatline_forecaster()}}.
+When using \code{\link[=flatline]{flatline()}} as the underlying engine (here), both will result in the
+same predictions (the most recent observed value), but that model calculates
+separate residuals for each \code{ahead} by comparing to observations further into
+the future. This version continues to use the same set of residuals, and
+adds them on to produce wider intervals as \code{ahead} increases.
+}
+\examples{
+r <- epi_recipe(case_death_rate_subset) \%>\%
+  # data is "daily", so we fit this to 1 ahead, the result will contain
+  # 1 day ahead residuals
+  step_epi_ahead(death_rate, ahead = 1L, skip = TRUE) \%>\%
+  recipes::update_role(death_rate, new_role = "predictor") \%>\%
+  recipes::add_role(time_value, geo_value, new_role = "predictor")
+
+forecast_date <- max(case_death_rate_subset$time_value)
+
+latest <- get_test_data(
+  epi_recipe(case_death_rate_subset), case_death_rate_subset
+)
+
+f <- frosting() \%>\%
+  layer_predict() \%>\%
+  layer_cdc_flatline_quantiles(aheads = c(7, 14, 21, 28), symmetrize = TRUE)
+
+eng <- parsnip::linear_reg() \%>\% parsnip::set_engine("flatline")
+
+wf <- epi_workflow(r, eng, f) \%>\% fit(case_death_rate_subset)
+preds <- suppressWarnings(predict(wf, new_data = latest)) \%>\%
+  dplyr::select(-time_value) \%>\%
+  dplyr::mutate(forecast_date = forecast_date)
+preds
+
+preds <- preds \%>\%
+  unnest(.pred_distn_all) \%>\%
+  pivot_quantiles(.pred_distn) \%>\%
+  mutate(target_date = forecast_date + ahead)
+
+library(ggplot2)
+four_states <- c("ca", "pa", "wa", "ny")
+preds \%>\%
+  filter(geo_value \%in\% four_states) \%>\%
+  ggplot(aes(target_date)) +
+  geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) +
+  geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) +
+  geom_line(aes(y = .pred), color = "orange") +
+  geom_line(data = case_death_rate_subset \%>\% filter(geo_value \%in\% four_states),
+    aes(x = time_value, y = death_rate)) +
+  scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) +
+  labs(x = "Date", y = "Death rate") +
+  facet_wrap(~geo_value, scales = "free_y") +
+  theme_bw() +
+  geom_vline(xintercept = forecast_date)
+
+
+}
diff --git a/man/layer_population_scaling.Rd b/man/layer_population_scaling.Rd
index e841e9a50..179d6862c 100644
--- a/man/layer_population_scaling.Rd
+++ b/man/layer_population_scaling.Rd
@@ -78,13 +78,15 @@ jhu <- epiprocess::jhu_csse_daily_subset \%>\%
   dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ca", "ny")) \%>\%
   dplyr::select(geo_value, time_value, cases)
 
-pop_data = data.frame(states = c("ca", "ny"), value = c(20000, 30000))
+pop_data <- data.frame(states = c("ca", "ny"), value = c(20000, 30000))
 
 r <- epi_recipe(jhu) \%>\%
-  step_population_scaling(df = pop_data,
-                          df_pop_col = "value",
-                          by = c("geo_value" = "states"),
-                          cases, suffix = "_scaled") \%>\%
+  step_population_scaling(
+    df = pop_data,
+    df_pop_col = "value",
+    by = c("geo_value" = "states"),
+    cases, suffix = "_scaled"
+  ) \%>\%
   step_epi_lag(cases_scaled, lag = c(0, 7, 14)) \%>\%
   step_epi_ahead(cases_scaled, ahead = 7, role = "outcome") \%>\%
   step_epi_naomit()
@@ -93,9 +95,11 @@ f <- frosting() \%>\%
   layer_predict() \%>\%
   layer_threshold(.pred) \%>\%
   layer_naomit(.pred) \%>\%
-  layer_population_scaling(.pred, df = pop_data,
-                           by =  c("geo_value" = "states"),
-                           df_pop_col = "value")
+  layer_population_scaling(.pred,
+    df = pop_data,
+    by = c("geo_value" = "states"),
+    df_pop_col = "value"
+  )
 
 wf <- epi_workflow(r, parsnip::linear_reg()) \%>\%
   fit(jhu) \%>\%
@@ -104,9 +108,12 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\%
 latest <- get_test_data(
   recipe = r,
   x = epiprocess::jhu_csse_daily_subset \%>\%
-    dplyr::filter(time_value > "2021-11-01",
-                  geo_value \%in\% c("ca", "ny")) \%>\%
-    dplyr::select(geo_value, time_value, cases))
+    dplyr::filter(
+      time_value > "2021-11-01",
+      geo_value \%in\% c("ca", "ny")
+    ) \%>\%
+    dplyr::select(geo_value, time_value, cases)
+)
 
 predict(wf, latest)
 }
diff --git a/man/layer_predict.Rd b/man/layer_predict.Rd
index 1326dfe75..03473053f 100644
--- a/man/layer_predict.Rd
+++ b/man/layer_predict.Rd
@@ -62,9 +62,9 @@ jhu <- case_death_rate_subset \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 
 r <- epi_recipe(jhu) \%>\%
- step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\%
- step_epi_ahead(death_rate, ahead = 7) \%>\%
- step_epi_naomit()
+  step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\%
+  step_epi_ahead(death_rate, ahead = 7) \%>\%
+  step_epi_naomit()
 
 wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% fit(jhu)
 latest <- jhu \%>\% filter(time_value >= max(time_value) - 14)
diff --git a/man/nested_quantiles.Rd b/man/nested_quantiles.Rd
index 1a2824041..c4b578c1a 100644
--- a/man/nested_quantiles.Rd
+++ b/man/nested_quantiles.Rd
@@ -16,8 +16,8 @@ a list-col
 Turn a vector of quantile distributions into a list-col
 }
 \examples{
-edf <- case_death_rate_subset[1:3,]
-edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5/6, 2:4/5, 3:10/11))
+edf <- case_death_rate_subset[1:3, ]
+edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11))
 
 edf_nested <- edf \%>\% dplyr::mutate(q = nested_quantiles(q))
 edf_nested \%>\% tidyr::unnest(q)
diff --git a/man/smooth_quantile_reg.Rd b/man/smooth_quantile_reg.Rd
index 293999876..6cc2dfc82 100644
--- a/man/smooth_quantile_reg.Rd
+++ b/man/smooth_quantile_reg.Rd
@@ -39,9 +39,10 @@ only supported engine is \code{\link[smoothqr:smooth_qr]{smoothqr::smooth_qr()}}
 tib <- data.frame(
   y1 = rnorm(100), y2 = rnorm(100), y3 = rnorm(100),
   y4 = rnorm(100), y5 = rnorm(100), y6 = rnorm(100),
-  x1 = rnorm(100), x2 = rnorm(100))
+  x1 = rnorm(100), x2 = rnorm(100)
+)
 qr_spec <- smooth_quantile_reg(tau = c(.2, .5, .8), outcome_locations = 1:6)
-ff <- qr_spec \%>\% fit(cbind(y1, y2 , y3 , y4 , y5 , y6) ~ ., data = tib)
+ff <- qr_spec \%>\% fit(cbind(y1, y2, y3, y4, y5, y6) ~ ., data = tib)
 p <- predict(ff, new_data = tib)
 
 x <- -99:99 / 100 * 2 * pi
@@ -50,21 +51,23 @@ fd <- x[length(x) - 20]
 XY <- smoothqr::lagmat(y[1:(length(y) - 20)], c(-20:20))
 XY <- tibble::as_tibble(XY)
 qr_spec <- smooth_quantile_reg(tau = c(.2, .5, .8), outcome_locations = 20:1)
-tt <- qr_spec \%>\% fit_xy(x = XY[,21:41], y = XY[,1:20])
+tt <- qr_spec \%>\% fit_xy(x = XY[, 21:41], y = XY[, 1:20])
 
 library(tidyr)
 library(dplyr)
 pl <- predict(
-        object = tt,
-        new_data = XY[max(which(complete.cases(XY[,21:41]))), 21:41]
-      )
+  object = tt,
+  new_data = XY[max(which(complete.cases(XY[, 21:41]))), 21:41]
+)
 pl <- pl \%>\%
-        unnest(.pred) \%>\%
-        mutate(distn = nested_quantiles(distn)) \%>\%
-        unnest(distn) \%>\%
-        mutate(x = x[length(x) - 20] + ahead / 100 * 2 * pi,
-               ahead = NULL) \%>\%
-        pivot_wider(names_from = tau, values_from = q)
+  unnest(.pred) \%>\%
+  mutate(distn = nested_quantiles(distn)) \%>\%
+  unnest(distn) \%>\%
+  mutate(
+    x = x[length(x) - 20] + ahead / 100 * 2 * pi,
+    ahead = NULL
+  ) \%>\%
+  pivot_wider(names_from = tau, values_from = q)
 plot(x, y, pch = 16, xlim = c(pi, 2 * pi), col = "lightgrey")
 curve(sin(x), add = TRUE)
 abline(v = fd, lty = 2)
diff --git a/man/step_epi_shift.Rd b/man/step_epi_shift.Rd
index ca8609b1e..bf135346e 100644
--- a/man/step_epi_shift.Rd
+++ b/man/step_epi_shift.Rd
@@ -90,7 +90,7 @@ are always set to \code{"ahead_"} and \code{"epi_ahead"} respectively, while for
 \examples{
 r <- epi_recipe(case_death_rate_subset) \%>\%
   step_epi_ahead(death_rate, ahead = 7) \%>\%
-  step_epi_lag(death_rate, lag = c(0,7,14))
+  step_epi_lag(death_rate, lag = c(0, 7, 14))
 r
 }
 \seealso{
diff --git a/man/step_growth_rate.Rd b/man/step_growth_rate.Rd
index 0449b887c..b409135b1 100644
--- a/man/step_growth_rate.Rd
+++ b/man/step_growth_rate.Rd
@@ -87,7 +87,9 @@ r <- epi_recipe(case_death_rate_subset) \%>\%
   step_growth_rate(case_rate, death_rate)
 r
 
-r \%>\% recipes::prep() \%>\% recipes::bake(case_death_rate_subset)
+r \%>\%
+  recipes::prep() \%>\%
+  recipes::bake(case_death_rate_subset)
 }
 \seealso{
 Other row operation steps: 
diff --git a/man/step_lag_difference.Rd b/man/step_lag_difference.Rd
index d69c25faa..b06abe43c 100644
--- a/man/step_lag_difference.Rd
+++ b/man/step_lag_difference.Rd
@@ -59,7 +59,9 @@ r <- epi_recipe(case_death_rate_subset) \%>\%
   step_lag_difference(case_rate, death_rate, horizon = c(7, 14))
 r
 
-r \%>\% recipes::prep() \%>\% recipes::bake(case_death_rate_subset)
+r \%>\%
+  recipes::prep() \%>\%
+  recipes::bake(case_death_rate_subset)
 }
 \seealso{
 Other row operation steps: 
diff --git a/man/step_population_scaling.Rd b/man/step_population_scaling.Rd
index 2964c6912..1a9564563 100644
--- a/man/step_population_scaling.Rd
+++ b/man/step_population_scaling.Rd
@@ -104,13 +104,15 @@ jhu <- epiprocess::jhu_csse_daily_subset \%>\%
   dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ca", "ny")) \%>\%
   dplyr::select(geo_value, time_value, cases)
 
-pop_data = data.frame(states = c("ca", "ny"), value = c(20000, 30000))
+pop_data <- data.frame(states = c("ca", "ny"), value = c(20000, 30000))
 
 r <- epi_recipe(jhu) \%>\%
-  step_population_scaling(df = pop_data,
-                          df_pop_col = "value",
-                          by = c("geo_value" = "states"),
-                          cases, suffix = "_scaled") \%>\%
+  step_population_scaling(
+    df = pop_data,
+    df_pop_col = "value",
+    by = c("geo_value" = "states"),
+    cases, suffix = "_scaled"
+  ) \%>\%
   step_epi_lag(cases_scaled, lag = c(0, 7, 14)) \%>\%
   step_epi_ahead(cases_scaled, ahead = 7, role = "outcome") \%>\%
   step_epi_naomit()
@@ -119,9 +121,11 @@ f <- frosting() \%>\%
   layer_predict() \%>\%
   layer_threshold(.pred) \%>\%
   layer_naomit(.pred) \%>\%
-  layer_population_scaling(.pred, df = pop_data,
-                           by =  c("geo_value" = "states"),
-                           df_pop_col = "value")
+  layer_population_scaling(.pred,
+    df = pop_data,
+    by = c("geo_value" = "states"),
+    df_pop_col = "value"
+  )
 
 wf <- epi_workflow(r, parsnip::linear_reg()) \%>\%
   fit(jhu) \%>\%
@@ -130,8 +134,10 @@ wf <- epi_workflow(r, parsnip::linear_reg()) \%>\%
 latest <- get_test_data(
   recipe = r,
   epiprocess::jhu_csse_daily_subset \%>\%
-    dplyr::filter(time_value > "2021-11-01",
-      geo_value \%in\% c("ca", "ny")) \%>\%
+    dplyr::filter(
+      time_value > "2021-11-01",
+      geo_value \%in\% c("ca", "ny")
+    ) \%>\%
     dplyr::select(geo_value, time_value, cases)
 )
 
diff --git a/man/step_training_window.Rd b/man/step_training_window.Rd
index 7861f27ea..ce7c0fc74 100644
--- a/man/step_training_window.Rd
+++ b/man/step_training_window.Rd
@@ -50,9 +50,12 @@ after any filtering step.
 tib <- tibble::tibble(
   x = 1:10,
   y = 1:10,
-  time_value = rep(seq(as.Date("2020-01-01"), by = 1,
-                     length.out = 5), times = 2),
-  geo_value = rep(c("ca", "hi"), each = 5)) \%>\%
+  time_value = rep(seq(as.Date("2020-01-01"),
+    by = 1,
+    length.out = 5
+  ), times = 2),
+  geo_value = rep(c("ca", "hi"), each = 5)
+) \%>\%
   as_epi_df()
 
 epi_recipe(y ~ x, data = tib) \%>\%

From 237ec50ddd74d577184f68cad39a794e91468110 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Sun, 24 Sep 2023 16:42:59 -0700
Subject: [PATCH 07/22] run styler

---
 R/layer_cdc_flatline_quantiles.R        | 24 +++++++++++++---------
 R/step_growth_rate.R                    | 27 ++++++++++++-------------
 R/step_lag_difference.R                 | 19 +++++++++--------
 tests/testthat/test-propogate_samples.R |  1 -
 4 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R
index afee37577..1881b8523 100644
--- a/R/layer_cdc_flatline_quantiles.R
+++ b/R/layer_cdc_flatline_quantiles.R
@@ -83,15 +83,16 @@
 #'   geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) +
 #'   geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) +
 #'   geom_line(aes(y = .pred), color = "orange") +
-#'   geom_line(data = case_death_rate_subset %>% filter(geo_value %in% four_states),
-#'     aes(x = time_value, y = death_rate)) +
+#'   geom_line(
+#'     data = case_death_rate_subset %>% filter(geo_value %in% four_states),
+#'     aes(x = time_value, y = death_rate)
+#'   ) +
 #'   scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) +
 #'   labs(x = "Date", y = "Death rate") +
 #'   facet_wrap(~geo_value, scales = "free_y") +
 #'   theme_bw() +
 #'   geom_vline(xintercept = forecast_date)
 #'
-#'
 layer_cdc_flatline_quantiles <- function(
     frosting,
     ...,
@@ -102,7 +103,6 @@ layer_cdc_flatline_quantiles <- function(
     symmetrize = FALSE,
     nonneg = TRUE,
     id = rand_id("cdc_baseline_bands")) {
-
   rlang::check_dots_empty()
 
   arg_is_int(aheads)
@@ -134,8 +134,7 @@ layer_cdc_flatline_quantiles_new <- function(
     by_key,
     symmetrize,
     nonneg,
-    id
-) {
+    id) {
   layer(
     "cdc_flatline_quantiles",
     aheads = aheads,
@@ -154,8 +153,10 @@ slather.layer_cdc_flatline_quantiles <-
     the_fit <- workflows::extract_fit_parsnip(workflow)
     if (!inherits(the_fit, "_flatline")) {
       cli::cli_warn(
-        c("Predictions for this workflow were not produced by the {.cls flatline}",
-        "{.pkg parsnip} engine. Results may be unexpected. See {.fn epipredict::flatline}.")
+        c(
+          "Predictions for this workflow were not produced by the {.cls flatline}",
+          "{.pkg parsnip} engine. Results may be unexpected. See {.fn epipredict::flatline}."
+        )
       )
     }
     p <- components$predictions
@@ -240,8 +241,11 @@ propogate_samples <- function(
     for (iter in 2:max_ahead) {
       samp <- shuffle(samp)
       raw <- raw + samp
-      if (symmetrize) symmetric <- raw - (median(raw) - p)
-      else symmetric <- raw
+      if (symmetrize) {
+        symmetric <- raw - (median(raw) - p)
+      } else {
+        symmetric <- raw
+      }
       if (nonneg) symmetric <- pmax(0, symmetric)
       res[[iter]] <- symmetric
     }
diff --git a/R/step_growth_rate.R b/R/step_growth_rate.R
index f6ad29a5b..74cfff284 100644
--- a/R/step_growth_rate.R
+++ b/R/step_growth_rate.R
@@ -42,20 +42,19 @@
 #'   recipes::prep() %>%
 #'   recipes::bake(case_death_rate_subset)
 step_growth_rate <-
-  function(
-      recipe,
-      ...,
-      role = "predictor",
-      trained = FALSE,
-      horizon = 7,
-      method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"),
-      log_scale = FALSE,
-      replace_Inf = NA,
-      prefix = "gr_",
-      columns = NULL,
-      skip = FALSE,
-      id = rand_id("growth_rate"),
-      additional_gr_args_list = list()) {
+  function(recipe,
+           ...,
+           role = "predictor",
+           trained = FALSE,
+           horizon = 7,
+           method = c("rel_change", "linear_reg", "smooth_spline", "trend_filter"),
+           log_scale = FALSE,
+           replace_Inf = NA,
+           prefix = "gr_",
+           columns = NULL,
+           skip = FALSE,
+           id = rand_id("growth_rate"),
+           additional_gr_args_list = list()) {
     if (!is_epi_recipe(recipe)) {
       rlang::abort("This recipe step can only operate on an `epi_recipe`.")
     }
diff --git a/R/step_lag_difference.R b/R/step_lag_difference.R
index 2482be46a..21878eaa7 100644
--- a/R/step_lag_difference.R
+++ b/R/step_lag_difference.R
@@ -23,16 +23,15 @@
 #'   recipes::prep() %>%
 #'   recipes::bake(case_death_rate_subset)
 step_lag_difference <-
-  function(
-      recipe,
-      ...,
-      role = "predictor",
-      trained = FALSE,
-      horizon = 7,
-      prefix = "lag_diff_",
-      columns = NULL,
-      skip = FALSE,
-      id = rand_id("lag_diff")) {
+  function(recipe,
+           ...,
+           role = "predictor",
+           trained = FALSE,
+           horizon = 7,
+           prefix = "lag_diff_",
+           columns = NULL,
+           skip = FALSE,
+           id = rand_id("lag_diff")) {
     if (!is_epi_recipe(recipe)) {
       rlang::abort("This recipe step can only operate on an `epi_recipe`.")
     }
diff --git a/tests/testthat/test-propogate_samples.R b/tests/testthat/test-propogate_samples.R
index 3b02404b6..b8a1ff82d 100644
--- a/tests/testthat/test-propogate_samples.R
+++ b/tests/testthat/test-propogate_samples.R
@@ -4,5 +4,4 @@ test_that("propogate_samples", {
   quantiles <- 1:9 / 10
   aheads <- c(2, 4, 7)
   nsim <- 100
-
 })

From c13b83eb1371ebd0668ded2d6eb0495b17c4dd9e Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Sun, 24 Sep 2023 16:43:26 -0700
Subject: [PATCH 08/22] redocument after styling

---
 man/layer_cdc_flatline_quantiles.Rd | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd
index c5bb33d3b..4f151e854 100644
--- a/man/layer_cdc_flatline_quantiles.Rd
+++ b/man/layer_cdc_flatline_quantiles.Rd
@@ -113,13 +113,14 @@ preds \%>\%
   geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) +
   geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) +
   geom_line(aes(y = .pred), color = "orange") +
-  geom_line(data = case_death_rate_subset \%>\% filter(geo_value \%in\% four_states),
-    aes(x = time_value, y = death_rate)) +
+  geom_line(
+    data = case_death_rate_subset \%>\% filter(geo_value \%in\% four_states),
+    aes(x = time_value, y = death_rate)
+  ) +
   scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) +
   labs(x = "Date", y = "Death rate") +
   facet_wrap(~geo_value, scales = "free_y") +
   theme_bw() +
   geom_vline(xintercept = forecast_date)
 
-
 }

From 16f6c2c5da3fb906b0152348c29a328ada0ace55 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Mon, 25 Sep 2023 10:04:36 -0700
Subject: [PATCH 09/22] example plotting with ggplot2 handled correctly

---
 R/layer_cdc_flatline_quantiles.R | 4 ++--
 R/make_smooth_quantile_reg.R     | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R
index 1881b8523..7ff224359 100644
--- a/R/layer_cdc_flatline_quantiles.R
+++ b/R/layer_cdc_flatline_quantiles.R
@@ -75,7 +75,7 @@
 #'   pivot_quantiles(.pred_distn) %>%
 #'   mutate(target_date = forecast_date + ahead)
 #'
-#' library(ggplot2)
+#' if (require("ggplot2")) {
 #' four_states <- c("ca", "pa", "wa", "ny")
 #' preds %>%
 #'   filter(geo_value %in% four_states) %>%
@@ -92,7 +92,7 @@
 #'   facet_wrap(~geo_value, scales = "free_y") +
 #'   theme_bw() +
 #'   geom_vline(xintercept = forecast_date)
-#'
+#' }
 layer_cdc_flatline_quantiles <- function(
     frosting,
     ...,
diff --git a/R/make_smooth_quantile_reg.R b/R/make_smooth_quantile_reg.R
index 6eab2a132..cfb08a9c7 100644
--- a/R/make_smooth_quantile_reg.R
+++ b/R/make_smooth_quantile_reg.R
@@ -61,7 +61,8 @@
 #' lines(pl$x, pl$`0.2`, col = "blue")
 #' lines(pl$x, pl$`0.8`, col = "blue")
 #' lines(pl$x, pl$`0.5`, col = "red")
-#' \dontrun{
+#'
+#' if (require("ggplot2")) {
 #' ggplot(data.frame(x = x, y = y), aes(x)) +
 #'   geom_ribbon(data = pl, aes(ymin = `0.2`, ymax = `0.8`), fill = "lightblue") +
 #'   geom_point(aes(y = y), colour = "grey") + # observed data

From c9b4667ccecf9d36de71383ef94fda0fa84ba996 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Wed, 4 Oct 2023 12:30:41 -0700
Subject: [PATCH 10/22] working cdc baseline

---
 NAMESPACE                           |   3 +
 R/cdc_baseline_forecaster.R         | 228 ++++++++++++++++++++++++++++
 R/layer_cdc_flatline_quantiles.R    |  17 ++-
 man/cdc_baseline_args_list.Rd       |  85 +++++++++++
 man/cdc_baseline_forecaster.Rd      |  73 +++++++++
 man/layer_cdc_flatline_quantiles.Rd |  14 +-
 man/smooth_quantile_reg.Rd          |   3 +-
 tests/testthat/test-parse_period.R  |  12 ++
 8 files changed, 419 insertions(+), 16 deletions(-)
 create mode 100644 R/cdc_baseline_forecaster.R
 create mode 100644 man/cdc_baseline_args_list.Rd
 create mode 100644 man/cdc_baseline_forecaster.Rd
 create mode 100644 tests/testthat/test-parse_period.R

diff --git a/NAMESPACE b/NAMESPACE
index e361dec00..d7e941b0f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -52,6 +52,7 @@ S3method(print,alist)
 S3method(print,arx_class)
 S3method(print,arx_fcast)
 S3method(print,canned_epipred)
+S3method(print,cdc_baseline_fcast)
 S3method(print,epi_workflow)
 S3method(print,flat_fcast)
 S3method(print,flatline)
@@ -107,6 +108,8 @@ export(arx_classifier)
 export(arx_fcast_epi_workflow)
 export(arx_forecaster)
 export(bake)
+export(cdc_baseline_args_list)
+export(cdc_baseline_forecaster)
 export(create_layer)
 export(default_epi_recipe_blueprint)
 export(detect_layer)
diff --git a/R/cdc_baseline_forecaster.R b/R/cdc_baseline_forecaster.R
new file mode 100644
index 000000000..f5961431d
--- /dev/null
+++ b/R/cdc_baseline_forecaster.R
@@ -0,0 +1,228 @@
+#' Predict the future with the most recent value
+#'
+#' This is a simple forecasting model for
+#' [epiprocess::epi_df] data. It uses the most recent observation as the
+#' forecast for any future date, and produces intervals by shuffling the quantiles
+#' of the residuals of such a "flatline" forecast and incrementing these
+#' forward over all available training data.
+#'
+#' By default, the predictive intervals are computed separately for each
+#' combination of `geo_value` in the `epi_data` argument.
+#'
+#' This forecaster is meant to produce exactly the CDC Baseline used for
+#' [COVID19ForecastHub](https://covid19forecasthub.org)
+#'
+#' @param epi_data An [epiprocess::epi_df]
+#' @param outcome A scalar character for the column name we wish to predict.
+#' @param args_list A list of additional arguments as created by the
+#'   [cdc_baseline_args_list()] constructor function.
+#'
+#' @return A data frame of point and interval forecasts at for all
+#'   aheads (unique horizons) for each unique combination of `key_vars`.
+#' @export
+#'
+#' @examples
+#' library(dplyr)
+#' weekly_deaths <- case_death_rate_subset %>%
+#'   select(geo_value, time_value, death_rate) %>%
+#'   left_join(state_census %>% select(pop, abbr), by = c("geo_value" = "abbr")) %>%
+#'   mutate(deaths = pmax(death_rate / 1e5 * pop, 0)) %>%
+#'   select(-pop, -death_rate) %>%
+#'   group_by(geo_value) %>%
+#'   epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") %>%
+#'   ungroup() %>%
+#'   filter(weekdays(time_value) == "Saturday")
+#'
+#' cdc <- cdc_baseline_forecaster(deaths, "deaths")
+#' preds <- pivot_quantiles(cdc$predictions, .pred_distn)
+#'
+#' if (require(ggplot2)) {
+#' forecast_date <- unique(preds$forecast_date)
+#' four_states <- c("ca", "pa", "wa", "ny")
+#' preds %>%
+#'   filter(geo_value %in% four_states) %>%
+#'   ggplot(aes(target_date)) +
+#'   geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) +
+#'   geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) +
+#'   geom_line(aes(y = .pred), color = "orange") +
+#'   geom_line(
+#'     data = deaths %>% filter(geo_value %in% four_states),
+#'     aes(x = time_value, y = deaths)
+#'   ) +
+#'   scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) +
+#'   labs(x = "Date", y = "Weekly deaths") +
+#'   facet_wrap(~geo_value, scales = "free_y") +
+#'   theme_bw() +
+#'   geom_vline(xintercept = forecast_date)
+#' }
+cdc_baseline_forecaster <- function(
+    epi_data,
+    outcome,
+    args_list = cdc_baseline_args_list()) {
+  validate_forecaster_inputs(epi_data, outcome, "time_value")
+  if (!inherits(args_list, c("cdc_flat_fcast", "alist"))) {
+    cli_stop("args_list was not created using `cdc_baseline_args_list().")
+  }
+  keys <- epi_keys(epi_data)
+  ek <- kill_time_value(keys)
+  outcome <- rlang::sym(outcome)
+
+
+  r <- epi_recipe(epi_data) %>%
+    step_epi_ahead(!!outcome, ahead = args_list$data_frequency, skip = TRUE) %>%
+    recipes::update_role(!!outcome, new_role = "predictor") %>%
+    recipes::add_role(tidyselect::all_of(keys), new_role = "predictor") %>%
+    step_training_window(n_recent = args_list$n_training)
+
+  forecast_date <- args_list$forecast_date %||% max(epi_data$time_value)
+  # target_date <- args_list$target_date %||% forecast_date + args_list$ahead
+
+
+  latest <- get_test_data(
+    epi_recipe(epi_data), epi_data, TRUE, args_list$nafill_buffer,
+    forecast_date
+  )
+
+  f <- frosting() %>%
+    layer_predict() %>%
+    layer_cdc_flatline_quantiles(
+      aheads = args_list$aheads,
+      quantile_levels = args_list$quantile_levels,
+      nsims = args_list$nsims,
+      by_key = args_list$quantile_by_key,
+      symmetrize = args_list$symmetrize,
+      nonneg = args_list$nonneg
+    ) %>%
+    layer_add_forecast_date(forecast_date = forecast_date) %>%
+    layer_unnest(.pred_distn_all)
+    # layer_add_target_date(target_date = target_date)
+  if (args_list$nonneg) f <- layer_threshold(f, ".pred")
+
+  eng <- parsnip::linear_reg() %>% parsnip::set_engine("flatline")
+
+  wf <- epi_workflow(r, eng, f)
+  wf <- generics::fit(wf, epi_data)
+  preds <- suppressWarnings(predict(wf, new_data = latest)) %>%
+    tibble::as_tibble() %>%
+    dplyr::select(-time_value) %>%
+    dplyr::mutate(target_date = forecast_date + ahead * args_list$data_frequency)
+
+  structure(
+    list(
+      predictions = preds,
+      epi_workflow = wf,
+      metadata = list(
+        training = attr(epi_data, "metadata"),
+        forecast_created = Sys.time()
+      )
+    ),
+    class = c("cdc_baseline_fcast", "canned_epipred")
+  )
+}
+
+
+
+#' CDC baseline forecaster argument constructor
+#'
+#' Constructs a list of arguments for [cdc_baseline_forecaster()].
+#'
+#' @inheritParams arx_args_list
+#' @param data_frequency Integer or string. This describes the frequency of the
+#'   input `epi_df`. For typical FluSight forecasts, this would be `"1 week"`.
+#'   Allowable arguments are integers (taken to mean numbers of days) or a
+#'   string like `"7 days"` or `"2 weeks"`. Currently, all other periods
+#'   (other than days or weeks) result in an error.
+#' @param aheads Integer vector. Unlike [arx_forecaster()], this doesn't have
+#'   any effect on the predicted values.
+#'   Predictions are always the most recent observation. This determines the
+#'   set of prediction horizons for [layer_cdc_flatline_quantiles()]`. It interacts
+#'   with the `data_frequency` argument. So, for example, if the data is daily
+#'   and you want forecasts for 1:4 days ahead, then you would use `1:4`. However,
+#'   if you want one-week predictions, you would set this as `c(7, 14, 21, 28)`.
+#'   But if `data_frequency` is `"1 week"`, then you would set it as `1:4`.
+#' @param quantile_levels Vector or `NULL`. A vector of probabilities to produce
+#'   prediction intervals. These are created by computing the quantiles of
+#'   training residuals. A `NULL` value will result in point forecasts only.
+#' @param nsims Positive integer. The number of draws from the empirical CDF.
+#'   These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting
+#'   in linear interpolation on the X scale. This is achieved with
+#'   [stats::quantile()] Type 7 (the default for that function).
+#' @param nonneg Logical. Force all predictive intervals be non-negative.
+#'   Because non-negativity is forced _before_ propagating forward, this
+#'   has slightly different behaviour than would occur if using
+#'   [layer_threshold_preds()].
+#'
+#' @return A list containing updated parameter choices with class `cdc_flat_fcast`.
+#' @export
+#'
+#' @examples
+#' cdc_baseline_args_list()
+#' cdc_baseline_args_list(symmetrize = FALSE)
+#' cdc_baseline_args_list(levels = c(.1, .3, .7, .9), n_training = 120)
+cdc_baseline_args_list <- function(
+    data_frequency = "1 week",
+    aheads = 1:4,
+    n_training = Inf,
+    forecast_date = NULL,
+    quantile_levels = c(.01, .025, 1:19 / 20, .975, .99),
+    nsims = 1e3L,
+    symmetrize = TRUE,
+    nonneg = TRUE,
+    quantile_by_key = "geo_value",
+    nafill_buffer = Inf) {
+  arg_is_scalar(n_training, nsims, data_frequency)
+  data_frequency <- parse_period(data_frequency)
+  arg_is_pos_int(data_frequency)
+  arg_is_chr(quantile_by_key, allow_empty = TRUE)
+  arg_is_scalar(forecast_date, allow_null = TRUE)
+  arg_is_date(forecast_date, allow_null = TRUE)
+  arg_is_nonneg_int(aheads, nsims)
+  arg_is_lgl(symmetrize, nonneg)
+  arg_is_probabilities(quantile_levels, allow_null = TRUE)
+  arg_is_pos(n_training)
+  if (is.finite(n_training)) arg_is_pos_int(n_training)
+  if (is.finite(nafill_buffer)) arg_is_pos_int(nafill_buffer, allow_null = TRUE)
+
+  structure(
+    enlist(
+      data_frequency,
+      aheads,
+      n_training,
+      forecast_date,
+      quantile_levels,
+      nsims,
+      symmetrize,
+      nonneg,
+      quantile_by_key,
+      nafill_buffer
+    ),
+    class = c("cdc_baseline_fcast", "alist")
+  )
+}
+
+#' @export
+print.cdc_baseline_fcast <- function(x, ...) {
+  name <- "CDC Baseline"
+  NextMethod(name = name, ...)
+}
+
+parse_period <- function(x) {
+  arg_is_scalar(x)
+  if (is.character(x)) {
+    x <- unlist(strsplit(x, " "))
+    if (length(x) == 1L) x <- as.numeric(x)
+    if (length(x) == 2L) {
+      mult <- substr(x[2], 1, 3)
+      mult <- switch(
+        mult,
+        day = 1L,
+        wee = 7L,
+        cli::cli_abort("incompatible timespan in `aheads`.")
+      )
+      x <- as.numeric(x[1]) * mult
+    }
+    if (length(x) > 2L) cli::cli_abort("incompatible timespan in `aheads`.")
+  }
+  stopifnot(rlang::is_integerish(x))
+  as.integer(x)
+}
diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R
index 7ff224359..aa953ad45 100644
--- a/R/layer_cdc_flatline_quantiles.R
+++ b/R/layer_cdc_flatline_quantiles.R
@@ -97,7 +97,7 @@ layer_cdc_flatline_quantiles <- function(
     frosting,
     ...,
     aheads = 1:4,
-    quantiles = c(.01, .025, 1:19 / 20, .975, .99),
+    quantile_levels = c(.01, .025, 1:19 / 20, .975, .99),
     nsims = 1e3,
     by_key = "geo_value",
     symmetrize = FALSE,
@@ -106,7 +106,7 @@ layer_cdc_flatline_quantiles <- function(
   rlang::check_dots_empty()
 
   arg_is_int(aheads)
-  arg_is_probabilities(quantiles)
+  arg_is_probabilities(quantile_levels, allow_null = TRUE)
   arg_is_pos_int(nsims)
   arg_is_scalar(nsims)
   arg_is_chr_scalar(id)
@@ -117,7 +117,7 @@ layer_cdc_flatline_quantiles <- function(
     frosting,
     layer_cdc_flatline_quantiles_new(
       aheads = aheads,
-      quantiles = quantiles,
+      quantile_levels = quantile_levels,
       nsims = nsims,
       by_key = by_key,
       symmetrize = symmetrize,
@@ -129,7 +129,7 @@ layer_cdc_flatline_quantiles <- function(
 
 layer_cdc_flatline_quantiles_new <- function(
     aheads,
-    quantiles,
+    quantile_levels,
     nsims,
     by_key,
     symmetrize,
@@ -138,7 +138,7 @@ layer_cdc_flatline_quantiles_new <- function(
   layer(
     "cdc_flatline_quantiles",
     aheads = aheads,
-    quantiles = quantiles,
+    quantile_levels = quantile_levels,
     nsims = nsims,
     by_key = by_key,
     symmetrize = symmetrize,
@@ -150,6 +150,7 @@ layer_cdc_flatline_quantiles_new <- function(
 #' @export
 slather.layer_cdc_flatline_quantiles <-
   function(object, components, workflow, new_data, ...) {
+    if (is.null(object$quantile_levels)) return(components)
     the_fit <- workflows::extract_fit_parsnip(workflow)
     if (!inherits(the_fit, "_flatline")) {
       cli::cli_warn(
@@ -213,7 +214,7 @@ slather.layer_cdc_flatline_quantiles <-
       dplyr::rowwise() %>%
       dplyr::mutate(
         .pred_distn_all = propogate_samples(
-          .resid, .pred, object$quantiles,
+          .resid, .pred, object$quantile_levels,
           object$aheads, object$nsim, object$symmetrize, object$nonneg
         )
       ) %>%
@@ -229,7 +230,7 @@ slather.layer_cdc_flatline_quantiles <-
   }
 
 propogate_samples <- function(
-    r, p, quantiles, aheads, nsim, symmetrize, nonneg) {
+    r, p, quantile_levels, aheads, nsim, symmetrize, nonneg) {
   max_ahead <- max(aheads)
   samp <- quantile(r, probs = c(0, seq_len(nsim - 1)) / (nsim - 1), na.rm = TRUE)
   res <- list()
@@ -254,7 +255,7 @@ propogate_samples <- function(
   list(tibble::tibble(
     ahead = aheads,
     .pred_distn = map_vec(
-      res, ~ dist_quantiles(quantile(.x, quantiles), tau = quantiles)
+      res, ~ dist_quantiles(quantile(.x, quantile_levels), quantile_levels)
     )
   ))
 }
diff --git a/man/cdc_baseline_args_list.Rd b/man/cdc_baseline_args_list.Rd
new file mode 100644
index 000000000..37c326e9c
--- /dev/null
+++ b/man/cdc_baseline_args_list.Rd
@@ -0,0 +1,85 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/cdc_baseline_forecaster.R
+\name{cdc_baseline_args_list}
+\alias{cdc_baseline_args_list}
+\title{CDC baseline forecaster argument constructor}
+\usage{
+cdc_baseline_args_list(
+  data_frequency = "1 week",
+  aheads = 1:4,
+  n_training = Inf,
+  forecast_date = NULL,
+  quantile_levels = c(0.01, 0.025, 1:19/20, 0.975, 0.99),
+  nsims = 1000L,
+  symmetrize = TRUE,
+  nonneg = TRUE,
+  quantile_by_key = "geo_value",
+  nafill_buffer = Inf
+)
+}
+\arguments{
+\item{data_frequency}{Integer or string. This describes the frequency of the
+input \code{epi_df}. For typical FluSight forecasts, this would be \code{"1 week"}.
+Allowable arguments are integers (taken to mean numbers of days) or a
+string like \code{"7 days"} or \code{"2 weeks"}. Currently, all other periods
+(other than days or weeks) result in an error.}
+
+\item{aheads}{Integer vector. Unlike \code{\link[=arx_forecaster]{arx_forecaster()}}, this doesn't have
+any effect on the predicted values.
+Predictions are always the most recent observation. This determines the
+set of prediction horizons for \code{\link[=layer_cdc_flatline_quantiles]{layer_cdc_flatline_quantiles()}}\verb{. It interacts with the }data_frequency\verb{argument. So, for example, if the data is daily and you want forecasts for 1:4 days ahead, then you would use}1:4\verb{. However, if you want one-week predictions, you would set this as }c(7, 14, 21, 28)\verb{. But if }data_frequency\code{is}"1 week"\verb{, then you would set it as }1:4`.}
+
+\item{n_training}{Integer. An upper limit for the number of rows per
+key that are used for training
+(in the time unit of the \code{epi_df}).}
+
+\item{forecast_date}{Date. The date on which the forecast is created.
+The default \code{NULL} will attempt to determine this automatically.}
+
+\item{quantile_levels}{Vector or \code{NULL}. A vector of probabilities to produce
+prediction intervals. These are created by computing the quantiles of
+training residuals. A \code{NULL} value will result in point forecasts only.}
+
+\item{nsims}{Positive integer. The number of draws from the empirical CDF.
+These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting
+in linear interpolation on the X scale. This is achieved with
+\code{\link[stats:quantile]{stats::quantile()}} Type 7 (the default for that function).}
+
+\item{symmetrize}{Logical. The default \code{TRUE} calculates
+symmetric prediction intervals. This argument only applies when
+residual quantiles are used. It is not applicable with
+\code{trainer = quantile_reg()}, for example.}
+
+\item{nonneg}{Logical. Force all predictive intervals be non-negative.
+Because non-negativity is forced \emph{before} propagating forward, this
+has slightly different behaviour than would occur if using
+\code{\link[=layer_threshold_preds]{layer_threshold_preds()}}.}
+
+\item{quantile_by_key}{Character vector. Groups residuals by listed keys
+before calculating residual quantiles. See the \code{by_key} argument to
+\code{\link[=layer_residual_quantiles]{layer_residual_quantiles()}} for more information. The default,
+\code{character(0)} performs no grouping. This argument only applies when
+residual quantiles are used. It is not applicable with
+\code{trainer = quantile_reg()}, for example.}
+
+\item{nafill_buffer}{At predict time, recent values of the training data
+are used to create a forecast. However, these can be \code{NA} due to, e.g.,
+data latency issues. By default, any missing values will get filled with
+less recent data. Setting this value to \code{NULL} will result in 1 extra
+recent row (beyond those required for lag creation) to be used. Note that
+we require at least \code{min(lags)} rows of recent data per \code{geo_value} to
+create a prediction. For this reason, setting \code{nafill_buffer < min(lags)}
+will be treated as \emph{additional} allowed recent data rather than the
+total amount of recent data to examine.}
+}
+\value{
+A list containing updated parameter choices with class \code{cdc_flat_fcast}.
+}
+\description{
+Constructs a list of arguments for \code{\link[=cdc_baseline_forecaster]{cdc_baseline_forecaster()}}.
+}
+\examples{
+cdc_baseline_args_list()
+cdc_baseline_args_list(symmetrize = FALSE)
+cdc_baseline_args_list(levels = c(.1, .3, .7, .9), n_training = 120)
+}
diff --git a/man/cdc_baseline_forecaster.Rd b/man/cdc_baseline_forecaster.Rd
new file mode 100644
index 000000000..8d0e1b3ec
--- /dev/null
+++ b/man/cdc_baseline_forecaster.Rd
@@ -0,0 +1,73 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/cdc_baseline_forecaster.R
+\name{cdc_baseline_forecaster}
+\alias{cdc_baseline_forecaster}
+\title{Predict the future with the most recent value}
+\usage{
+cdc_baseline_forecaster(
+  epi_data,
+  outcome,
+  args_list = cdc_baseline_args_list()
+)
+}
+\arguments{
+\item{epi_data}{An \link[epiprocess:epi_df]{epiprocess::epi_df}}
+
+\item{outcome}{A scalar character for the column name we wish to predict.}
+
+\item{args_list}{A list of additional arguments as created by the
+\code{\link[=cdc_baseline_args_list]{cdc_baseline_args_list()}} constructor function.}
+}
+\value{
+A data frame of point and interval forecasts at for all
+aheads (unique horizons) for each unique combination of \code{key_vars}.
+}
+\description{
+This is a simple forecasting model for
+\link[epiprocess:epi_df]{epiprocess::epi_df} data. It uses the most recent observation as the
+forecast for any future date, and produces intervals by shuffling the quantiles
+of the residuals of such a "flatline" forecast and incrementing these
+forward over all available training data.
+}
+\details{
+By default, the predictive intervals are computed separately for each
+combination of \code{geo_value} in the \code{epi_data} argument.
+
+This forecaster is meant to produce exactly the CDC Baseline used for
+\href{https://covid19forecasthub.org}{COVID19ForecastHub}
+}
+\examples{
+library(dplyr)
+weekly_deaths <- case_death_rate_subset \%>\%
+  select(geo_value, time_value, death_rate) \%>\%
+  left_join(state_census \%>\% select(pop, abbr), by = c("geo_value" = "abbr")) \%>\%
+  mutate(deaths = pmax(death_rate / 1e5 * pop, 0)) \%>\%
+  select(-pop, -death_rate) \%>\%
+  group_by(geo_value) \%>\%
+  epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") \%>\%
+  ungroup() \%>\%
+  filter(weekdays(time_value) == "Saturday")
+
+cdc <- cdc_baseline_forecaster(deaths, "deaths")
+preds <- pivot_quantiles(cdc$predictions, .pred_distn)
+
+if (require(ggplot2)) {
+forecast_date <- unique(preds$forecast_date)
+four_states <- c("ca", "pa", "wa", "ny")
+preds \%>\%
+  filter(geo_value \%in\% four_states) \%>\%
+  ggplot(aes(target_date)) +
+  geom_ribbon(aes(ymin = `0.1`, ymax = `0.9`), fill = blues9[3]) +
+  geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) +
+  geom_line(aes(y = .pred), color = "orange") +
+  geom_line(
+    data = deaths \%>\% filter(geo_value \%in\% four_states),
+    aes(x = time_value, y = deaths)
+  ) +
+  scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) +
+  labs(x = "Date", y = "Weekly deaths") +
+  facet_wrap(~geo_value, scales = "free_y") +
+  theme_bw() +
+  geom_vline(xintercept = forecast_date)
+}
+}
diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd
index 4f151e854..8eb42b423 100644
--- a/man/layer_cdc_flatline_quantiles.Rd
+++ b/man/layer_cdc_flatline_quantiles.Rd
@@ -8,7 +8,7 @@ layer_cdc_flatline_quantiles(
   frosting,
   ...,
   aheads = 1:4,
-  quantiles = c(0.01, 0.025, 1:19/20, 0.975, 0.99),
+  quantile_levels = c(0.01, 0.025, 1:19/20, 0.975, 0.99),
   nsims = 1000,
   by_key = "geo_value",
   symmetrize = FALSE,
@@ -27,10 +27,6 @@ typically observed daily (possibly with missing values), but
 with weekly forecast targets, you would use \code{c(7, 14, 21, 28)}. But with
 weekly data, you would use \code{1:4}.}
 
-\item{quantiles}{Numeric vector of probabilities with values in (0,1)
-referring to the desired predictive intervals. The default is the standard
-set for the COVID Forecast Hub.}
-
 \item{nsims}{Positive integer. The number of draws from the empirical CDF.
 These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting
 in linear interpolation on the X scale. This is achieved with
@@ -47,6 +43,10 @@ has slightly different behaviour than would occur if using
 \code{\link[=layer_threshold_preds]{layer_threshold_preds()}}.}
 
 \item{id}{a random id string}
+
+\item{quantiles}{Numeric vector of probabilities with values in (0,1)
+referring to the desired predictive intervals. The default is the standard
+set for the COVID Forecast Hub.}
 }
 \value{
 an updated \code{frosting} postprocessor. Calling \code{\link[=predict]{predict()}} will result
@@ -105,7 +105,7 @@ preds <- preds \%>\%
   pivot_quantiles(.pred_distn) \%>\%
   mutate(target_date = forecast_date + ahead)
 
-library(ggplot2)
+if (require("ggplot2")) {
 four_states <- c("ca", "pa", "wa", "ny")
 preds \%>\%
   filter(geo_value \%in\% four_states) \%>\%
@@ -122,5 +122,5 @@ preds \%>\%
   facet_wrap(~geo_value, scales = "free_y") +
   theme_bw() +
   geom_vline(xintercept = forecast_date)
-
+}
 }
diff --git a/man/smooth_quantile_reg.Rd b/man/smooth_quantile_reg.Rd
index 6cc2dfc82..b938541f1 100644
--- a/man/smooth_quantile_reg.Rd
+++ b/man/smooth_quantile_reg.Rd
@@ -74,7 +74,8 @@ abline(v = fd, lty = 2)
 lines(pl$x, pl$`0.2`, col = "blue")
 lines(pl$x, pl$`0.8`, col = "blue")
 lines(pl$x, pl$`0.5`, col = "red")
-\dontrun{
+
+if (require("ggplot2")) {
 ggplot(data.frame(x = x, y = y), aes(x)) +
   geom_ribbon(data = pl, aes(ymin = `0.2`, ymax = `0.8`), fill = "lightblue") +
   geom_point(aes(y = y), colour = "grey") + # observed data
diff --git a/tests/testthat/test-parse_period.R b/tests/testthat/test-parse_period.R
new file mode 100644
index 000000000..0adbcec3d
--- /dev/null
+++ b/tests/testthat/test-parse_period.R
@@ -0,0 +1,12 @@
+test_that("parse_period works", {
+  expect_error(parse_period(c(1, 2)))
+  expect_error(parse_period(c(1.3)))
+  expect_error(parse_period("1 year"))
+  expect_error(parse_period("2 weeks later"))
+  expect_identical(parse_period(1), 1L)
+  expect_identical(parse_period("1 day"), 1L)
+  expect_identical(parse_period("1 days"), 1L)
+  expect_identical(parse_period("1 week"), 7L)
+  expect_identical(parse_period("1 weeks"), 7L)
+  expect_identical(parse_period("2 weeks"), 14L)
+})

From d59a691d2eea0a7ae7606464120e18e0b8e22bc3 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Wed, 4 Oct 2023 12:32:16 -0700
Subject: [PATCH 11/22] add cdc baseline to pkgdown

---
 _pkgdown.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/_pkgdown.yml b/_pkgdown.yml
index 2ad03c277..89fd733e1 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -34,6 +34,7 @@ reference:
     contents:
     - contains("flatline")
     - contains("arx")
+    - contains("cdc")
   - title: Parsnip engines
     desc: Prediction methods not available elsewhere
     contents:

From 21b4c85340972e0005af1bc40fd1e0bb4763a9ed Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Wed, 4 Oct 2023 13:27:02 -0700
Subject: [PATCH 12/22] local checks pass

---
 .Rbuildignore                       |  1 +
 NAMESPACE                           |  1 +
 R/cdc_baseline_forecaster.R         |  8 ++++----
 R/layer_cdc_flatline_quantiles.R    |  4 ++--
 R/make_smooth_quantile_reg.R        |  2 +-
 man/cdc_baseline_args_list.Rd       |  4 ++--
 man/cdc_baseline_forecaster.Rd      |  4 ++--
 man/layer_cdc_flatline_quantiles.Rd | 10 +++++-----
 8 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index 5139bcabe..cb36bb9d2 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -12,3 +12,4 @@
 ^musings$
 ^data-raw$
 ^vignettes/articles$
+^.git-blame-ignore-revs$
diff --git a/NAMESPACE b/NAMESPACE
index d7e941b0f..21cd7b83f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -186,6 +186,7 @@ importFrom(rlang,caller_env)
 importFrom(rlang,is_empty)
 importFrom(rlang,is_null)
 importFrom(rlang,quos)
+importFrom(smoothqr,smooth_qr)
 importFrom(stats,as.formula)
 importFrom(stats,family)
 importFrom(stats,lm)
diff --git a/R/cdc_baseline_forecaster.R b/R/cdc_baseline_forecaster.R
index f5961431d..f1014d666 100644
--- a/R/cdc_baseline_forecaster.R
+++ b/R/cdc_baseline_forecaster.R
@@ -33,7 +33,7 @@
 #'   ungroup() %>%
 #'   filter(weekdays(time_value) == "Saturday")
 #'
-#' cdc <- cdc_baseline_forecaster(deaths, "deaths")
+#' cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths")
 #' preds <- pivot_quantiles(cdc$predictions, .pred_distn)
 #'
 #' if (require(ggplot2)) {
@@ -46,7 +46,7 @@
 #'   geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) +
 #'   geom_line(aes(y = .pred), color = "orange") +
 #'   geom_line(
-#'     data = deaths %>% filter(geo_value %in% four_states),
+#'     data = weekly_deaths %>% filter(geo_value %in% four_states),
 #'     aes(x = time_value, y = deaths)
 #'   ) +
 #'   scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) +
@@ -150,7 +150,7 @@ cdc_baseline_forecaster <- function(
 #' @param nonneg Logical. Force all predictive intervals be non-negative.
 #'   Because non-negativity is forced _before_ propagating forward, this
 #'   has slightly different behaviour than would occur if using
-#'   [layer_threshold_preds()].
+#'   [layer_threshold()].
 #'
 #' @return A list containing updated parameter choices with class `cdc_flat_fcast`.
 #' @export
@@ -158,7 +158,7 @@ cdc_baseline_forecaster <- function(
 #' @examples
 #' cdc_baseline_args_list()
 #' cdc_baseline_args_list(symmetrize = FALSE)
-#' cdc_baseline_args_list(levels = c(.1, .3, .7, .9), n_training = 120)
+#' cdc_baseline_args_list(quantile_levels = c(.1, .3, .7, .9), n_training = 120)
 cdc_baseline_args_list <- function(
     data_frequency = "1 week",
     aheads = 1:4,
diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R
index aa953ad45..f1a159b9a 100644
--- a/R/layer_cdc_flatline_quantiles.R
+++ b/R/layer_cdc_flatline_quantiles.R
@@ -25,7 +25,7 @@
 #'   typically observed daily (possibly with missing values), but
 #'   with weekly forecast targets, you would use `c(7, 14, 21, 28)`. But with
 #'   weekly data, you would use `1:4`.
-#' @param quantiles Numeric vector of probabilities with values in (0,1)
+#' @param quantile_levels Numeric vector of probabilities with values in (0,1)
 #'   referring to the desired predictive intervals. The default is the standard
 #'   set for the COVID Forecast Hub.
 #' @param nsims Positive integer. The number of draws from the empirical CDF.
@@ -35,7 +35,7 @@
 #' @param nonneg Logical. Force all predictive intervals be non-negative.
 #'   Because non-negativity is forced _before_ propagating forward, this
 #'   has slightly different behaviour than would occur if using
-#'   [layer_threshold_preds()].
+#'   [layer_threshold()].
 #'
 #' @return an updated `frosting` postprocessor. Calling [predict()] will result
 #'   in an additional `<list-col>` named `.pred_distn_all` containing 2-column
diff --git a/R/make_smooth_quantile_reg.R b/R/make_smooth_quantile_reg.R
index cfb08a9c7..7d25a8c6b 100644
--- a/R/make_smooth_quantile_reg.R
+++ b/R/make_smooth_quantile_reg.R
@@ -21,7 +21,7 @@
 #'
 #' @seealso [fit.model_spec()], [set_engine()]
 #'
-#' @importFrom quantreg rq
+#' @importFrom smoothqr smooth_qr
 #' @examples
 #' tib <- data.frame(
 #'   y1 = rnorm(100), y2 = rnorm(100), y3 = rnorm(100),
diff --git a/man/cdc_baseline_args_list.Rd b/man/cdc_baseline_args_list.Rd
index 37c326e9c..2f6546f74 100644
--- a/man/cdc_baseline_args_list.Rd
+++ b/man/cdc_baseline_args_list.Rd
@@ -53,7 +53,7 @@ residual quantiles are used. It is not applicable with
 \item{nonneg}{Logical. Force all predictive intervals be non-negative.
 Because non-negativity is forced \emph{before} propagating forward, this
 has slightly different behaviour than would occur if using
-\code{\link[=layer_threshold_preds]{layer_threshold_preds()}}.}
+\code{\link[=layer_threshold]{layer_threshold()}}.}
 
 \item{quantile_by_key}{Character vector. Groups residuals by listed keys
 before calculating residual quantiles. See the \code{by_key} argument to
@@ -81,5 +81,5 @@ Constructs a list of arguments for \code{\link[=cdc_baseline_forecaster]{cdc_bas
 \examples{
 cdc_baseline_args_list()
 cdc_baseline_args_list(symmetrize = FALSE)
-cdc_baseline_args_list(levels = c(.1, .3, .7, .9), n_training = 120)
+cdc_baseline_args_list(quantile_levels = c(.1, .3, .7, .9), n_training = 120)
 }
diff --git a/man/cdc_baseline_forecaster.Rd b/man/cdc_baseline_forecaster.Rd
index 8d0e1b3ec..65de1c29e 100644
--- a/man/cdc_baseline_forecaster.Rd
+++ b/man/cdc_baseline_forecaster.Rd
@@ -48,7 +48,7 @@ weekly_deaths <- case_death_rate_subset \%>\%
   ungroup() \%>\%
   filter(weekdays(time_value) == "Saturday")
 
-cdc <- cdc_baseline_forecaster(deaths, "deaths")
+cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths")
 preds <- pivot_quantiles(cdc$predictions, .pred_distn)
 
 if (require(ggplot2)) {
@@ -61,7 +61,7 @@ preds \%>\%
   geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) +
   geom_line(aes(y = .pred), color = "orange") +
   geom_line(
-    data = deaths \%>\% filter(geo_value \%in\% four_states),
+    data = weekly_deaths \%>\% filter(geo_value \%in\% four_states),
     aes(x = time_value, y = deaths)
   ) +
   scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) +
diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd
index 8eb42b423..71f414e25 100644
--- a/man/layer_cdc_flatline_quantiles.Rd
+++ b/man/layer_cdc_flatline_quantiles.Rd
@@ -27,6 +27,10 @@ typically observed daily (possibly with missing values), but
 with weekly forecast targets, you would use \code{c(7, 14, 21, 28)}. But with
 weekly data, you would use \code{1:4}.}
 
+\item{quantile_levels}{Numeric vector of probabilities with values in (0,1)
+referring to the desired predictive intervals. The default is the standard
+set for the COVID Forecast Hub.}
+
 \item{nsims}{Positive integer. The number of draws from the empirical CDF.
 These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting
 in linear interpolation on the X scale. This is achieved with
@@ -40,13 +44,9 @@ calculating quantiles. The default, \code{c()} performs no grouping.}
 \item{nonneg}{Logical. Force all predictive intervals be non-negative.
 Because non-negativity is forced \emph{before} propagating forward, this
 has slightly different behaviour than would occur if using
-\code{\link[=layer_threshold_preds]{layer_threshold_preds()}}.}
+\code{\link[=layer_threshold]{layer_threshold()}}.}
 
 \item{id}{a random id string}
-
-\item{quantiles}{Numeric vector of probabilities with values in (0,1)
-referring to the desired predictive intervals. The default is the standard
-set for the COVID Forecast Hub.}
 }
 \value{
 an updated \code{frosting} postprocessor. Calling \code{\link[=predict]{predict()}} will result

From 1f58e67de257b75704acc80a80ff59f5a0b7d6b3 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 4 Oct 2023 14:46:04 -0700
Subject: [PATCH 13/22] Fix incomplete `symmetrize` + document it

---
 R/layer_cdc_flatline_quantiles.R | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R
index f1a159b9a..0b0db20b7 100644
--- a/R/layer_cdc_flatline_quantiles.R
+++ b/R/layer_cdc_flatline_quantiles.R
@@ -32,10 +32,20 @@
 #'   These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting
 #'   in linear interpolation on the X scale. This is achieved with
 #'   [stats::quantile()] Type 7 (the default for that function).
+#' @param symmetrize Logical. If `TRUE`, does two things: (i) forces the
+#'   "empirical" CDF of residuals to be symmetric by pretending that for every
+#'   actually-observed residual X we also observed another residual -X, and (ii)
+#'   at each ahead, forces the median simulated value to be equal to the point
+#'   prediction by adding or subtracting the same amount to every simulated
+#'   value. Adjustments in (ii) take place before propagating forward and
+#'   simulating the next ahead. This forces any 1-ahead predictive intervals to
+#'   be symmetric about the point prediction, and encourages larger aheads to be
+#'   more symmetric.
 #' @param nonneg Logical. Force all predictive intervals be non-negative.
-#'   Because non-negativity is forced _before_ propagating forward, this
-#'   has slightly different behaviour than would occur if using
-#'   [layer_threshold()].
+#'   Because non-negativity is forced _before_ propagating forward, this has
+#'   slightly different behaviour than would occur if using [layer_threshold()].
+#'   Thresholding at each ahead takes place after any shifting from
+#'   `symmetrize`.
 #'
 #' @return an updated `frosting` postprocessor. Calling [predict()] will result
 #'   in an additional `<list-col>` named `.pred_distn_all` containing 2-column
@@ -232,6 +242,9 @@ slather.layer_cdc_flatline_quantiles <-
 propogate_samples <- function(
     r, p, quantile_levels, aheads, nsim, symmetrize, nonneg) {
   max_ahead <- max(aheads)
+  if (symmetrize) {
+    r <- c(r, -r)
+  }
   samp <- quantile(r, probs = c(0, seq_len(nsim - 1)) / (nsim - 1), na.rm = TRUE)
   res <- list()
 

From fe31a790eeed2401ae659a9a070eece75847269d Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 4 Oct 2023 14:47:54 -0700
Subject: [PATCH 14/22] `document()`

---
 man/layer_cdc_flatline_quantiles.Rd | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd
index 71f414e25..22219ba7f 100644
--- a/man/layer_cdc_flatline_quantiles.Rd
+++ b/man/layer_cdc_flatline_quantiles.Rd
@@ -39,12 +39,21 @@ in linear interpolation on the X scale. This is achieved with
 \item{by_key}{A character vector of keys to group the residuals by before
 calculating quantiles. The default, \code{c()} performs no grouping.}
 
-\item{symmetrize}{logical. If \code{TRUE} then interval will be symmetric.}
+\item{symmetrize}{Logical. If \code{TRUE}, does two things: (i) forces the
+"empirical" CDF of residuals to be symmetric by pretending that for every
+actually-observed residual X we also observed another residual -X, and (ii)
+at each ahead, forces the median simulated value to be equal to the point
+prediction by adding or subtracting the same amount to every simulated
+value. Adjustments in (ii) take place before propagating forward and
+simulating the next ahead. This forces any 1-ahead predictive intervals to
+be symmetric about the point prediction, and encourages larger aheads to be
+more symmetric.}
 
 \item{nonneg}{Logical. Force all predictive intervals be non-negative.
-Because non-negativity is forced \emph{before} propagating forward, this
-has slightly different behaviour than would occur if using
-\code{\link[=layer_threshold]{layer_threshold()}}.}
+Because non-negativity is forced \emph{before} propagating forward, this has
+slightly different behaviour than would occur if using \code{\link[=layer_threshold]{layer_threshold()}}.
+Thresholding at each ahead takes place after any shifting from
+\code{symmetrize}.}
 
 \item{id}{a random id string}
 }

From 93fac1aca9034ee61de4f993df315223145e4c73 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 4 Oct 2023 14:49:30 -0700
Subject: [PATCH 15/22] Fix death rate 7dav -> weekly sum conversion in example

---
 R/cdc_baseline_forecaster.R    | 2 +-
 man/cdc_baseline_forecaster.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/cdc_baseline_forecaster.R b/R/cdc_baseline_forecaster.R
index f1014d666..4a8c4b7a4 100644
--- a/R/cdc_baseline_forecaster.R
+++ b/R/cdc_baseline_forecaster.R
@@ -26,7 +26,7 @@
 #' weekly_deaths <- case_death_rate_subset %>%
 #'   select(geo_value, time_value, death_rate) %>%
 #'   left_join(state_census %>% select(pop, abbr), by = c("geo_value" = "abbr")) %>%
-#'   mutate(deaths = pmax(death_rate / 1e5 * pop, 0)) %>%
+#'   mutate(deaths = pmax(death_rate / 1e5 * pop * 7, 0)) %>%
 #'   select(-pop, -death_rate) %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") %>%
diff --git a/man/cdc_baseline_forecaster.Rd b/man/cdc_baseline_forecaster.Rd
index 65de1c29e..31fe75f25 100644
--- a/man/cdc_baseline_forecaster.Rd
+++ b/man/cdc_baseline_forecaster.Rd
@@ -41,7 +41,7 @@ library(dplyr)
 weekly_deaths <- case_death_rate_subset \%>\%
   select(geo_value, time_value, death_rate) \%>\%
   left_join(state_census \%>\% select(pop, abbr), by = c("geo_value" = "abbr")) \%>\%
-  mutate(deaths = pmax(death_rate / 1e5 * pop, 0)) \%>\%
+  mutate(deaths = pmax(death_rate / 1e5 * pop * 7, 0)) \%>\%
   select(-pop, -death_rate) \%>\%
   group_by(geo_value) \%>\%
   epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") \%>\%

From 0516836dfcbbcbd58ca19eed8cee3158ac50b32d Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 4 Oct 2023 14:51:08 -0700
Subject: [PATCH 16/22] Copyediting, roxygen link styling, formatting

---
 R/cdc_baseline_forecaster.R                        |  6 +++---
 R/layer_cdc_flatline_quantiles.R                   | 14 +++++++-------
 man/cdc_baseline_forecaster.Rd                     |  6 +++---
 man/layer_cdc_flatline_quantiles.Rd                | 10 +++++-----
 ...ropogate_samples.R => test-propagate_samples.R} |  2 +-
 5 files changed, 19 insertions(+), 19 deletions(-)
 rename tests/testthat/{test-propogate_samples.R => test-propagate_samples.R} (72%)

diff --git a/R/cdc_baseline_forecaster.R b/R/cdc_baseline_forecaster.R
index 4a8c4b7a4..e66563b65 100644
--- a/R/cdc_baseline_forecaster.R
+++ b/R/cdc_baseline_forecaster.R
@@ -12,13 +12,13 @@
 #' This forecaster is meant to produce exactly the CDC Baseline used for
 #' [COVID19ForecastHub](https://covid19forecasthub.org)
 #'
-#' @param epi_data An [epiprocess::epi_df]
+#' @param epi_data An [`epiprocess::epi_df`]
 #' @param outcome A scalar character for the column name we wish to predict.
 #' @param args_list A list of additional arguments as created by the
 #'   [cdc_baseline_args_list()] constructor function.
 #'
-#' @return A data frame of point and interval forecasts at for all
-#'   aheads (unique horizons) for each unique combination of `key_vars`.
+#' @return A data frame of point and interval forecasts for all aheads (unique
+#'   horizons) for each unique combination of `key_vars`.
 #' @export
 #'
 #' @examples
diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R
index 0b0db20b7..16538f0e1 100644
--- a/R/layer_cdc_flatline_quantiles.R
+++ b/R/layer_cdc_flatline_quantiles.R
@@ -22,15 +22,15 @@
 #' @inheritParams layer_residual_quantiles
 #' @param aheads Numeric vector of desired forecast horizons. These should be
 #'   given in the "units of the training data". So, for example, for data
-#'   typically observed daily (possibly with missing values), but
-#'   with weekly forecast targets, you would use `c(7, 14, 21, 28)`. But with
-#'   weekly data, you would use `1:4`.
+#'   typically observed daily (possibly with missing values), but with weekly
+#'   forecast targets, you would use `c(7, 14, 21, 28)`. But with weekly data,
+#'   you would use `1:4`.
 #' @param quantile_levels Numeric vector of probabilities with values in (0,1)
 #'   referring to the desired predictive intervals. The default is the standard
 #'   set for the COVID Forecast Hub.
 #' @param nsims Positive integer. The number of draws from the empirical CDF.
-#'   These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting
-#'   in linear interpolation on the X scale. This is achieved with
+#'   These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting in
+#'   linear interpolation on the X scale. This is achieved with
 #'   [stats::quantile()] Type 7 (the default for that function).
 #' @param symmetrize Logical. If `TRUE`, does two things: (i) forces the
 #'   "empirical" CDF of residuals to be symmetric by pretending that for every
@@ -223,7 +223,7 @@ slather.layer_cdc_flatline_quantiles <-
     res <- dplyr::left_join(p, r, by = avail_grps) %>%
       dplyr::rowwise() %>%
       dplyr::mutate(
-        .pred_distn_all = propogate_samples(
+        .pred_distn_all = propagate_samples(
           .resid, .pred, object$quantile_levels,
           object$aheads, object$nsim, object$symmetrize, object$nonneg
         )
@@ -239,7 +239,7 @@ slather.layer_cdc_flatline_quantiles <-
     components
   }
 
-propogate_samples <- function(
+propagate_samples <- function(
     r, p, quantile_levels, aheads, nsim, symmetrize, nonneg) {
   max_ahead <- max(aheads)
   if (symmetrize) {
diff --git a/man/cdc_baseline_forecaster.Rd b/man/cdc_baseline_forecaster.Rd
index 31fe75f25..3f5faa329 100644
--- a/man/cdc_baseline_forecaster.Rd
+++ b/man/cdc_baseline_forecaster.Rd
@@ -11,7 +11,7 @@ cdc_baseline_forecaster(
 )
 }
 \arguments{
-\item{epi_data}{An \link[epiprocess:epi_df]{epiprocess::epi_df}}
+\item{epi_data}{An \code{\link[epiprocess:epi_df]{epiprocess::epi_df}}}
 
 \item{outcome}{A scalar character for the column name we wish to predict.}
 
@@ -19,8 +19,8 @@ cdc_baseline_forecaster(
 \code{\link[=cdc_baseline_args_list]{cdc_baseline_args_list()}} constructor function.}
 }
 \value{
-A data frame of point and interval forecasts at for all
-aheads (unique horizons) for each unique combination of \code{key_vars}.
+A data frame of point and interval forecasts for all aheads (unique
+horizons) for each unique combination of \code{key_vars}.
 }
 \description{
 This is a simple forecasting model for
diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd
index 22219ba7f..5e72378b3 100644
--- a/man/layer_cdc_flatline_quantiles.Rd
+++ b/man/layer_cdc_flatline_quantiles.Rd
@@ -23,17 +23,17 @@ layer_cdc_flatline_quantiles(
 
 \item{aheads}{Numeric vector of desired forecast horizons. These should be
 given in the "units of the training data". So, for example, for data
-typically observed daily (possibly with missing values), but
-with weekly forecast targets, you would use \code{c(7, 14, 21, 28)}. But with
-weekly data, you would use \code{1:4}.}
+typically observed daily (possibly with missing values), but with weekly
+forecast targets, you would use \code{c(7, 14, 21, 28)}. But with weekly data,
+you would use \code{1:4}.}
 
 \item{quantile_levels}{Numeric vector of probabilities with values in (0,1)
 referring to the desired predictive intervals. The default is the standard
 set for the COVID Forecast Hub.}
 
 \item{nsims}{Positive integer. The number of draws from the empirical CDF.
-These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting
-in linear interpolation on the X scale. This is achieved with
+These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting in
+linear interpolation on the X scale. This is achieved with
 \code{\link[stats:quantile]{stats::quantile()}} Type 7 (the default for that function).}
 
 \item{by_key}{A character vector of keys to group the residuals by before
diff --git a/tests/testthat/test-propogate_samples.R b/tests/testthat/test-propagate_samples.R
similarity index 72%
rename from tests/testthat/test-propogate_samples.R
rename to tests/testthat/test-propagate_samples.R
index b8a1ff82d..5278ab385 100644
--- a/tests/testthat/test-propogate_samples.R
+++ b/tests/testthat/test-propagate_samples.R
@@ -1,4 +1,4 @@
-test_that("propogate_samples", {
+test_that("propagate_samples", {
   r <- -30:50
   p <- 40
   quantiles <- 1:9 / 10

From 0905ba47cb8050a80a548366e5eb971b3a0dadfb Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 4 Oct 2023 16:52:40 -0700
Subject: [PATCH 17/22] "Logical" -> "Scalar logical" as appropriate to match
 rest of docs

---
 R/layer_cdc_flatline_quantiles.R    | 4 ++--
 man/layer_cdc_flatline_quantiles.Rd | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R
index 16538f0e1..d9d192563 100644
--- a/R/layer_cdc_flatline_quantiles.R
+++ b/R/layer_cdc_flatline_quantiles.R
@@ -32,7 +32,7 @@
 #'   These samples are spaced evenly on the (0, 1) scale, F_X(x) resulting in
 #'   linear interpolation on the X scale. This is achieved with
 #'   [stats::quantile()] Type 7 (the default for that function).
-#' @param symmetrize Logical. If `TRUE`, does two things: (i) forces the
+#' @param symmetrize Scalar logical. If `TRUE`, does two things: (i) forces the
 #'   "empirical" CDF of residuals to be symmetric by pretending that for every
 #'   actually-observed residual X we also observed another residual -X, and (ii)
 #'   at each ahead, forces the median simulated value to be equal to the point
@@ -41,7 +41,7 @@
 #'   simulating the next ahead. This forces any 1-ahead predictive intervals to
 #'   be symmetric about the point prediction, and encourages larger aheads to be
 #'   more symmetric.
-#' @param nonneg Logical. Force all predictive intervals be non-negative.
+#' @param nonneg Scalar logical. Force all predictive intervals be non-negative.
 #'   Because non-negativity is forced _before_ propagating forward, this has
 #'   slightly different behaviour than would occur if using [layer_threshold()].
 #'   Thresholding at each ahead takes place after any shifting from
diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd
index 5e72378b3..55a1a378e 100644
--- a/man/layer_cdc_flatline_quantiles.Rd
+++ b/man/layer_cdc_flatline_quantiles.Rd
@@ -39,7 +39,7 @@ linear interpolation on the X scale. This is achieved with
 \item{by_key}{A character vector of keys to group the residuals by before
 calculating quantiles. The default, \code{c()} performs no grouping.}
 
-\item{symmetrize}{Logical. If \code{TRUE}, does two things: (i) forces the
+\item{symmetrize}{Scalar logical. If \code{TRUE}, does two things: (i) forces the
 "empirical" CDF of residuals to be symmetric by pretending that for every
 actually-observed residual X we also observed another residual -X, and (ii)
 at each ahead, forces the median simulated value to be equal to the point
@@ -49,7 +49,7 @@ simulating the next ahead. This forces any 1-ahead predictive intervals to
 be symmetric about the point prediction, and encourages larger aheads to be
 more symmetric.}
 
-\item{nonneg}{Logical. Force all predictive intervals be non-negative.
+\item{nonneg}{Scalar logical. Force all predictive intervals be non-negative.
 Because non-negativity is forced \emph{before} propagating forward, this has
 slightly different behaviour than would occur if using \code{\link[=layer_threshold]{layer_threshold()}}.
 Thresholding at each ahead takes place after any shifting from

From 6f14e6a1e950b23cf6fa4f51c2f6ee88527ac96b Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Thu, 5 Oct 2023 06:17:08 -0700
Subject: [PATCH 18/22] add formatter to the correct branch

---
 R/flusight_hub_formatter.R | 95 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 R/flusight_hub_formatter.R

diff --git a/R/flusight_hub_formatter.R b/R/flusight_hub_formatter.R
new file mode 100644
index 000000000..78bb5719c
--- /dev/null
+++ b/R/flusight_hub_formatter.R
@@ -0,0 +1,95 @@
+abbr_to_fips <- function(abbr) {
+  fi <- dplyr::left_join(
+    tibble::tibble(abbr = tolower(abbr)),
+    state_census, by = "abbr") %>%
+    dplyr::mutate(fips = as.character(fips), fips = case_when(
+      fips == "0" ~ "US",
+      nchar(fips) < 2L ~ paste0("0", fips),
+      TRUE ~ fips
+    )) %>%
+    pull(.data$fips)
+  names(fi) <- NULL
+  fi
+}
+
+#' Format predictions for submission to FluSight forecast Hub
+#'
+#'
+#'
+#' @param object a data.frame of predictions or an object of class
+#'   `canned_epipred` as created by, e.g., [arx_forecaster()]
+#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Name = value pairs of constant
+#'   columns (or mutations) to perform to the results.
+#' @param .fcast_period
+#'
+#' @return A [tibble::tibble].
+#' @export
+#'
+#' @examples
+flusight_hub_formatter <- function(
+    object, ...,
+    .fcast_period = c("daily", "weekly")) {
+  UseMethod("flusight_hub_formatter")
+}
+
+#' @export
+flusight_hub_formatter.canned_epipred <- function(
+    object, ...,
+    .fcast_period = c("daily", "weekly")) {
+  flusight_hub_formatter(object$predictions, ..., .fcast_period = .fcast_period)
+}
+
+#' @export
+flusight_hub_formatter.data.frame <- function(
+    object, ...,
+    .fcast_period = c("daily", "weekly")) {
+  required_names <- c(".pred", ".pred_distn", "forecast_date", "geo_value")
+  optional_names <- c("ahead", "target_date")
+  hardhat::validate_column_names(object, required_names)
+  if (!any(optional_names %in% names(object))) {
+    cli::cli_abort("At least one of {.val {optional_names}} must be present.")
+  }
+
+  dots <- enquos(..., .named = TRUE)
+  names <- names(dots)
+
+  object <- object %>%
+    # combine the predictions and the distribution
+    dplyr::mutate(.pred_distn = nested_quantiles(.pred_distn)) %>%
+    dplyr::rowwise() %>%
+    dplyr::mutate(
+      .pred_distn = list(add_row(.pred_distn, q = .pred, tau = NA)),
+      .pred = NULL
+    ) %>%
+    tidyr::unnest(.pred_distn) %>%
+    # now we create the correct column names
+    dplyr::rename(
+      value = q,
+      output_type_id = tau,
+      reference_date = forecast_date
+    ) %>%
+    # convert to fips codes, and add any constant cols passed in ...
+    dplyr::mutate(location = abbr_to_fips(tolower(geo_value)), geo_value = NULL, !!!dots)
+
+  # create target_end_date / horizon, depending on what is available
+  pp <- ifelse(match.arg(.fcast_period) == "daily", 1L, 7L)
+  has_ahead <- charmatch("ahead", names(object))
+  if ("target_date" %in% names(object) && !is.na(has_ahead)) {
+    object <- object %>%
+      dplyr::rename(
+        target_end_date = target_date,
+        horizon = !!names(object)[has_ahead]
+      )
+  } else if (!is.na(has_ahead)) { # ahead present, not target date
+    object <- object %>%
+      dplyr::rename(horizon = !!names(object)[has_ahead]) %>%
+      dplyr::mutate(target_end_date =  horizon * pp + reference_date)
+  } else { # target_date present, not ahead
+    object <- object %>%
+      dplyr::rename(target_end_date = target_date) %>%
+      dplyr::mutate(horizon = as.integer((target_end_date - reference_date)) / pp)
+  }
+  object %>% dplyr::relocate(
+    reference_date, horizon, target_end_date, location, output_type_id, value
+  )
+}

From b1c34cb862146b48344e0174db62916f62d4a2c0 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 5 Oct 2023 07:48:40 -0700
Subject: [PATCH 19/22] Speed up cdc baseline: `quantile(...., names = FALSE)`

---
 R/layer_cdc_flatline_quantiles.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R
index d9d192563..312c3630e 100644
--- a/R/layer_cdc_flatline_quantiles.R
+++ b/R/layer_cdc_flatline_quantiles.R
@@ -245,7 +245,8 @@ propagate_samples <- function(
   if (symmetrize) {
     r <- c(r, -r)
   }
-  samp <- quantile(r, probs = c(0, seq_len(nsim - 1)) / (nsim - 1), na.rm = TRUE)
+  samp <- quantile(r, probs = c(0, seq_len(nsim - 1)) / (nsim - 1),
+                   na.rm = TRUE, names = FALSE)
   res <- list()
 
   raw <- samp + p

From cf9a44e3fb3cd2a08342272787882b3626e5ff91 Mon Sep 17 00:00:00 2001
From: dsweber2 <david.weber2@pm.me>
Date: Thu, 5 Oct 2023 11:57:19 -0700
Subject: [PATCH 20/22] CI: trying to change in a particular branch too

---
 .github/workflows/R-CMD-check.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index c4bcd6b68..eff7367ec 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -4,9 +4,9 @@
 # Created with usethis + edited to use API key.
 on:
   push:
-    branches: [main, master]
+    branches: [main, master, v0.0.6]
   pull_request:
-    branches: [main, master]
+    branches: [main, master, v0.0.6]
 
 name: R-CMD-check
 

From 343add145aed2baea3a60ec204c41f03d153f5a7 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Thu, 5 Oct 2023 12:26:16 -0700
Subject: [PATCH 21/22] formatter works

---
 NAMESPACE                     |  3 +++
 R/flusight_hub_formatter.R    | 37 +++++++++++++++++++++----
 man/flusight_hub_formatter.Rd | 51 +++++++++++++++++++++++++++++++++++
 3 files changed, 86 insertions(+), 5 deletions(-)
 create mode 100644 man/flusight_hub_formatter.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 21cd7b83f..03b159764 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -34,6 +34,8 @@ S3method(extrapolate_quantiles,dist_default)
 S3method(extrapolate_quantiles,dist_quantiles)
 S3method(extrapolate_quantiles,distribution)
 S3method(fit,epi_workflow)
+S3method(flusight_hub_formatter,canned_epipred)
+S3method(flusight_hub_formatter,data.frame)
 S3method(format,dist_quantiles)
 S3method(is.na,dist_quantiles)
 S3method(is.na,distribution)
@@ -126,6 +128,7 @@ export(fit)
 export(flatline)
 export(flatline_args_list)
 export(flatline_forecaster)
+export(flusight_hub_formatter)
 export(frosting)
 export(get_test_data)
 export(grab_names)
diff --git a/R/flusight_hub_formatter.R b/R/flusight_hub_formatter.R
index 78bb5719c..1ddd471b3 100644
--- a/R/flusight_hub_formatter.R
+++ b/R/flusight_hub_formatter.R
@@ -14,18 +14,44 @@ abbr_to_fips <- function(abbr) {
 
 #' Format predictions for submission to FluSight forecast Hub
 #'
-#'
+#' This function converts predictions from any of the included forecasters into
+#' a format (nearly) ready for submission to the 2023-24
+#' [FluSight-forecast-hub](https://github.com/cdcepi/FluSight-forecast-hub).
+#' See there for documentation of the required columns. Currently, only
+#' "quantile" forcasts are supported, but the intention is to support both
+#' "quantile" and "pmf". For this reason, adding the `output_type` column should
+#' be done via the `...` argument. See the examples below. The specific required
+#' format for this forecast task is [here](https://github.com/cdcepi/FluSight-forecast-hub/blob/main/model-output/README.md).
 #'
 #' @param object a data.frame of predictions or an object of class
 #'   `canned_epipred` as created by, e.g., [arx_forecaster()]
 #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Name = value pairs of constant
-#'   columns (or mutations) to perform to the results.
+#'   columns (or mutations) to perform to the results. See examples.
 #' @param .fcast_period
 #'
-#' @return A [tibble::tibble].
+#' @return A [tibble::tibble]. If `...` is empty, the result will contain the
+#'   columns `reference_date`, `horizon`, `target_end_date`, `location`,
+#'   `output_type_id`, and `value`. The `...` can perform mutations on any of
+#'   these.
 #' @export
 #'
 #' @examples
+#' library(dplyr)
+#' weekly_deaths <- case_death_rate_subset %>%
+#'   select(geo_value, time_value, death_rate) %>%
+#'   left_join(state_census %>% select(pop, abbr), by = c("geo_value" = "abbr")) %>%
+#'   mutate(deaths = pmax(death_rate / 1e5 * pop * 7, 0)) %>%
+#'   select(-pop, -death_rate) %>%
+#'   group_by(geo_value) %>%
+#'   epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") %>%
+#'   ungroup() %>%
+#'   filter(weekdays(time_value) == "Saturday")
+#'
+#' cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths")
+#' flusight_hub_formatter(cdc)
+#' flusight_hub_formatter(cdc, target = "wk inc covid deaths")
+#' flusight_hub_formatter(cdc, target = paste(horizon, "wk inc covid deaths"))
+#' flusight_hub_formatter(cdc, target = "wk inc covid deaths", output_type = "quantile")
 flusight_hub_formatter <- function(
     object, ...,
     .fcast_period = c("daily", "weekly")) {
@@ -69,7 +95,7 @@ flusight_hub_formatter.data.frame <- function(
       reference_date = forecast_date
     ) %>%
     # convert to fips codes, and add any constant cols passed in ...
-    dplyr::mutate(location = abbr_to_fips(tolower(geo_value)), geo_value = NULL, !!!dots)
+    dplyr::mutate(location = abbr_to_fips(tolower(geo_value)), geo_value = NULL)
 
   # create target_end_date / horizon, depending on what is available
   pp <- ifelse(match.arg(.fcast_period) == "daily", 1L, 7L)
@@ -91,5 +117,6 @@ flusight_hub_formatter.data.frame <- function(
   }
   object %>% dplyr::relocate(
     reference_date, horizon, target_end_date, location, output_type_id, value
-  )
+  ) %>%
+    dplyr::mutate(!!!dots)
 }
diff --git a/man/flusight_hub_formatter.Rd b/man/flusight_hub_formatter.Rd
new file mode 100644
index 000000000..4a2661080
--- /dev/null
+++ b/man/flusight_hub_formatter.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/flusight_hub_formatter.R
+\name{flusight_hub_formatter}
+\alias{flusight_hub_formatter}
+\title{Format predictions for submission to FluSight forecast Hub}
+\usage{
+flusight_hub_formatter(object, ..., .fcast_period = c("daily", "weekly"))
+}
+\arguments{
+\item{object}{a data.frame of predictions or an object of class
+\code{canned_epipred} as created by, e.g., \code{\link[=arx_forecaster]{arx_forecaster()}}}
+
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Name = value pairs of constant
+columns (or mutations) to perform to the results. See examples.}
+
+\item{.fcast_period}{}
+}
+\value{
+A \link[tibble:tibble]{tibble::tibble}. If \code{...} is empty, the result will contain the
+columns \code{reference_date}, \code{horizon}, \code{target_end_date}, \code{location},
+\code{output_type_id}, and \code{value}. The \code{...} can perform mutations on any of
+these.
+}
+\description{
+This function converts predictions from any of the included forecasters into
+a format (nearly) ready for submission to the 2023-24
+\href{https://github.com/cdcepi/FluSight-forecast-hub}{FluSight-forecast-hub}.
+See there for documentation of the required columns. Currently, only
+"quantile" forcasts are supported, but the intention is to support both
+"quantile" and "pmf". For this reason, adding the \code{output_type} column should
+be done via the \code{...} argument. See the examples below. The specific required
+format for this forecast task is \href{https://github.com/cdcepi/FluSight-forecast-hub/blob/main/model-output/README.md}{here}.
+}
+\examples{
+library(dplyr)
+weekly_deaths <- case_death_rate_subset \%>\%
+  select(geo_value, time_value, death_rate) \%>\%
+  left_join(state_census \%>\% select(pop, abbr), by = c("geo_value" = "abbr")) \%>\%
+  mutate(deaths = pmax(death_rate / 1e5 * pop * 7, 0)) \%>\%
+  select(-pop, -death_rate) \%>\%
+  group_by(geo_value) \%>\%
+  epi_slide(~ sum(.$deaths), before = 6, new_col_name = "deaths") \%>\%
+  ungroup() \%>\%
+  filter(weekdays(time_value) == "Saturday")
+
+cdc <- cdc_baseline_forecaster(weekly_deaths, "deaths")
+flusight_hub_formatter(cdc)
+flusight_hub_formatter(cdc, target = "wk inc covid deaths")
+flusight_hub_formatter(cdc, target = paste(horizon, "wk inc covid deaths"))
+flusight_hub_formatter(cdc, target = "wk inc covid deaths", output_type = "quantile")
+}

From b5fe62451d3f9ffc4677e12e7b0ccbc0f82f9d38 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Thu, 5 Oct 2023 12:37:29 -0700
Subject: [PATCH 22/22] local checks pass

---
 R/flusight_hub_formatter.R    | 11 ++++++++++-
 man/flusight_hub_formatter.Rd | 11 ++++++++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/R/flusight_hub_formatter.R b/R/flusight_hub_formatter.R
index 1ddd471b3..d433ab2a7 100644
--- a/R/flusight_hub_formatter.R
+++ b/R/flusight_hub_formatter.R
@@ -27,7 +27,16 @@ abbr_to_fips <- function(abbr) {
 #'   `canned_epipred` as created by, e.g., [arx_forecaster()]
 #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Name = value pairs of constant
 #'   columns (or mutations) to perform to the results. See examples.
-#' @param .fcast_period
+#' @param .fcast_period Control whether the `horizon` should represent days or
+#'   weeks. Depending on whether the forecaster output has target dates
+#'   from [layer_add_target_date()] or not, we may need to compute the horizon
+#'   and/or the `target_end_date` from the other available columns in the predictions.
+#'   When both `ahead` and `target_date` are available, this is ignored. If only
+#'   `ahead` or `aheads` exists, then the target date may need to be multiplied
+#'   if the `ahead` represents weekly forecasts. Alternatively, if only, the
+#'   `target_date` is available, then the `horizon` will be in days, unless
+#'   this argument is `"weekly"`. Note that these can be adjusted later by the
+#'   `...` argument.
 #'
 #' @return A [tibble::tibble]. If `...` is empty, the result will contain the
 #'   columns `reference_date`, `horizon`, `target_end_date`, `location`,
diff --git a/man/flusight_hub_formatter.Rd b/man/flusight_hub_formatter.Rd
index 4a2661080..d8a4571f4 100644
--- a/man/flusight_hub_formatter.Rd
+++ b/man/flusight_hub_formatter.Rd
@@ -13,7 +13,16 @@ flusight_hub_formatter(object, ..., .fcast_period = c("daily", "weekly"))
 \item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Name = value pairs of constant
 columns (or mutations) to perform to the results. See examples.}
 
-\item{.fcast_period}{}
+\item{.fcast_period}{Control whether the \code{horizon} should represent days or
+weeks. Depending on whether the forecaster output has target dates
+from \code{\link[=layer_add_target_date]{layer_add_target_date()}} or not, we may need to compute the horizon
+and/or the \code{target_end_date} from the other available columns in the predictions.
+When both \code{ahead} and \code{target_date} are available, this is ignored. If only
+\code{ahead} or \code{aheads} exists, then the target date may need to be multiplied
+if the \code{ahead} represents weekly forecasts. Alternatively, if only, the
+\code{target_date} is available, then the \code{horizon} will be in days, unless
+this argument is \code{"weekly"}. Note that these can be adjusted later by the
+\code{...} argument.}
 }
 \value{
 A \link[tibble:tibble]{tibble::tibble}. If \code{...} is empty, the result will contain the