diff --git a/DESCRIPTION b/DESCRIPTION
index bdbc337b4..e1a8b25d8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -67,4 +67,4 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.2
+RoxygenNote: 7.2.3
diff --git a/R/arx_forecaster.R b/R/arx_forecaster.R
index 9d537ab24..ad1d02288 100644
--- a/R/arx_forecaster.R
+++ b/R/arx_forecaster.R
@@ -38,6 +38,9 @@ arx_forecaster <- function(epi_data,
 
   # --- validation
   validate_forecaster_inputs(epi_data, outcome, predictors)
+  if (!inherits(args_list, "arx_alist")) {
+    cli_stop("args_list was not created using `arx_args_list().")
+  }
   if (!is.list(trainer) || trainer$mode != "regression")
     cli_stop("{trainer} must be a `parsnip` method of mode 'regression'.")
   lags <- arx_lags_validator(predictors, args_list$lags)
@@ -121,7 +124,7 @@ arx_lags_validator <- function(predictors, lags) {
 #'   [layer_residual_quantiles()] for more information. The default,
 #'   `character(0)` performs no grouping.
 #'
-#' @return A list containing updated parameter choices.
+#' @return A list containing updated parameter choices with class `arx_alist`.
 #' @export
 #'
 #' @examples
@@ -151,14 +154,15 @@ arx_args_list <- function(lags = c(0L, 7L, 14L),
   arg_is_probabilities(levels, allow_null = TRUE)
 
   max_lags <- max(lags)
-  enlist(lags = .lags,
-         ahead,
-         min_train_window,
-         levels,
-         forecast_date,
-         target_date,
-         symmetrize,
-         nonneg,
-         max_lags,
-         quantile_by_key)
+  structure(enlist(lags = .lags,
+                   ahead,
+                   min_train_window,
+                   levels,
+                   forecast_date,
+                   target_date,
+                   symmetrize,
+                   nonneg,
+                   max_lags,
+                   quantile_by_key),
+            class = "arx_alist")
 }
diff --git a/R/flatline_forecaster.R b/R/flatline_forecaster.R
index 13f0c1f53..af2638c8a 100644
--- a/R/flatline_forecaster.R
+++ b/R/flatline_forecaster.R
@@ -32,6 +32,9 @@ flatline_forecaster <- function(epi_data,
                                 args_list = flatline_args_list()) {
 
   validate_forecaster_inputs(epi_data, outcome, "time_value")
+  if (!inherits(args_list, "flatline_alist")) {
+    cli_stop("args_list was not created using `flatline_args_list().")
+  }
   keys <- epi_keys(epi_data)
   ek <- keys[-1]
   outcome <- rlang::sym(outcome)
@@ -75,7 +78,7 @@ flatline_forecaster <- function(epi_data,
 #'
 #' @inheritParams arx_args_list
 #'
-#' @return A list containing updated parameter choices.
+#' @return A list containing updated parameter choices with class `flatline_alist`.
 #' @export
 #'
 #' @examples
@@ -99,14 +102,15 @@ flatline_args_list <- function(ahead = 7L,
   arg_is_lgl(symmetrize, nonneg)
   arg_is_probabilities(levels, allow_null = TRUE)
 
-  enlist(ahead,
-         min_train_window,
-         forecast_date,
-         target_date,
-         levels,
-         symmetrize,
-         nonneg,
-         quantile_by_key)
+  structure(enlist(ahead,
+                   min_train_window,
+                   forecast_date,
+                   target_date,
+                   levels,
+                   symmetrize,
+                   nonneg,
+                   quantile_by_key),
+            class = "flatline_alist")
 }
 
 validate_forecaster_inputs <- function(epi_data, outcome, predictors) {
diff --git a/README.Rmd b/README.Rmd
index adc4c7dad..d79e7bc69 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -19,15 +19,15 @@ knitr::opts_chunk$set(
 [![R-CMD-check](https://github.com/cmu-delphi/epipredict/workflows/R-CMD-check/badge.svg)](https://github.com/cmu-delphi/epipredict/actions)
 <!-- badges: end -->
 
-**Note:** This package is currently in development and likely will not work as expected.
+**Note:** This package is currently in development and may not work as expected. Please file bug reports as issues in this repo, and we will do our best to address them quickly.
 
 ## Installation
 
 You can install the development version of epipredict from [GitHub](https://github.com/) with:
 
 ``` r
-# install.packages("devtools")
-devtools::install_github("cmu-delphi/epipredict")
+# install.packages("remotes")
+remotes::install_github("cmu-delphi/epipredict")
 ```
 
 ## Documentation
@@ -39,7 +39,7 @@ You can view documentation for the `main` branch at <https://cmu-delphi.github.i
 
 **We hope to provide:**
 
-1. A set of basic, easy-to-use forecasters that work out of the box. You should be able to do a reasonably limited amount of customization on them. (Any serious customization happens with point number 2.) For the basic forecasters, we should provide, at least: 
+1. A set of basic, easy-to-use forecasters that work out of the box. You should be able to do a reasonably limited amount of customization on them. For the basic forecasters, we currently provide: 
     * Baseline flat-line forecaster 
     * Autoregressive forecaster
     * Autoregressive classifier
@@ -49,15 +49,58 @@ You can view documentation for the `main` branch at <https://cmu-delphi.github.i
     * Predictor: make predictions, using a fitted model object
     * Postprocessor: do things to the predictions before returning
 
-**Target audience:**
+**Target audiences:**
 
 * Basic. Has data, calls forecaster with default arguments.
 * Intermediate. Wants to examine changes to the arguments, take advantage of built in flexibility.
 * Advanced. Wants to write their own forecasters. Maybe willing to build up from some components that we write. 
 
-The Advanced user should find their task to be relatively easy (and we'll show them how). 
+The Advanced user should find their task to be relatively easy. Examples of these tasks are illustrated in the [vignettes and articles](https://cmu-delphi.github.io/epipredict).
+
+## Intermediate example
+
+The package comes with some built-in historical data for illustration, but
+up-to-date versions of this could be downloaded with the [`{covidcast}` package](https://cmu-delphi.github.io/covidcast/covidcastR/index.html) and processed using [`{epiprocess}`](https://cmu-delphi.github.io/epiprocess/).[^1]
+
+[^1]: Other epidemiological signals for non-Covid related illnesses are available with [`{epidatr}`](https://github.com/cmu-delphi/epidatr) which interfaces directly to Delphi's [Epidata API](https://cmu-delphi.github.io/delphi-epidata/)
+
+```{r epidf, message=FALSE}
+library(tidyverse)
+library(epipredict)
+jhu <- case_death_rate_subset
+jhu
+```
+
+To create and train a simple auto-regressive forecaster to predict the death rate two weeks into the future using past (lagged) deaths and cases, we could use the following function.
+
+```{r make-forecasts, warning=FALSE}
+two_week_ahead <- arx_forecaster(
+  jhu, 
+  outcome = "death_rate", 
+  predictors = c("case_rate", "death_rate"),
+  args_list = arx_args_list(
+    lags = list(c(0,1,2,3,7,14), c(0,7,14)),
+    ahead = 14
+  )
+) 
+```
+
+In this case, we have used a number of different lags for the case rate, while only using 3 weekly lags for the death rate (as predictors). The result is both a fitted model object which could be used any time in the future to create different forecasts, as well as a set of predicted values (and prediction intervals) for each location 14 days after the last available time value in the data.
+
+```{r print-model}
+two_week_ahead$epi_workflow
+```
+
+The fitted model here involved preprocessing the data to appropriately generate lagged predictors, estimating a linear model with `stats::lm()` and then postprocessing the results to be meaningful for epidemiological tasks. We can also examine the predictions.
+
+```{r show-preds}
+two_week_ahead$predictions
+```
+
+The results above show a distributional forecast produced using data through the end of 2021 for the 14th of January 2022. A prediction for the death rate per 100K inhabitants is available for every state (`geo_value`) along with a 90% predictive interval.
+
+<!--
 
-**Example:**  
 During a quiet period, a user decides they want to first predict whether a surge is about to occur, say using variant information from GISAID. Then for surging locations, they want to train an AR model using past surges in the same location. Everywhere else, they predict a flat line. We should be able to do this in a few lines of code.
 
 Delphi's own forecasts have been produced/evaluated in this way for a while now, but the code base is scattered and evolving. We want to consolidate, generalize, and simplify to allow others to benefit as well.
@@ -84,25 +127,11 @@ my_fcaster(lead(cases, 7) ~ ., epi_df, key_vars, time_vars)
 
 The hypothetical example of first classifying, then fitting different models would also fit into this framework. And this isn't far from our current production models.
 
-### Why doesn't this exist
 
-Closest neighbor is [`{fable}`](https://fable.tidyverts.org/). It does some of what we want but has a few major downsides:
 
-1. Small set of standard Time Series models.
-    * Small modifications are hard (e.g. can't "just use" `glmnet` instead of `lm`) in an AR model.
-1. Multi-period forecasting is model-based only.
-    * This is "iterative" forecasting, and is very bad in epidemiology.
-    * Much better with simple models to use "direct" forecasting.
-1. Confidence bands are model-based only.
-    * In epi tasks, these dramatically under-cover.
-1. Layering is not possible/natural
-1. Can't use methods that aren't already implemented.
-
-The forecasts we did above can't be produced with `{fable}`.
-
-
-**However:** The developers behind `{fable}` wrote a package called `{fabletools}` that powers model creation (based on `R6`). We can almost certainly borrow some of that technology to lever up.
 
 ### What this isn't
 
 This is not a framework for SIR models. We intend to create some simple versions, but advanced models---those that use variants, hospitalizations, different types of immunity, age stratification, etc.---cannot be compartmentalized in the same way (though see [pypm](https://pypm.github.io/home/)). These types of models also are better at scenario modeling than short term forecasts unless they are quite complicated.
+
+-->
diff --git a/README.md b/README.md
index 538a571ae..689c69076 100644
--- a/README.md
+++ b/README.md
@@ -8,8 +8,9 @@
 [![R-CMD-check](https://github.com/cmu-delphi/epipredict/workflows/R-CMD-check/badge.svg)](https://github.com/cmu-delphi/epipredict/actions)
 <!-- badges: end -->
 
-**Note:** This package is currently in development and likely will not
-work as expected.
+**Note:** This package is currently in development and may not work as
+expected. Please file bug reports as issues in this repo, and we will do
+our best to address them quickly.
 
 ## Installation
 
@@ -17,8 +18,8 @@ You can install the development version of epipredict from
 [GitHub](https://github.com/) with:
 
 ``` r
-# install.packages("devtools")
-devtools::install_github("cmu-delphi/epipredict")
+# install.packages("remotes")
+remotes::install_github("cmu-delphi/epipredict")
 ```
 
 ## Documentation
@@ -32,48 +33,173 @@ You can view documentation for the `main` branch at
 
 1.  A set of basic, easy-to-use forecasters that work out of the box.
     You should be able to do a reasonably limited amount of
-    customization on them. (Any serious customization happens with point
-    number 2.) For the basic forecasters, we should provide, at least:
-    -   Baseline flat-line forecaster
-    -   Autoregressive forecaster
-    -   Autoregressive classifier
+    customization on them. For the basic forecasters, we currently
+    provide:
+    - Baseline flat-line forecaster
+    - Autoregressive forecaster
+    - Autoregressive classifier
 2.  A framework for creating custom forecasters out of modular
     components. There are four types of components:
-    -   Preprocessor: do things to the data before model training
-    -   Trainer: train a model on data, resulting in a fitted model
-        object
-    -   Predictor: make predictions, using a fitted model object
-    -   Postprocessor: do things to the predictions before returning
-
-**Target audience:**
-
--   Basic. Has data, calls forecaster with default arguments.
--   Intermediate. Wants to examine changes to the arguments, take
-    advantage of built in flexibility.
--   Advanced. Wants to write their own forecasters. Maybe willing to
-    build up from some components that we write.
-
-The Advanced user should find their task to be relatively easy (and
-we’ll show them how).
-
-**Example:**  
-During a quiet period, a user decides they want to first predict whether
-a surge is about to occur, say using variant information from GISAID.
-Then for surging locations, they want to train an AR model using past
-surges in the same location. Everywhere else, they predict a flat line.
-We should be able to do this in a few lines of code.
-
-Delphi’s own forecasts have been produced/evaluated in this way for a
-while now, but the code base is scattered and evolving. We want to
-consolidate, generalize, and simplify to allow others to benefit as
-well.
-
-The basic framework should allow for something like the following. This
-would feel very familiar to anyone working in `R`+`{tidyverse}`.
+    - Preprocessor: do things to the data before model training
+    - Trainer: train a model on data, resulting in a fitted model object
+    - Predictor: make predictions, using a fitted model object
+    - Postprocessor: do things to the predictions before returning
 
-**Simple linear autoregressive model with scaling (modular)**
+**Target audiences:**
+
+- Basic. Has data, calls forecaster with default arguments.
+- Intermediate. Wants to examine changes to the arguments, take
+  advantage of built in flexibility.
+- Advanced. Wants to write their own forecasters. Maybe willing to build
+  up from some components that we write.
+
+The Advanced user should find their task to be relatively easy. Examples
+of these tasks are illustrated in the [vignettes and
+articles](https://cmu-delphi.github.io/epipredict).
+
+## Intermediate example
+
+The package comes with some built-in historical data for illustration,
+but up-to-date versions of this could be downloaded with the
+[`{covidcast}`
+package](https://cmu-delphi.github.io/covidcast/covidcastR/index.html)
+and processed using
+[`{epiprocess}`](https://cmu-delphi.github.io/epiprocess/).[^1]
+
+``` r
+library(tidyverse)
+library(epipredict)
+jhu <- case_death_rate_subset
+jhu
+#> An `epi_df` object, 20,496 x 4 with metadata:
+#> * geo_type  = state
+#> * time_type = day
+#> * as_of     = 2022-05-31 12:08:25
+#> 
+#> # A tibble: 20,496 × 4
+#>    geo_value time_value case_rate death_rate
+#>  * <chr>     <date>         <dbl>      <dbl>
+#>  1 ak        2020-12-31      35.9      0.158
+#>  2 al        2020-12-31      65.1      0.438
+#>  3 ar        2020-12-31      66.0      1.27 
+#>  4 as        2020-12-31       0        0    
+#>  5 az        2020-12-31      76.8      1.10 
+#>  6 ca        2020-12-31      96.0      0.751
+#>  7 co        2020-12-31      35.8      0.649
+#>  8 ct        2020-12-31      52.1      0.819
+#>  9 dc        2020-12-31      31.0      0.601
+#> 10 de        2020-12-31      65.2      0.807
+#> # … with 20,486 more rows
+```
+
+To create and train a simple auto-regressive forecaster to predict the
+death rate two weeks into the future using past (lagged) deaths and
+cases, we could use the following function.
 
 ``` r
+two_week_ahead <- arx_forecaster(
+  jhu, 
+  outcome = "death_rate", 
+  predictors = c("case_rate", "death_rate"),
+  args_list = arx_args_list(
+    lags = list(c(0,1,2,3,7,14), c(0,7,14)),
+    ahead = 14
+  )
+) 
+```
+
+In this case, we have used a number of different lags for the case rate,
+while only using 3 weekly lags for the death rate (as predictors). The
+result is both a fitted model object which could be used any time in the
+future to create different forecasts, as well as a set of predicted
+values (and prediction intervals) for each location 14 days after the
+last available time value in the data.
+
+``` r
+two_week_ahead$epi_workflow
+#> ══ Epi Workflow [trained] ══════════════════════════════════════════════════════
+#> Preprocessor: Recipe
+#> Model: linear_reg()
+#> Postprocessor: Frosting
+#> 
+#> ── Preprocessor ────────────────────────────────────────────────────────────────
+#> 5 Recipe Steps
+#> 
+#> • step_epi_lag()
+#> • step_epi_lag()
+#> • step_epi_ahead()
+#> • step_naomit()
+#> • step_naomit()
+#> 
+#> ── Model ───────────────────────────────────────────────────────────────────────
+#> 
+#> Call:
+#> stats::lm(formula = ..y ~ ., data = data)
+#> 
+#> Coefficients:
+#>       (Intercept)    lag_0_case_rate    lag_1_case_rate    lag_2_case_rate  
+#>        -0.0073358          0.0030365          0.0012467          0.0009536  
+#>   lag_3_case_rate    lag_7_case_rate   lag_14_case_rate   lag_0_death_rate  
+#>         0.0011425          0.0012481          0.0003041          0.1351769  
+#>  lag_7_death_rate  lag_14_death_rate  
+#>         0.1471127          0.1062473  
+#> 
+#> ── Postprocessor ───────────────────────────────────────────────────────────────
+#> 5 Frosting Layers
+#> 
+#> • layer_predict()
+#> • layer_residual_quantiles()
+#> • layer_add_forecast_date()
+#> • layer_add_target_date()
+#> • layer_threshold()
+```
+
+The fitted model here involved preprocessing the data to appropriately
+generate lagged predictors, estimating a linear model with `stats::lm()`
+and then postprocessing the results to be meaningful for epidemiological
+tasks. We can also examine the predictions.
+
+``` r
+two_week_ahead$predictions
+#> An `epi_df` object, 56 x 6 with metadata:
+#> * geo_type  = state
+#> * time_type = day
+#> * as_of     = 2022-05-31 12:08:25
+#> 
+#> # A tibble: 56 × 6
+#>    geo_value time_value .pred         .pred_distn forecast_date target_date
+#>  * <chr>     <date>     <dbl>              <dist> <date>        <date>     
+#>  1 ak        2021-12-31 0.449 [0.05, 0.95]<q-rng> 2021-12-31    2022-01-14 
+#>  2 al        2021-12-31 0.574 [0.05, 0.95]<q-rng> 2021-12-31    2022-01-14 
+#>  3 ar        2021-12-31 0.673 [0.05, 0.95]<q-rng> 2021-12-31    2022-01-14 
+#>  4 as        2021-12-31 0     [0.05, 0.95]<q-rng> 2021-12-31    2022-01-14 
+#>  5 az        2021-12-31 0.679 [0.05, 0.95]<q-rng> 2021-12-31    2022-01-14 
+#>  6 ca        2021-12-31 0.575 [0.05, 0.95]<q-rng> 2021-12-31    2022-01-14 
+#>  7 co        2021-12-31 0.862 [0.05, 0.95]<q-rng> 2021-12-31    2022-01-14 
+#>  8 ct        2021-12-31 1.07  [0.05, 0.95]<q-rng> 2021-12-31    2022-01-14 
+#>  9 dc        2021-12-31 2.12  [0.05, 0.95]<q-rng> 2021-12-31    2022-01-14 
+#> 10 de        2021-12-31 1.09  [0.05, 0.95]<q-rng> 2021-12-31    2022-01-14 
+#> # … with 46 more rows
+```
+
+The results above show a distributional forecast produced using data
+through the end of 2021 for the 14th of January 2022. A prediction for
+the death rate per 100K inhabitants is available for every state
+(`geo_value`) along with a 90% predictive interval.
+
+<!--
+
+During a quiet period, a user decides they want to first predict whether a surge is about to occur, say using variant information from GISAID. Then for surging locations, they want to train an AR model using past surges in the same location. Everywhere else, they predict a flat line. We should be able to do this in a few lines of code.
+
+Delphi's own forecasts have been produced/evaluated in this way for a while now, but the code base is scattered and evolving. We want to consolidate, generalize, and simplify to allow others to benefit as well.
+
+The basic framework should allow for something like the following. This would
+feel very familiar to anyone working in `R`+`{tidyverse}`.
+
+**Simple linear autoregressive model with scaling (modular)**
+
+
+```r
 my_fcaster = new_epi_predictor() %>%
   add_preprocessor(scaler, var = cases, by = pop) %>%
   add_preprocessor(lagger, var = dv_cli, lags = c(0, 7, 14)) %>%
@@ -84,43 +210,23 @@ my_fcaster = new_epi_predictor() %>%
 
 Then you could run this on an `epi_df` with one line.
 
-``` r
+
+```r
 my_fcaster(lead(cases, 7) ~ ., epi_df, key_vars, time_vars)
 ```
 
-The hypothetical example of first classifying, then fitting different
-models would also fit into this framework. And this isn’t far from our
-current production models.
-
-### Why doesn’t this exist
-
-Closest neighbor is [`{fable}`](https://fable.tidyverts.org/). It does
-some of what we want but has a few major downsides:
-
-1.  Small set of standard Time Series models.
-    -   Small modifications are hard (e.g. can’t “just use” `glmnet`
-        instead of `lm`) in an AR model.
-2.  Multi-period forecasting is model-based only.
-    -   This is “iterative” forecasting, and is very bad in
-        epidemiology.
-    -   Much better with simple models to use “direct” forecasting.
-3.  Confidence bands are model-based only.
-    -   In epi tasks, these dramatically under-cover.
-4.  Layering is not possible/natural
-5.  Can’t use methods that aren’t already implemented.
-
-The forecasts we did above can’t be produced with `{fable}`.
-
-**However:** The developers behind `{fable}` wrote a package called
-`{fabletools}` that powers model creation (based on `R6`). We can almost
-certainly borrow some of that technology to lever up.
-
-### What this isn’t
-
-This is not a framework for SIR models. We intend to create some simple
-versions, but advanced models—those that use variants, hospitalizations,
-different types of immunity, age stratification, etc.—cannot be
-compartmentalized in the same way (though see
-[pypm](https://pypm.github.io/home/)). These types of models also are
-better at scenario modeling than short term forecasts unless they are
-quite complicated.
+The hypothetical example of first classifying, then fitting different models would also fit into this framework. And this isn't far from our current production models.
+
+
+
+
+### What this isn't
+
+This is not a framework for SIR models. We intend to create some simple versions, but advanced models---those that use variants, hospitalizations, different types of immunity, age stratification, etc.---cannot be compartmentalized in the same way (though see [pypm](https://pypm.github.io/home/)). These types of models also are better at scenario modeling than short term forecasts unless they are quite complicated.
+
+-->
+
+[^1]: Other epidemiological signals for non-Covid related illnesses are
+    available with [`{epidatr}`](https://github.com/cmu-delphi/epidatr)
+    which interfaces directly to Delphi’s [Epidata
+    API](https://cmu-delphi.github.io/delphi-epidata/)
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 719d7cc22..dacf88cbf 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -1,6 +1,20 @@
 url: https://cmu-delphi.github.io/epipredict/
 template:
   bootstrap: 5
+  bootswatch: cosmo
+
+navbar:
+  bg: dark
+
+home:
+  links:
+  - text: The epiprocess R package
+    href: https://cmu-delphi.github.io/epiprocess/
+  - text: The covidcast R package
+    href: https://cmu-delphi.github.io/covidcast/covidcastR/
+  - text: The epidatr R package
+    href: https://github.com/cmu-delphi/epidatr/
+
 
 
 reference:
diff --git a/man/arx_args_list.Rd b/man/arx_args_list.Rd
index 20a6e778e..d9e87101e 100644
--- a/man/arx_args_list.Rd
+++ b/man/arx_args_list.Rd
@@ -49,7 +49,7 @@ before calculating residual quantiles. See the \code{by_key} argument to
 \code{character(0)} performs no grouping.}
 }
 \value{
-A list containing updated parameter choices.
+A list containing updated parameter choices with class \code{arx_alist}.
 }
 \description{
 Constructs a list of arguments for \code{\link[=arx_forecaster]{arx_forecaster()}}.
diff --git a/man/flatline_args_list.Rd b/man/flatline_args_list.Rd
index 2982d0107..1bdb48e0e 100644
--- a/man/flatline_args_list.Rd
+++ b/man/flatline_args_list.Rd
@@ -45,7 +45,7 @@ before calculating residual quantiles. See the \code{by_key} argument to
 \code{character(0)} performs no grouping.}
 }
 \value{
-A list containing updated parameter choices.
+A list containing updated parameter choices with class \code{flatline_alist}.
 }
 \description{
 Constructs a list of arguments for \code{\link[=flatline_forecaster]{flatline_forecaster()}}.
diff --git a/tests/testthat/test-arx_args_list.R b/tests/testthat/test-arx_args_list.R
index c540eb3fc..61dd374fe 100644
--- a/tests/testthat/test-arx_args_list.R
+++ b/tests/testthat/test-arx_args_list.R
@@ -1,4 +1,5 @@
 test_that("arx_args checks inputs", {
+  expect_s3_class(arx_args_list(), "arx_alist")
   expect_error(arx_args_list(ahead = c(0, 4)))
   expect_error(arx_args_list(min_train_window = c(28, 65)))
 
diff --git a/vignettes/articles/sliding.Rmd b/vignettes/articles/sliding.Rmd
index 5d086b516..eec8b4607 100644
--- a/vignettes/articles/sliding.Rmd
+++ b/vignettes/articles/sliding.Rmd
@@ -93,10 +93,9 @@ fc_time_values <- seq(as.Date("2020-08-01"), as.Date("2021-12-01"),
 k_week_ahead <- function(epi_df, outcome, predictors, ahead = 7, engine) {
   epi_df %>%
     epi_slide( 
-      ~arx_forecaster(
+      ~ arx_forecaster(
         .x, outcome, predictors, engine, 
-        args_list = arx_args_list(ahead = ahead)
-      ) %>%
+        args_list = arx_args_list(ahead = ahead)) %>%
         extract2("predictions") %>% 
         select(-c(geo_value, time_value)), 
       n = 120, 
@@ -107,19 +106,21 @@ k_week_ahead <- function(epi_df, outcome, predictors, ahead = 7, engine) {
     mutate(engine_type = engine$engine)
 }
 
-# Generate the forecasts, and bind them together
+# Generate the forecasts and bind them together
 fc <- bind_rows(
   purrr::map_dfr(
     c(7,14,21,28), 
-    ~ k_week_ahead(x_latest, "case_rate", c("case_rate", "percent_cli"), .x,
-                   engine = linear_reg())
-    ),
+    ~ k_week_ahead(
+      x_latest, "case_rate", c("case_rate", "percent_cli"), .x,
+      engine = linear_reg())
+  ),
   purrr::map_dfr(
     c(7,14,21,28), 
-    ~ k_week_ahead(x_latest, "case_rate", c("case_rate", "percent_cli"), .x,
-                   engine = rand_forest(mode = "regression"))
-    )) %>% 
-  mutate(.pred_distn = nested_quantiles(fc_.pred_distn)) %>% # "nested" list-col
+    ~ k_week_ahead(
+      x_latest, "case_rate", c("case_rate", "percent_cli"), .x,
+      engine = rand_forest(mode = "regression"))
+  )) %>% 
+  mutate(.pred_distn = nested_quantiles(fc_.pred_distn)) %>%
   unnest(.pred_distn) %>% 
   pivot_wider(names_from = tau, values_from = q) 
 ```
@@ -127,20 +128,19 @@ fc <- bind_rows(
 Here, `arx_forecaster()` does all the heavy lifting. It creates leads of the
 target (respecting time stamps and locations) along with lags of the features
 (here, the response and doctors visits), estimates a forecasting model using the
-specified engine, creates predictions, and non-parametric confidence bands. All
-of these are tunable parameters.
+specified engine, creates predictions, and non-parametric confidence bands. 
 
 To see how the predictions compare, we plot them on top of the latest case
-rates. Note that even though we've fitted on all states, we'll just display the
+rates. Note that even though we've fitted the model on all states, 
+we'll just display the
 results for two states, California (CA) and Florida (FL), to get a sense of the
-model performance while keeping it simple. So feel free to modify the code to
-look over the results for other states.
+model performance while keeping the graphic simple. 
 
 ```{r plot-arx, message = FALSE, warning = FALSE, fig.width = 9, fig.height = 6}
 fc_cafl <- fc %>% filter(geo_value %in% c("ca", "fl"))
 x_latest_cafl <- x_latest %>% filter(geo_value %in% c("ca", "fl"))
 
-ggplot(fc_cafl, aes(x = fc_target_date, group = time_value, fill = engine_type)) + 
+ggplot(fc_cafl, aes(fc_target_date, group = time_value, fill = engine_type)) + 
   geom_line(data = x_latest_cafl, aes(x = time_value, y = case_rate),
             inherit.aes = FALSE, color = "gray50") +
   geom_ribbon(aes(ymin = `0.05`, ymax = `0.95`), alpha = 0.4) +
@@ -155,14 +155,17 @@ ggplot(fc_cafl, aes(x = fc_target_date, group = time_value, fill = engine_type))
 ```
 
 For the two states of interest, simple linear regression clearly performs better
-than random forest in terms of accuracy of the predictions and not does not
-result in such in overconfident predictions (too narrow confidence bands).
-Though, in general, both approaches do not perform great. This could be because
+than random forest in terms of accuracy of the predictions and does not
+result in such in overconfident predictions (overly narrow confidence bands).
+Though, in general, neither approach produces amazingly accurate forecasts. 
+This could be because
 the behaviour is rather different across states and the effects of other notable
 factors such as age and public health measures may be important to account for
 in such forecasting. Including such factors as well as making enhancements such
-as correcting for outliers are some improvements one could make to our simple
-model in the future.
+as correcting for outliers are some improvements one could make to this simple
+model.[^1]
+
+[^1]: Note that, despite the above caveats, simple models like this tend to out-perform many far more complicated models in the online Covid forecasting due to those models high variance predictions.
 
 ### Example using case data from Canada
 
@@ -175,7 +178,7 @@ daily time series data on COVID-19 cases, deaths, recoveries, testing and
 vaccinations at the health region and province levels. Data are collected from
 publicly available sources such as government datasets and news releases.
 Unfortunately, there is no simple versioned source, so we have created our own
-from the Commit history.
+from the Github commit history.
 
 First, we load versioned case rates at the provincial level. After converting
 these to 7-day averages (due to highly variable provincial reporting
diff --git a/vignettes/epipredict.Rmd b/vignettes/epipredict.Rmd
index b9a949552..cff1840ed 100644
--- a/vignettes/epipredict.Rmd
+++ b/vignettes/epipredict.Rmd
@@ -21,14 +21,13 @@ library(parsnip)
 library(workflows)
 library(recipes)
 library(epiprocess)
-# remotes::install_github("cmu-delphi/epipredict")
 library(epipredict)
 ```
 
 
 # Goals for the package
 
-At a high level, our goal with `epipredict` is to make running simple Machine Learning / Statistical forecasters for epidemiology easy. However, this package is extremely extensible, and that is part of the utility. Our hope is that it is easy for users with epi training and some statistics to fit baseline models while still allowing those with more nuanced statistical understanding to create complicated specializations within the same framework.
+At a high level, our goal with `{epipredict}` is to make running simple Machine Learning / Statistical forecasters for epidemiology easy. However, this package is extremely extensible, and that is part of its utility. Our hope is that it is easy for users with epi training and some statistics to fit baseline models while still allowing those with more nuanced statistical understanding to create complicated specializations using the same framework.
 
 Serving both populations is the main motivation for our efforts, but at the same time, we have tried hard to make it useful.
 
@@ -38,7 +37,7 @@ Serving both populations is the main motivation for our efforts, but at the same
 We provide a set of basic, easy-to-use forecasters that work out of the box. 
 You should be able to do a reasonably limited amount of customization on them. Any serious customization happens with the framework discussed below).
 
-For the basic forecasters, we provide, at least: 
+For the basic forecasters, we provide: 
     
 * Baseline flat-line forecaster 
 * Autoregressive forecaster
@@ -55,11 +54,11 @@ Our framework for creating custom forecasters views the prediction task as a set
 3. Predictor: make predictions, using a fitted model object and processed test data
 4. Postprocessor: manipulate or transform the predictions before returning
     
-Users familiar with [`{tidymodels}`](https://www.tidymodels.org) and especially the [`{workflows}`](https://workflows.tidymodels.org) package will notice a lot of overlap. This is by design, and is in fact a feature. The truth is that `epipredict` is a wrapper around much that is contained in these packages. Therefore, if you want something from this -verse, it should "just work" (we hope).
+Users familiar with [`{tidymodels}`](https://www.tidymodels.org) and especially the [`{workflows}`](https://workflows.tidymodels.org) package will notice a lot of overlap. This is by design, and is in fact a feature. The truth is that `{epipredict}` is a wrapper around much that is contained in these packages. Therefore, if you want something from this -verse, it should "just work" (we hope).
 
 The reason for the overlap is that `{workflows}` _already implements_ the first three steps. And it does this very well. However, it is missing the postprocessing stage and currently has no plans for such an implementation. And this feature is important. The baseline forecaster we provide _requires_ postprocessing. Anything more complicated needs this as well.
 
-The second omission from `{tidymodels}` is support for panel data. Besides epidemiological data, economics, psychology, sociology, and many other areas frequently deal with data of this type. So the framework of behind `epipredict` implements this. In principle, this has nothing to do with epidemiology, and one could simply use this package as a solution for the missing functionality in `{tidymodels}`. Again, this should "just work".
+The second omission from `{tidymodels}` is support for panel data. Besides epidemiological data, economics, psychology, sociology, and many other areas frequently deal with data of this type. So the framework of behind `{epipredict}` implements this. In principle, this has nothing to do with epidemiology, and one could simply use this package as a solution for the missing functionality in `{tidymodels}`. Again, this should "just work".
 
 All of the _panel data_ functionality is implemented through the `epi_df` data type in the companion [`{epiprocess}`](https://cmu-delphi.github.io/epiprocess/) package. There is much more to see there, but for the moment, it's enough to look at a simple one:
 
@@ -70,7 +69,7 @@ jhu
 
 This data is built into the package and contains the measured variables `case_rate` and `death_rate` for COVID-19 at the daily level for each US state for the year 2021. The "panel" part is because we have repeated measurements across a number of locations. 
 
-The `epi_df` encodes the timestamp as `time_value` and the `key` as `geo_value`. While these 2 names are required, the values don't need to actually represent such objects. Additional `key`'s are also supported (like age group, ethnicity, taxonomy, etc.). 
+The `epi_df` encodes the time stamp as `time_value` and the `key` as `geo_value`. While these 2 names are required, the values don't need to actually represent such objects. Additional `key`'s are also supported (like age group, ethnicity, taxonomy, etc.). 
 
 The `epi_df` also contains some metadata that describes the keys as well as the vintage of the data. It's possible that data collected at different times for the _same set_ of `geo_value`'s and `time_value`'s could actually be different. For more details, see [`{epiprocess}`](https://cmu-delphi.github.io/epiprocess/articles/epiprocess.html).
 
@@ -90,9 +89,9 @@ use almost everything they provide.
 * The tidy-team doesn't have plans to do either of these things. (We checked).
 
 * There are two packages that do _time series_ built on `{tidymodels}`, but it's
-"basic" time series: 1-step AR models, exponential smoothing, STL decomposition, etc.[^2] We have never found these models to be particularly helpful for epidemic forecasting, but one could also integrate these methods into our framework.
+"basic" time series: 1-step AR models, exponential smoothing, STL decomposition, etc.[^2] Our group has not prioritized these sorts of models for epidemic forecasting, but one could also integrate these methods into our framework.
 
-[^2]: These are [`{timetk}`](https://business-science.github.io/timetk/index.html) and [`{modeltime}`](https://business-science.github.io/timetk/index.html). There are _lots_ of useful methods there than can be used to do fairly complex machine learning methodology.
+[^2]: These are [`{timetk}`](https://business-science.github.io/timetk/index.html) and [`{modeltime}`](https://business-science.github.io/timetk/index.html). There are _lots_ of useful methods there than can be used to do fairly complex machine learning methodology, though not directly for panel data and not for direct prediction of future targets.
 
 # Show me the basics
 
@@ -103,7 +102,9 @@ We'll estimate the model jointly across all locations using only the most recent
 
 ```{r demo-workflow}
 jhu <- jhu %>% filter(time_value >= max(time_value) - 30)
-out <- arx_forecaster(jhu, outcome = "death_rate",
+out <- arx_forecaster(
+  jhu, 
+  outcome = "death_rate",
   predictors = c("case_rate", "death_rate")
 )
 ```
@@ -117,8 +118,12 @@ The `out` object has two components:
   out$predictions
   ```
   2. A list object of class `epi_workflow`. This object encapsulates all the instructions necessary to create the prediction. More details on this below.
+  ```{r}
+  out$epi_workflow
+  ```
 
-Note that the `time_value` in the predictions is not necessarily meaningful.
+Note that the `time_value` in the predictions is not necessarily meaningful, 
+but it is a required column in an `epi_df`, so it remains here.
 
 By default, the forecaster predicts the outcome (`death_rate`) 1-week ahead, using 3 lags of each predictor (`case_rate` and `death_rate`) at 0 (today), 1 week back and 2 weeks back. The predictors and outcome can be changed directly. The rest of the defaults are encapsulated into a list of arguments. This list is produced by `arx_args_list()`.
 
@@ -131,7 +136,10 @@ knitr::opts_chunk$set(warning = FALSE, message = FALSE)
 ```
 
 ```{r differential-lags}
-out2week <- arx_forecaster(jhu, "death_rate", c("case_rate", "death_rate"),
+out2week <- arx_forecaster(
+  jhu, 
+  outcome = "death_rate", 
+  predictors = c("case_rate", "death_rate"),
   args_list = arx_args_list(
     lags = list(c(0,1,2,3,7,14), c(0,7,14)),
     ahead = 14)
@@ -140,7 +148,7 @@ out2week <- arx_forecaster(jhu, "death_rate", c("case_rate", "death_rate"),
 
 Here, we've used different lags on the `case_rate` and are now predicting 2 weeks ahead. This example also illustrates a major difficulty with the "iterative" versions of AR models. This model doesn't produce forecasts for `case_rate`, and so, would not have data to "plug in" for the necessary lags.[^1]
 
-[^1]: An obvious fix is to instead use a VAR and predict both, but would likely increase the variance, and may lead to less accurate forecasts for the variable of interest.
+[^1]: An obvious fix is to instead use a VAR and predict both, but this would likely increase the variance of the model, and therefore, may lead to less accurate forecasts for the variable of interest.
 
 Another property of the basic model is the predictive interval. We describe this in more detail in a different vignette, but it is easy to request multiple quantiles.
 
@@ -151,22 +159,22 @@ out_q <- arx_forecaster(jhu, "death_rate", c("case_rate", "death_rate"),
   )
 ```
 
-The column `.pred_dstn` in the `predictions` object is actually a "distribution" here parameterized by its quantiles. For this default forecaster, these are created using the quantiles of the residuals of the predictive model (possibly symmetrized). Here, we used 23 quantiles, but one can grab a particular quantile
+The column `.pred_dstn` in the `predictions` object is actually a "distribution" here parameterized by its quantiles. For this default forecaster, these are created using the quantiles of the residuals of the predictive model (possibly symmetrized). Here, we used 23 quantiles, but one can grab a particular quantile,
 
 ```{r q1}
 head(quantile(out_q$predictions$.pred_distn, p = .4))
 ```
 
-Or extract the entire distribution into a "long" `epi_df` with `tau` being the probability and `q` being the value associated to that quantile.
+or extract the entire distribution into a "long" `epi_df` with `tau` being the probability and `q` being the value associated to that quantile.
 
 ```{r q2}
 out_q$predictions %>% 
-  mutate(
-    .pred_distn = nested_quantiles(.pred_distn) # "nested" list-col
-  ) %>% unnest(.pred_distn)
+  # first create a "nested" list-column
+  mutate(.pred_distn = nested_quantiles(.pred_distn)) %>%
+  unnest(.pred_distn) # then unnest it
 ```
 
-Further simple adjustments can be made using the function.
+Additional simple adjustments to the basic forecaster can be made using the function:
 
 ```{r, eval = FALSE}
 arx_args_list(
@@ -194,6 +202,15 @@ out_gb <- arx_forecaster(jhu, "death_rate", c("case_rate", "death_rate"),
   boost_tree(mode = "regression", trees = 20))
 ```
 
+Or quantile regression, using our custom forecasting engine `quantile_reg()`:
+
+```{r quantreg, warning = FALSE}
+out_gb <- arx_forecaster(jhu, "death_rate", c("case_rate", "death_rate"),
+  quantile_reg())
+```
+
+FWIW, this last case (using quantile regression), is not far from what the Delphi production forecast team used for its Covid forecasts over the past few years.
+
 ## Inner workings
 
 Underneath the hood, this forecaster creates (and returns) an `epi_workflow`. 
@@ -215,13 +232,14 @@ there are 2 extensions to the `formula` that `{recipes}` handles:
   lagging, filtering out rows, handling dummy variables, etc.
   2. Using statistics from the training data to eventually process test data. 
     This is a major benefit of `{recipes}`. It prevents what the tidy team calls
-    "data leakage". A simple example is centering a predictor by it's mean. We
+    "data leakage". A simple example is centering a predictor by its mean. We
     need to store the mean of the predictor from the training data and use that
     value on the test data rather than accidentally calculating the mean of
-    the test predictor.
+    the test predictor for centering.
     
 A recipe is processed in 2 steps, first it is "prepped". This calculates and
-stores the result as necessary for use on the test data. Then it is "baked"
+stores any intermediate statistics necessary for use on the test data. 
+Then it is "baked"
 resulting in training data ready for passing into a statistical model (like `lm`).
 
 We have introduced an `epi_recipe`. It's just a `recipe` that knows how to handle
@@ -236,14 +254,14 @@ extract_recipe(out_gb$epi_workflow)
 The "Inputs" are the original `epi_df` and the "roles" that these are assigned.
 None of these are predictors or outcomes. Those will be created 
 by the recipe when it is prepped. The "Operations" are the sequence of 
-instructions to create the cake.
-Here we create lagged predictors, lead the outcome, and then remove NA's.
-Some models like `lm` internally handle NA's, but not everything does, so we
+instructions to create the cake (baked training data).
+Here we create lagged predictors, lead the outcome, and then remove `NA`s.
+Some models like `lm` internally handle `NA`s, but not everything does, so we
 deal with them explicitly. The code to do this (inside the forecaster) is
 
 ```{r}
 er <- epi_recipe(jhu) %>%
-  step_epi_lag(case_rate, death_rate, lag = c(0,7,14)) %>%
+  step_epi_lag(case_rate, death_rate, lag = c(0, 7, 14)) %>%
   step_epi_ahead(death_rate, ahead = 7) %>%
   step_epi_naomit()
 ```
@@ -251,7 +269,7 @@ er <- epi_recipe(jhu) %>%
 While `{recipes}` provides a function `step_lag()`, it assumes that the data
 have no breaks in the sequence of `time_values`. This is a bit dangerous, so
 we avoid that behaviour. Our `lag/ahead` functions also appropriately adjust the
-amount of data to avoid accidently dropping recent predictors from the test
+amount of data to avoid accidentally dropping recent predictors from the test
 data.
 
 ### The model specification
@@ -267,6 +285,7 @@ any issue despite the fact that these functions couldn't be more different.
 lm(formula, data, subset, weights, na.action, method = "qr", 
     model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE, 
     contrasts = NULL, offset, ...)
+
 xgboost(data = NULL, label = NULL, missing = NA, weight = NULL, 
     params = list(), nrounds, verbose = 1, print_every_n = 1L, 
     early_stopping_rounds = NULL, maximize = NULL, save_period = NULL, 
@@ -297,7 +316,7 @@ extract_frosting(out_q$epi_workflow)
 ```
 
 Here we have 5 layers of frosting. The first generates the forecasts from the test data.
-The second uses quantiles of the residuals (by `geo_value`) to create distributional
+The second uses quantiles of the residuals to create distributional
 forecasts. The next two add columns for the date the forecast was made and the
 date for which it is intended to occur. Because we are predicting rates, they 
 should be non-negative, so the last layer thresholds both predicted values and
@@ -307,7 +326,7 @@ intervals at 0. The code to do this (inside the forecaster) is
 f <- frosting() %>%
   layer_predict() %>%
   layer_residual_quantiles(
-    probs = c(.01,.025, seq(.05,.95, by=.05), .975,.99), 
+    probs = c(.01, .025, seq(.05, .95, by = .05), .975, .99), 
     symmetrize = TRUE) %>%
   layer_add_forecast_date() %>%
   layer_add_target_date() %>%
@@ -321,12 +340,20 @@ test_data <- get_test_data(er, jhu)
 ewf %>% add_frosting(f) %>% predict(test_data)
 ```
 
+The above `get_test_data()` function examines the recipe and ensures that enough
+test data is available to create the necessary lags and produce a prediction
+for the desired future time point (after the end of the training data). This mimics
+what would happen if `jhu` contained the most recent available historical data and
+we wanted to actually predict the future. We could have instead used any test data
+that contained the necessary predictors.
+
+
 ## Conclusion
 
 Internally, we provide some simple functions to create reasonable forecasts.
 But ideally, a user could create their own forecasters by building up the 
 components we provide. In other vignettes, we try to walk through some of these
-customizations.
+customizations. 
 
 To illustrate everything above, here is (roughly) the code for the `flatline_forecaster()` applied to the `case_rate`.
 
@@ -359,4 +386,3 @@ and predicting procedure.
 preds
 ```
 
-## Using the workflow object
diff --git a/vignettes/preprocessing-and-models.Rmd b/vignettes/preprocessing-and-models.Rmd
index a2d338f06..544b86b20 100644
--- a/vignettes/preprocessing-and-models.Rmd
+++ b/vignettes/preprocessing-and-models.Rmd
@@ -14,19 +14,19 @@ knitr::opts_chunk$set(echo = TRUE)
 ## Introduction 
 
 The `epipredict` package utilizes the `tidymodels` framework, namely 
-[`recipes`](https://recipes.tidymodels.org/) for 
+[`{recipes}`](https://recipes.tidymodels.org/) for 
 [dplyr](https://dplyr.tidyverse.org/)-like pipeable sequences 
-of feature engineering and [`parsnip`](https://parsnip.tidymodels.org/) for a 
+of feature engineering and [`{parsnip}`](https://parsnip.tidymodels.org/) for a 
 unified interface to a range of models. 
 
 `epipredict` has additional customized feature engineering and preprocessing 
-steps, such as `step_epi_shift()`, `step_population_scaling()`, 
+steps, such as `step_epi_lag()`, `step_population_scaling()`, 
 `step_epi_naomit()`. They can be used along with 
-steps from the `recipes` package for more feature engineering. 
+steps from the `{recipes}` package for more feature engineering. 
 
 In this vignette, we will illustrate some examples of how to use `epipredict`
-with `recipes` and `parsnip` for different purposes of epidemiology forecasting.
-We will focus on simple basic autoregressive models, in which COVID cases and 
+with `recipes` and `parsnip` for different purposes of epidemiological forecasting.
+We will focus on basic autoregressive models, in which COVID cases and 
 deaths in the near future are predicted using a linear combination of cases and 
 deaths in the near past.
 
@@ -49,7 +49,8 @@ library(poissonreg)
 
 ## Poisson Regression 
 
-During COVID-19, Center for Disease Control and Prevention (CDC) gathered models
+During COVID-19, the US Center for Disease Control and Prevention (CDC) collected
+models
 and forecasts to characterize the state of an outbreak and its course. They use
 it to inform public health decision makers on potential consequences of 
 deploying control measures.
@@ -57,7 +58,7 @@ deploying control measures.
 One of the outcomes that the CDC forecasts is [death counts from COVID-19](https://www.cdc.gov/coronavirus/2019-ncov/science/forecasting/forecasting-us.html).
 Although there are many state-of-the-art models, we choose to use Poisson 
 regression, the textbook example for modeling count data, as an illustration
-for using the `epipredict` package with other existing tidymodel packages. 
+for using the `epipredict` package with other existing tidymodels packages. 
 
 ```{r poisson-reg-data}
 x <- covidcast(
@@ -80,12 +81,11 @@ y <- covidcast(
   fetch_tbl() %>%
   select(geo_value, time_value, deaths = value)
 
-case_death_counts_subset <- x %>%
-  full_join(y, by = c("geo_value", "time_value")) %>%
+counts_subset <- full_join(x, y, by = c("geo_value", "time_value")) %>%
   as_epi_df()
 ```
 
-The `case_death_counts_subset` dataset comes from the `epidatr` package,and 
+The `counts_subset` dataset comes from the `epidatr` package, and 
 contains the number of confirmed cases and deaths from June 4, 2021 to 
 Dec 31, 2021 in some U.S. states. 
 
@@ -93,24 +93,19 @@ We wish to predict the 7-day ahead death counts with lagged cases and deaths.
 Furthermore, we will let each state be a dummy variable. Using differential 
 intercept coefficients, we can allow for an intercept shift between states.
 
-The model takes the form of
+The model takes the form
 \begin{aligned}
-\text{log}\left( \mu_{t+7} \right) = \beta_0 + \delta_1 s_{\text{state_1}} +
-\delta_2 s_{\text{state_2}} + ... +  \beta_1 \text{deaths}_{t} + \nonumber \\
+\log\left( \mu_{t+7} \right) &= \beta_0 + \delta_1 s_{\text{state}_1} +
+\delta_2 s_{\text{state}_2} + \cdots +  \nonumber \\ &\quad\beta_1 \text{deaths}_{t} + 
 \beta_2 \text{deaths}_{t-7}  + \beta_3 \text{cases}_{t} + 
-\beta_4 \text{cases}_{t-7}
+\beta_4 \text{cases}_{t-7},
 \end{aligned}
-
-
 where $\mu_{t+7} = \mathbb{E}(y_{t+7})$, and $y_{t+7}$ is assumed to follow a 
 Poisson distribution with mean $\mu_{t+7}$; $s_{\text{state}}$ are dummy 
 variables for each state and take values of either 0 or 1. 
 
-Preprocessing steps will be performed using the `recipes` functions to get the 
-data ready for modeling. 
-
-But before diving into them, it will be helpful to understand what `roles` are
-in the `recipes` framework. 
+Preprocessing steps will be performed to prepare the
+data for model fitting. But before diving into them, it will be helpful to understand what `roles` are in the `recipes` framework. 
 
 ---
 
@@ -122,15 +117,16 @@ For most conventional situations, they are typically “predictor” and/or
 "outcome". Additional roles enable targeted `step_*()` operations on specific 
 variables or groups of variables.
 
-In our case, the role `predictor` is given to explanatory variable that go to the 
-left-hand side of the model. The role `outcome` is the response variable 
+In our case, the role `predictor` is given to explanatory variables on the
+right-hand side of the model (in the equation above). 
+The role `outcome` is the response variable 
 that we wish to predict. `geo_value` and `time_value` are predefined roles 
 that are unique to the `epipredict` package. Since we work with `epi_df` 
 objects, all datasets should have `geo_value` and `time_value` passed through
-automatically with these two roles assigned to the appropriate columns.
+automatically with these two roles assigned to the appropriate columns in the data.
  
 The `recipes` package also allows [manual alterations of roles](https://recipes.tidymodels.org/reference/roles.html) 
-in bulk. There are a couple handy functions that can be used together to help us 
+in bulk. There are a few handy functions that can be used together to help us 
 manipulate variable roles easily. 
 
 > `update_role()` alters an existing role in the recipe or assigns an initial role 
@@ -146,17 +142,17 @@ manipulate variable roles easily.
 ---
 
 Notice in the following preprocessing steps, we used `add_role()` on 
-`geo_value_factor` since currently the default role for it is `raw` and 
-we would like the dummified variables to automatically be `predictor`'s. 
+`geo_value_factor` since, currently, the default role for it is `raw`, but
+we would like to reuse this variable as `predictor`s. 
 
 ```{r}
-case_death_counts_subset <- case_death_counts_subset %>%
+counts_subset <- counts_subset %>%
   mutate(geo_value_factor = as.factor(geo_value)) %>%
   as_epi_df()
 
-epi_recipe(case_death_counts_subset)
+epi_recipe(counts_subset)
 
-r <- epi_recipe(case_death_counts_subset) %>%
+r <- epi_recipe(counts_subset) %>%
   add_role(geo_value_factor, new_role = "predictor") %>%
   step_dummy(geo_value_factor) %>%
   ## Occasionally, data reporting errors / corrections result in negative
@@ -168,14 +164,14 @@ r <- epi_recipe(case_death_counts_subset) %>%
 ```
 
 After specifying the preprocessing steps, we will use the `parsnip` package for
-modeling and getting the prediction for death count, 7 days after the
+modeling and producing the prediction for death count, 7 days after the
 latest available date in the dataset. 
 
 ```{r}
-latest <- get_test_data(r, case_death_counts_subset)
+latest <- get_test_data(r, counts_subset)
 
-wf <- epi_workflow(r, parsnip::poisson_reg()) %>% 
-      fit(case_death_counts_subset)
+wf <- epi_workflow(r, parsnip::poisson_reg()) %>%
+  fit(counts_subset)
 
 predict(wf, latest) %>% filter(!is.na(.pred))
 ```
@@ -196,47 +192,49 @@ rates, by incorporating offset terms in the model.
 
 To model death rates, the Poisson regression would be expressed as:
 \begin{aligned}
-\text{log}\left( \mu_{t+7} \right) = \text{log(population)} + 
-\beta_0 + \delta_1 s_{\text{state_1}} +
-\delta_2 s_{\text{state_2}} + ... +  \beta_1 \text{deaths}_{t} + \nonumber \\
+\log\left( \mu_{t+7} \right) &= \log(\text{population}) + 
+\beta_0 + \delta_1 s_{\text{state}_1} +
+\delta_2 s_{\text{state}_2} + \cdots +  \nonumber \\ &\quad\beta_1 \text{deaths}_{t} + 
 \beta_2 \text{deaths}_{t-7}  + \beta_3 \text{cases}_{t} + 
-\beta_4 \text{cases}_{t-7}
-\end{aligned}
-where $\text{log(population)}$ is the log of the state population that was 
-used to scale the count data on the left-hand side of the equation.
+\beta_4 \text{cases}_{t-7}\end{aligned}
+where $\log(\text{population})$ is the log of the state population that was 
+used to scale the count data on the left-hand side of the equation. This offset
+is simply a predictor with coefficient fixed at 1 rather than estimated.
 
 There are several ways to model rate data given count and population data. 
-First, in the `parsnip` framework, we can specify the formula in `fit()`. 
+First, in the `parsnip` framework, we could specify the formula in `fit()`. 
 However, by doing so we lose the ability to use the `recipes` framework to 
-create new variables since new variables that do not exist in the 
-original dataset such as lag and leads cannot be called directly in `fit()`. 
+create new variables since variables that do not exist in the 
+original dataset (such as, here, the lags and leads) cannot be called directly in `fit()`. 
 
 Alternatively, `step_population_scaling()` and `layer_population_scaling()` 
 in the `epipredict` package can perform the population scaling if we provide the 
-population data, and we can model this using other models such as linear 
-regression, which we will illustrate in the next section.
+population data, which we will illustrate in the next section.
 
 
 ## Linear Regression 
 
 For COVID-19, the CDC required submission of case and death count predictions. 
 However, the Delphi Group preferred to train on rate data instead, because it 
-puts different locations on a similar scale. 
+puts different locations on a similar scale (eliminating the need for location-specific intercepts). 
 We can use a liner regression to predict the death
-rates and use state population data to scale the rates to counts. We will do so 
+rates and use state population data to scale the rates to counts.[^pois] We will do so 
 using `layer_population_scaling()` from the `epipredict` package. 
 
+[^pois]: We could continue with the Poisson model, but we'll switch to the Gaussian likelihood just for simplicity.
+
 Additionally, when forecasts are submitted, prediction intervals should be 
 provided along with the point estimates. This can be obtained via postprocessing
-`layer_residual_quantiles()`. Although worth pointing out that 
+using
+`layer_residual_quantiles()`. It is worth pointing out, however, that 
 `layer_residual_quantiles()` should be used before population scaling or else 
 the transformation will make the results uninterpretable. 
 
-We wish to predict the 7-day ahead death counts with lagged case rates and death
-rates, along with extra behaviour indicator inputs. Namely, we will use survey data 
+We wish, now, to predict the 7-day ahead death counts with lagged case rates and death
+rates, along with some extra behaviourial predictors. Namely, we will use survey data
 from [COVID-19 Trends and Impact Survey](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/fb-survey.html#behavior-indicators).
 
-The survey data shows the estimated percentage of people who wore a mask for 
+The survey data provides the estimated percentage of people who wore a mask for 
 most or all of the time while in public in the past 7 days and the estimated 
 percentage of respondents who reported that all or most people they encountered 
 in public in the past 7 days maintained a distance of at least 6 feet. 
@@ -291,20 +289,20 @@ We will take a subset of death rate and case rate data from the built-in dataset
 `case_death_rate_subset`.
 
 ```{r}
-jhu <- case_death_rate_subset %>%
-  dplyr::filter(
-    time_value >= "2021-06-04", 
-    time_value <= "2021-12-31",
-    geo_value %in% c("ca","fl","tx","ny","nj"))
+jhu <- filter(
+  case_death_rate_subset,
+  time_value >= "2021-06-04", 
+  time_value <= "2021-12-31",
+  geo_value %in% c("ca","fl","tx","ny","nj")
+)
 ```
 
-Preprocessing steps will rely on functions from the `epipredict` package as well
-as the `recipes` package:
-
-Additionally, there are also many functions in the `recipes` package that allow for 
+Preprocessing steps will again rely on functions from the `epipredict` package as well
+as the `recipes` package.
+There are also many functions in the `recipes` package that allow for 
 [scalar transformations](https://recipes.tidymodels.org/reference/#step-functions-individual-transformations),
 such as log transformations and data centering. In our case, we will 
-center the numerical predictors to allow for meaningful interpretation of the 
+center the numerical predictors to allow for a more meaningful interpretation of the 
 intercept. 
 
 ```{r}
@@ -324,29 +322,34 @@ r <- epi_recipe(jhu) %>%
   step_epi_naomit()
 ```
 
-As a sanity check we can see what the training data will look like
+As a sanity check we can examine the structure of the training data:
 ```{r, warning = FALSE}
 glimpse(slice_sample(bake(prep(r, jhu), jhu), n = 6))
 ```
 
 Before directly predicting the results, we need to add postprocessing layers to
 obtain the death counts instead of death rates. Note that the rates used so
-far are "per 100K people" rather than "per person".
+far are "per 100K people" rather than "per person". We'll also use quantile
+regression with the `quantile_reg` engine rather than ordinary least squares
+to create median predictions and a 90% prediction interval.
 
-```{r}
+```{r, warning=FALSE}
 f <- frosting() %>%
   layer_predict() %>%
   layer_add_target_date("2022-01-07") %>% 
   layer_threshold(.pred, lower = 0) %>%
-  layer_residual_quantiles(probs = c(0.05, 0.95), symmetrize = FALSE) %>%
+  layer_quantile_distn() %>%
   layer_naomit(.pred) %>%
   layer_population_scaling(
-    .pred, .pred_distn, df = pop_dat, rate_rescaling = 1e5,
-    by = c("geo_value" = "abbr"), df_pop_col = "pop")
+    .pred, .pred_distn, 
+    df = pop_dat, 
+    rate_rescaling = 1e5,
+    by = c("geo_value" = "abbr"), 
+    df_pop_col = "pop")
 
-wf <- epi_workflow(r, parsnip::linear_reg()) %>%
-    fit(jhu) %>%
-    add_frosting(f)
+wf <- epi_workflow(r, quantile_reg(tau = c(.05, .5, .95))) %>%
+  fit(jhu) %>%
+  add_frosting(f)
 
 latest <- get_test_data(recipe = r, x = jhu)
 p <- predict(wf, latest)
@@ -369,21 +372,21 @@ p %>%
 Last but not least, let's take a look at the regression fit and check the 
 coefficients:
 ```{r, echo =FALSE}
-summary(extract_fit_engine(wf))
+extract_fit_engine(wf)
 ```
 
 ## Classification
 
 Sometimes it is preferable to create a predictive model for surges or upswings
 rather than for raw values. In this case,
-the target is to predict if the future will have increased case rates(up),
-decreased case rates (down), or flat case rates (flat) relative to the current
-level. Such model are often 
-referred to as hotspot prediction models. We will refer to the model notations 
-from [McDonald, Bien, Green, Hu, et al.](#references) but extend the application
+the target is to predict if the future will have increased case rates (denoted `up`),
+decreased case rates (`down`), or flat case rates (`flat`) relative to the current
+level. Such models may be 
+referred to as "hotspot prediction models". We will follow the analysis 
+in [McDonald, Bien, Green, Hu, et al.](#references) but extend the application
 to predict three categories instead of two. 
 
-Hotspot prediction predicts a categorical variable defined in terms of the 
+Hotspot prediction uses a categorical outcome variable defined in terms of the 
 relative change of $Y_{\ell, t+a}$ compared to $Y_{\ell, t}$. 
 Where $Y_{\ell, t}$ denotes the case rates in location $\ell$ at time $t$. 
 We define the response variables as follows:
@@ -392,39 +395,35 @@ $$
  Z_{\ell, t}=
     \begin{cases}
       \text{up}, & \text{if}\ Y^{\Delta}_{\ell, t} > 0.25 \\ 
-      \text{down}, & \text{if}\  Y^{\Delta}_{\ell, t} < -0.25\\
+      \text{down}, & \text{if}\  Y^{\Delta}_{\ell, t} < -0.20\\
       \text{flat}, & \text{otherwise}
     \end{cases}
 $$
 
-where $Y^{\Delta}_{\ell, t} = (Y_{\ell, t}- Y_{\ell, t-7})/(Y_{\ell, t-7})$. 
-We say location $\ell$ is a hotspot at time $t$ when $Z_{\ell,t}$ is categorized
-as 'up', meaning the number of newly reported cases over the past 7 days has 
+where $Y^{\Delta}_{\ell, t} = (Y_{\ell, t}- Y_{\ell, t-7})\ /\ (Y_{\ell, t-7})$. 
+We say location $\ell$ is a hotspot at time $t$ when $Z_{\ell,t}$ is 
+`up`, meaning the number of newly reported cases over the past 7 days has 
 increased by at least 25% compared to the preceding week. When $Z_{\ell,t}$ 
-is categorized as 'down', it suggests that there has been at least a 25% 
-decrease in newly reported cases over the past 7 days. Otherwise, we will 
-consider the trends to be flat. 
-
-The expression of the multinomial regression we will model is as follows:
+is categorized as `down`, it suggests that there has been at least a 20% 
+decrease in newly reported cases over the past 7 days (a 20% decrease is the inverse of a 25% increase). Otherwise, we will 
+consider the trend to be `flat`. 
 
+The expression of the multinomial regression we will use is as follows:
 $$
-\pi_{j}(x) = \text{Pr}(Y = j|x) = \frac{e^{g_j(x)}}{1 + \sum_{k=0}^2 g_j(x) }
+\pi_{j}(x) = \text{Pr}(Z_{\ell,t} = j|x) = \frac{e^{g_j(x)}}{1 + \sum_{k=0}^2 g_j(x) }
 $$
-where $j$ is either down (`Y=0`), flat (`Y=1`), or up (`Y=2`),
-$$g_0(x) = 0$$
+where $j$ is either down, flat, or up
 
 \begin{aligned}
-g_1(x)= \text{ln}\left(\frac{Pr(Y=1|x)}{Pr(Y=0|x)}\right) &= 
-\beta_{10} + \beta_{11}* x_{\text{time_value}} + \delta_{10} s_{\text{state_1}} +
-\delta_{11} s_{\text{state_2}} + ... \nonumber \\
+g_{\text{down}}(x) &= 0.\\
+g_{\text{flat}}(x)&= \text{ln}\left(\frac{Pr(Z_{\ell,t}=\text{flat}|x)}{Pr(Z_{\ell,t}=\text{down}|x)}\right) = 
+\beta_{10} + \beta_{11}t + \delta_{10} s_{\text{state_1}} +
+\delta_{11} s_{\text{state_2}} + \cdots \nonumber \\
 &\quad + \beta_{12} Y^{\Delta}_{\ell, t} +
-\beta_{13} Y^{\Delta}_{\ell, t-7}
-\end{aligned}
-
-\begin{aligned}
-g_2(x)= \text{ln}\left(\frac{Pr(Y=2|x)}{Pr(Y=0|x)}\right) &= 
-\beta_{20} + \beta_{21}* x_{\text{time_value}} + \delta_{20} s_{\text{state_1}} +
-\delta_{21} s_{\text{state_2}} + ... \nonumber \\
+\beta_{13} Y^{\Delta}_{\ell, t-7} \\
+g_{\text{flat}}(x) &= \text{ln}\left(\frac{Pr(Z_{\ell,t}=\text{up}|x)}{Pr(Z_{\ell,t}=\text{down}|x)}\right) = 
+\beta_{20} + \beta_{21}t + \delta_{20} s_{\text{state_1}} +
+\delta_{21} s_{\text{state}_2} + \cdots \nonumber \\
 &\quad + \beta_{22} Y^{\Delta}_{\ell, t} +
 \beta_{23} Y^{\Delta}_{\ell, t-7}
 \end{aligned}
@@ -432,9 +431,7 @@ g_2(x)= \text{ln}\left(\frac{Pr(Y=2|x)}{Pr(Y=0|x)}\right) &=
 
 
 Preprocessing steps are similar to the previous models with an additional step 
-of categorizing the response variables. 
-
-We will take a subset of death rate and case rate data from our built-in dataset 
+of categorizing the response variables. Again, we will use a subset of death rate and case rate data from our built-in dataset 
 `case_death_rate_subset`.
 ```{r}
 jhu <- case_death_rate_subset %>%
@@ -470,31 +467,31 @@ r <- epi_recipe(jhu) %>%
   step_epi_naomit()
 ```
 
-We will fit the multinomial regression and take a look at the predictions:
+We will fit the multinomial regression and examine the predictions:
 
 ```{r, warning=FALSE}
 wf <- epi_workflow(r, parsnip::multinom_reg()) %>%
-    fit(jhu)
+  fit(jhu)
 
 latest <- get_test_data(recipe = r, x = jhu)
 predict(wf, latest) %>% filter(!is.na(.pred_class))
 ```
 
-We can also take a look at the fit:
+We can also look at the estimated coefficients and model summary information:
 ```{r}
 extract_fit_engine(wf)
 ```
 
 One could also use a formula in `epi_recipe()` to achieve the same results as 
 above. However, only one of `add_formula()`, `add_recipe()`, or 
-`workflow_variables()` must be specified. For the purpose of demonstrating 
+`workflow_variables()` can be specified. For the purpose of demonstrating 
 `add_formula` rather than `add_recipe`, we will `prep` and `bake` our recipe to
 return a `data.frame` that could be used for model fitting.
 ```{r}
 b <- bake(prep(r, jhu), jhu)
 
 epi_workflow() %>%
-  add_formula(response ~ geo_value + pct_diff_wk1 + pct_diff_wk2) %>%
+  add_formula(response ~ geo_value + time_value + pct_diff_wk1 + pct_diff_wk2) %>%
   add_model(parsnip::multinom_reg()) %>%
   fit(data = b)
 ```
@@ -506,15 +503,17 @@ is handy for creating correct lags and leads for future predictions.
 
 Let's start with a simple dataset and preprocessing:
 ```{r}
-ex <- case_death_rate_subset %>%
-  dplyr::filter(time_value >= "2021-12-01", 
-                time_value <= "2021-12-31",
-                geo_value == "ca")
+ex <- filter(
+  case_death_rate_subset, 
+  time_value >= "2021-12-01", 
+  time_value <= "2021-12-31",
+  geo_value == "ca"
+)
 
 dim(ex)
 ```
 
-We want to predict death rates on `2022-01-07`, which is 7 days ahead from the 
+We want to predict death rates on `r max(ex$time_value) + 7`, which is 7 days ahead of the 
 latest available date in our dataset. 
 
 We will compare two methods of trying to create lags and leads:
@@ -562,7 +561,7 @@ dates_used_in_training2 <- b2 %>%
 dates_used_in_training2
 ```
 
-The model that is trained without the functions is predicting 7 days ahead from 
+The model that is trained based on the `{recipes}` functions will predict 7 days ahead from 
 `r max(dates_used_in_training2$time_value)`
 instead of 7 days ahead from `r max(dates_used_in_training1$time_value)`.