diff --git a/DESCRIPTION b/DESCRIPTION
index 0dab118fc..2ae9d3337 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -25,6 +25,7 @@ URL: https://github.com/cmu-delphi/epipredict/,
 BugReports: https://github.com/cmu-delphi/epipredict/issues/
 Depends:
     epiprocess (>= 0.9.0),
+    epidatasets,
     parsnip (>= 1.0.0),
     R (>= 3.5.0)
 Imports:
@@ -49,7 +50,6 @@ Imports:
     workflows (>= 1.0.0)
 Suggests:
     data.table,
-    epidatasets,
     epidatr (>= 1.0.0),
     fs,
     grf,
@@ -69,6 +69,7 @@ Suggests:
 VignetteBuilder:
     knitr
 Remotes:
+    cmu-delphi/epidatasets,
     cmu-delphi/epidatr,
     cmu-delphi/epiprocess,
     dajmcdon/smoothqr
diff --git a/NAMESPACE b/NAMESPACE
index 86b77716b..5ee13f730 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -215,6 +215,7 @@ export(update_model)
 export(validate_layer)
 export(weighted_interval_score)
 import(distributional)
+import(epidatasets)
 import(epiprocess)
 import(parsnip)
 import(recipes)
diff --git a/NEWS.md b/NEWS.md
index e080d1aa0..3fcf4dc0c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,12 +4,21 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicat
 
 # epipredict 0.2
 
-## features
+## Breaking changes
+
+- Moved example datasets from being hosted in the package to being loaded
+  from the `epidatasets` package. The datasets can no longer be loaded with
+  `data(<dataset name>)`, but can be accessed with 
+  `data(<dataset name>, package = "epidatasets")`, `epidatasets::<dataset name>` 
+  or, after loading the package, the name of the dataset alone (#382).
+
+## Improvements
+
 - Add `step_adjust_latency`, which give several methods to adjust the forecast if the `forecast_date` is after the last day of data.
 - (temporary) ahead negative is allowed for `step_epi_ahead` until we have `step_epi_shift`
 
-## bugfixes
-- shifting no columns results in no error for either `step_epi_ahead` and `step_epi_lag`
+## Bug fixes
+- Shifting no columns results in no error for either `step_epi_ahead` and `step_epi_lag`
 - Quantiles produced by `grf` were sometimes out of order.
 
 # epipredict 0.1
diff --git a/R/arx_classifier.R b/R/arx_classifier.R
index 0aec0e362..240bc69ee 100644
--- a/R/arx_classifier.R
+++ b/R/arx_classifier.R
@@ -27,7 +27,7 @@
 #'
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value >= as.Date("2021-11-01"))
 #'
 #' out <- arx_classifier(jhu, "death_rate", c("case_rate", "death_rate"))
@@ -104,7 +104,7 @@ arx_classifier <- function(
 #' @seealso [arx_classifier()]
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value >= as.Date("2021-11-01"))
 #'
 #' arx_class_epi_workflow(jhu, "death_rate", c("case_rate", "death_rate"))
diff --git a/R/arx_forecaster.R b/R/arx_forecaster.R
index bfd5eaec1..c7aebef46 100644
--- a/R/arx_forecaster.R
+++ b/R/arx_forecaster.R
@@ -25,7 +25,7 @@
 #' @seealso [arx_fcast_epi_workflow()], [arx_args_list()]
 #'
 #' @examples
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   dplyr::filter(time_value >= as.Date("2021-12-01"))
 #'
 #' out <- arx_forecaster(
@@ -96,7 +96,7 @@ arx_forecaster <- function(
 #'
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value >= as.Date("2021-12-01"))
 #'
 #' arx_fcast_epi_workflow(
diff --git a/R/autoplot.R b/R/autoplot.R
index 8bded03a3..4f4222979 100644
--- a/R/autoplot.R
+++ b/R/autoplot.R
@@ -29,7 +29,7 @@ ggplot2::autoplot
 #' @name autoplot-epipred
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value >= as.Date("2021-11-01"))
 #'
 #' r <- epi_recipe(jhu) %>%
@@ -70,7 +70,7 @@ ggplot2::autoplot
 #'
 #' # ------- Plotting canned forecaster output
 #'
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value >= as.Date("2021-11-01"))
 #' flat <- flatline_forecaster(jhu, "death_rate")
 #' autoplot(flat, .max_facets = 4)
diff --git a/R/cdc_baseline_forecaster.R b/R/cdc_baseline_forecaster.R
index 3352c5159..44090bf79 100644
--- a/R/cdc_baseline_forecaster.R
+++ b/R/cdc_baseline_forecaster.R
@@ -23,7 +23,7 @@
 #'
 #' @examples
 #' library(dplyr)
-#' weekly_deaths <- case_death_rate_subset %>%
+#' weekly_deaths <- covid_case_death_rates %>%
 #'   select(geo_value, time_value, death_rate) %>%
 #'   left_join(state_census %>% select(pop, abbr), by = c("geo_value" = "abbr")) %>%
 #'   mutate(deaths = pmax(death_rate / 1e5 * pop * 7, 0)) %>%
diff --git a/R/data.R b/R/data.R
deleted file mode 100644
index 71e5bdcd3..000000000
--- a/R/data.R
+++ /dev/null
@@ -1,87 +0,0 @@
-#' Subset of JHU daily state cases and deaths
-#'
-#' This data source of confirmed COVID-19 cases and deaths
-#' is based on reports made available by the Center for
-#' Systems Science and Engineering at Johns Hopkins University.
-#' This example data ranges from Dec 31, 2020 to Dec 31, 2021,
-#' and includes all states.
-#'
-#' @format A tibble with 20,496 rows and 4 variables:
-#' \describe{
-#'   \item{geo_value}{the geographic value associated with each row
-#'       of measurements.}
-#'   \item{time_value}{the time value associated with each row of measurements.}
-#'   \item{case_rate}{7-day average signal of number of new
-#'       confirmed COVID-19 cases per 100,000 population, daily}
-#'   \item{death_rate}{7-day average signal of number of new confirmed
-#'       deaths due to COVID-19 per 100,000 population, daily}
-#' }
-#' @source This object contains a modified part of the
-#'   \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University}
-#'   as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
-#'   This data set is licensed under the terms of the
-#'   \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
-#'   by the Johns Hopkins University on behalf of its Center for Systems Science
-#'   in Engineering. Copyright Johns Hopkins University 2020.
-#'
-#' Modifications:
-#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
-#'   These signals are taken directly from the JHU CSSE
-#'   \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository}
-#'   without changes. The 7-day average signals are computed by Delphi by
-#'   calculating moving averages of the preceding 7 days, so the signal for
-#'   June 7 is the average of the underlying data for June 1 through 7,
-#'   inclusive.
-"case_death_rate_subset"
-
-#' State population data
-#'
-#' Data set on state populations, from the 2019 US Census.
-#'
-#' @format Data frame with 57 rows (including one for the United States as a
-#'   whole, plus the District of Columbia, Puerto Rico Commonwealth,
-#'   American Samoa, Guam, the U.S. Virgin Islands, and the Northern Mariana,
-#'   Islands).
-#'
-#' \describe{
-#'   \item{fips}{FIPS code}
-#'   \item{name}{Full name of the state or territory}
-#'   \item{pop}{Estimate of the location's resident population in
-#'      2019.}
-#'   \item{abbr}{Postal abbreviation for the location}
-#' }
-#'
-#' @source United States Census Bureau, at
-#'   \url{https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.pdf},
-#'   \url{https://www.census.gov/data/tables/time-series/demo/popest/2010s-total-puerto-rico-municipios.html},
-#'   and \url{https://www.census.gov/data/tables/2010/dec/2010-island-areas.html}
-"state_census"
-
-#' Subset of Statistics Canada median employment income for postsecondary graduates
-#'
-#' @format An [epiprocess::epi_df][epiprocess::as_epi_df] with 10193 rows and 8 variables:
-#' \describe{
-#'   \item{geo_value}{The province in Canada associated with each
-#'      row of measurements.}
-#'   \item{time_value}{The time value, a year integer in YYYY format}
-#'   \item{edu_qual}{The education qualification}
-#'   \item{fos}{The field of study}
-#'   \item{age_group}{The age group; either 15 to 34 or 35 to 64}
-#'   \item{num_graduates}{The number of graduates for the given row of characteristics}
-#'   \item{med_income_2y}{The median employment income two years after graduation}
-#'   \item{med_income_5y}{The median employment income five years after graduation}
-#' }
-#' @source This object contains modified data from the following Statistics Canada
-#' data table: \href{https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=3710011501}{
-#'  Characteristics and median employment income of longitudinal cohorts of postsecondary
-#'  graduates two and five years after graduation, by educational qualification and
-#'  field of study (primary groupings)
-#' }
-#'
-#' Modifications:
-#' * Only provincial-level geo_values are kept
-#' * Only age group, field of study, and educational qualification are kept as
-#'   covariates. For the remaining covariates, we keep aggregated values and
-#'   drop the level-specific rows.
-#' * No modifications were made to the time range of the data
-"grad_employ_subset"
diff --git a/R/epi_recipe.R b/R/epi_recipe.R
index 311b9d073..646cb1b6d 100644
--- a/R/epi_recipe.R
+++ b/R/epi_recipe.R
@@ -43,7 +43,7 @@ epi_recipe.default <- function(x, ...) {
 #' @examples
 #' library(dplyr)
 #' library(recipes)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-08-01") %>%
 #'   arrange(geo_value, time_value)
 #'
@@ -263,7 +263,7 @@ is_epi_recipe <- function(x) {
 #' library(dplyr)
 #' library(recipes)
 #'
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-08-01") %>%
 #'   arrange(geo_value, time_value)
 #'
@@ -347,7 +347,7 @@ update_epi_recipe <- function(x, recipe, ..., blueprint = default_epi_recipe_blu
 #' library(dplyr)
 #' library(workflows)
 #'
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #' r <- epi_recipe(jhu) %>%
 #'   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
diff --git a/R/epi_workflow.R b/R/epi_workflow.R
index e4cc9cd2a..81b443e7b 100644
--- a/R/epi_workflow.R
+++ b/R/epi_workflow.R
@@ -20,7 +20,7 @@
 #' @importFrom generics augment
 #' @export
 #' @examples
-#' jhu <- case_death_rate_subset
+#' jhu <- covid_case_death_rates
 #'
 #' r <- epi_recipe(jhu) %>%
 #'   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
@@ -84,7 +84,7 @@ is_epi_workflow <- function(x) {
 #' @name fit-epi_workflow
 #' @export
 #' @examples
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #'
 #' r <- epi_recipe(jhu) %>%
@@ -142,7 +142,7 @@ fit.epi_workflow <- function(object, data, ..., control = workflows::control_wor
 #' @name predict-epi_workflow
 #' @export
 #' @examples
-#' jhu <- case_death_rate_subset
+#' jhu <- covid_case_death_rates
 #'
 #' r <- epi_recipe(jhu) %>%
 #'   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
diff --git a/R/epipredict-package.R b/R/epipredict-package.R
index ad0f95295..3dee263e2 100644
--- a/R/epipredict-package.R
+++ b/R/epipredict-package.R
@@ -1,5 +1,5 @@
 ## usethis namespace: start
-#' @import epiprocess parsnip
+#' @import epiprocess parsnip epidatasets
 #' @importFrom checkmate assert_class assert_numeric
 #' @importFrom checkmate test_character test_date test_function
 #' @importFrom checkmate test_integerish test_logical
diff --git a/R/flatline_forecaster.R b/R/flatline_forecaster.R
index 59f54bd86..7efda3efd 100644
--- a/R/flatline_forecaster.R
+++ b/R/flatline_forecaster.R
@@ -24,7 +24,7 @@
 #' @export
 #'
 #' @examples
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   dplyr::filter(time_value >= as.Date("2021-12-01"))
 #'
 #' out <- flatline_forecaster(jhu, "death_rate")
diff --git a/R/flusight_hub_formatter.R b/R/flusight_hub_formatter.R
index c1aa00b82..b3e31822c 100644
--- a/R/flusight_hub_formatter.R
+++ b/R/flusight_hub_formatter.R
@@ -1,7 +1,6 @@
 location_to_abbr <- function(location) {
   dictionary <-
     state_census %>%
-    mutate(fips = sprintf("%02d", fips)) %>%
     dplyr::transmute(
       location = dplyr::case_match(fips, "00" ~ "US", .default = fips),
       abbr
@@ -12,7 +11,6 @@ location_to_abbr <- function(location) {
 abbr_to_location <- function(abbr) {
   dictionary <-
     state_census %>%
-    mutate(fips = sprintf("%02d", fips)) %>%
     dplyr::transmute(
       location = dplyr::case_match(fips, "00" ~ "US", .default = fips),
       abbr
@@ -57,7 +55,7 @@ abbr_to_location <- function(abbr) {
 #'
 #' @examples
 #' library(dplyr)
-#' weekly_deaths <- case_death_rate_subset %>%
+#' weekly_deaths <- covid_case_death_rates %>%
 #'   filter(
 #'     time_value >= as.Date("2021-09-01"),
 #'     geo_value %in% c("ca", "ny", "dc", "ga", "vt")
diff --git a/R/frosting.R b/R/frosting.R
index 2672bcdd1..ef32b4a3b 100644
--- a/R/frosting.R
+++ b/R/frosting.R
@@ -9,7 +9,7 @@
 #'
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #' r <- epi_recipe(jhu) %>%
 #'   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
@@ -128,7 +128,7 @@ update_frosting <- function(x, frosting, ...) {
 #' @export
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #' r <- epi_recipe(jhu) %>%
 #'   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
@@ -268,7 +268,7 @@ new_frosting <- function() {
 #' wf <- epi_workflow() %>% add_frosting(f)
 #'
 #' # A more realistic example
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #'
 #' r <- epi_recipe(jhu) %>%
diff --git a/R/get_test_data.R b/R/get_test_data.R
index 8de698301..442272a2f 100644
--- a/R/get_test_data.R
+++ b/R/get_test_data.R
@@ -19,11 +19,11 @@
 #'   keys, as well other variables in the original dataset.
 #' @examples
 #' # create recipe
-#' rec <- epi_recipe(case_death_rate_subset) %>%
+#' rec <- epi_recipe(covid_case_death_rates) %>%
 #'   step_epi_ahead(death_rate, ahead = 7) %>%
 #'   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
 #'   step_epi_lag(case_rate, lag = c(0, 7, 14))
-#' get_test_data(recipe = rec, x = case_death_rate_subset)
+#' get_test_data(recipe = rec, x = covid_case_death_rates)
 #' @importFrom rlang %@%
 #' @importFrom stats na.omit
 #' @export
diff --git a/R/layer_add_forecast_date.R b/R/layer_add_forecast_date.R
index c8f857c89..78cbb79a3 100644
--- a/R/layer_add_forecast_date.R
+++ b/R/layer_add_forecast_date.R
@@ -22,7 +22,7 @@
 #' @export
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #' r <- epi_recipe(jhu) %>%
 #'   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
diff --git a/R/layer_add_target_date.R b/R/layer_add_target_date.R
index 991ec2140..f8b6a06e5 100644
--- a/R/layer_add_target_date.R
+++ b/R/layer_add_target_date.R
@@ -25,7 +25,7 @@
 #' @export
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #' r <- epi_recipe(jhu) %>%
 #'   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
diff --git a/R/layer_cdc_flatline_quantiles.R b/R/layer_cdc_flatline_quantiles.R
index fd61c4045..13938d837 100644
--- a/R/layer_cdc_flatline_quantiles.R
+++ b/R/layer_cdc_flatline_quantiles.R
@@ -56,14 +56,14 @@
 #'
 #' @examples
 #' library(dplyr)
-#' r <- epi_recipe(case_death_rate_subset) %>%
+#' r <- epi_recipe(covid_case_death_rates) %>%
 #'   # data is "daily", so we fit this to 1 ahead, the result will contain
 #'   # 1 day ahead residuals
 #'   step_epi_ahead(death_rate, ahead = 1L, skip = TRUE) %>%
 #'   recipes::update_role(death_rate, new_role = "predictor") %>%
 #'   recipes::add_role(time_value, geo_value, new_role = "predictor")
 #'
-#' forecast_date <- max(case_death_rate_subset$time_value)
+#' forecast_date <- max(covid_case_death_rates$time_value)
 #'
 #' f <- frosting() %>%
 #'   layer_predict() %>%
@@ -71,7 +71,7 @@
 #'
 #' eng <- linear_reg(engine = "flatline")
 #'
-#' wf <- epi_workflow(r, eng, f) %>% fit(case_death_rate_subset)
+#' wf <- epi_workflow(r, eng, f) %>% fit(covid_case_death_rates)
 #' preds <- forecast(wf) %>%
 #'   select(-time_value) %>%
 #'   mutate(forecast_date = forecast_date)
@@ -91,7 +91,7 @@
 #'     geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) +
 #'     geom_line(aes(y = .pred), color = "orange") +
 #'     geom_line(
-#'       data = case_death_rate_subset %>% filter(geo_value %in% four_states),
+#'       data = covid_case_death_rates %>% filter(geo_value %in% four_states),
 #'       aes(x = time_value, y = death_rate)
 #'     ) +
 #'     scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) +
diff --git a/R/layer_naomit.R b/R/layer_naomit.R
index 209a663b4..2b111a6f9 100644
--- a/R/layer_naomit.R
+++ b/R/layer_naomit.R
@@ -12,7 +12,7 @@
 #' @export
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #'
 #' r <- epi_recipe(jhu) %>%
diff --git a/R/layer_point_from_distn.R b/R/layer_point_from_distn.R
index f14008748..c433717bb 100644
--- a/R/layer_point_from_distn.R
+++ b/R/layer_point_from_distn.R
@@ -17,7 +17,7 @@
 #'
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #'
 #' r <- epi_recipe(jhu) %>%
diff --git a/R/layer_predict.R b/R/layer_predict.R
index 6ca17ac24..b59be5f03 100644
--- a/R/layer_predict.R
+++ b/R/layer_predict.R
@@ -17,7 +17,7 @@
 #'
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #'
 #' r <- epi_recipe(jhu) %>%
diff --git a/R/layer_predictive_distn.R b/R/layer_predictive_distn.R
index b28e0c765..2b18fbf8e 100644
--- a/R/layer_predictive_distn.R
+++ b/R/layer_predictive_distn.R
@@ -21,7 +21,7 @@
 #'
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #'
 #' r <- epi_recipe(jhu) %>%
diff --git a/R/layer_quantile_distn.R b/R/layer_quantile_distn.R
index 5f87ded29..f7bc9259d 100644
--- a/R/layer_quantile_distn.R
+++ b/R/layer_quantile_distn.R
@@ -23,7 +23,7 @@
 #'
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #'
 #' r <- epi_recipe(jhu) %>%
diff --git a/R/layer_residual_quantiles.R b/R/layer_residual_quantiles.R
index 1b623adfa..e9b5b7c19 100644
--- a/R/layer_residual_quantiles.R
+++ b/R/layer_residual_quantiles.R
@@ -15,7 +15,7 @@
 #' @export
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #'
 #' r <- epi_recipe(jhu) %>%
diff --git a/R/layer_threshold_preds.R b/R/layer_threshold_preds.R
index 56f8059ab..7b8ca0252 100644
--- a/R/layer_threshold_preds.R
+++ b/R/layer_threshold_preds.R
@@ -23,7 +23,7 @@
 #' @export
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value < "2021-03-08", geo_value %in% c("ak", "ca", "ar"))
 #' r <- epi_recipe(jhu) %>%
 #'   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
diff --git a/R/layers.R b/R/layers.R
index 538fcad1b..752f014c5 100644
--- a/R/layers.R
+++ b/R/layers.R
@@ -42,7 +42,7 @@ layer <- function(subclass, ..., .prefix = "layer_") {
 #'
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #' r <- epi_recipe(jhu) %>%
 #'   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
diff --git a/R/make_grf_quantiles.R b/R/make_grf_quantiles.R
index 2903c93a8..00e7d0e71 100644
--- a/R/make_grf_quantiles.R
+++ b/R/make_grf_quantiles.R
@@ -61,7 +61,7 @@
 #' # -- a more complicated task
 #'
 #' library(dplyr)
-#' dat <- case_death_rate_subset %>%
+#' dat <- covid_case_death_rates %>%
 #'   filter(time_value > as.Date("2021-10-01"))
 #' rec <- epi_recipe(dat) %>%
 #'   step_epi_lag(case_rate, death_rate, lag = c(0, 7, 14)) %>%
diff --git a/R/model-methods.R b/R/model-methods.R
index f3b374879..a575bd591 100644
--- a/R/model-methods.R
+++ b/R/model-methods.R
@@ -33,7 +33,7 @@
 #' @export
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #'
 #' r <- epi_recipe(jhu) %>%
diff --git a/R/pivot_quantiles.R b/R/pivot_quantiles.R
index b01dc392c..2a9e0d4e0 100644
--- a/R/pivot_quantiles.R
+++ b/R/pivot_quantiles.R
@@ -8,7 +8,7 @@
 #' @examples
 #' library(dplyr)
 #' library(tidyr)
-#' edf <- case_death_rate_subset[1:3, ]
+#' edf <- covid_case_death_rates[1:3, ]
 #' edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11))
 #'
 #' edf_nested <- edf %>% mutate(q = nested_quantiles(q))
diff --git a/R/step_adjust_latency.R b/R/step_adjust_latency.R
index 604e06710..3d9f19891 100644
--- a/R/step_adjust_latency.R
+++ b/R/step_adjust_latency.R
@@ -180,12 +180,12 @@
 #' @rdname step_adjust_latency
 #' @export
 #' @examples
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #' # setting the `as_of` to something realistic
 #' attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3
 #'
-#' r <- epi_recipe(case_death_rate_subset) %>%
+#' r <- epi_recipe(covid_case_death_rates) %>%
 #'   step_adjust_latency(method = "extend_ahead") %>%
 #'   step_epi_ahead(death_rate, ahead = 7) %>%
 #'   step_epi_lag(death_rate, lag = c(0, 7, 14))
diff --git a/R/step_epi_naomit.R b/R/step_epi_naomit.R
index d81ba398d..bfe8a4faa 100644
--- a/R/step_epi_naomit.R
+++ b/R/step_epi_naomit.R
@@ -8,7 +8,7 @@
 #'   of data loss.
 #' @export
 #' @examples
-#' case_death_rate_subset %>%
+#' covid_case_death_rates %>%
 #'   epi_recipe() %>%
 #'   step_epi_naomit()
 step_epi_naomit <- function(recipe) {
diff --git a/R/step_epi_shift.R b/R/step_epi_shift.R
index d79ad1e2b..beda182e6 100644
--- a/R/step_epi_shift.R
+++ b/R/step_epi_shift.R
@@ -42,7 +42,7 @@
 #' @rdname step_epi_shift
 #' @export
 #' @examples
-#' r <- epi_recipe(case_death_rate_subset) %>%
+#' r <- epi_recipe(covid_case_death_rates) %>%
 #'   step_epi_ahead(death_rate, ahead = 7) %>%
 #'   step_epi_lag(death_rate, lag = c(0, 7, 14))
 #' r
diff --git a/R/step_epi_slide.R b/R/step_epi_slide.R
index c7d3f9fbd..274ce2451 100644
--- a/R/step_epi_slide.R
+++ b/R/step_epi_slide.R
@@ -37,7 +37,7 @@
 #' @export
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value >= as.Date("2021-01-01"), geo_value %in% c("ca", "ny"))
 #' rec <- epi_recipe(jhu) %>%
 #'   step_epi_slide(case_rate, death_rate,
diff --git a/R/step_growth_rate.R b/R/step_growth_rate.R
index 00bf9bd87..b3a712313 100644
--- a/R/step_growth_rate.R
+++ b/R/step_growth_rate.R
@@ -32,13 +32,13 @@
 #' @importFrom epiprocess growth_rate
 #' @export
 #' @examples
-#' r <- epi_recipe(case_death_rate_subset) %>%
+#' r <- epi_recipe(covid_case_death_rates) %>%
 #'   step_growth_rate(case_rate, death_rate)
 #' r
 #'
 #' r %>%
-#'   prep(case_death_rate_subset) %>%
-#'   bake(case_death_rate_subset)
+#'   prep(covid_case_death_rates) %>%
+#'   bake(new_data = NULL)
 step_growth_rate <-
   function(recipe,
            ...,
diff --git a/R/step_lag_difference.R b/R/step_lag_difference.R
index 39ae1ba59..2b0af00f2 100644
--- a/R/step_lag_difference.R
+++ b/R/step_lag_difference.R
@@ -15,14 +15,14 @@
 #' @family row operation steps
 #' @export
 #' @examples
-#' r <- epi_recipe(case_death_rate_subset) %>%
+#' r <- epi_recipe(covid_case_death_rates) %>%
 #'   step_lag_difference(case_rate, death_rate, horizon = c(7, 14)) %>%
 #'   step_epi_naomit()
 #' r
 #'
 #' r %>%
-#'   prep(case_death_rate_subset) %>%
-#'   bake(case_death_rate_subset)
+#'   prep(covid_case_death_rates) %>%
+#'   bake(new_data = NULL)
 step_lag_difference <-
   function(recipe,
            ...,
diff --git a/R/tidy.R b/R/tidy.R
index 8fc06398a..47c6efa68 100644
--- a/R/tidy.R
+++ b/R/tidy.R
@@ -27,7 +27,7 @@
 #'
 #' @examples
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 #'
 #' r <- epi_recipe(jhu) %>%
diff --git a/R/weighted_interval_score.R b/R/weighted_interval_score.R
index cd67bbee9..48741de7d 100644
--- a/R/weighted_interval_score.R
+++ b/R/weighted_interval_score.R
@@ -44,13 +44,13 @@
 #'
 #' # Using some actual forecasts --------
 #' library(dplyr)
-#' jhu <- case_death_rate_subset %>%
+#' jhu <- covid_case_death_rates %>%
 #'   filter(time_value >= "2021-10-01", time_value <= "2021-12-01")
 #' preds <- flatline_forecaster(
 #'   jhu, "death_rate",
 #'   flatline_args_list(quantile_levels = c(.01, .025, 1:19 / 20, .975, .99))
 #' )$predictions
-#' actuals <- case_death_rate_subset %>%
+#' actuals <- covid_case_death_rates %>%
 #'   filter(time_value == as.Date("2021-12-01") + 7) %>%
 #'   select(geo_value, time_value, actual = death_rate)
 #' preds <- left_join(preds, actuals,
diff --git a/README.Rmd b/README.Rmd
index 36af14cd9..73cedbeaa 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -81,14 +81,14 @@ interfaces directly to Delphi's
 
 ```{r epidf, message=FALSE}
 library(epipredict)
-case_death_rate_subset
+covid_case_death_rates
 ```
 
 To create and train a simple auto-regressive forecaster to predict the death rate two weeks into the future using past (lagged) deaths and cases, we could use the following function.
 
 ```{r make-forecasts, warning=FALSE}
 two_week_ahead <- arx_forecaster(
-  case_death_rate_subset,
+  covid_case_death_rates,
   outcome = "death_rate",
   predictors = c("case_rate", "death_rate"),
   args_list = arx_args_list(
diff --git a/README.md b/README.md
index 9d912f7e6..561d00a1e 100644
--- a/README.md
+++ b/README.md
@@ -75,7 +75,7 @@ processed using
 
 ``` r
 library(epipredict)
-case_death_rate_subset
+covid_case_death_rates
 #> An `epi_df` object, 20,496 x 4 with metadata:
 #> * geo_type  = state
 #> * time_type = day
@@ -103,7 +103,7 @@ cases, we could use the following function.
 
 ``` r
 two_week_ahead <- arx_forecaster(
-  case_death_rate_subset,
+  covid_case_death_rates,
   outcome = "death_rate",
   predictors = c("case_rate", "death_rate"),
   args_list = arx_args_list(
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 5222999b6..dbe188f4c 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -121,8 +121,3 @@ reference:
   - title: Other utilities
     contents:
       - clean_f_name
-  - title: Included datasets
-    contents:
-      - case_death_rate_subset
-      - state_census
-      - grad_employ_subset
diff --git a/data-raw/case_death_rate_subset.R b/data-raw/case_death_rate_subset.R
deleted file mode 100644
index 4fa3e64a7..000000000
--- a/data-raw/case_death_rate_subset.R
+++ /dev/null
@@ -1,29 +0,0 @@
-library(tidyverse)
-library(epidatr)
-library(epiprocess)
-
-x <- pub_covidcast(
-  data_source = "jhu-csse",
-  signals = "confirmed_7dav_incidence_prop",
-  time_type = "day",
-  geo_type = "state",
-  time_values = epirange(20201231, 20211231),
-  geo_values = "*"
-) %>%
-  select(geo_value, time_value, case_rate = value)
-
-y <- pub_covidcast(
-  data_source = "jhu-csse",
-  signals = "deaths_7dav_incidence_prop",
-  time_type = "day",
-  geo_type = "state",
-  time_values = epirange(20201231, 20211231),
-  geo_values = "*"
-) %>%
-  select(geo_value, time_value, death_rate = value)
-
-case_death_rate_subset <- x %>%
-  full_join(y, by = c("geo_value", "time_value")) %>%
-  as_epi_df()
-
-usethis::use_data(case_death_rate_subset, overwrite = TRUE)
diff --git a/data-raw/grad_employ_subset.R b/data-raw/grad_employ_subset.R
deleted file mode 100644
index 38719a02e..000000000
--- a/data-raw/grad_employ_subset.R
+++ /dev/null
@@ -1,106 +0,0 @@
-library(epipredict)
-library(epiprocess)
-library(cansim)
-library(dplyr)
-library(stringr)
-library(tidyr)
-
-# https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=3710011501
-statcan_grad_employ <- get_cansim("37-10-0115-01")
-
-gemploy <- statcan_grad_employ %>%
-  select(c(
-    "REF_DATE",
-    "GEO",
-    # "DGUID",
-    # "UOM",
-    # "UOM_ID",
-    # "SCALAR_FACTOR",
-    # "SCALAR_ID",
-    # "VECTOR",
-    # "COORDINATE",
-    "VALUE",
-    "STATUS",
-    # "SYMBOL",
-    # "TERMINATED",
-    # "DECIMALS",
-    # "GeoUID",
-    # "Hierarchy for GEO",
-    # "Classification Code for Educational qualification",
-    # "Hierarchy for Educational qualification",
-    # "Classification Code for Field of study",
-    # "Hierarchy for Field of study",
-    # "Classification Code for Gender",
-    # "Hierarchy for Gender",
-    # "Classification Code for Age group",
-    # "Hierarchy for Age group",
-    # "Classification Code for Status of student in Canada",
-    # "Hierarchy for Status of student in Canada",
-    # "Classification Code for Characteristics after graduation",
-    # "Hierarchy for Characteristics after graduation",
-    # "Classification Code for Graduate statistics",
-    # "Hierarchy for Graduate statistics",
-    # "val_norm",
-    # "Date",
-    "Educational qualification",
-    "Field of study",
-    "Gender",
-    "Age group",
-    "Status of student in Canada",
-    "Characteristics after graduation",
-    "Graduate statistics"
-  )) %>%
-  rename(
-    "geo_value" = "GEO",
-    "time_value" = "REF_DATE",
-    "value" = "VALUE",
-    "status" = "STATUS",
-    "edu_qual" = "Educational qualification",
-    "fos" = "Field of study",
-    "gender" = "Gender",
-    "age_group" = "Age group",
-    "student_status" = "Status of student in Canada",
-    "grad_charac" = "Characteristics after graduation",
-    "grad_stat" = "Graduate statistics"
-  ) %>%
-  mutate(
-    grad_stat = recode_factor(
-      grad_stat,
-      `Number of graduates` = "num_graduates",
-      `Median employment income two years after graduation` = "med_income_2y",
-      `Median employment income five years after graduation` = "med_income_5y"
-    ),
-    time_value = as.integer(time_value)
-  ) %>%
-  pivot_wider(names_from = grad_stat, values_from = value) %>%
-  filter(
-    # Drop aggregates for some columns
-    geo_value != "Canada" &
-      age_group != "15 to 64 years" &
-      edu_qual != "Total, educational qualification" &
-      # Keep aggregates for keys we don't want to keep
-      fos == "Total, field of study" &
-      gender == "Total, gender" &
-      student_status == "Canadian and international students" &
-      # Since we're looking at 2y and 5y employment income, the only
-      # characteristics remaining are:
-      # - Graduates reporting employment income
-      # - Graduates reporting wages, salaries, and commissions only
-      # For simplicity, keep the first one only
-      grad_charac == "Graduates reporting employment income" &
-      # Only keep "good" data
-      is.na(status) &
-      # Drop NA value rows
-      !is.na(num_graduates) & !is.na(med_income_2y) & !is.na(med_income_5y)
-  ) %>%
-  select(-c(status, gender, student_status, grad_charac, fos))
-
-nrow(gemploy)
-ncol(gemploy)
-
-grad_employ_subset <- gemploy %>%
-  as_epi_df(
-    as_of = "2022-07-19",
-    other_keys = c("age_group", "edu_qual")
-  )
-usethis::use_data(grad_employ_subset, overwrite = TRUE)
diff --git a/data-raw/state_census.R b/data-raw/state_census.R
deleted file mode 100644
index cfa74d38b..000000000
--- a/data-raw/state_census.R
+++ /dev/null
@@ -1,10 +0,0 @@
-library(dplyr)
-library(tidyr)
-
-state_census <- readr::read_csv("https://github.com/cmu-delphi/covidcast/raw/c89e4d295550ba1540d64d2cc991badf63ad04e5/Python-packages/covidcast-py/covidcast/geo_mappings/state_census.csv") %>% # nolint: line_length_linter
-  select(STATE, NAME, POPESTIMATE2019, ABBR) %>%
-  rename(abbr = ABBR, name = NAME, pop = POPESTIMATE2019, fips = STATE) %>%
-  mutate(abbr = tolower(abbr)) %>%
-  as_tibble()
-
-usethis::use_data(state_census, overwrite = TRUE)
diff --git a/data/case_death_rate_subset.rda b/data/case_death_rate_subset.rda
deleted file mode 100644
index 2e5ced29e..000000000
Binary files a/data/case_death_rate_subset.rda and /dev/null differ
diff --git a/data/grad_employ_subset.rda b/data/grad_employ_subset.rda
deleted file mode 100644
index 9380b43b5..000000000
Binary files a/data/grad_employ_subset.rda and /dev/null differ
diff --git a/data/state_census.rda b/data/state_census.rda
deleted file mode 100644
index 1118db0d0..000000000
Binary files a/data/state_census.rda and /dev/null differ
diff --git a/inst/extdata/can_prov_cases.rds b/inst/extdata/can_prov_cases.rds
deleted file mode 100644
index b6a10a422..000000000
Binary files a/inst/extdata/can_prov_cases.rds and /dev/null differ
diff --git a/inst/extdata/canada-case-rates.R b/inst/extdata/canada-case-rates.R
deleted file mode 100644
index 7cf88d602..000000000
--- a/inst/extdata/canada-case-rates.R
+++ /dev/null
@@ -1,23 +0,0 @@
-path_to_csvs <- here::here("../../COVID-BC/Covid19Canada/updates.nosync/")
-files <- list.files(path_to_csvs)
-ca_as_ofs <- as.Date(substr(files, 1, 10)) %>%
-  intersect(fc_time_values) %>%
-  as.Date(origin = "1970-01-01")
-
-can <- purrr::map(ca_as_ofs, ~ {
-  readr::read_csv(here::here(path_to_csvs, paste0(.x, ".csv"))) %>%
-    left_join(ca_pop) %>%
-    mutate(time_value = lubridate::dmy(date_report)) %>%
-    filter(province %in% ca_pop$province, time_value > "2020-04-01") %>%
-    mutate(
-      geo_value = province,
-      case_rate = cases / population * 1e5
-    ) %>%
-    select(geo_value, time_value, case_rate) %>%
-    as_epi_df(geo_type = "province", as_of = .x)
-})
-names(can) <- ca_as_ofs
-can <- can %>%
-  bind_rows(.id = "version") %>%
-  mutate(version = lubridate::ymd(version))
-saveRDS(can, "inst/extdata/can_prov_cases.rds")
diff --git a/inst/extdata/epi_archive.rds b/inst/extdata/epi_archive.rds
deleted file mode 100644
index 8ca52be76..000000000
Binary files a/inst/extdata/epi_archive.rds and /dev/null differ
diff --git a/man/Add_model.Rd b/man/Add_model.Rd
index 17b65793c..641bd7676 100644
--- a/man/Add_model.Rd
+++ b/man/Add_model.Rd
@@ -72,7 +72,7 @@ properly.
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 
 r <- epi_recipe(jhu) \%>\%
diff --git a/man/add_epi_recipe.Rd b/man/add_epi_recipe.Rd
index 0da2d55b3..b74267524 100644
--- a/man/add_epi_recipe.Rd
+++ b/man/add_epi_recipe.Rd
@@ -41,7 +41,7 @@ default blueprint to automatically handle \link[epiprocess:epi_df]{epiprocess::e
 library(dplyr)
 library(recipes)
 
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-08-01") \%>\%
   arrange(geo_value, time_value)
 
diff --git a/man/add_frosting.Rd b/man/add_frosting.Rd
index 94812cbe2..00b899b7c 100644
--- a/man/add_frosting.Rd
+++ b/man/add_frosting.Rd
@@ -27,7 +27,7 @@ Add frosting to a workflow
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 r <- epi_recipe(jhu) \%>\%
   step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\%
diff --git a/man/adjust_epi_recipe.Rd b/man/adjust_epi_recipe.Rd
index 7468c4ce2..0ed7148a3 100644
--- a/man/adjust_epi_recipe.Rd
+++ b/man/adjust_epi_recipe.Rd
@@ -55,7 +55,7 @@ illustrations of the different types of updates.
 library(dplyr)
 library(workflows)
 
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 r <- epi_recipe(jhu) \%>\%
   step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\%
diff --git a/man/adjust_frosting.Rd b/man/adjust_frosting.Rd
index c089b3443..3b855a9af 100644
--- a/man/adjust_frosting.Rd
+++ b/man/adjust_frosting.Rd
@@ -36,7 +36,7 @@ illustrations of the different types of updates.
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 r <- epi_recipe(jhu) \%>\%
   step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\%
diff --git a/man/arx_class_epi_workflow.Rd b/man/arx_class_epi_workflow.Rd
index 713365f17..9f0aae6a1 100644
--- a/man/arx_class_epi_workflow.Rd
+++ b/man/arx_class_epi_workflow.Rd
@@ -48,7 +48,7 @@ may alter the returned \code{epi_workflow} object but can be omitted.
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value >= as.Date("2021-11-01"))
 
 arx_class_epi_workflow(jhu, "death_rate", c("case_rate", "death_rate"))
diff --git a/man/arx_classifier.Rd b/man/arx_classifier.Rd
index c7c2cf059..94503f3d3 100644
--- a/man/arx_classifier.Rd
+++ b/man/arx_classifier.Rd
@@ -49,7 +49,7 @@ that it estimates a class at a particular target horizon.
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value >= as.Date("2021-11-01"))
 
 out <- arx_classifier(jhu, "death_rate", c("case_rate", "death_rate"))
diff --git a/man/arx_fcast_epi_workflow.Rd b/man/arx_fcast_epi_workflow.Rd
index 4070a3337..c2e38218f 100644
--- a/man/arx_fcast_epi_workflow.Rd
+++ b/man/arx_fcast_epi_workflow.Rd
@@ -43,7 +43,7 @@ use \code{\link[=quantile_reg]{quantile_reg()}}) but can be omitted.
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value >= as.Date("2021-12-01"))
 
 arx_fcast_epi_workflow(
diff --git a/man/arx_forecaster.Rd b/man/arx_forecaster.Rd
index d8c7671dc..ff820b8c8 100644
--- a/man/arx_forecaster.Rd
+++ b/man/arx_forecaster.Rd
@@ -41,7 +41,7 @@ This is an autoregressive forecasting model for
 that it estimates a model for a particular target horizon.
 }
 \examples{
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   dplyr::filter(time_value >= as.Date("2021-12-01"))
 
 out <- arx_forecaster(
diff --git a/man/autoplot-epipred.Rd b/man/autoplot-epipred.Rd
index 27bfdf5f7..1025759b3 100644
--- a/man/autoplot-epipred.Rd
+++ b/man/autoplot-epipred.Rd
@@ -71,7 +71,7 @@ can simply call \code{autoplot()} on the original \code{epi_df}).
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value >= as.Date("2021-11-01"))
 
 r <- epi_recipe(jhu) \%>\%
@@ -112,7 +112,7 @@ autoplot(wf, p, .max_facets = 4)
 
 # ------- Plotting canned forecaster output
 
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value >= as.Date("2021-11-01"))
 flat <- flatline_forecaster(jhu, "death_rate")
 autoplot(flat, .max_facets = 4)
diff --git a/man/case_death_rate_subset.Rd b/man/case_death_rate_subset.Rd
deleted file mode 100644
index 119c8ee26..000000000
--- a/man/case_death_rate_subset.Rd
+++ /dev/null
@@ -1,49 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{case_death_rate_subset}
-\alias{case_death_rate_subset}
-\title{Subset of JHU daily state cases and deaths}
-\format{
-A tibble with 20,496 rows and 4 variables:
-\describe{
-\item{geo_value}{the geographic value associated with each row
-of measurements.}
-\item{time_value}{the time value associated with each row of measurements.}
-\item{case_rate}{7-day average signal of number of new
-confirmed COVID-19 cases per 100,000 population, daily}
-\item{death_rate}{7-day average signal of number of new confirmed
-deaths due to COVID-19 per 100,000 population, daily}
-}
-}
-\source{
-This object contains a modified part of the
-\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University}
-as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
-This data set is licensed under the terms of the
-\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
-by the Johns Hopkins University on behalf of its Center for Systems Science
-in Engineering. Copyright Johns Hopkins University 2020.
-
-Modifications:
-\itemize{
-\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
-These signals are taken directly from the JHU CSSE
-\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository}
-without changes. The 7-day average signals are computed by Delphi by
-calculating moving averages of the preceding 7 days, so the signal for
-June 7 is the average of the underlying data for June 1 through 7,
-inclusive.
-}
-}
-\usage{
-case_death_rate_subset
-}
-\description{
-This data source of confirmed COVID-19 cases and deaths
-is based on reports made available by the Center for
-Systems Science and Engineering at Johns Hopkins University.
-This example data ranges from Dec 31, 2020 to Dec 31, 2021,
-and includes all states.
-}
-\keyword{datasets}
diff --git a/man/cdc_baseline_forecaster.Rd b/man/cdc_baseline_forecaster.Rd
index 0c7f1e436..e7cefda2d 100644
--- a/man/cdc_baseline_forecaster.Rd
+++ b/man/cdc_baseline_forecaster.Rd
@@ -38,7 +38,7 @@ This forecaster is meant to produce exactly the CDC Baseline used for
 }
 \examples{
 library(dplyr)
-weekly_deaths <- case_death_rate_subset \%>\%
+weekly_deaths <- covid_case_death_rates \%>\%
   select(geo_value, time_value, death_rate) \%>\%
   left_join(state_census \%>\% select(pop, abbr), by = c("geo_value" = "abbr")) \%>\%
   mutate(deaths = pmax(death_rate / 1e5 * pop * 7, 0)) \%>\%
diff --git a/man/epi_recipe.Rd b/man/epi_recipe.Rd
index d0105d1ec..9ef5eb288 100644
--- a/man/epi_recipe.Rd
+++ b/man/epi_recipe.Rd
@@ -59,7 +59,7 @@ columns present in an \code{epi_df}
 \examples{
 library(dplyr)
 library(recipes)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-08-01") \%>\%
   arrange(geo_value, time_value)
 
diff --git a/man/epi_workflow.Rd b/man/epi_workflow.Rd
index b29078d52..59e3d5c8f 100644
--- a/man/epi_workflow.Rd
+++ b/man/epi_workflow.Rd
@@ -33,7 +33,7 @@ this operates exactly like a \code{\link[workflows:workflow]{workflows::workflow
 and numerous examples, see there.
 }
 \examples{
-jhu <- case_death_rate_subset
+jhu <- covid_case_death_rates
 
 r <- epi_recipe(jhu) \%>\%
   step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\%
diff --git a/man/fit-epi_workflow.Rd b/man/fit-epi_workflow.Rd
index 3dfa0029a..83b3b9f51 100644
--- a/man/fit-epi_workflow.Rd
+++ b/man/fit-epi_workflow.Rd
@@ -28,7 +28,7 @@ Fitting an \code{epi_workflow} involves two main steps, which are
 preprocessing the data and fitting the underlying parsnip model.
 }
 \examples{
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 
 r <- epi_recipe(jhu) \%>\%
diff --git a/man/flatline_forecaster.Rd b/man/flatline_forecaster.Rd
index 1803f1078..f70c05e0f 100644
--- a/man/flatline_forecaster.Rd
+++ b/man/flatline_forecaster.Rd
@@ -35,7 +35,7 @@ This forecaster is very similar to that used by the
 \href{https://covid19forecasthub.org}{COVID19ForecastHub}
 }
 \examples{
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   dplyr::filter(time_value >= as.Date("2021-12-01"))
 
 out <- flatline_forecaster(jhu, "death_rate")
diff --git a/man/flusight_hub_formatter.Rd b/man/flusight_hub_formatter.Rd
index b2be9b4fe..f48d33d65 100644
--- a/man/flusight_hub_formatter.Rd
+++ b/man/flusight_hub_formatter.Rd
@@ -42,7 +42,7 @@ format for this forecast task is \href{https://github.com/cdcepi/FluSight-foreca
 }
 \examples{
 library(dplyr)
-weekly_deaths <- case_death_rate_subset \%>\%
+weekly_deaths <- covid_case_death_rates \%>\%
   filter(
     time_value >= as.Date("2021-09-01"),
     geo_value \%in\% c("ca", "ny", "dc", "ga", "vt")
diff --git a/man/frosting.Rd b/man/frosting.Rd
index a75f21b61..8534bc6d6 100644
--- a/man/frosting.Rd
+++ b/man/frosting.Rd
@@ -28,7 +28,7 @@ f <- frosting()
 wf <- epi_workflow() \%>\% add_frosting(f)
 
 # A more realistic example
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 
 r <- epi_recipe(jhu) \%>\%
diff --git a/man/get_test_data.Rd b/man/get_test_data.Rd
index 81649452a..16359b9c3 100644
--- a/man/get_test_data.Rd
+++ b/man/get_test_data.Rd
@@ -31,9 +31,9 @@ calculated internally.
 }
 \examples{
 # create recipe
-rec <- epi_recipe(case_death_rate_subset) \%>\%
+rec <- epi_recipe(covid_case_death_rates) \%>\%
   step_epi_ahead(death_rate, ahead = 7) \%>\%
   step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\%
   step_epi_lag(case_rate, lag = c(0, 7, 14))
-get_test_data(recipe = rec, x = case_death_rate_subset)
+get_test_data(recipe = rec, x = covid_case_death_rates)
 }
diff --git a/man/grad_employ_subset.Rd b/man/grad_employ_subset.Rd
deleted file mode 100644
index 46ba36913..000000000
--- a/man/grad_employ_subset.Rd
+++ /dev/null
@@ -1,44 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{grad_employ_subset}
-\alias{grad_employ_subset}
-\title{Subset of Statistics Canada median employment income for postsecondary graduates}
-\format{
-An \link[epiprocess:epi_df]{epiprocess::epi_df} with 10193 rows and 8 variables:
-\describe{
-\item{geo_value}{The province in Canada associated with each
-row of measurements.}
-\item{time_value}{The time value, a year integer in YYYY format}
-\item{edu_qual}{The education qualification}
-\item{fos}{The field of study}
-\item{age_group}{The age group; either 15 to 34 or 35 to 64}
-\item{num_graduates}{The number of graduates for the given row of characteristics}
-\item{med_income_2y}{The median employment income two years after graduation}
-\item{med_income_5y}{The median employment income five years after graduation}
-}
-}
-\source{
-This object contains modified data from the following Statistics Canada
-data table: \href{https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=3710011501}{
-Characteristics and median employment income of longitudinal cohorts of postsecondary
-graduates two and five years after graduation, by educational qualification and
-field of study (primary groupings)
-}
-
-Modifications:
-\itemize{
-\item Only provincial-level geo_values are kept
-\item Only age group, field of study, and educational qualification are kept as
-covariates. For the remaining covariates, we keep aggregated values and
-drop the level-specific rows.
-\item No modifications were made to the time range of the data
-}
-}
-\usage{
-grad_employ_subset
-}
-\description{
-Subset of Statistics Canada median employment income for postsecondary graduates
-}
-\keyword{datasets}
diff --git a/man/grf_quantiles.Rd b/man/grf_quantiles.Rd
index e6852a55b..f6400edcf 100644
--- a/man/grf_quantiles.Rd
+++ b/man/grf_quantiles.Rd
@@ -85,7 +85,7 @@ predict(out, new_data = tib[1:5, ]) \%>\%
 # -- a more complicated task
 
 library(dplyr)
-dat <- case_death_rate_subset \%>\%
+dat <- covid_case_death_rates \%>\%
   filter(time_value > as.Date("2021-10-01"))
 rec <- epi_recipe(dat) \%>\%
   step_epi_lag(case_rate, death_rate, lag = c(0, 7, 14)) \%>\%
diff --git a/man/layer_add_forecast_date.Rd b/man/layer_add_forecast_date.Rd
index aa224013f..cc92e9a71 100644
--- a/man/layer_add_forecast_date.Rd
+++ b/man/layer_add_forecast_date.Rd
@@ -39,7 +39,7 @@ model fitting, and postprocessing), an appropriate warning will be thrown.
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 r <- epi_recipe(jhu) \%>\%
   step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\%
diff --git a/man/layer_add_target_date.Rd b/man/layer_add_target_date.Rd
index e522cd6da..37416d24e 100644
--- a/man/layer_add_target_date.Rd
+++ b/man/layer_add_target_date.Rd
@@ -44,7 +44,7 @@ every dataset used (prep, training, and prediction).
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 r <- epi_recipe(jhu) \%>\%
   step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\%
diff --git a/man/layer_cdc_flatline_quantiles.Rd b/man/layer_cdc_flatline_quantiles.Rd
index c3bc4f257..632fdb65e 100644
--- a/man/layer_cdc_flatline_quantiles.Rd
+++ b/man/layer_cdc_flatline_quantiles.Rd
@@ -85,14 +85,14 @@ adds them on to produce wider intervals as \code{ahead} increases.
 }
 \examples{
 library(dplyr)
-r <- epi_recipe(case_death_rate_subset) \%>\%
+r <- epi_recipe(covid_case_death_rates) \%>\%
   # data is "daily", so we fit this to 1 ahead, the result will contain
   # 1 day ahead residuals
   step_epi_ahead(death_rate, ahead = 1L, skip = TRUE) \%>\%
   recipes::update_role(death_rate, new_role = "predictor") \%>\%
   recipes::add_role(time_value, geo_value, new_role = "predictor")
 
-forecast_date <- max(case_death_rate_subset$time_value)
+forecast_date <- max(covid_case_death_rates$time_value)
 
 f <- frosting() \%>\%
   layer_predict() \%>\%
@@ -100,7 +100,7 @@ f <- frosting() \%>\%
 
 eng <- linear_reg(engine = "flatline")
 
-wf <- epi_workflow(r, eng, f) \%>\% fit(case_death_rate_subset)
+wf <- epi_workflow(r, eng, f) \%>\% fit(covid_case_death_rates)
 preds <- forecast(wf) \%>\%
   select(-time_value) \%>\%
   mutate(forecast_date = forecast_date)
@@ -120,7 +120,7 @@ if (require("ggplot2")) {
     geom_ribbon(aes(ymin = `0.25`, ymax = `0.75`), fill = blues9[6]) +
     geom_line(aes(y = .pred), color = "orange") +
     geom_line(
-      data = case_death_rate_subset \%>\% filter(geo_value \%in\% four_states),
+      data = covid_case_death_rates \%>\% filter(geo_value \%in\% four_states),
       aes(x = time_value, y = death_rate)
     ) +
     scale_x_date(limits = c(forecast_date - 90, forecast_date + 30)) +
diff --git a/man/layer_naomit.Rd b/man/layer_naomit.Rd
index d77112f95..06e09d4b0 100644
--- a/man/layer_naomit.Rd
+++ b/man/layer_naomit.Rd
@@ -25,7 +25,7 @@ Omit \code{NA}s from predictions or other columns
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 
 r <- epi_recipe(jhu) \%>\%
diff --git a/man/layer_point_from_distn.Rd b/man/layer_point_from_distn.Rd
index 276f7cb17..bde2323b1 100644
--- a/man/layer_point_from_distn.Rd
+++ b/man/layer_point_from_distn.Rd
@@ -35,7 +35,7 @@ or set the \code{name} argument to something specific.
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 
 r <- epi_recipe(jhu) \%>\%
diff --git a/man/layer_predict.Rd b/man/layer_predict.Rd
index 8ae92f4c8..db771b882 100644
--- a/man/layer_predict.Rd
+++ b/man/layer_predict.Rd
@@ -59,7 +59,7 @@ postprocessor.
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 
 r <- epi_recipe(jhu) \%>\%
diff --git a/man/layer_predictive_distn.Rd b/man/layer_predictive_distn.Rd
index 240db5f5b..3bd95425b 100644
--- a/man/layer_predictive_distn.Rd
+++ b/man/layer_predictive_distn.Rd
@@ -40,7 +40,7 @@ should be reasonably accurate for models fit using \code{lm} when the new point
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 
 r <- epi_recipe(jhu) \%>\%
diff --git a/man/layer_quantile_distn.Rd b/man/layer_quantile_distn.Rd
index 68192deee..3a5cb60e2 100644
--- a/man/layer_quantile_distn.Rd
+++ b/man/layer_quantile_distn.Rd
@@ -46,7 +46,7 @@ If these engines were used, then this layer will grab out estimated
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 
 r <- epi_recipe(jhu) \%>\%
diff --git a/man/layer_residual_quantiles.Rd b/man/layer_residual_quantiles.Rd
index 39e1ecfbe..a7deded71 100644
--- a/man/layer_residual_quantiles.Rd
+++ b/man/layer_residual_quantiles.Rd
@@ -40,7 +40,7 @@ Creates predictions based on residual quantiles
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 
 r <- epi_recipe(jhu) \%>\%
diff --git a/man/layer_threshold.Rd b/man/layer_threshold.Rd
index 0f4b1dfb7..702c5d713 100644
--- a/man/layer_threshold.Rd
+++ b/man/layer_threshold.Rd
@@ -41,7 +41,7 @@ to the threshold values.
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value < "2021-03-08", geo_value \%in\% c("ak", "ca", "ar"))
 r <- epi_recipe(jhu) \%>\%
   step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\%
diff --git a/man/nested_quantiles.Rd b/man/nested_quantiles.Rd
index b34b718ca..0fa0fe8cc 100644
--- a/man/nested_quantiles.Rd
+++ b/man/nested_quantiles.Rd
@@ -18,7 +18,7 @@ Turn a vector of quantile distributions into a list-col
 \examples{
 library(dplyr)
 library(tidyr)
-edf <- case_death_rate_subset[1:3, ]
+edf <- covid_case_death_rates[1:3, ]
 edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11))
 
 edf_nested <- edf \%>\% mutate(q = nested_quantiles(q))
diff --git a/man/predict-epi_workflow.Rd b/man/predict-epi_workflow.Rd
index 130279249..0b605d556 100644
--- a/man/predict-epi_workflow.Rd
+++ b/man/predict-epi_workflow.Rd
@@ -66,7 +66,7 @@ possible. Specifically, the output will have \code{time_value} and
 }
 }
 \examples{
-jhu <- case_death_rate_subset
+jhu <- covid_case_death_rates
 
 r <- epi_recipe(jhu) \%>\%
   step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\%
diff --git a/man/state_census.Rd b/man/state_census.Rd
deleted file mode 100644
index eec13eb53..000000000
--- a/man/state_census.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{state_census}
-\alias{state_census}
-\title{State population data}
-\format{
-Data frame with 57 rows (including one for the United States as a
-whole, plus the District of Columbia, Puerto Rico Commonwealth,
-American Samoa, Guam, the U.S. Virgin Islands, and the Northern Mariana,
-Islands).
-
-\describe{
-\item{fips}{FIPS code}
-\item{name}{Full name of the state or territory}
-\item{pop}{Estimate of the location's resident population in
-2019.}
-\item{abbr}{Postal abbreviation for the location}
-}
-}
-\source{
-United States Census Bureau, at
-\url{https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.pdf},
-\url{https://www.census.gov/data/tables/time-series/demo/popest/2010s-total-puerto-rico-municipios.html},
-and \url{https://www.census.gov/data/tables/2010/dec/2010-island-areas.html}
-}
-\usage{
-state_census
-}
-\description{
-Data set on state populations, from the 2019 US Census.
-}
-\keyword{datasets}
diff --git a/man/step_adjust_latency.Rd b/man/step_adjust_latency.Rd
index af733fcce..1a6770428 100644
--- a/man/step_adjust_latency.Rd
+++ b/man/step_adjust_latency.Rd
@@ -267,8 +267,8 @@ while this will not:
 \if{html}{\out{<div class="sourceCode r">}}\preformatted{toy_recipe <- epi_recipe(toy_df) \%>\%
    step_epi_lag(a, lag=0) \%>\%
    step_adjust_latency(a, method = "extend_lags")
-#> Warning: If `method` is "extend_lags" or "locf", then the previous `step_epi_lag`s won't
-#> work with modified data.
+#> Warning: If `method` is "extend_lags" or "locf", then the previous
+#> `step_epi_lag`s won't work with modified data.
 }\if{html}{\out{</div>}}
 
 If you create columns that you then apply lags to (such as
@@ -277,12 +277,12 @@ If you create columns that you then apply lags to (such as
 }
 
 \examples{
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 # setting the `as_of` to something realistic
 attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3
 
-r <- epi_recipe(case_death_rate_subset) \%>\%
+r <- epi_recipe(covid_case_death_rates) \%>\%
   step_adjust_latency(method = "extend_ahead") \%>\%
   step_epi_ahead(death_rate, ahead = 7) \%>\%
   step_epi_lag(death_rate, lag = c(0, 7, 14))
diff --git a/man/step_epi_naomit.Rd b/man/step_epi_naomit.Rd
index b579dd6d6..faf7484da 100644
--- a/man/step_epi_naomit.Rd
+++ b/man/step_epi_naomit.Rd
@@ -19,7 +19,7 @@ of data loss.
 Unified NA omission wrapper function for recipes
 }
 \examples{
-case_death_rate_subset \%>\%
+covid_case_death_rates \%>\%
   epi_recipe() \%>\%
   step_epi_naomit()
 }
diff --git a/man/step_epi_shift.Rd b/man/step_epi_shift.Rd
index 30ac05d16..867410360 100644
--- a/man/step_epi_shift.Rd
+++ b/man/step_epi_shift.Rd
@@ -78,7 +78,7 @@ are always set to \code{"ahead_"} and \code{"epi_ahead"} respectively, while for
 \code{step_epi_lag}, they are set to \code{"lag_"} and \verb{"epi_lag}, respectively.
 }
 \examples{
-r <- epi_recipe(case_death_rate_subset) \%>\%
+r <- epi_recipe(covid_case_death_rates) \%>\%
   step_epi_ahead(death_rate, ahead = 7) \%>\%
   step_epi_lag(death_rate, lag = c(0, 7, 14))
 r
diff --git a/man/step_epi_slide.Rd b/man/step_epi_slide.Rd
index 242f8e312..b8e4cedb1 100644
--- a/man/step_epi_slide.Rd
+++ b/man/step_epi_slide.Rd
@@ -81,7 +81,7 @@ a computation along existing data.
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value >= as.Date("2021-01-01"), geo_value \%in\% c("ca", "ny"))
 rec <- epi_recipe(jhu) \%>\%
   step_epi_slide(case_rate, death_rate,
diff --git a/man/step_growth_rate.Rd b/man/step_growth_rate.Rd
index 752b38dbe..12963f8da 100644
--- a/man/step_growth_rate.Rd
+++ b/man/step_growth_rate.Rd
@@ -73,13 +73,13 @@ sequence of any existing operations.
 that will generate one or more new columns of derived data.
 }
 \examples{
-r <- epi_recipe(case_death_rate_subset) \%>\%
+r <- epi_recipe(covid_case_death_rates) \%>\%
   step_growth_rate(case_rate, death_rate)
 r
 
 r \%>\%
-  prep(case_death_rate_subset) \%>\%
-  bake(case_death_rate_subset)
+  prep(covid_case_death_rates) \%>\%
+  bake(new_data = NULL)
 }
 \seealso{
 Other row operation steps: 
diff --git a/man/step_lag_difference.Rd b/man/step_lag_difference.Rd
index e8ec2101a..6151bee84 100644
--- a/man/step_lag_difference.Rd
+++ b/man/step_lag_difference.Rd
@@ -47,14 +47,14 @@ sequence of any existing operations.
 that will generate one or more new columns of derived data.
 }
 \examples{
-r <- epi_recipe(case_death_rate_subset) \%>\%
+r <- epi_recipe(covid_case_death_rates) \%>\%
   step_lag_difference(case_rate, death_rate, horizon = c(7, 14)) \%>\%
   step_epi_naomit()
 r
 
 r \%>\%
-  prep(case_death_rate_subset) \%>\%
-  bake(case_death_rate_subset)
+  prep(covid_case_death_rates) \%>\%
+  bake(new_data = NULL)
 }
 \seealso{
 Other row operation steps: 
diff --git a/man/tidy.frosting.Rd b/man/tidy.frosting.Rd
index ba3c0f3d5..8152b1440 100644
--- a/man/tidy.frosting.Rd
+++ b/man/tidy.frosting.Rd
@@ -38,7 +38,7 @@ version of the \code{tidy} method for a recipe.
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 
 r <- epi_recipe(jhu) \%>\%
diff --git a/man/update.layer.Rd b/man/update.layer.Rd
index 9604992e1..f151beea9 100644
--- a/man/update.layer.Rd
+++ b/man/update.layer.Rd
@@ -19,7 +19,7 @@ Analogous to \code{update.step()} from the \code{recipes} package.
 }
 \examples{
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny"))
 r <- epi_recipe(jhu) \%>\%
   step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\%
diff --git a/man/weighted_interval_score.Rd b/man/weighted_interval_score.Rd
index 4907e2724..4aac20e7d 100644
--- a/man/weighted_interval_score.Rd
+++ b/man/weighted_interval_score.Rd
@@ -80,13 +80,13 @@ weighted_interval_score(dist_quantiles(1:4, 1:4 / 5), 2.5, 1:9 / 10,
 
 # Using some actual forecasts --------
 library(dplyr)
-jhu <- case_death_rate_subset \%>\%
+jhu <- covid_case_death_rates \%>\%
   filter(time_value >= "2021-10-01", time_value <= "2021-12-01")
 preds <- flatline_forecaster(
   jhu, "death_rate",
   flatline_args_list(quantile_levels = c(.01, .025, 1:19 / 20, .975, .99))
 )$predictions
-actuals <- case_death_rate_subset \%>\%
+actuals <- covid_case_death_rates \%>\%
   filter(time_value == as.Date("2021-12-01") + 7) \%>\%
   select(geo_value, time_value, actual = death_rate)
 preds <- left_join(preds, actuals,
diff --git a/tests/testthat/_snaps/arg_is_.md b/tests/testthat/_snaps/arg_is_.md
index 9250f1707..f05ca780d 100644
--- a/tests/testthat/_snaps/arg_is_.md
+++ b/tests/testthat/_snaps/arg_is_.md
@@ -377,7 +377,7 @@
 # simple surface step test
 
     Code
-      epi_recipe(case_death_rate_subset) %>% step_epi_lag(death_rate, lag = "hello")
+      epi_recipe(cases_deaths_subset) %>% step_epi_lag(death_rate, lag = "hello")
     Condition
       Error in `step_epi_lag()`:
       ! `lag` must be a non-negative integer.
diff --git a/tests/testthat/_snaps/get_test_data.md b/tests/testthat/_snaps/get_test_data.md
index e65b0715c..22d0c942a 100644
--- a/tests/testthat/_snaps/get_test_data.md
+++ b/tests/testthat/_snaps/get_test_data.md
@@ -1,7 +1,7 @@
 # expect insufficient training data error
 
     Code
-      get_test_data(recipe = r, x = case_death_rate_subset)
+      get_test_data(recipe = r, x = covid_case_death_rates)
     Condition
       Error in `get_test_data()`:
       ! You supplied insufficient recent data for this recipe.
diff --git a/tests/testthat/_snaps/snapshots.md b/tests/testthat/_snaps/snapshots.md
index f3e7e5737..a03a8dd43 100644
--- a/tests/testthat/_snaps/snapshots.md
+++ b/tests/testthat/_snaps/snapshots.md
@@ -1093,6 +1093,7 @@
       
       Training data was an <epi_df> with:
       * Geography: state,
+      * Other keys: ,
       * Time type: day,
       * Using data up-to-date as of: 2022-05-31.
       * With the last data available on 2021-12-31
@@ -1116,6 +1117,7 @@
       
       Training data was an <epi_df> with:
       * Geography: state,
+      * Other keys: ,
       * Time type: day,
       * Using data up-to-date as of: 2022-05-31.
       * With the last data available on 2021-12-31
@@ -1140,6 +1142,7 @@
       
       Training data was an <epi_df> with:
       * Geography: state,
+      * Other keys: ,
       * Time type: day,
       * Using data up-to-date as of: 2022-05-31.
       * With the last data available on 2021-12-31
diff --git a/tests/testthat/_snaps/step_adjust_latency.md b/tests/testthat/_snaps/step_adjust_latency.md
index e37ae07ea..8d09248f3 100644
--- a/tests/testthat/_snaps/step_adjust_latency.md
+++ b/tests/testthat/_snaps/step_adjust_latency.md
@@ -63,7 +63,7 @@
 ---
 
     Code
-      prep(r6, case_death_rate_subset)
+      prep(r6, covid_case_death_rates)
     Message
       
       -- Epi Recipe ------------------------------------------------------------------
diff --git a/tests/testthat/test-arg_is_.R b/tests/testthat/test-arg_is_.R
index a1606f021..f043328c7 100644
--- a/tests/testthat/test-arg_is_.R
+++ b/tests/testthat/test-arg_is_.R
@@ -149,7 +149,7 @@ test_that("coerce scalar to date", {
 test_that("simple surface step test", {
   expect_snapshot(
     error = TRUE,
-    epi_recipe(case_death_rate_subset) %>%
+    epi_recipe(cases_deaths_subset) %>%
       step_epi_lag(death_rate, lag = "hello")
   )
 })
diff --git a/tests/testthat/test-bake-method.R b/tests/testthat/test-bake-method.R
index 06f861012..8e118a18d 100644
--- a/tests/testthat/test-bake-method.R
+++ b/tests/testthat/test-bake-method.R
@@ -1,5 +1,5 @@
 test_that("bake method works in all cases", {
-  edf <- case_death_rate_subset %>%
+  edf <- covid_case_death_rates %>%
     filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
   r <- epi_recipe(edf) %>%
     step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
diff --git a/tests/testthat/test-blueprint.R b/tests/testthat/test-blueprint.R
index 2d22aff6e..b37bd5e4a 100644
--- a/tests/testthat/test-blueprint.R
+++ b/tests/testthat/test-blueprint.R
@@ -4,7 +4,7 @@ test_that("epi_recipe blueprint keeps the class, mold works", {
   expect_s3_class(bp, "default_epi_recipe_blueprint")
   expect_s3_class(refresh_blueprint(bp), "default_epi_recipe_blueprint")
 
-  jhu <- case_death_rate_subset
+  jhu <- covid_case_death_rates
   # expect_s3_class(er_check_is_data_like(jhu), "epi_df")
 
   r <- epi_recipe(jhu) %>%
diff --git a/tests/testthat/test-epi_recipe.R b/tests/testthat/test-epi_recipe.R
index 1b06cf24c..b4c59c0e5 100644
--- a/tests/testthat/test-epi_recipe.R
+++ b/tests/testthat/test-epi_recipe.R
@@ -103,7 +103,7 @@ test_that("epi_recipe epi_df works", {
 
 
 test_that("add/update/adjust/remove epi_recipe works as intended", {
-  jhu <- case_death_rate_subset
+  jhu <- covid_case_death_rates
 
   r <- epi_recipe(jhu) %>%
     step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
diff --git a/tests/testthat/test-epi_workflow.R b/tests/testthat/test-epi_workflow.R
index af6ef39ca..cce68a80f 100644
--- a/tests/testthat/test-epi_workflow.R
+++ b/tests/testthat/test-epi_workflow.R
@@ -1,5 +1,5 @@
 test_that("postprocesser was evaluated", {
-  r <- epi_recipe(case_death_rate_subset)
+  r <- epi_recipe(covid_case_death_rates)
   s <- parsnip::linear_reg()
   f <- frosting()
 
@@ -12,7 +12,7 @@ test_that("postprocesser was evaluated", {
 
 
 test_that("outcome of the two methods are the same", {
-  jhu <- case_death_rate_subset
+  jhu <- covid_case_death_rates
 
   r <- epi_recipe(jhu) %>%
     step_epi_lag(death_rate, lag = c(0, 7)) %>%
@@ -33,7 +33,7 @@ test_that("outcome of the two methods are the same", {
 })
 
 test_that("model can be added/updated/removed from epi_workflow", {
-  jhu <- case_death_rate_subset %>%
+  jhu <- covid_case_death_rates %>%
     dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 
   r <- epi_recipe(jhu) %>%
@@ -64,7 +64,7 @@ test_that("model can be added/updated/removed from epi_workflow", {
 })
 
 test_that("forecast method works", {
-  jhu <- case_death_rate_subset %>%
+  jhu <- covid_case_death_rates %>%
     filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
   r <- epi_recipe(jhu) %>%
     step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
@@ -89,7 +89,7 @@ test_that("forecast method works", {
 })
 
 test_that("forecast method errors when workflow not fit", {
-  jhu <- case_death_rate_subset %>%
+  jhu <- covid_case_death_rates %>%
     filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
   r <- epi_recipe(jhu) %>%
     step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
diff --git a/tests/testthat/test-extract_argument.R b/tests/testthat/test-extract_argument.R
index 7434763e7..7ac160e67 100644
--- a/tests/testthat/test-extract_argument.R
+++ b/tests/testthat/test-extract_argument.R
@@ -28,7 +28,7 @@ test_that("layer argument extractor works", {
 })
 
 test_that("recipe argument extractor works", {
-  jhu <- case_death_rate_subset %>%
+  jhu <- covid_case_death_rates %>%
     dplyr::filter(time_value > "2021-08-01") %>%
     dplyr::arrange(geo_value, time_value)
 
diff --git a/tests/testthat/test-frosting.R b/tests/testthat/test-frosting.R
index 1bdce3b5a..cd153b200 100644
--- a/tests/testthat/test-frosting.R
+++ b/tests/testthat/test-frosting.R
@@ -40,7 +40,7 @@ test_that("frosting can be created/added/updated/adjusted/removed", {
 
 
 test_that("prediction works without any postprocessor", {
-  jhu <- case_death_rate_subset %>%
+  jhu <- covid_case_death_rates %>%
     dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
   r <- epi_recipe(jhu) %>%
     step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
@@ -62,7 +62,7 @@ test_that("prediction works without any postprocessor", {
 
 
 test_that("layer_predict is added by default if missing", {
-  jhu <- case_death_rate_subset %>%
+  jhu <- covid_case_death_rates %>%
     dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 
   r <- epi_recipe(jhu) %>%
@@ -89,7 +89,7 @@ test_that("layer_predict is added by default if missing", {
 
 
 test_that("parsnip settings can be passed through predict.epi_workflow", {
-  jhu <- case_death_rate_subset %>%
+  jhu <- covid_case_death_rates %>%
     dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 
   r <- epi_recipe(jhu) %>%
diff --git a/tests/testthat/test-get_test_data.R b/tests/testthat/test-get_test_data.R
index 5f315c499..7822f5433 100644
--- a/tests/testthat/test-get_test_data.R
+++ b/tests/testthat/test-get_test_data.R
@@ -1,17 +1,17 @@
 suppressPackageStartupMessages(library(dplyr))
 test_that("return expected number of rows and returned dataset is ungrouped", {
-  r <- epi_recipe(case_death_rate_subset) %>%
+  r <- epi_recipe(covid_case_death_rates) %>%
     step_epi_ahead(death_rate, ahead = 7) %>%
     step_epi_lag(death_rate, lag = c(0, 7, 14, 21, 28)) %>%
     step_epi_lag(case_rate, lag = c(0, 7, 14)) %>%
     step_naomit(all_predictors()) %>%
     step_naomit(all_outcomes(), skip = TRUE)
 
-  test <- get_test_data(recipe = r, x = case_death_rate_subset)
+  test <- get_test_data(recipe = r, x = covid_case_death_rates)
 
   expect_equal(
     nrow(test),
-    dplyr::n_distinct(case_death_rate_subset$geo_value) * 29
+    dplyr::n_distinct(covid_case_death_rates$geo_value) * 29
   )
 
   expect_false(dplyr::is.grouped_df(test))
@@ -19,25 +19,25 @@ test_that("return expected number of rows and returned dataset is ungrouped", {
 
 
 test_that("expect insufficient training data error", {
-  r <- epi_recipe(case_death_rate_subset) %>%
+  r <- epi_recipe(covid_case_death_rates) %>%
     step_epi_ahead(death_rate, ahead = 7) %>%
     step_epi_lag(death_rate, lag = c(0, 367)) %>%
     step_naomit(all_predictors()) %>%
     step_naomit(all_outcomes(), skip = TRUE)
 
-  expect_snapshot(error = TRUE, get_test_data(recipe = r, x = case_death_rate_subset))
+  expect_snapshot(error = TRUE, get_test_data(recipe = r, x = covid_case_death_rates))
 })
 
 
 test_that("expect error that geo_value or time_value does not exist", {
-  r <- epi_recipe(case_death_rate_subset) %>%
+  r <- epi_recipe(covid_case_death_rates) %>%
     step_epi_ahead(death_rate, ahead = 7) %>%
     step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
     step_epi_lag(case_rate, lag = c(0, 7, 14)) %>%
     step_naomit(all_predictors()) %>%
     step_naomit(all_outcomes(), skip = TRUE)
 
-  wrong_epi_df <- case_death_rate_subset %>% dplyr::select(-geo_value)
+  wrong_epi_df <- covid_case_death_rates %>% dplyr::select(-geo_value)
 
   expect_snapshot(error = TRUE, get_test_data(recipe = r, x = wrong_epi_df))
 })
@@ -139,7 +139,7 @@ test_that("Omit end rows according to minimum lag when that’s not lag 0", {
 
   # Ex. using real built-in data
 
-  ca <- case_death_rate_subset %>%
+  ca <- covid_case_death_rates %>%
     filter(geo_value == "ca")
 
   rec <- epi_recipe(ca) %>%
diff --git a/tests/testthat/test-key_colnames.R b/tests/testthat/test-key_colnames.R
index 3b3118740..d94daaec4 100644
--- a/tests/testthat/test-key_colnames.R
+++ b/tests/testthat/test-key_colnames.R
@@ -1,9 +1,9 @@
 test_that("Extracts keys from a recipe; roles are NA, giving an empty vector", {
-  expect_equal(key_colnames(recipe(case_death_rate_subset)), character(0L))
+  expect_equal(key_colnames(recipe(covid_case_death_rates)), character(0L))
 })
 
 test_that("key_colnames extracts time_value and geo_value, but not raw", {
-  my_recipe <- epi_recipe(case_death_rate_subset) %>%
+  my_recipe <- epi_recipe(covid_case_death_rates) %>%
     step_epi_ahead(death_rate, ahead = 7) %>%
     step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
     step_epi_lag(case_rate, lag = c(0, 7, 14)) %>%
@@ -14,7 +14,7 @@ test_that("key_colnames extracts time_value and geo_value, but not raw", {
   my_workflow <- epi_workflow() %>%
     add_epi_recipe(my_recipe) %>%
     add_model(linear_reg()) %>%
-    fit(data = case_death_rate_subset)
+    fit(data = covid_case_death_rates)
 
   expect_identical(key_colnames(my_workflow), c("geo_value", "time_value"))
 })
diff --git a/tests/testthat/test-layer_add_forecast_date.R b/tests/testthat/test-layer_add_forecast_date.R
index 491bf5e20..8bf452a81 100644
--- a/tests/testthat/test-layer_add_forecast_date.R
+++ b/tests/testthat/test-layer_add_forecast_date.R
@@ -1,4 +1,4 @@
-jhu <- case_death_rate_subset %>%
+jhu <- covid_case_death_rates %>%
   dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3
 
diff --git a/tests/testthat/test-layer_add_target_date.R b/tests/testthat/test-layer_add_target_date.R
index 8bdb3a76b..7cd164960 100644
--- a/tests/testthat/test-layer_add_target_date.R
+++ b/tests/testthat/test-layer_add_target_date.R
@@ -1,4 +1,4 @@
-jhu <- case_death_rate_subset %>%
+jhu <- covid_case_death_rates %>%
   dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 r <- epi_recipe(jhu) %>%
   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
diff --git a/tests/testthat/test-layer_naomit.R b/tests/testthat/test-layer_naomit.R
index 1d5b4ee25..8eb597f41 100644
--- a/tests/testthat/test-layer_naomit.R
+++ b/tests/testthat/test-layer_naomit.R
@@ -1,4 +1,4 @@
-jhu <- case_death_rate_subset %>%
+jhu <- covid_case_death_rates %>%
   dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 
 r <- epi_recipe(jhu) %>%
diff --git a/tests/testthat/test-layer_predict.R b/tests/testthat/test-layer_predict.R
index 041516b29..ae51a5ec6 100644
--- a/tests/testthat/test-layer_predict.R
+++ b/tests/testthat/test-layer_predict.R
@@ -1,4 +1,4 @@
-jhu <- case_death_rate_subset %>%
+jhu <- covid_case_death_rates %>%
   dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 r <- epi_recipe(jhu) %>%
   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
diff --git a/tests/testthat/test-layer_residual_quantiles.R b/tests/testthat/test-layer_residual_quantiles.R
index 09ef7c9d3..12e44809e 100644
--- a/tests/testthat/test-layer_residual_quantiles.R
+++ b/tests/testthat/test-layer_residual_quantiles.R
@@ -1,4 +1,4 @@
-jhu <- case_death_rate_subset %>%
+jhu <- covid_case_death_rates %>%
   dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
 
 r <- epi_recipe(jhu) %>%
diff --git a/tests/testthat/test-layer_threshold_preds.R b/tests/testthat/test-layer_threshold_preds.R
index 9df7e64ab..324f60a1b 100644
--- a/tests/testthat/test-layer_threshold_preds.R
+++ b/tests/testthat/test-layer_threshold_preds.R
@@ -1,4 +1,4 @@
-jhu <- case_death_rate_subset %>%
+jhu <- covid_case_death_rates %>%
   dplyr::filter(time_value < "2021-03-08", geo_value %in% c("ak", "ca", "ar"))
 r <- epi_recipe(jhu) %>%
   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
diff --git a/tests/testthat/test-population_scaling.R b/tests/testthat/test-population_scaling.R
index a1ccba4a1..1f356c0cc 100644
--- a/tests/testthat/test-population_scaling.R
+++ b/tests/testthat/test-population_scaling.R
@@ -7,7 +7,7 @@ test_that("Column names can be passed with and without the tidy way", {
 
   pop_data2 <- pop_data %>% dplyr::rename(geo_value = states)
 
-  newdata <- case_death_rate_subset %>%
+  newdata <- covid_case_death_rates %>%
     filter(geo_value %in% c("ak", "al", "ar", "as", "az", "ca"))
 
   r1 <- epi_recipe(newdata) %>%
@@ -150,7 +150,7 @@ test_that("Postprocessing workflow works and values correct", {
 })
 
 test_that("Postprocessing to get cases from case rate", {
-  jhu <- case_death_rate_subset %>%
+  jhu <- covid_case_death_rates %>%
     dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ca", "ny")) %>%
     dplyr::select(geo_value, time_value, case_rate)
 
@@ -193,7 +193,7 @@ test_that("Postprocessing to get cases from case rate", {
 
 
 test_that("test joining by default columns", {
-  jhu <- case_death_rate_subset %>%
+  jhu <- covid_case_death_rates %>%
     dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ca", "ny")) %>%
     dplyr::select(geo_value, time_value, case_rate)
 
@@ -237,7 +237,7 @@ test_that("test joining by default columns", {
 
   latest <- get_test_data(
     recipe = r,
-    x = case_death_rate_subset %>%
+    x = covid_case_death_rates %>%
       dplyr::filter(
         time_value > "2021-11-01",
         geo_value %in% c("ca", "ny")
@@ -250,7 +250,7 @@ test_that("test joining by default columns", {
 
 
 
-  jhu <- case_death_rate_subset %>%
+  jhu <- covid_case_death_rates %>%
     dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ca", "ny")) %>%
     dplyr::select(geo_value, time_value, case_rate)
 
@@ -305,7 +305,7 @@ test_that("test joining by default columns", {
 
 
 test_that("expect error if `by` selector does not match", {
-  jhu <- case_death_rate_subset %>%
+  jhu <- covid_case_death_rates %>%
     dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ca", "ny")) %>%
     dplyr::select(geo_value, time_value, case_rate)
 
diff --git a/tests/testthat/test-snapshots.R b/tests/testthat/test-snapshots.R
index 956580e9f..3aecfd6b8 100644
--- a/tests/testthat/test-snapshots.R
+++ b/tests/testthat/test-snapshots.R
@@ -110,7 +110,7 @@ test_that("arx_forecaster snapshots", {
 })
 
 test_that("arx_forecaster output format snapshots", {
-  jhu <- case_death_rate_subset %>%
+  jhu <- covid_case_death_rates %>%
     dplyr::filter(time_value >= as.Date("2021-12-01"))
   attributes(jhu)$metadata$as_of <- as.Date(attributes(jhu)$metadata$as_of)
   out1 <- arx_forecaster(
@@ -147,15 +147,15 @@ test_that("arx_forecaster output format snapshots", {
 
 test_that("arx_classifier snapshots", {
   arc1 <- arx_classifier(
-    case_death_rate_subset %>%
+    covid_case_death_rates %>%
       dplyr::filter(time_value >= as.Date("2021-11-01")),
     "death_rate",
     c("case_rate", "death_rate")
   )
   expect_snapshot_tibble(arc1$predictions)
-  max_date <- case_death_rate_subset$time_value %>% max()
+  max_date <- covid_case_death_rates$time_value %>% max()
   arc2 <- arx_classifier(
-    case_death_rate_subset %>%
+    covid_case_death_rates %>%
       dplyr::filter(time_value >= as.Date("2021-11-01")),
     "death_rate",
     c("case_rate", "death_rate"),
@@ -164,7 +164,7 @@ test_that("arx_classifier snapshots", {
   expect_snapshot_tibble(arc2$predictions)
   expect_error(
     arc3 <- arx_classifier(
-      case_death_rate_subset %>%
+      covid_case_death_rates %>%
         dplyr::filter(time_value >= as.Date("2021-11-01")),
       "death_rate",
       c("case_rate", "death_rate"),
@@ -174,7 +174,7 @@ test_that("arx_classifier snapshots", {
   )
   expect_error(
     arc4 <- arx_classifier(
-      case_death_rate_subset %>%
+      covid_case_death_rates %>%
         dplyr::filter(time_value >= as.Date("2021-11-01")),
       "death_rate",
       c("case_rate", "death_rate"),
diff --git a/tests/testthat/test-step_adjust_latency.R b/tests/testthat/test-step_adjust_latency.R
index 0c292ed6f..7b1f320e4 100644
--- a/tests/testthat/test-step_adjust_latency.R
+++ b/tests/testthat/test-step_adjust_latency.R
@@ -508,12 +508,12 @@ test_that("printing step_adjust_latency results in expected output", {
     step_epi_ahead(death_rate, ahead = ahead)
   expect_snapshot(r5)
   expect_snapshot(prep(r5, real_x))
-  r6 <- epi_recipe(case_death_rate_subset) %>%
+  r6 <- epi_recipe(covid_case_death_rates) %>%
     step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
     step_adjust_latency(method = "extend_ahead") %>%
     step_epi_ahead(death_rate, ahead = 7)
   expect_snapshot(r6)
-  expect_snapshot(prep(r6, case_death_rate_subset))
+  expect_snapshot(prep(r6, covid_case_death_rates))
 })
 
 test_that("locf works as intended", {
diff --git a/tests/testthat/test-utils_latency.R b/tests/testthat/test-utils_latency.R
index 7bf808835..2ac32fc9f 100644
--- a/tests/testthat/test-utils_latency.R
+++ b/tests/testthat/test-utils_latency.R
@@ -8,7 +8,10 @@ old_data <- tibble(
   tmp_death_rate = atan(0.1 * 1:200) + cos(5 * 1:200) + 1
 ) %>%
   # place2 is slightly more recent than place1
-  mutate(time_value = as.Date(ifelse(geo_value == "place2", time_value + 1, time_value))) %>%
+  mutate(time_value = case_when(
+    geo_value == "place2" ~ time_value + 1,
+    TRUE ~ time_value
+  )) %>%
   as_epi_df(as_of = as_of)
 old_data
 keys <- c("time_value", "geo_value")
diff --git a/vignettes/articles/all_states_covidcast_signals.rds b/vignettes/articles/all_states_covidcast_signals.rds
deleted file mode 100644
index e4ad60153..000000000
Binary files a/vignettes/articles/all_states_covidcast_signals.rds and /dev/null differ
diff --git a/vignettes/articles/case_death_rate_archive.rds b/vignettes/articles/case_death_rate_archive.rds
deleted file mode 100644
index b5209fb1d..000000000
Binary files a/vignettes/articles/case_death_rate_archive.rds and /dev/null differ
diff --git a/vignettes/articles/smooth-qr.Rmd b/vignettes/articles/smooth-qr.Rmd
index b93c726f6..801934e8f 100644
--- a/vignettes/articles/smooth-qr.Rmd
+++ b/vignettes/articles/smooth-qr.Rmd
@@ -97,7 +97,7 @@ state cases and deaths. This sample data ranges from Dec. 31, 2020 to
 Dec. 31, 2021.
 
 ```{r}
-edf <- case_death_rate_subset
+edf <- covid_case_death_rates
 ```
 
 We will set the forecast date to be November 30, 2021 so that we can produce
diff --git a/vignettes/articles/symptom-surveys.Rmd b/vignettes/articles/symptom-surveys.Rmd
index 1e51a9963..af692726e 100644
--- a/vignettes/articles/symptom-surveys.Rmd
+++ b/vignettes/articles/symptom-surveys.Rmd
@@ -145,22 +145,39 @@ own forecaster under the `epipredict` framework, we could easily add steps to
 re-scale and transform the signals to our `epi_recipe`. This would make the code
 more succinct and self-contained.
 
+We will compare two CLI-in-community indicators from
+different sources. The data are available in the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)), 
+and can be loaded with:
+
 ```{r, message = FALSE, warning = FALSE}
-library(epidatr)
 library(dplyr)
 library(purrr)
 library(epipredict)
 library(recipes)
 
+z <- epidatasets::county_smoothed_cli_comparison
+```
+
+The data can also be constructed from data the Delphi API with the following code:
+
+```{r, message = FALSE, warning = FALSE, eval = FALSE}
+library(epidatr)
+
+d <- "2020-09-21"
+
 case_num <- 200
-as_of_date <- "2020-05-14"
-geo_values <- pub_covidcast(
+geos_date <- "2020-05-14"
+
+# Find counties that on 2020-05-14 had >= 200 cases reported.
+# For later datasets, we will only keep data for these geos.
+geo_values_initial <- pub_covidcast(
   source = "jhu-csse",
   signals = "confirmed_cumulative_num",
   geo_type = "county",
   time_type = "day",
   geo_values = "*",
-  time_values = epirange(20200514, 20200514)
+  time_values = epirange(geos_date, geos_date),
+  as_of = d
 ) %>%
   filter(value >= case_num) %>%
   pull(geo_value) %>%
@@ -177,9 +194,10 @@ goog_sm_cli <- pub_covidcast(
   geo_type = "county",
   time_type = "day",
   geo_values = "*",
-  time_values = epirange(start_day, end_day)
+  time_values = epirange(start_day, end_day),
+  as_of = d
 ) %>%
-  filter(geo_value %in% geo_values) %>%
+  filter(geo_value %in% geo_values_initial) %>%
   select(geo_value, time_value, value) %>%
   rename(goog = value)
 
@@ -189,9 +207,10 @@ fb_survey <- pub_covidcast(
   geo_type = "county",
   time_type = "day",
   geo_values = "*",
-  time_values = epirange(start_day, end_day)
+  time_values = epirange(start_day, end_day),
+  as_of = d
 ) %>%
-  filter(geo_value %in% geo_values) %>%
+  filter(geo_value %in% geo_values_initial) %>%
   select(geo_value, time_value, value) %>%
   rename(fb = value)
 
@@ -201,26 +220,31 @@ jhu_7dav_incid <- pub_covidcast(
   geo_type = "county",
   time_type = "day",
   geo_values = "*",
-  time_values = epirange(start_day, end_day)
+  time_values = epirange(start_day, end_day),
+  as_of = d
 ) %>%
-  filter(geo_value %in% geo_values) %>%
+  filter(geo_value %in% geo_values_initial) %>%
   select(geo_value, time_value, value) %>%
   rename(case = value)
 
-# Find "complete" counties, present in all three data signals at all times
+# Find "complete" counties, present in all three data signals, and also
+# present in the `geo_values_initial` object.
 geo_values_complete <- intersect(
   intersect(goog_sm_cli$geo_value, fb_survey$geo_value),
   jhu_7dav_incid$geo_value
 )
 
-# Make one big matrix by joining these three data frames
-z <- full_join(full_join(goog_sm_cli, fb_survey, by = c("geo_value", "time_value")),
+# Join the three data frames together
+z <- full_join(
+  full_join(goog_sm_cli, fb_survey, by = c("geo_value", "time_value")),
   jhu_7dav_incid,
   by = c("geo_value", "time_value")
 ) %>%
   filter(geo_value %in% geo_values_complete) %>%
-  as_epi_df()
+  as_epi_df(as_of = d)
+```
 
+```{r, message = FALSE, warning = FALSE}
 Logit <- function(x, a = 0.01) log((x + a) / (1 - x + a))
 Sigmd <- function(y, a = 0.01) (exp(y) * (1 + a) - a) / (1 + exp(y))
 
diff --git a/vignettes/arx-classifier.Rmd b/vignettes/arx-classifier.Rmd
index 3813e7d13..1e2a6949a 100644
--- a/vignettes/arx-classifier.Rmd
+++ b/vignettes/arx-classifier.Rmd
@@ -29,14 +29,14 @@ or ahead value.
 
 To get a sense of how the `arx_classifier()` works, let's consider a simple
 example with minimal inputs. For this, we will use the built-in
-`case_death_rate_subset` that contains confirmed COVID-19 cases and deaths from
+`covid_case_death_rates` that contains confirmed COVID-19 cases and deaths from
 JHU CSSE for all states over Dec 31, 2020 to Dec 31, 2021. From this, we'll take
 a subset of data for five states over June 4, 2021 to December 31, 2021. Our
 objective is to predict whether the case rates are increasing when considering
 the 0, 7 and 14 day case rates:
 
 ```{r}
-jhu <- case_death_rate_subset %>%
+jhu <- covid_case_death_rates %>%
   filter(
     time_value >= "2021-06-04",
     time_value <= "2021-12-31",
@@ -163,7 +163,7 @@ $$
 g_{\text{up}}(x) = \log\left ( \frac{\Pr(Z_{l, t} = \text{up} \vert x)}{\Pr(Z_{l, t} = \text{not up} \vert x)} \right ) = \beta_{10} + \beta_{11}Y_{l,t}^\Delta + \beta_{12}Y_{l,t-7}^\Delta + \beta_{13}Y_{l,t-14}^\Delta.
 $$
 
-Now then, we will operate on the same subset of the `case_death_rate_subset`
+Now then, we will operate on the same subset of the `covid_case_death_rates`
 that we used in our above example. This time, we will use it to investigate
 whether the number of newly reported cases over the past 7 days has increased by
 at least 25% compared to the preceding week for our sample of states.
diff --git a/vignettes/epipredict.Rmd b/vignettes/epipredict.Rmd
index 32e0d7d16..ae4f7671a 100644
--- a/vignettes/epipredict.Rmd
+++ b/vignettes/epipredict.Rmd
@@ -85,7 +85,7 @@ package. There is much more to see there, but for the moment, it's enough to
 look at a simple one:
 
 ```{r epidf}
-jhu <- case_death_rate_subset
+jhu <- covid_case_death_rates
 jhu
 ```
 
diff --git a/vignettes/panel-data.Rmd b/vignettes/panel-data.Rmd
index 79186a6c2..1faf5b56f 100644
--- a/vignettes/panel-data.Rmd
+++ b/vignettes/panel-data.Rmd
@@ -18,6 +18,7 @@ library(parsnip)
 library(recipes)
 library(epiprocess)
 library(epipredict)
+library(epidatasets)
 library(ggplot2)
 theme_set(theme_bw())
 ```
@@ -25,13 +26,13 @@ theme_set(theme_bw())
 [Panel data](https://en.wikipedia.org/wiki/Panel_data), or longitudinal data,
 contain cross-sectional measurements of subjects over time. The `epipredict`
 package is most suitable for running forecasters on epidemiological panel data.
-A built-in example of this is the [`case_death_rate_subset`](
-  https://cmu-delphi.github.io/epipredict/reference/case_death_rate_subset.html)
-dataset, which contains daily state-wise measures of `case_rate` and
+An example of this is the [`covid_case_death_rates`](
+  https://cmu-delphi.github.io/epidatasets/reference/covid_case_death_rates.html)
+dataset, which contains daily state-wise measures of `case_rate` and 
 `death_rate` for COVID-19 in 2021:
 
 ```{r epi-panel-ex, include=T}
-head(case_death_rate_subset, 3)
+head(covid_case_death_rates, 3)
 ```
 
 `epipredict` functions work with data in
@@ -41,7 +42,6 @@ are also valid candidates for `epipredict` functionality, as long as they are
 in `epi_df` format.
 
 ```{r employ-stats, include=F}
-data("grad_employ_subset")
 year_start <- min(grad_employ_subset$time_value)
 year_end <- max(grad_employ_subset$time_value)
 ```
diff --git a/vignettes/preprocessing-and-models.Rmd b/vignettes/preprocessing-and-models.Rmd
index 987ecdef4..8d1d2f19f 100644
--- a/vignettes/preprocessing-and-models.Rmd
+++ b/vignettes/preprocessing-and-models.Rmd
@@ -38,7 +38,6 @@ will create a classification model for hotspot predictions.
 ```{r, warning=FALSE, message=FALSE}
 library(tidyr)
 library(dplyr)
-library(epidatr)
 library(epipredict)
 library(recipes)
 library(workflows)
@@ -59,14 +58,28 @@ Although there are many state-of-the-art models, we choose to use Poisson
 regression, the textbook example for modeling count data, as an illustration
 for using the `epipredict` package with other existing tidymodels packages.
 
+The `counts_subset` dataset is available in the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)), 
+and contains the number of confirmed cases and deaths from June 4, 2021 to Dec
+31, 2021 in some U.S. states. It can be loaded with:
+
 ```{r poisson-reg-data}
+x <- epidatasets::counts_subset
+```
+
+The data can also be fetched from the Delphi API with the following query:
+```{r, eval = FALSE}
+library(epidatr)
+
+d <- as.Date("2024-03-20")
+
 x <- pub_covidcast(
   source = "jhu-csse",
   signals = "confirmed_incidence_num",
   time_type = "day",
   geo_type = "state",
   time_values = epirange(20210604, 20211231),
-  geo_values = "ca,fl,tx,ny,nj"
+  geo_values = "ca,fl,tx,ny,nj",
+  as_of = d
 ) %>%
   select(geo_value, time_value, cases = value)
 
@@ -76,18 +89,15 @@ y <- pub_covidcast(
   time_type = "day",
   geo_type = "state",
   time_values = epirange(20210604, 20211231),
-  geo_values = "ca,fl,tx,ny,nj"
+  geo_values = "ca,fl,tx,ny,nj",
+  as_of = d
 ) %>%
   select(geo_value, time_value, deaths = value)
 
-counts_subset <- full_join(x, y, by = c("geo_value", "time_value")) %>%
-  as_epi_df()
+x <- full_join(x, y, by = c("geo_value", "time_value")) %>%
+  as_epi_df(as_of = d)
 ```
 
-The `counts_subset` dataset comes from the `epidatr` package, and
-contains the number of confirmed cases and deaths from June 4, 2021 to
-Dec 31, 2021 in some U.S. states.
-
 We wish to predict the 7-day ahead death counts with lagged cases and deaths.
 Furthermore, we will let each state be a dummy variable. Using differential
 intercept coefficients, we can allow for an intercept shift between states.
@@ -242,17 +252,31 @@ most or all of the time while in public in the past 7 days and the estimated
 percentage of respondents who reported that all or most people they encountered
 in public in the past 7 days maintained a distance of at least 6 feet.
 
-State-wise population data from the 2019 U.S. Census is included in this package
-and will be used in `layer_population_scaling()`.
+State-wise population data from the 2019 U.S. Census will be used in 
+`layer_population_scaling()`.
+
+Both datasets are available in the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)), 
+and can be loaded with:
 
 ```{r}
+behav_ind <- epidatasets::ctis_covid_behaviours
+pop_dat <- epidatasets::state_census %>% select(abbr, pop)
+```
+
+The data can also be fetched from the Delphi API with the following query:
+```{r, eval = FALSE}
+library(epidatr)
+
+d <- as.Date("2024-03-20")
+
 behav_ind_mask <- pub_covidcast(
   source = "fb-survey",
   signals = "smoothed_wwearing_mask_7d",
   time_type = "day",
   geo_type = "state",
   time_values = epirange(20210604, 20211231),
-  geo_values = "ca,fl,tx,ny,nj"
+  geo_values = "ca,fl,tx,ny,nj",
+  as_of = d
 ) %>%
   select(geo_value, time_value, masking = value)
 
@@ -262,14 +286,16 @@ behav_ind_distancing <- pub_covidcast(
   time_type = "day",
   geo_type = "state",
   time_values = epirange(20210604, 20211231),
-  geo_values = "ca,fl,tx,ny,nj"
+  geo_values = "ca,fl,tx,ny,nj",
+  as_of = d
 ) %>%
   select(geo_value, time_value, distancing = value)
 
-pop_dat <- state_census %>% select(abbr, pop)
-
 behav_ind <- behav_ind_mask %>%
-  full_join(behav_ind_distancing, by = c("geo_value", "time_value"))
+  full_join(behav_ind_distancing, by = c("geo_value", "time_value")) %>%
+  as_epi_df(as_of = d)
+
+pop_dat <- state_census %>% select(abbr, pop)
 ```
 
 Rather than using raw mask-wearing / social-distancing metrics, for the sake
@@ -290,11 +316,11 @@ behav_ind %>%
 ```
 
 We will take a subset of death rate and case rate data from the built-in dataset
-`case_death_rate_subset`.
+`covid_case_death_rates`.
 
 ```{r}
 jhu <- filter(
-  case_death_rate_subset,
+  covid_case_death_rates,
   time_value >= "2021-06-04",
   time_value <= "2021-12-31",
   geo_value %in% c("ca", "fl", "tx", "ny", "nj")
@@ -440,10 +466,10 @@ g_{\text{up}}(x) &= \log\left(\frac{Pr(Z_{\ell,t}=\text{up}\mid x)}{Pr(Z_{\ell,t
 Preprocessing steps are similar to the previous models with an additional step
 of categorizing the response variables. Again, we will use a subset of death
 rate and case rate data from our built-in dataset
-`case_death_rate_subset`.
+`covid_case_death_rates`.
 
 ```{r}
-jhu <- case_death_rate_subset %>%
+jhu <- covid_case_death_rates %>%
   dplyr::filter(
     time_value >= "2021-06-04",
     time_value <= "2021-12-31",
@@ -512,7 +538,7 @@ Let's start with a simple dataset and preprocessing:
 
 ```{r}
 ex <- filter(
-  case_death_rate_subset,
+  covid_case_death_rates,
   time_value >= "2021-12-01",
   time_value <= "2021-12-31",
   geo_value == "ca"
diff --git a/vignettes/update.Rmd b/vignettes/update.Rmd
index 6e9e8745c..3d97da049 100644
--- a/vignettes/update.Rmd
+++ b/vignettes/update.Rmd
@@ -61,13 +61,13 @@ vignette and only briefly go through some examples for a `frosting` object.
 
 ## Add/update/remove an `epi_recipe` in an `epi_workflow`
 
-We start with the built-in `case_death_rate_subset` dataset that contains JHU
+We start with the built-in `covid_case_death_rates` dataset that contains JHU
 daily COVID-19 cases and deaths by state and take a subset of it from Nov. 1,
 2021 to Dec. 31, 2021 for the four states of Alaska, California, New York, and
 South Carolina.
 
 ```{r}
-jhu <- case_death_rate_subset %>%
+jhu <- covid_case_death_rates %>%
   dplyr::filter(time_value >= as.Date("2021-11-01"), geo_value %in% c("ak", "ca", "ny", "sc"))
 
 jhu