cmu-delphi · dsweber2 · Feb 14, 2025 · Feb 12, 2025 · Feb 13, 2025 · Feb 14, 2025
@@ -67,6 +67,8 @@ climatological_forecaster <- function(epi_data,
   horizon <- args_list$forecast_horizon
   window_size <- args_list$window_size
   time_type <- args_list$time_type
+  # check that the prediction time type is more granular than epi_data's
+  # time_type
   ttype_ord <- match(time_type, c("day", "epiweek", "week", "month"))
   ttype_ord <- ttype_ord - as.integer(ttype_ord > 2)
   edf_ttype_ord <- match(edf_time_type, c("day", "week", "yearmonth"))

@@ -61,6 +61,7 @@ layer_threshold_new <-
 
 
 
+#' restrict various objects to the interval [lower, upper]
 snap <- function(x, lower, upper, ...) {
   UseMethod("snap")
 }

@@ -1,13 +1,14 @@
 #' Calculate a climatological variable based on the history
 #'
-#' `step_climate()` creates a *specification* of a recipe step
-#' that will generate one or more new columns of derived data. This step
-#' examines all available seasons in the training data and calculates the
-#' a measure of center for the "typical" season. Think of this like with
-#' the weather: to predict the temperature in January in Pittsburgh, PA,
-#' I might look at all previous January's on record, average their temperatures,
-#' and include that in my model. So it is important to _align_ the forecast
-#' horizon with the climate. See the details for more information.
+#' `step_climate()` creates a *specification* of a recipe step that will
+#' generate one or more new columns of derived data. This step examines all
+#' available seasons in the training data and calculates the a measure of center
+#' for the "typical" season. Think of this like with the weather: to predict the
+#' temperature in January in Pittsburgh, PA, I might look at all previous
+#' January's on record, average their temperatures, and include that in my
+#' model. So it is important to _align_ the forecast horizon with the climate.
+#' This step will work best if added after `step_epi_ahead()`, but that is not
+#' strictly required. See the details for more information.
 #'
 #' @details
 #' Construction of a climate predictor can be helpful with strongly seasonal
@@ -101,12 +102,22 @@
 #' r %>%
 #'   prep(covid_case_death_rates) %>%
 #'   bake(new_data = NULL)
+#'
+#' # switching the order is possible if you specify `forecast_ahead`
+#' r <- epi_recipe(covid_case_death_rates) %>%
+#'   step_climate(death_rate, forecast_ahead = 7, time_type = "day") %>%
+#'   step_epi_ahead(death_rate, ahead = 7) %>%
+#'   r()
+#'
+#' r %>%
+#'   prep(covid_case_death_rates) %>%
+#'   bake(new_data = NULL)
 step_climate <-
   function(recipe,
            ...,
            forecast_ahead = "detect",
            role = "predictor",
-           time_type = c("epiweek", "week", "month", "day"),
+           time_type = c("detect", "epiweek", "week", "month", "day"),
            center_method = c("median", "mean"),
            window_size = 3L,
            epi_keys = NULL,
@@ -121,6 +132,7 @@ step_climate <-
     n_outcomes <- sum(recipe$var_info$role == "outcome")
     time_type <- rlang::arg_match(time_type)
     edf_time_type <- attr(recipe$template, "metadata")$time_type
+    if (time_type == "detect") time_type <- edf_time_type
     if (edf_time_type == "custom") {
       cli_abort("This step only works with daily, weekly, or yearmonth data.")
     }
@@ -315,6 +327,14 @@ print.step_climate <- function(x, width = max(20, options()$width - 30), ...) {
   invisible(x)
 }
 
+#' group col by .idx values and sum windows around each .idx value
+#' @param .idx the relevant periodic part of time value, e.g. the week number
+#' @param col the list of values indexed by `.idx`
+#' @param weights how much to weigh each particular datapoint
+#' @param aggr the aggregation function, probably Quantile, mean or median
+#' @param window_size the number of .idx entries before and after to include in
+#'   the aggregation
+#' @param modulus the maximum value of `.idx`
 roll_modular_multivec <- function(col, .idx, weights, aggr, window_size, modulus) {
   tib <- tibble(col = col, weights = weights, .idx = .idx) |>
     arrange(.idx) |>

@@ -10,20 +10,23 @@ test_that("roll_modular_multivec works", {
   Median <- function(x, w) median(x, na.rm = TRUE)
 
   # unweighted mean
+  # window of size 0
   expected_res <- tib |>
     mutate(.idx = .idx %% modulus, .idx = .idx + (.idx == 0) * modulus) |>
     summarise(climate_pred = weighted.mean(col, w = w), .by = .idx)
   expect_equal(
     roll_modular_multivec(tib$col, tib$.idx, tib$w, Mean, 0, modulus),
     expected_res
   )
-  w_size <- 1L
+  # window of size 1, which includes everything
   expected_res <- tibble(.idx = as.double(1:3), climate_pred = mean(tib$col))
   expect_equal(
     roll_modular_multivec(tib$col, tib$.idx, tib$w, Mean, 1L, modulus),
     expected_res
   )
+
   # weighted mean
+  # window of size 0
   tib$w <- c(1, 2, 3, 1, 2, 1, 1, 2, 2, 1)
   expected_res <- tib |>
     mutate(.idx = .idx %% modulus, .idx = .idx + (.idx == 0) * modulus) |>
@@ -32,7 +35,7 @@ test_that("roll_modular_multivec works", {
     roll_modular_multivec(tib$col, tib$.idx, tib$w, Mean, 0, modulus),
     expected_res
   )
-  tib$w <- c(1, 2, 3, 1, 2, 1, 1, 2, 2, 1)
+  # window of size 1
   expected_res <- tibble(
     .idx = as.double(1:3),
     climate_pred = weighted.mean(tib$col, tib$w)
@@ -66,7 +69,7 @@ test_that("prep/bake steps create the correct training data", {
   ) %>%
     as_epi_df()
   # epiweeks 1, 52, and 53 are all 1, note that there are days in wk 52, 2 in wk 53
-  r <- epi_recipe(x) %>% step_climate(y)
+  r <- epi_recipe(x) %>% step_climate(y, time_type = "epiweek")
   p <- prep(r, x)
 
   expected_res <- tibble(.idx = 1:53, climate_y = c(2, 2:25, 25, 25, 25:2, 2, 2))
@@ -94,7 +97,7 @@ test_that("leading the climate predictor works as expected", {
   r <- epi_recipe(x) %>%
     step_epi_ahead(y, ahead = 14L) %>%
     step_epi_lag(y, lag = c(0, 7L, 14L)) %>%
-    step_climate(y, forecast_ahead = 2L) %>%
+    step_climate(y, forecast_ahead = 2L, time_type = "epiweek") %>%
     # matches the response
     step_epi_naomit()
   p <- prep(r, x)
-Original file line number
+Diff line change
@@ Expand Up / @@ -61,6 +61,7 @@ layer_threshold_new <- @@
+    #' restrict various objects to the interval [lower, upper]
     snap <- function(x, lower, upper, ...) {
       UseMethod("snap")
     }
@@ Expand Down @@