From 7089c2f4c6d66e2cbaf4fd44db44a32a60e16cc6 Mon Sep 17 00:00:00 2001
From: dsweber2 <david.weber2@pm.me>
Date: Wed, 12 Feb 2025 10:44:00 -0600
Subject: [PATCH 1/3] David's suggestions

---
 R/climatological_forecaster.R      |  2 ++
 R/layer_threshold_preds.R          |  1 +
 R/step_climate.R                   | 35 +++++++++++++++++++++++-------
 man/step_adjust_latency.Rd         |  4 ++--
 tests/testthat/test-step_climate.R |  7 ++++--
 5 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/R/climatological_forecaster.R b/R/climatological_forecaster.R
index fb65d3e8a..f1f7d66e1 100644
--- a/R/climatological_forecaster.R
+++ b/R/climatological_forecaster.R
@@ -67,6 +67,8 @@ climatological_forecaster <- function(epi_data,
   horizon <- args_list$forecast_horizon
   window_size <- args_list$window_size
   time_type <- args_list$time_type
+  # check that the prediction time type is more granular than epi_data's
+  # time_type
   ttype_ord <- match(time_type, c("day", "epiweek", "week", "month"))
   ttype_ord <- ttype_ord - as.integer(ttype_ord > 2)
   edf_ttype_ord <- match(edf_time_type, c("day", "week", "yearmonth"))
diff --git a/R/layer_threshold_preds.R b/R/layer_threshold_preds.R
index 7b8ca0252..99f016128 100644
--- a/R/layer_threshold_preds.R
+++ b/R/layer_threshold_preds.R
@@ -61,6 +61,7 @@ layer_threshold_new <-
 
 
 
+#' restrict various objects to the interval [lower, upper]
 snap <- function(x, lower, upper, ...) {
   UseMethod("snap")
 }
diff --git a/R/step_climate.R b/R/step_climate.R
index 2f88ca94c..38a5a0ec5 100644
--- a/R/step_climate.R
+++ b/R/step_climate.R
@@ -1,13 +1,14 @@
 #' Calculate a climatological variable based on the history
 #'
-#' `step_climate()` creates a *specification* of a recipe step
-#' that will generate one or more new columns of derived data. This step
-#' examines all available seasons in the training data and calculates the
-#' a measure of center for the "typical" season. Think of this like with
-#' the weather: to predict the temperature in January in Pittsburgh, PA,
-#' I might look at all previous January's on record, average their temperatures,
-#' and include that in my model. So it is important to _align_ the forecast
-#' horizon with the climate. See the details for more information.
+#' `step_climate()` creates a *specification* of a recipe step that will
+#' generate one or more new columns of derived data. This step examines all
+#' available seasons in the training data and calculates the a measure of center
+#' for the "typical" season. Think of this like with the weather: to predict the
+#' temperature in January in Pittsburgh, PA, I might look at all previous
+#' January's on record, average their temperatures, and include that in my
+#' model. So it is important to _align_ the forecast horizon with the climate.
+#' This step will work best if added after `step_epi_ahead()`, but that is not
+#' strictly required. See the details for more information.
 #'
 #' @details
 #' Construction of a climate predictor can be helpful with strongly seasonal
@@ -101,6 +102,16 @@
 #' r %>%
 #'   prep(covid_case_death_rates) %>%
 #'   bake(new_data = NULL)
+#'
+#' # switching the order is possible if you specify `forecast_ahead`
+#' r <- epi_recipe(covid_case_death_rates) %>%
+#'   step_climate(death_rate, forecast_ahead = 7, time_type = "day") %>%
+#'   step_epi_ahead(death_rate, ahead = 7) %>%
+#'   r()
+#'
+#' r %>%
+#'   prep(covid_case_death_rates) %>%
+#'   bake(new_data = NULL)
 step_climate <-
   function(recipe,
            ...,
@@ -315,6 +326,14 @@ print.step_climate <- function(x, width = max(20, options()$width - 30), ...) {
   invisible(x)
 }
 
+#' group col by .idx values and sum windows around each .idx value
+#' @param .idx the relevant periodic part of time value, e.g. the week number
+#' @param col the list of values indexed by `.idx`
+#' @param weights how much to weigh each particular datapoint
+#' @param aggr the aggregation function, probably Quantile, mean or median
+#' @param window_size the number of .idx entries before and after to include in
+#'   the aggregation
+#' @param modulus the maximum value of `.idx`
 roll_modular_multivec <- function(col, .idx, weights, aggr, window_size, modulus) {
   tib <- tibble(col = col, weights = weights, .idx = .idx) |>
     arrange(.idx) |>
diff --git a/man/step_adjust_latency.Rd b/man/step_adjust_latency.Rd
index 75d674472..1fadd3ab3 100644
--- a/man/step_adjust_latency.Rd
+++ b/man/step_adjust_latency.Rd
@@ -267,8 +267,8 @@ while this will not:
 \if{html}{\out{<div class="sourceCode r">}}\preformatted{toy_recipe <- epi_recipe(toy_df) \%>\%
    step_epi_lag(a, lag=0) \%>\%
    step_adjust_latency(a, method = "extend_lags")
-#> Warning: If `method` is "extend_lags" or "locf", then the previous `step_epi_lag`s won't
-#> work with modified data.
+#> Warning: If `method` is "extend_lags" or "locf", then the previous `step_epi_lag`s won't work with
+#> modified data.
 }\if{html}{\out{</div>}}
 
 If you create columns that you then apply lags to (such as
diff --git a/tests/testthat/test-step_climate.R b/tests/testthat/test-step_climate.R
index 045789483..480eb62b5 100644
--- a/tests/testthat/test-step_climate.R
+++ b/tests/testthat/test-step_climate.R
@@ -10,6 +10,7 @@ test_that("roll_modular_multivec works", {
   Median <- function(x, w) median(x, na.rm = TRUE)
 
   # unweighted mean
+  # window of size 0
   expected_res <- tib |>
     mutate(.idx = .idx %% modulus, .idx = .idx + (.idx == 0) * modulus) |>
     summarise(climate_pred = weighted.mean(col, w = w), .by = .idx)
@@ -17,13 +18,15 @@ test_that("roll_modular_multivec works", {
     roll_modular_multivec(tib$col, tib$.idx, tib$w, Mean, 0, modulus),
     expected_res
   )
-  w_size <- 1L
+  # window of size 1, which includes everything
   expected_res <- tibble(.idx = as.double(1:3), climate_pred = mean(tib$col))
   expect_equal(
     roll_modular_multivec(tib$col, tib$.idx, tib$w, Mean, 1L, modulus),
     expected_res
   )
+
   # weighted mean
+  # window of size 0
   tib$w <- c(1, 2, 3, 1, 2, 1, 1, 2, 2, 1)
   expected_res <- tib |>
     mutate(.idx = .idx %% modulus, .idx = .idx + (.idx == 0) * modulus) |>
@@ -32,7 +35,7 @@ test_that("roll_modular_multivec works", {
     roll_modular_multivec(tib$col, tib$.idx, tib$w, Mean, 0, modulus),
     expected_res
   )
-  tib$w <- c(1, 2, 3, 1, 2, 1, 1, 2, 2, 1)
+  # window of size 1
   expected_res <- tibble(
     .idx = as.double(1:3),
     climate_pred = weighted.mean(tib$col, tib$w)

From 60d6aeb5dff90e7ae3f7d2a647ee4472d76250fa Mon Sep 17 00:00:00 2001
From: dsweber2 <david.weber2@pm.me>
Date: Thu, 13 Feb 2025 14:14:39 -0600
Subject: [PATCH 2/3] time_type defaulting to the epi_df's time_type

---
 R/step_climate.R                   | 7 ++++++-
 tests/testthat/test-step_climate.R | 4 ++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/R/step_climate.R b/R/step_climate.R
index 38a5a0ec5..6169bd2a7 100644
--- a/R/step_climate.R
+++ b/R/step_climate.R
@@ -117,7 +117,7 @@ step_climate <-
            ...,
            forecast_ahead = "detect",
            role = "predictor",
-           time_type = c("epiweek", "week", "month", "day"),
+           time_type = c("detect", "epiweek", "week", "month", "day"),
            center_method = c("median", "mean"),
            window_size = 3L,
            epi_keys = NULL,
@@ -132,6 +132,11 @@ step_climate <-
     n_outcomes <- sum(recipe$var_info$role == "outcome")
     time_type <- rlang::arg_match(time_type)
     edf_time_type <- attr(recipe$template, "metadata")$time_type
+    if (time_type == "detect") {
+      time_type <- edf_time_type
+    } else {
+      edf_time_type <- attr(recipe$template, "metadata")$time_type
+    }
     if (edf_time_type == "custom") {
       cli_abort("This step only works with daily, weekly, or yearmonth data.")
     }
diff --git a/tests/testthat/test-step_climate.R b/tests/testthat/test-step_climate.R
index 480eb62b5..2544a77ae 100644
--- a/tests/testthat/test-step_climate.R
+++ b/tests/testthat/test-step_climate.R
@@ -69,7 +69,7 @@ test_that("prep/bake steps create the correct training data", {
   ) %>%
     as_epi_df()
   # epiweeks 1, 52, and 53 are all 1, note that there are days in wk 52, 2 in wk 53
-  r <- epi_recipe(x) %>% step_climate(y)
+  r <- epi_recipe(x) %>% step_climate(y, time_type = "epiweek")
   p <- prep(r, x)
 
   expected_res <- tibble(.idx = 1:53, climate_y = c(2, 2:25, 25, 25, 25:2, 2, 2))
@@ -97,7 +97,7 @@ test_that("leading the climate predictor works as expected", {
   r <- epi_recipe(x) %>%
     step_epi_ahead(y, ahead = 14L) %>%
     step_epi_lag(y, lag = c(0, 7L, 14L)) %>%
-    step_climate(y, forecast_ahead = 2L) %>%
+    step_climate(y, forecast_ahead = 2L, time_type = "epiweek") %>%
     # matches the response
     step_epi_naomit()
   p <- prep(r, x)

From 759a3b80ea02c389b58cae729c7283c71b58e453 Mon Sep 17 00:00:00 2001
From: David Weber <david.weber2@pm.me>
Date: Fri, 14 Feb 2025 12:41:33 -0800
Subject: [PATCH 3/3] unneeded else branch

Co-authored-by: Daniel McDonald <dajmcdon@gmail.com>
---
 R/step_climate.R | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/R/step_climate.R b/R/step_climate.R
index 6169bd2a7..db66aa32d 100644
--- a/R/step_climate.R
+++ b/R/step_climate.R
@@ -132,11 +132,7 @@ step_climate <-
     n_outcomes <- sum(recipe$var_info$role == "outcome")
     time_type <- rlang::arg_match(time_type)
     edf_time_type <- attr(recipe$template, "metadata")$time_type
-    if (time_type == "detect") {
-      time_type <- edf_time_type
-    } else {
-      edf_time_type <- attr(recipe$template, "metadata")$time_type
-    }
+    if (time_type == "detect") time_type <- edf_time_type
     if (edf_time_type == "custom") {
       cli_abort("This step only works with daily, weekly, or yearmonth data.")
     }