Skip to content

Climate suggestions #441

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions R/climatological_forecaster.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ climatological_forecaster <- function(epi_data,
horizon <- args_list$forecast_horizon
window_size <- args_list$window_size
time_type <- args_list$time_type
# check that the prediction time type is more granular than epi_data's
# time_type
ttype_ord <- match(time_type, c("day", "epiweek", "week", "month"))
ttype_ord <- ttype_ord - as.integer(ttype_ord > 2)
edf_ttype_ord <- match(edf_time_type, c("day", "week", "yearmonth"))
Expand Down
1 change: 1 addition & 0 deletions R/layer_threshold_preds.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ layer_threshold_new <-



#' restrict various objects to the interval [lower, upper]
snap <- function(x, lower, upper, ...) {
UseMethod("snap")
}
Expand Down
38 changes: 29 additions & 9 deletions R/step_climate.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
#' Calculate a climatological variable based on the history
#'
#' `step_climate()` creates a *specification* of a recipe step
#' that will generate one or more new columns of derived data. This step
#' examines all available seasons in the training data and calculates the
#' a measure of center for the "typical" season. Think of this like with
#' the weather: to predict the temperature in January in Pittsburgh, PA,
#' I might look at all previous January's on record, average their temperatures,
#' and include that in my model. So it is important to _align_ the forecast
#' horizon with the climate. See the details for more information.
#' `step_climate()` creates a *specification* of a recipe step that will
#' generate one or more new columns of derived data. This step examines all
#' available seasons in the training data and calculates the a measure of center
#' for the "typical" season. Think of this like with the weather: to predict the
#' temperature in January in Pittsburgh, PA, I might look at all previous
#' January's on record, average their temperatures, and include that in my
#' model. So it is important to _align_ the forecast horizon with the climate.
#' This step will work best if added after `step_epi_ahead()`, but that is not
#' strictly required. See the details for more information.
#'
#' @details
#' Construction of a climate predictor can be helpful with strongly seasonal
Expand Down Expand Up @@ -101,12 +102,22 @@
#' r %>%
#' prep(covid_case_death_rates) %>%
#' bake(new_data = NULL)
#'
#' # switching the order is possible if you specify `forecast_ahead`
#' r <- epi_recipe(covid_case_death_rates) %>%
#' step_climate(death_rate, forecast_ahead = 7, time_type = "day") %>%
#' step_epi_ahead(death_rate, ahead = 7) %>%
#' r()
#'
#' r %>%
#' prep(covid_case_death_rates) %>%
#' bake(new_data = NULL)
step_climate <-
function(recipe,
...,
forecast_ahead = "detect",
role = "predictor",
time_type = c("epiweek", "week", "month", "day"),
time_type = c("detect", "epiweek", "week", "month", "day"),
center_method = c("median", "mean"),
window_size = 3L,
epi_keys = NULL,
Expand All @@ -121,6 +132,7 @@ step_climate <-
n_outcomes <- sum(recipe$var_info$role == "outcome")
time_type <- rlang::arg_match(time_type)
edf_time_type <- attr(recipe$template, "metadata")$time_type
if (time_type == "detect") time_type <- edf_time_type
if (edf_time_type == "custom") {
cli_abort("This step only works with daily, weekly, or yearmonth data.")
}
Expand Down Expand Up @@ -315,6 +327,14 @@ print.step_climate <- function(x, width = max(20, options()$width - 30), ...) {
invisible(x)
}

#' group col by .idx values and sum windows around each .idx value
#' @param .idx the relevant periodic part of time value, e.g. the week number
#' @param col the list of values indexed by `.idx`
#' @param weights how much to weigh each particular datapoint
#' @param aggr the aggregation function, probably Quantile, mean or median
#' @param window_size the number of .idx entries before and after to include in
#' the aggregation
#' @param modulus the maximum value of `.idx`
roll_modular_multivec <- function(col, .idx, weights, aggr, window_size, modulus) {
tib <- tibble(col = col, weights = weights, .idx = .idx) |>
arrange(.idx) |>
Expand Down
4 changes: 2 additions & 2 deletions man/step_adjust_latency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 7 additions & 4 deletions tests/testthat/test-step_climate.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,23 @@ test_that("roll_modular_multivec works", {
Median <- function(x, w) median(x, na.rm = TRUE)

# unweighted mean
# window of size 0
expected_res <- tib |>
mutate(.idx = .idx %% modulus, .idx = .idx + (.idx == 0) * modulus) |>
summarise(climate_pred = weighted.mean(col, w = w), .by = .idx)
expect_equal(
roll_modular_multivec(tib$col, tib$.idx, tib$w, Mean, 0, modulus),
expected_res
)
w_size <- 1L
# window of size 1, which includes everything
expected_res <- tibble(.idx = as.double(1:3), climate_pred = mean(tib$col))
expect_equal(
roll_modular_multivec(tib$col, tib$.idx, tib$w, Mean, 1L, modulus),
expected_res
)

# weighted mean
# window of size 0
tib$w <- c(1, 2, 3, 1, 2, 1, 1, 2, 2, 1)
expected_res <- tib |>
mutate(.idx = .idx %% modulus, .idx = .idx + (.idx == 0) * modulus) |>
Expand All @@ -32,7 +35,7 @@ test_that("roll_modular_multivec works", {
roll_modular_multivec(tib$col, tib$.idx, tib$w, Mean, 0, modulus),
expected_res
)
tib$w <- c(1, 2, 3, 1, 2, 1, 1, 2, 2, 1)
# window of size 1
expected_res <- tibble(
.idx = as.double(1:3),
climate_pred = weighted.mean(tib$col, tib$w)
Expand Down Expand Up @@ -66,7 +69,7 @@ test_that("prep/bake steps create the correct training data", {
) %>%
as_epi_df()
# epiweeks 1, 52, and 53 are all 1, note that there are days in wk 52, 2 in wk 53
r <- epi_recipe(x) %>% step_climate(y)
r <- epi_recipe(x) %>% step_climate(y, time_type = "epiweek")
p <- prep(r, x)

expected_res <- tibble(.idx = 1:53, climate_y = c(2, 2:25, 25, 25, 25:2, 2, 2))
Expand Down Expand Up @@ -94,7 +97,7 @@ test_that("leading the climate predictor works as expected", {
r <- epi_recipe(x) %>%
step_epi_ahead(y, ahead = 14L) %>%
step_epi_lag(y, lag = c(0, 7L, 14L)) %>%
step_climate(y, forecast_ahead = 2L) %>%
step_climate(y, forecast_ahead = 2L, time_type = "epiweek") %>%
# matches the response
step_epi_naomit()
p <- prep(r, x)
Expand Down
Loading