Skip to content

Move old forecasters #123

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Aug 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: epipredict
Title: Basic epidemiology forecasting methods
Version: 0.0.1
Version: 0.0.2
Authors@R: c(
person("Daniel", "McDonald", , "[email protected]", role = c("aut","cre")),
person("Jacob", "Bien", role = "aut"),
Expand All @@ -19,6 +19,7 @@ Description: A forecasting "framework" for creating epidemiological forecasts
License: MIT + file LICENSE
URL: https://github.com/cmu-delphi/epipredict/,
https://cmu-delphi.github.io/epipredict
BugReports: https://github.com/cmu-delphi/epipredict/issues/
Depends:
R (>= 3.5.0)
Imports:
Expand Down
12 changes: 1 addition & 11 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,7 @@ export(add_frosting)
export(add_layer)
export(apply_frosting)
export(arx_args_list)
export(arx_epi_forecaster)
export(arx_forecaster)
export(create_lags_and_leads)
export(create_layer)
export(default_epi_recipe_blueprint)
export(detect_layer)
Expand All @@ -82,18 +80,13 @@ export(extract_layers)
export(extrapolate_quantiles)
export(flatline)
export(flatline_args_list)
export(flatline_epi_forecaster)
export(flatline_forecaster)
export(frosting)
export(get_precision)
export(get_test_data)
export(grab_names)
export(is_epi_recipe)
export(is_epi_workflow)
export(is_layer)
export(knn_iteraive_ar_args_list)
export(knn_iteraive_ar_forecaster)
export(knnarx_args_list)
export(knnarx_forecaster)
export(layer)
export(layer_add_forecast_date)
export(layer_add_target_date)
Expand All @@ -108,8 +101,6 @@ export(new_default_epi_recipe_blueprint)
export(new_epi_recipe_blueprint)
export(remove_frosting)
export(slather)
export(smooth_arx_args_list)
export(smooth_arx_forecaster)
export(step_epi_ahead)
export(step_epi_lag)
export(step_epi_naomit)
Expand Down Expand Up @@ -139,5 +130,4 @@ importFrom(stats,predict)
importFrom(stats,qnorm)
importFrom(stats,quantile)
importFrom(stats,residuals)
importFrom(stats,setNames)
importFrom(tibble,tibble)
119 changes: 69 additions & 50 deletions R/arx_forecaster.R
Original file line number Diff line number Diff line change
@@ -1,68 +1,87 @@
#' AR forecaster with optional covariates
#' Direct autoregressive forecaster with covariates
#'
#' @param x Covariates. Allowed to be missing (resulting in AR on `y`).
#' @param y Response.
#' @param key_vars Factor(s). A prediction will be made for each unique
#' combination.
#' @param time_value the time value associated with each row of measurements.
#' @param args Additional arguments specifying the forecasting task. Created
#' by calling `arx_args_list()`.
#' This is an autoregressive forecasting model for
#' [epiprocess::epi_df] data. It does "direct" forecasting, meaning
#' that it estimates a model for a particular target horizon.
#'
#' @return A data frame of point (and optionally interval) forecasts at a single
#' ahead (unique horizon) for each unique combination of `key_vars`.
#'
#' @param epi_data An `epi_df` object
#' @param outcome A character (scalar) specifying the outcome (in the
#' `epi_df`).
#' @param predictors A character vector giving column(s) of predictor
#' variables.
#' @param trainer A `{parsnip}` model describing the type of estimation.
#' For now, we enforce `mode = "regression"`.
#' @param args_list A list of customization arguments to determine
#' the type of forecasting model. See [arx_args_list()].
#'
#' @return A list with (1) `predictions` an `epi_df` of predicted values
#' and (2) `epi_workflow`, a list that encapsulates the entire estimation
#' workflow
#' @export
arx_forecaster <- function(x, y, key_vars, time_value,
args = arx_args_list()) {
#'
#' @examples
#' jhu <- case_death_rate_subset %>%
#' dplyr::filter(time_value >= as.Date("2021-12-01"))
#'
#' out <- arx_forecaster(jhu, "death_rate",
#' c("case_rate", "death_rate"))
arx_forecaster <- function(epi_data,
outcome,
predictors,
trainer = parsnip::linear_reg(),
args_list = arx_args_list()) {

# TODO: function to verify standard forecaster signature inputs
validate_forecaster_inputs(epi_data, outcome, predictors)
if (!is.list(trainer) || trainer$mode != "regression")
cli_stop("{trainer} must be a `parsnip` method of mode 'regression'.")
lags <- arx_lags_validator(predictors, args_list$lags)

assign_arg_list(args)
if (is.null(key_vars)) { # this is annoying/repetitive, seemingly necessary?
keys <- NULL
distinct_keys <- tibble(.dump = NA)
} else {
keys <- tibble::tibble(key_vars)
distinct_keys <- dplyr::distinct(keys)
r <- epi_recipe(epi_data)
for (l in seq_along(lags)) {
p <- predictors[l]
r <- step_epi_lag(r, !!p, lag = lags[[l]])
}
r <- r %>%
step_epi_ahead(dplyr::all_of(!!outcome), ahead = args_list$ahead) %>%
step_epi_naomit()
# should limit the training window here (in an open PR)
# What to do if insufficient training data? Add issue.

# Return NA if insufficient training data
if (length(y) < min_train_window + max_lags + ahead) {
qnames <- probs_to_string(levels)
out <- dplyr::bind_cols(distinct_keys, point = NA) %>%
dplyr::select(!dplyr::any_of(".dump"))
return(enframer(out, qnames))
}
forecast_date <- args_list$forecast_date %||% max(epi_data$time_value)
target_date <- args_list$target_date %||% forecast_date + args_list$ahead
f <- frosting() %>%
layer_predict() %>%
# layer_naomit(.pred) %>%
layer_residual_quantiles(
probs = args_list$levels,
symmetrize = args_list$symmetrize) %>%
layer_add_forecast_date(forecast_date = forecast_date) %>%
layer_add_target_date(target_date = target_date)
if (args_list$nonneg) f <- layer_threshold(f, dplyr::starts_with(".pred"))

dat <- create_lags_and_leads(x, y, lags, ahead, time_value, keys)
dat$x0 <- 1
latest <- get_test_data(r, epi_data)

obj <- stats::lm(
y1 ~ . + 0,
data = dat %>% dplyr::select(starts_with(c("x", "y")))
wf <- epi_workflow(r, trainer, f) %>% generics::fit(epi_data)
list(
predictions = predict(wf, new_data = latest),
epi_workflow = wf
)
}

point <- make_predictions(obj, dat, time_value, keys)

# Residuals, simplest case, requires
# 1. same quantiles for all keys
# 2. `residuals(obj)` works
r <- residuals(obj)
q <- residual_quantiles(r, point, levels, symmetrize)

# Harder case requires handling failures of 1 and or 2, neither implemented
# 1. different quantiles by key, need to bind the keys, then group_modify
# 2 fails. need to bind the keys, grab, y and yhat, subtract
if (nonneg) {
q <- dplyr::mutate(q, dplyr::across(dplyr::everything(), ~ pmax(.x, 0)))
arx_lags_validator <- function(predictors, lags) {
p <- length(predictors)
if (!is.list(lags)) lags <- list(lags)
if (length(lags) == 1) lags <- rep(lags, p)
else if (length(lags) < p) {
cli_stop(
"You have requested {p} predictors but lags cannot be recycled to match."
)
}

return(
dplyr::bind_cols(distinct_keys, q) %>%
dplyr::select(!dplyr::any_of(".dump"))
)
lags
}


#' ARX forecaster argument constructor
#'
#' Constructs a list of arguments for [arx_forecaster()].
Expand Down
83 changes: 0 additions & 83 deletions R/arx_forecaster_mod.R

This file was deleted.

1 change: 1 addition & 0 deletions R/blueprint-epi_recipe-default.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#' @details The `bake_dependent_roles` are automatically set to `epi_df` defaults.
#' @return A recipe blueprint.
#'
#' @keywords internal
#' @export
new_epi_recipe_blueprint <-
function(intercept = FALSE, allow_novel_levels = FALSE, fresh = TRUE,
Expand Down
1 change: 1 addition & 0 deletions R/df_mat_mul.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#' @return A data.frame with the new columns at the right. Original
#' columns are removed.
#' @export
#' @keywords internal
#'
#' @examples
#' df <- data.frame(matrix(1:200, ncol = 10))
Expand Down
2 changes: 1 addition & 1 deletion R/epi_keys.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
#' @param x a data.frame, tibble, or epi_df
#'
#' @return If an `epi_df`, this returns all "keys". Otherwise `NULL`
#' @keywords internal
#' @export
#'
epi_keys <- function(x) {
UseMethod("epi_keys")
}
Expand Down
1 change: 1 addition & 0 deletions R/epi_recipe.R
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ epi_form2args <- function(formula, data, ...) {
#' @param x An object.
#' @return `TRUE` if the object inherits from `epi_recipe`.
#'
#' @keywords internal
#' @export
is_epi_recipe <- function(x) {
inherits(x, "epi_recipe")
Expand Down
2 changes: 2 additions & 0 deletions R/epi_shift.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#' @param keys Data frame, vector, or `NULL`. Additional grouping vars.
#' @param out_name Chr. The output list will use this as a prefix.
#'
#' @keywords internal
#'
#' @return a list of tibbles
epi_shift <- function(x, shifts, time_value, keys = NULL, out_name = "x") {
if (!is.data.frame(x)) x <- data.frame(x)
Expand Down
1 change: 1 addition & 0 deletions R/epi_workflow.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ epi_workflow <- function(preprocessor = NULL, spec = NULL, postprocessor = NULL)
#' @param x An object.
#' @return `TRUE` if the object inherits from `epi_workflow`.
#'
#' @keywords internal
#' @export
is_epi_workflow <- function(x) {
inherits(x, "epi_workflow")
Expand Down
2 changes: 2 additions & 0 deletions R/extract.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#' @return An object originally passed as an argument to a layer or step
#' @export
#'
#' @keywords internal
#'
#' @examples
#' f <- frosting() %>%
#' layer_predict() %>%
Expand Down
3 changes: 2 additions & 1 deletion R/flatline.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' horizon. The right hand side must contain any keys (locations) for the
#' panel data separated by plus. The observed time series must come last.
#' For example
#' ```
#' ```r
#' form <- as.formula(lead7_y ~ state + age + y)
#' ```
#' Note that this function doesn't DO the shifting, that has to be done
Expand All @@ -26,6 +26,7 @@
#' predictions for future data (the last observed of the outcome for each
#' combination of keys.
#' @export
#' @keywords internal
#'
#' @examples
#' tib <- data.frame(y = runif(100),
Expand Down
10 changes: 5 additions & 5 deletions R/flatline_epi_forecaster.R → R/flatline_forecaster.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
#' jhu <- case_death_rate_subset %>%
#' dplyr::filter(time_value >= as.Date("2021-12-01"))
#'
#' out <- flatline_epi_forecaster(jhu, "death_rate")
flatline_epi_forecaster <- function(epi_data,
outcome,
args_list = flatline_args_list()) {
#' out <- flatline_forecaster(jhu, "death_rate")
flatline_forecaster <- function(epi_data,
outcome,
args_list = flatline_args_list()) {

validate_forecaster_inputs(epi_data, outcome, "time_value")
keys <- epi_keys(epi_data)
Expand Down Expand Up @@ -71,7 +71,7 @@ flatline_epi_forecaster <- function(epi_data,

#' Flatline forecaster argument constructor
#'
#' Constructs a list of arguments for [flatline_epi_forecaster()].
#' Constructs a list of arguments for [flatline_forecaster()].
#'
#' @inheritParams arx_args_list
#'
Expand Down
1 change: 1 addition & 0 deletions R/grab_names.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#' be used to select a range of variables.
#'
#' @export
#' @keywords internal
#' @return a character vector
#' @examples
#' df <- data.frame(a = 1, b = 2, cc = rep(NA, 3))
Expand Down
Loading