From 4d0e44ac8c46751da381854777d888e8eab7c5df Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 24 May 2022 16:19:24 -0700 Subject: [PATCH 01/65] New file --- tests/testthat/test-df_mat_mul.R | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tests/testthat/test-df_mat_mul.R diff --git a/tests/testthat/test-df_mat_mul.R b/tests/testthat/test-df_mat_mul.R new file mode 100644 index 000000000..166fdc92d --- /dev/null +++ b/tests/testthat/test-df_mat_mul.R @@ -0,0 +1,19 @@ +df <- data.frame(matrix(1:100, ncol = 5)) +mat <- matrix(1:4, ncol = 2) +df_mat_mul(df, mat, "z", dplyr::num_range("X", 2:3)) +test_that("df_mat_mul checks inputs", { + expect_error(df_mat_mul(30,mat)) + expect_error(df_mat_mul(df,20)) +}) + +test_that("Incompatible matrix multipication cannot happen", { + expect_error(df_mat_mul(df, mat, "z", dplyr::num_range("X", 1:3))) +}) + +test_that("Matrix multiplication is being handled as expected", { + X <- df[c(1,4,5)] + Z <- as.data.frame(as.matrix(df[2:3]) %*% mat) + colnames(Z) <- c("z1","z2") + output <- cbind(X,Z) + expect_identical(df_mat_mul(df,mat, "z", dplyr::num_range("X", 2:3)),output) +}) From 881be8882eb15186b63f3b1a798dde5279d72cf9 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 24 May 2022 16:37:22 -0700 Subject: [PATCH 02/65] New test --- tests/testthat/test-assign_arg_list.R | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tests/testthat/test-assign_arg_list.R diff --git a/tests/testthat/test-assign_arg_list.R b/tests/testthat/test-assign_arg_list.R new file mode 100644 index 000000000..d5229e6ac --- /dev/null +++ b/tests/testthat/test-assign_arg_list.R @@ -0,0 +1,3 @@ +test_that("Cat",{ + expect_error(assign_arg_list(c(1,2,3))) +}) From 0f2fb31b526d3339accf91f0d6b715608ee5ad89 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 24 May 2022 16:45:44 -0700 Subject: [PATCH 03/65] Updated descriptor --- tests/testthat/test-assign_arg_list.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-assign_arg_list.R b/tests/testthat/test-assign_arg_list.R index d5229e6ac..bf5868032 100644 --- a/tests/testthat/test-assign_arg_list.R +++ b/tests/testthat/test-assign_arg_list.R @@ -1,3 +1,3 @@ -test_that("Cat",{ +test_that("Stop when errors occur",{ expect_error(assign_arg_list(c(1,2,3))) }) From f5ab89087775cd7deabe6ff614bdf3bf94007097 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 25 May 2022 09:30:24 -0700 Subject: [PATCH 04/65] Updated test --- tests/testthat/test-assign_arg_list.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-assign_arg_list.R b/tests/testthat/test-assign_arg_list.R index bf5868032..017c40935 100644 --- a/tests/testthat/test-assign_arg_list.R +++ b/tests/testthat/test-assign_arg_list.R @@ -1,3 +1,6 @@ -test_that("Stop when errors occur",{ +test_that("First argument must be a list",{ expect_error(assign_arg_list(c(1,2,3))) }) +test_that("All arguments should be named",{ + expect_error(assign_arg_list(list(1,2))) +}) From 2597b906bfffab4e2f4b36461aeb98dc5bb30413 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 25 May 2022 10:41:34 -0700 Subject: [PATCH 05/65] Template for new lag function. --- R/epi_lag2.R | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 R/epi_lag2.R diff --git a/R/epi_lag2.R b/R/epi_lag2.R new file mode 100644 index 000000000..c52da8bec --- /dev/null +++ b/R/epi_lag2.R @@ -0,0 +1,123 @@ +#' Create a lagged predictor +#' +#' `step_epi_lag` creates a *specification* of a recipe step that +#' will add new columns of lagged data. Lagged data will +#' by default include NA values where the lag was induced. +#' These can be removed with [step_naomit()], or you may +#' specify an alternative filler value with the `default` +#' argument. +#' +#' @param lag A vector of positive integers. Each specified column will be +#' lagged for each value in the vector. +#' @template step-return +#' +#' @details The step assumes that the data are already _in the proper sequential +#' order_ for lagging. +#' +#' @family row operation steps +#' @export +#' @rdname step_epi_ahead +step_epi_lag2 <- + function(recipe, + ..., + role = "predictor", + trained = FALSE, + lag = 1, + prefix = "lag_", + default = NA, + keys = epi_keys(recipe), + columns = NULL, + skip = FALSE, + id = rand_id("epi_lag")) { + add_step( + recipe, + step_epi_lag_new( + terms = dplyr::enquos(...), + role = role, + trained = trained, + lag = lag, + prefix = prefix, + default = default, + keys = keys, + columns = columns, + skip = skip, + id = id + ) + ) + } + +step_epi_lag2_new <- + function(terms, role, trained, lag, prefix, default, keys, + columns, skip, id) { + step( + subclass = "epi_lag", + terms = terms, + role = role, + trained = trained, + lag = lag, + prefix = prefix, + default = default, + keys = keys, + columns = columns, + skip = skip, + id = id + ) + } + +#' @export +prep.step_epi_lag2 <- function(x, training, info = NULL, ...) { + step_epi_lag_new( + terms = x$terms, + role = x$role, + trained = TRUE, + lag = x$lag, + prefix = x$prefix, + default = x$default, + keys = x$keys, + columns = recipes_eval_select(x$terms, training, info), + skip = x$skip, + id = x$id + ) +} + +#' @export +bake.step_epi_lag2 <- function(object, new_data, ...) { + if (!all(object$lag == as.integer(object$lag))) { + rlang::abort("step_epi_lag requires 'lag' argument to be integer valued.") + } + + grid <- tidyr::expand_grid(col = object$columns, lag_val = object$lag) %>% + dplyr::mutate(newname = glue::glue("{object$prefix}{lag_val}_{col}")) + + ## ensure no name clashes + new_data_names <- colnames(new_data) + intersection <- new_data_names %in% grid$newname + if (any(intersection)) { + rlang::abort( + paste0("Name collision occured in `", class(object)[1], + "`. The following variable names already exists: ", + paste0(new_data_names[intersection], collapse = ", "), + ".")) + } + ok <- object$keys + lagged <- purrr::reduce( + purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok), + dplyr::full_join, + by = ok + ) + + dplyr::full_join(new_data, lagged, by = ok) %>% + dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>% + dplyr::arrange(time_value) %>% + dplyr::ungroup() + +} + +#' @export +print.step_epi_lag2 <- + function(x, width = max(20, options()$width - 30), ...) { + ## TODO add printing of the lags + title <- "Lagging " + recipes::print_step(x$columns, x$terms, x$trained, title, width) + invisible(x) + } From 3457ee50fd6459a9e9180a5d23c3df795422ffba Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 25 May 2022 13:34:34 -0700 Subject: [PATCH 06/65] Replaced ahead with lags of negative numbers. --- R/epi_ahead.R | 163 --------------------------------------- R/epi_lag.R | 6 +- R/epi_lag2.R | 123 ----------------------------- musings/example-recipe.R | 2 +- 4 files changed, 4 insertions(+), 290 deletions(-) delete mode 100644 R/epi_ahead.R delete mode 100644 R/epi_lag2.R diff --git a/R/epi_ahead.R b/R/epi_ahead.R deleted file mode 100644 index 3434b7d93..000000000 --- a/R/epi_ahead.R +++ /dev/null @@ -1,163 +0,0 @@ -#' Create a leading outcome -#' -#' `step_epi_ahead` creates a *specification* of a recipe step that -#' will add new columns of leading data. Leading data will -#' by default include NA values where the lag was induced. -#' These can be removed with [step_naomit()], or you may -#' specify an alternative filler value with the `default` -#' argument. -#' -#' @param recipe A recipe object. The step will be added to the -#' sequence of operations for this recipe. -#' @param ... One or more selector functions to choose variables -#' for this step. See [selections()] for more details. -#' @param role For model terms created by this step, what analysis role should -#' they be assigned? -#' @param trained A logical to indicate if the quantities for -#' preprocessing have been estimated. -#' @param ahead A vector of positive integers. Each specified column will be -#' lead for each value in the vector. -#' @param prefix A prefix for generated column names, default to "ahead_". -#' @param default Determines what fills empty rows -#' left by leading/lagging (defaults to NA). -#' @param keys A character vector of the keys in an epi_df -#' @param columns A character string of variable names that will -#' be populated (eventually) by the `terms` argument. -#' @param skip A logical. Should the step be skipped when the -#' recipe is baked by [bake()]? While all operations are baked -#' when [prep()] is run, some operations may not be able to be -#' conducted on new data (e.g. processing the outcome variable(s)). -#' Care should be taken when using `skip = TRUE` as it may affect -#' the computations for subsequent operations. -#' @param id A character string that is unique to this step to identify it. -#' @template step-return -#' -#' @details The step assumes that the data are already _in the proper sequential -#' order_ for leading. -#' -#' @family row operation steps -#' @export -#' -#' @examples -#' tib <- tibble::tibble( -#' x = 1:5, y = 1:5, -#' time_value = seq(as.Date("2020-01-01"), by = 1, length.out = 5), -#' geo_value = "ca" -#' ) %>% epiprocess::as_epi_df() -#' -#' library(recipes) -#' epi_recipe(y ~ x, data = tib) %>% -#' step_epi_lag(x, lag = 2:3) %>% -#' step_epi_ahead(y, ahead = 1) %>% -#' prep(tib) %>% -#' bake(tib) -step_epi_ahead <- - function(recipe, - ..., - role = "outcome", - trained = FALSE, - ahead = 1, - prefix = "ahead_", - default = NA, - keys = epi_keys(recipe), - columns = NULL, - skip = FALSE, - id = rand_id("epi_ahead")) { - add_step( - recipe, - step_epi_ahead_new( - terms = dplyr::enquos(...), - role = role, - trained = trained, - ahead = ahead, - prefix = prefix, - default = default, - keys = keys, - columns = columns, - skip = skip, - id = id - ) - ) - } - -step_epi_ahead_new <- - function(terms, role, trained, ahead, prefix, default, keys, - columns, skip, id) { - step( - subclass = "epi_ahead", - terms = terms, - role = role, - trained = trained, - ahead = ahead, - prefix = prefix, - default = default, - keys = keys, - columns = columns, - skip = skip, - id = id - ) - } - -#' @export -prep.step_epi_ahead <- function(x, training, info = NULL, ...) { - step_epi_ahead_new( - terms = x$terms, - role = x$role, - trained = TRUE, - ahead = x$ahead, - prefix = x$prefix, - default = x$default, - keys = x$keys, - columns = recipes_eval_select(x$terms, training, info), - skip = x$skip, - id = x$id - ) -} - -#' @export -bake.step_epi_ahead <- function(object, new_data, ...) { - if (!all(object$ahead == as.integer(object$ahead))) { - rlang::abort("step_epi_ahead requires 'ahead' argument to be integer valued.") - } - - grid <- tidyr::expand_grid( - col = object$columns, lag_val = -object$ahead) %>% - dplyr::mutate( - ahead_val = -lag_val, - newname = glue::glue("{object$prefix}{ahead_val}_{col}") - ) %>% - dplyr::select(-ahead_val) - - ## ensure no name clashes - new_data_names <- colnames(new_data) - intersection <- new_data_names %in% grid$newname - if (any(intersection)) { - rlang::abort( - paste0("Name collision occured in `", class(object)[1], - "`. The following variable names already exists: ", - paste0(new_data_names[intersection], collapse = ", "), - ".")) - } - - ok <- object$keys - lagged <- purrr::reduce( - purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok), - dplyr::full_join, - by = ok - ) - - dplyr::full_join(new_data, lagged, by = ok) %>% - dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>% - dplyr::arrange(time_value) %>% - dplyr::ungroup() - -} - -#' @export -print.step_epi_ahead <- - function(x, width = max(20, options()$width - 30), ...) { - ## TODO add printing of the lags - title <- "Leading " - recipes::print_step(x$columns, x$terms, x$trained, title, width) - invisible(x) - } diff --git a/R/epi_lag.R b/R/epi_lag.R index b7dc28b2a..46c93f105 100644 --- a/R/epi_lag.R +++ b/R/epi_lag.R @@ -16,13 +16,13 @@ #' #' @family row operation steps #' @export -#' @rdname step_epi_ahead +#' @rdname step_epi_lag step_epi_lag <- function(recipe, ..., role = "predictor", trained = FALSE, - lag = 1, + lag = 1, # negative for ahead prefix = "lag_", default = NA, keys = epi_keys(recipe), @@ -117,7 +117,7 @@ bake.step_epi_lag <- function(object, new_data, ...) { print.step_epi_lag <- function(x, width = max(20, options()$width - 30), ...) { ## TODO add printing of the lags - title <- "Lagging " + title <- ifelse(x$lag >= 0, "Lagging", "Leading") recipes::print_step(x$columns, x$terms, x$trained, title, width) invisible(x) } diff --git a/R/epi_lag2.R b/R/epi_lag2.R deleted file mode 100644 index c52da8bec..000000000 --- a/R/epi_lag2.R +++ /dev/null @@ -1,123 +0,0 @@ -#' Create a lagged predictor -#' -#' `step_epi_lag` creates a *specification* of a recipe step that -#' will add new columns of lagged data. Lagged data will -#' by default include NA values where the lag was induced. -#' These can be removed with [step_naomit()], or you may -#' specify an alternative filler value with the `default` -#' argument. -#' -#' @param lag A vector of positive integers. Each specified column will be -#' lagged for each value in the vector. -#' @template step-return -#' -#' @details The step assumes that the data are already _in the proper sequential -#' order_ for lagging. -#' -#' @family row operation steps -#' @export -#' @rdname step_epi_ahead -step_epi_lag2 <- - function(recipe, - ..., - role = "predictor", - trained = FALSE, - lag = 1, - prefix = "lag_", - default = NA, - keys = epi_keys(recipe), - columns = NULL, - skip = FALSE, - id = rand_id("epi_lag")) { - add_step( - recipe, - step_epi_lag_new( - terms = dplyr::enquos(...), - role = role, - trained = trained, - lag = lag, - prefix = prefix, - default = default, - keys = keys, - columns = columns, - skip = skip, - id = id - ) - ) - } - -step_epi_lag2_new <- - function(terms, role, trained, lag, prefix, default, keys, - columns, skip, id) { - step( - subclass = "epi_lag", - terms = terms, - role = role, - trained = trained, - lag = lag, - prefix = prefix, - default = default, - keys = keys, - columns = columns, - skip = skip, - id = id - ) - } - -#' @export -prep.step_epi_lag2 <- function(x, training, info = NULL, ...) { - step_epi_lag_new( - terms = x$terms, - role = x$role, - trained = TRUE, - lag = x$lag, - prefix = x$prefix, - default = x$default, - keys = x$keys, - columns = recipes_eval_select(x$terms, training, info), - skip = x$skip, - id = x$id - ) -} - -#' @export -bake.step_epi_lag2 <- function(object, new_data, ...) { - if (!all(object$lag == as.integer(object$lag))) { - rlang::abort("step_epi_lag requires 'lag' argument to be integer valued.") - } - - grid <- tidyr::expand_grid(col = object$columns, lag_val = object$lag) %>% - dplyr::mutate(newname = glue::glue("{object$prefix}{lag_val}_{col}")) - - ## ensure no name clashes - new_data_names <- colnames(new_data) - intersection <- new_data_names %in% grid$newname - if (any(intersection)) { - rlang::abort( - paste0("Name collision occured in `", class(object)[1], - "`. The following variable names already exists: ", - paste0(new_data_names[intersection], collapse = ", "), - ".")) - } - ok <- object$keys - lagged <- purrr::reduce( - purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok), - dplyr::full_join, - by = ok - ) - - dplyr::full_join(new_data, lagged, by = ok) %>% - dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>% - dplyr::arrange(time_value) %>% - dplyr::ungroup() - -} - -#' @export -print.step_epi_lag2 <- - function(x, width = max(20, options()$width - 30), ...) { - ## TODO add printing of the lags - title <- "Lagging " - recipes::print_step(x$columns, x$terms, x$trained, title, width) - invisible(x) - } diff --git a/musings/example-recipe.R b/musings/example-recipe.R index 61e4b6868..d10bd7e8d 100644 --- a/musings/example-recipe.R +++ b/musings/example-recipe.R @@ -36,7 +36,7 @@ xx <- x %>% filter(time_value > "2021-12-01") # Baseline AR3 r <- epi_recipe(x) %>% # if we add this as a class, maybe we get better # behaviour downstream? - step_epi_ahead(death_rate, ahead = 7) %>% + step_epi_lag(death_rate, lag = -7) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% step_naomit(all_predictors()) %>% From 93503d4bfbf500dced0ffc26f9ce7707529aff47 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 25 May 2022 14:35:16 -0700 Subject: [PATCH 07/65] Documented changes. --- NAMESPACE | 4 -- man/step_epi_ahead.Rd | 117 ------------------------------------------ man/step_epi_lag.Rd | 41 +++++++++++++++ 3 files changed, 41 insertions(+), 121 deletions(-) delete mode 100644 man/step_epi_ahead.Rd create mode 100644 man/step_epi_lag.Rd diff --git a/NAMESPACE b/NAMESPACE index b632f8922..4a678f56f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,5 @@ # Generated by roxygen2: do not edit by hand -S3method(bake,step_epi_ahead) S3method(bake,step_epi_lag) S3method(epi_keys,default) S3method(epi_keys,epi_df) @@ -8,9 +7,7 @@ S3method(epi_keys,recipe) S3method(epi_recipe,default) S3method(epi_recipe,epi_df) S3method(epi_recipe,formula) -S3method(prep,step_epi_ahead) S3method(prep,step_epi_lag) -S3method(print,step_epi_ahead) S3method(print,step_epi_lag) export("%>%") export(arx_args_list) @@ -27,7 +24,6 @@ export(knnarx_args_list) export(knnarx_forecaster) export(smooth_arx_args_list) export(smooth_arx_forecaster) -export(step_epi_ahead) export(step_epi_lag) import(recipes) importFrom(magrittr,"%>%") diff --git a/man/step_epi_ahead.Rd b/man/step_epi_ahead.Rd deleted file mode 100644 index 006c224c3..000000000 --- a/man/step_epi_ahead.Rd +++ /dev/null @@ -1,117 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/epi_ahead.R, R/epi_lag.R -\name{step_epi_ahead} -\alias{step_epi_ahead} -\alias{step_epi_lag} -\title{Create a leading outcome} -\usage{ -step_epi_ahead( - recipe, - ..., - role = "outcome", - trained = FALSE, - ahead = 1, - prefix = "ahead_", - default = NA, - keys = epi_keys(recipe), - columns = NULL, - skip = FALSE, - id = rand_id("epi_ahead") -) - -step_epi_lag( - recipe, - ..., - role = "predictor", - trained = FALSE, - lag = 1, - prefix = "lag_", - default = NA, - keys = epi_keys(recipe), - columns = NULL, - skip = FALSE, - id = rand_id("epi_lag") -) -} -\arguments{ -\item{recipe}{A recipe object. The step will be added to the -sequence of operations for this recipe.} - -\item{...}{One or more selector functions to choose variables -for this step. See \code{\link[=selections]{selections()}} for more details.} - -\item{role}{For model terms created by this step, what analysis role should -they be assigned?} - -\item{trained}{A logical to indicate if the quantities for -preprocessing have been estimated.} - -\item{ahead}{A vector of positive integers. Each specified column will be -lead for each value in the vector.} - -\item{prefix}{A prefix for generated column names, default to "ahead_".} - -\item{default}{Determines what fills empty rows -left by leading/lagging (defaults to NA).} - -\item{keys}{A character vector of the keys in an epi_df} - -\item{columns}{A character string of variable names that will -be populated (eventually) by the \code{terms} argument.} - -\item{skip}{A logical. Should the step be skipped when the -recipe is baked by \code{\link[=bake]{bake()}}? While all operations are baked -when \code{\link[=prep]{prep()}} is run, some operations may not be able to be -conducted on new data (e.g. processing the outcome variable(s)). -Care should be taken when using \code{skip = TRUE} as it may affect -the computations for subsequent operations.} - -\item{id}{A character string that is unique to this step to identify it.} - -\item{lag}{A vector of positive integers. Each specified column will be -lagged for each value in the vector.} -} -\value{ -An updated version of \code{recipe} with the new step added to the -sequence of any existing operations. - -An updated version of \code{recipe} with the new step added to the -sequence of any existing operations. -} -\description{ -\code{step_epi_ahead} creates a \emph{specification} of a recipe step that -will add new columns of leading data. Leading data will -by default include NA values where the lag was induced. -These can be removed with \code{\link[=step_naomit]{step_naomit()}}, or you may -specify an alternative filler value with the \code{default} -argument. - -\code{step_epi_lag} creates a \emph{specification} of a recipe step that -will add new columns of lagged data. Lagged data will -by default include NA values where the lag was induced. -These can be removed with \code{\link[=step_naomit]{step_naomit()}}, or you may -specify an alternative filler value with the \code{default} -argument. -} -\details{ -The step assumes that the data are already \emph{in the proper sequential -order} for leading. - -The step assumes that the data are already \emph{in the proper sequential -order} for lagging. -} -\examples{ -tib <- tibble::tibble( - x = 1:5, y = 1:5, - time_value = seq(as.Date("2020-01-01"), by = 1, length.out = 5), - geo_value = "ca" - ) \%>\% epiprocess::as_epi_df() - -library(recipes) -epi_recipe(y ~ x, data = tib) \%>\% - step_epi_lag(x, lag = 2:3) \%>\% - step_epi_ahead(y, ahead = 1) \%>\% - prep(tib) \%>\% - bake(tib) -} -\concept{row operation steps} diff --git a/man/step_epi_lag.Rd b/man/step_epi_lag.Rd new file mode 100644 index 000000000..52361f696 --- /dev/null +++ b/man/step_epi_lag.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epi_lag.R +\name{step_epi_lag} +\alias{step_epi_lag} +\title{Create a lagged predictor} +\usage{ +step_epi_lag( + recipe, + ..., + role = "predictor", + trained = FALSE, + lag = 1, + prefix = "lag_", + default = NA, + keys = epi_keys(recipe), + columns = NULL, + skip = FALSE, + id = rand_id("epi_lag") +) +} +\arguments{ +\item{lag}{A vector of positive integers. Each specified column will be +lagged for each value in the vector.} +} +\value{ +An updated version of \code{recipe} with the new step added to the +sequence of any existing operations. +} +\description{ +\code{step_epi_lag} creates a \emph{specification} of a recipe step that +will add new columns of lagged data. Lagged data will +by default include NA values where the lag was induced. +These can be removed with \code{\link[=step_naomit]{step_naomit()}}, or you may +specify an alternative filler value with the \code{default} +argument. +} +\details{ +The step assumes that the data are already \emph{in the proper sequential +order} for lagging. +} +\concept{row operation steps} From 139616b9785e2d78131c0143071c032437f04719 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 26 May 2022 09:07:27 -0700 Subject: [PATCH 08/65] Updates on code --- R/epi_lag.R | 2 +- musings/example-recipe.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/epi_lag.R b/R/epi_lag.R index 46c93f105..650aa11c6 100644 --- a/R/epi_lag.R +++ b/R/epi_lag.R @@ -22,7 +22,7 @@ step_epi_lag <- ..., role = "predictor", trained = FALSE, - lag = 1, # negative for ahead + lag = 1, prefix = "lag_", default = NA, keys = epi_keys(recipe), diff --git a/musings/example-recipe.R b/musings/example-recipe.R index d10bd7e8d..6bc92537d 100644 --- a/musings/example-recipe.R +++ b/musings/example-recipe.R @@ -37,8 +37,8 @@ xx <- x %>% filter(time_value > "2021-12-01") r <- epi_recipe(x) %>% # if we add this as a class, maybe we get better # behaviour downstream? step_epi_lag(death_rate, lag = -7) %>% - step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% - step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% + # step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% + # step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% step_naomit(all_predictors()) %>% # below, `skip` means we don't do this at predict time # we should probably do something useful here to avoid user error From 016887569b6fa15e2a2dc211bc9f2790d310002c Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 26 May 2022 10:10:04 -0700 Subject: [PATCH 09/65] I still get an error --- R/{epi_lag.R => step_epi_lag.R} | 9 ++++++--- man/step_epi_lag.Rd | 2 +- musings/example-recipe.R | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) rename R/{epi_lag.R => step_epi_lag.R} (92%) diff --git a/R/epi_lag.R b/R/step_epi_lag.R similarity index 92% rename from R/epi_lag.R rename to R/step_epi_lag.R index 650aa11c6..292f09729 100644 --- a/R/epi_lag.R +++ b/R/step_epi_lag.R @@ -23,12 +23,12 @@ step_epi_lag <- role = "predictor", trained = FALSE, lag = 1, - prefix = "lag_", + prefix = ifelse(lag >= 0, "lag_","ahead_"), default = NA, keys = epi_keys(recipe), columns = NULL, skip = FALSE, - id = rand_id("epi_lag")) { + id = rand_id(ifelse(lag >= 0, "epi_lag","epi_ahead"))) { add_step( recipe, step_epi_lag_new( @@ -86,7 +86,10 @@ bake.step_epi_lag <- function(object, new_data, ...) { rlang::abort("step_epi_lag requires 'lag' argument to be integer valued.") } - grid <- tidyr::expand_grid(col = object$columns, lag_val = object$lag) %>% + is_neg <- object$lag < 0 + + grid <- tidyr::expand_grid(col = object$columns, + lag_val = object$lag) %>% dplyr::mutate(newname = glue::glue("{object$prefix}{lag_val}_{col}")) ## ensure no name clashes diff --git a/man/step_epi_lag.Rd b/man/step_epi_lag.Rd index 52361f696..fa86dac11 100644 --- a/man/step_epi_lag.Rd +++ b/man/step_epi_lag.Rd @@ -10,7 +10,7 @@ step_epi_lag( role = "predictor", trained = FALSE, lag = 1, - prefix = "lag_", + prefix = ifelse(lag >= 0, "lag_", "ahead_"), default = NA, keys = epi_keys(recipe), columns = NULL, diff --git a/musings/example-recipe.R b/musings/example-recipe.R index 6bc92537d..d10bd7e8d 100644 --- a/musings/example-recipe.R +++ b/musings/example-recipe.R @@ -37,8 +37,8 @@ xx <- x %>% filter(time_value > "2021-12-01") r <- epi_recipe(x) %>% # if we add this as a class, maybe we get better # behaviour downstream? step_epi_lag(death_rate, lag = -7) %>% - # step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% - # step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% + step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% + step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% step_naomit(all_predictors()) %>% # below, `skip` means we don't do this at predict time # we should probably do something useful here to avoid user error From a62be178711184a3745f72fad84e7f7d6e0ff37d Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 26 May 2022 11:14:49 -0700 Subject: [PATCH 10/65] Renamed a subclass. --- R/step_epi_lag.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/step_epi_lag.R b/R/step_epi_lag.R index 292f09729..d0f88d919 100644 --- a/R/step_epi_lag.R +++ b/R/step_epi_lag.R @@ -50,7 +50,7 @@ step_epi_lag_new <- function(terms, role, trained, lag, prefix, default, keys, columns, skip, id) { step( - subclass = "epi_lag", + subclass = "step_epi_lag", terms = terms, role = role, trained = trained, From beb7a37c79783b8f93f1ac3ccf35eab76eadea48 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 26 May 2022 12:01:08 -0700 Subject: [PATCH 11/65] Reall needs fixing as I somehow managed to break this. --- R/step_epi_lag.R | 2 +- man/step_epi_lag.Rd | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/step_epi_lag.R b/R/step_epi_lag.R index d0f88d919..292f09729 100644 --- a/R/step_epi_lag.R +++ b/R/step_epi_lag.R @@ -50,7 +50,7 @@ step_epi_lag_new <- function(terms, role, trained, lag, prefix, default, keys, columns, skip, id) { step( - subclass = "step_epi_lag", + subclass = "epi_lag", terms = terms, role = role, trained = trained, diff --git a/man/step_epi_lag.Rd b/man/step_epi_lag.Rd index fa86dac11..ceda7daee 100644 --- a/man/step_epi_lag.Rd +++ b/man/step_epi_lag.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/epi_lag.R +% Please edit documentation in R/step_epi_lag.R \name{step_epi_lag} \alias{step_epi_lag} \title{Create a lagged predictor} @@ -15,7 +15,7 @@ step_epi_lag( keys = epi_keys(recipe), columns = NULL, skip = FALSE, - id = rand_id("epi_lag") + id = rand_id(ifelse(lag >= 0, "epi_lag", "epi_ahead")) ) } \arguments{ From f3d182e7f967657bdf5a1ecc7ff729a9c878b902 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 26 May 2022 12:21:12 -0700 Subject: [PATCH 12/65] Fixed issue of "step" duplicating. --- R/{step_epi_lag.R => epi_lag.R} | 0 man/step_epi_lag.Rd | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename R/{step_epi_lag.R => epi_lag.R} (100%) diff --git a/R/step_epi_lag.R b/R/epi_lag.R similarity index 100% rename from R/step_epi_lag.R rename to R/epi_lag.R diff --git a/man/step_epi_lag.Rd b/man/step_epi_lag.Rd index ceda7daee..05f6b8078 100644 --- a/man/step_epi_lag.Rd +++ b/man/step_epi_lag.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/step_epi_lag.R +% Please edit documentation in R/epi_lag.R \name{step_epi_lag} \alias{step_epi_lag} \title{Create a lagged predictor} From e5aeaf188dbcf37e420d1c29c1dd8acd294b395b Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 26 May 2022 14:27:11 -0700 Subject: [PATCH 13/65] Update --- R/epi_lag.R | 6 +++--- man/step_epi_lag.Rd | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/epi_lag.R b/R/epi_lag.R index 292f09729..fdf41293f 100644 --- a/R/epi_lag.R +++ b/R/epi_lag.R @@ -23,12 +23,12 @@ step_epi_lag <- role = "predictor", trained = FALSE, lag = 1, - prefix = ifelse(lag >= 0, "lag_","ahead_"), + prefix = "lag_", default = NA, keys = epi_keys(recipe), columns = NULL, skip = FALSE, - id = rand_id(ifelse(lag >= 0, "epi_lag","epi_ahead"))) { + id = rand_id("epi_lag")) { add_step( recipe, step_epi_lag_new( @@ -89,7 +89,7 @@ bake.step_epi_lag <- function(object, new_data, ...) { is_neg <- object$lag < 0 grid <- tidyr::expand_grid(col = object$columns, - lag_val = object$lag) %>% + lag_val = abs(object$lag)) %>% dplyr::mutate(newname = glue::glue("{object$prefix}{lag_val}_{col}")) ## ensure no name clashes diff --git a/man/step_epi_lag.Rd b/man/step_epi_lag.Rd index 05f6b8078..52361f696 100644 --- a/man/step_epi_lag.Rd +++ b/man/step_epi_lag.Rd @@ -10,12 +10,12 @@ step_epi_lag( role = "predictor", trained = FALSE, lag = 1, - prefix = ifelse(lag >= 0, "lag_", "ahead_"), + prefix = "lag_", default = NA, keys = epi_keys(recipe), columns = NULL, skip = FALSE, - id = rand_id(ifelse(lag >= 0, "epi_lag", "epi_ahead")) + id = rand_id("epi_lag") ) } \arguments{ From 7fa5163c83f09160d443c2911e459e8cae2bac8a Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 27 May 2022 11:01:46 -0700 Subject: [PATCH 14/65] Added epi_ahead --- R/epi_ahead.R | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 R/epi_ahead.R diff --git a/R/epi_ahead.R b/R/epi_ahead.R new file mode 100644 index 000000000..3434b7d93 --- /dev/null +++ b/R/epi_ahead.R @@ -0,0 +1,163 @@ +#' Create a leading outcome +#' +#' `step_epi_ahead` creates a *specification* of a recipe step that +#' will add new columns of leading data. Leading data will +#' by default include NA values where the lag was induced. +#' These can be removed with [step_naomit()], or you may +#' specify an alternative filler value with the `default` +#' argument. +#' +#' @param recipe A recipe object. The step will be added to the +#' sequence of operations for this recipe. +#' @param ... One or more selector functions to choose variables +#' for this step. See [selections()] for more details. +#' @param role For model terms created by this step, what analysis role should +#' they be assigned? +#' @param trained A logical to indicate if the quantities for +#' preprocessing have been estimated. +#' @param ahead A vector of positive integers. Each specified column will be +#' lead for each value in the vector. +#' @param prefix A prefix for generated column names, default to "ahead_". +#' @param default Determines what fills empty rows +#' left by leading/lagging (defaults to NA). +#' @param keys A character vector of the keys in an epi_df +#' @param columns A character string of variable names that will +#' be populated (eventually) by the `terms` argument. +#' @param skip A logical. Should the step be skipped when the +#' recipe is baked by [bake()]? While all operations are baked +#' when [prep()] is run, some operations may not be able to be +#' conducted on new data (e.g. processing the outcome variable(s)). +#' Care should be taken when using `skip = TRUE` as it may affect +#' the computations for subsequent operations. +#' @param id A character string that is unique to this step to identify it. +#' @template step-return +#' +#' @details The step assumes that the data are already _in the proper sequential +#' order_ for leading. +#' +#' @family row operation steps +#' @export +#' +#' @examples +#' tib <- tibble::tibble( +#' x = 1:5, y = 1:5, +#' time_value = seq(as.Date("2020-01-01"), by = 1, length.out = 5), +#' geo_value = "ca" +#' ) %>% epiprocess::as_epi_df() +#' +#' library(recipes) +#' epi_recipe(y ~ x, data = tib) %>% +#' step_epi_lag(x, lag = 2:3) %>% +#' step_epi_ahead(y, ahead = 1) %>% +#' prep(tib) %>% +#' bake(tib) +step_epi_ahead <- + function(recipe, + ..., + role = "outcome", + trained = FALSE, + ahead = 1, + prefix = "ahead_", + default = NA, + keys = epi_keys(recipe), + columns = NULL, + skip = FALSE, + id = rand_id("epi_ahead")) { + add_step( + recipe, + step_epi_ahead_new( + terms = dplyr::enquos(...), + role = role, + trained = trained, + ahead = ahead, + prefix = prefix, + default = default, + keys = keys, + columns = columns, + skip = skip, + id = id + ) + ) + } + +step_epi_ahead_new <- + function(terms, role, trained, ahead, prefix, default, keys, + columns, skip, id) { + step( + subclass = "epi_ahead", + terms = terms, + role = role, + trained = trained, + ahead = ahead, + prefix = prefix, + default = default, + keys = keys, + columns = columns, + skip = skip, + id = id + ) + } + +#' @export +prep.step_epi_ahead <- function(x, training, info = NULL, ...) { + step_epi_ahead_new( + terms = x$terms, + role = x$role, + trained = TRUE, + ahead = x$ahead, + prefix = x$prefix, + default = x$default, + keys = x$keys, + columns = recipes_eval_select(x$terms, training, info), + skip = x$skip, + id = x$id + ) +} + +#' @export +bake.step_epi_ahead <- function(object, new_data, ...) { + if (!all(object$ahead == as.integer(object$ahead))) { + rlang::abort("step_epi_ahead requires 'ahead' argument to be integer valued.") + } + + grid <- tidyr::expand_grid( + col = object$columns, lag_val = -object$ahead) %>% + dplyr::mutate( + ahead_val = -lag_val, + newname = glue::glue("{object$prefix}{ahead_val}_{col}") + ) %>% + dplyr::select(-ahead_val) + + ## ensure no name clashes + new_data_names <- colnames(new_data) + intersection <- new_data_names %in% grid$newname + if (any(intersection)) { + rlang::abort( + paste0("Name collision occured in `", class(object)[1], + "`. The following variable names already exists: ", + paste0(new_data_names[intersection], collapse = ", "), + ".")) + } + + ok <- object$keys + lagged <- purrr::reduce( + purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok), + dplyr::full_join, + by = ok + ) + + dplyr::full_join(new_data, lagged, by = ok) %>% + dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>% + dplyr::arrange(time_value) %>% + dplyr::ungroup() + +} + +#' @export +print.step_epi_ahead <- + function(x, width = max(20, options()$width - 30), ...) { + ## TODO add printing of the lags + title <- "Leading " + recipes::print_step(x$columns, x$terms, x$trained, title, width) + invisible(x) + } From e5e7737cf6daf8ee3c489f2278ddce223e5ef9ca Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 27 May 2022 11:02:52 -0700 Subject: [PATCH 15/65] Fixed epi_lag --- R/{step_epi_lag.R => epi_lag.R} | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) rename R/{step_epi_lag.R => epi_lag.R} (89%) diff --git a/R/step_epi_lag.R b/R/epi_lag.R similarity index 89% rename from R/step_epi_lag.R rename to R/epi_lag.R index d0f88d919..b7dc28b2a 100644 --- a/R/step_epi_lag.R +++ b/R/epi_lag.R @@ -16,19 +16,19 @@ #' #' @family row operation steps #' @export -#' @rdname step_epi_lag +#' @rdname step_epi_ahead step_epi_lag <- function(recipe, ..., role = "predictor", trained = FALSE, lag = 1, - prefix = ifelse(lag >= 0, "lag_","ahead_"), + prefix = "lag_", default = NA, keys = epi_keys(recipe), columns = NULL, skip = FALSE, - id = rand_id(ifelse(lag >= 0, "epi_lag","epi_ahead"))) { + id = rand_id("epi_lag")) { add_step( recipe, step_epi_lag_new( @@ -50,7 +50,7 @@ step_epi_lag_new <- function(terms, role, trained, lag, prefix, default, keys, columns, skip, id) { step( - subclass = "step_epi_lag", + subclass = "epi_lag", terms = terms, role = role, trained = trained, @@ -86,10 +86,7 @@ bake.step_epi_lag <- function(object, new_data, ...) { rlang::abort("step_epi_lag requires 'lag' argument to be integer valued.") } - is_neg <- object$lag < 0 - - grid <- tidyr::expand_grid(col = object$columns, - lag_val = object$lag) %>% + grid <- tidyr::expand_grid(col = object$columns, lag_val = object$lag) %>% dplyr::mutate(newname = glue::glue("{object$prefix}{lag_val}_{col}")) ## ensure no name clashes @@ -120,7 +117,7 @@ bake.step_epi_lag <- function(object, new_data, ...) { print.step_epi_lag <- function(x, width = max(20, options()$width - 30), ...) { ## TODO add printing of the lags - title <- ifelse(x$lag >= 0, "Lagging", "Leading") + title <- "Lagging " recipes::print_step(x$columns, x$terms, x$trained, title, width) invisible(x) } From 7912dd3d2ff8289fbf1996ab88d7e2576b718274 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 27 May 2022 11:07:44 -0700 Subject: [PATCH 16/65] Fixed recipe --- musings/example-recipe.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/musings/example-recipe.R b/musings/example-recipe.R index d10bd7e8d..61e4b6868 100644 --- a/musings/example-recipe.R +++ b/musings/example-recipe.R @@ -36,7 +36,7 @@ xx <- x %>% filter(time_value > "2021-12-01") # Baseline AR3 r <- epi_recipe(x) %>% # if we add this as a class, maybe we get better # behaviour downstream? - step_epi_lag(death_rate, lag = -7) %>% + step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% step_naomit(all_predictors()) %>% From 6741d65a00104dd8052a67acf6e3440d8a35c2de Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 27 May 2022 11:39:13 -0700 Subject: [PATCH 17/65] Updated changes and broken syntax. --- NAMESPACE | 4 ++ R/epi_lag.R | 8 --- man/step_epi_ahead.Rd | 117 ++++++++++++++++++++++++++++++++++++++++++ man/step_epi_lag.Rd | 41 --------------- 4 files changed, 121 insertions(+), 49 deletions(-) create mode 100644 man/step_epi_ahead.Rd delete mode 100644 man/step_epi_lag.Rd diff --git a/NAMESPACE b/NAMESPACE index 4a678f56f..b632f8922 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(bake,step_epi_ahead) S3method(bake,step_epi_lag) S3method(epi_keys,default) S3method(epi_keys,epi_df) @@ -7,7 +8,9 @@ S3method(epi_keys,recipe) S3method(epi_recipe,default) S3method(epi_recipe,epi_df) S3method(epi_recipe,formula) +S3method(prep,step_epi_ahead) S3method(prep,step_epi_lag) +S3method(print,step_epi_ahead) S3method(print,step_epi_lag) export("%>%") export(arx_args_list) @@ -24,6 +27,7 @@ export(knnarx_args_list) export(knnarx_forecaster) export(smooth_arx_args_list) export(smooth_arx_forecaster) +export(step_epi_ahead) export(step_epi_lag) import(recipes) importFrom(magrittr,"%>%") diff --git a/R/epi_lag.R b/R/epi_lag.R index 3efaea73f..eeacf342c 100644 --- a/R/epi_lag.R +++ b/R/epi_lag.R @@ -85,15 +85,7 @@ bake.step_epi_lag <- function(object, new_data, ...) { if (!all(object$lag == as.integer(object$lag))) { rlang::abort("step_epi_lag requires 'lag' argument to be integer valued.") } - -<<<<<<< HEAD:R/epi_lag.R - is_neg <- object$lag < 0 - - grid <- tidyr::expand_grid(col = object$columns, - lag_val = abs(object$lag)) %>% -======= grid <- tidyr::expand_grid(col = object$columns, lag_val = object$lag) %>% ->>>>>>> 492234ba8c73adb74c7798d84ab908539b210ed5:R/step_epi_lag.R dplyr::mutate(newname = glue::glue("{object$prefix}{lag_val}_{col}")) ## ensure no name clashes diff --git a/man/step_epi_ahead.Rd b/man/step_epi_ahead.Rd new file mode 100644 index 000000000..006c224c3 --- /dev/null +++ b/man/step_epi_ahead.Rd @@ -0,0 +1,117 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epi_ahead.R, R/epi_lag.R +\name{step_epi_ahead} +\alias{step_epi_ahead} +\alias{step_epi_lag} +\title{Create a leading outcome} +\usage{ +step_epi_ahead( + recipe, + ..., + role = "outcome", + trained = FALSE, + ahead = 1, + prefix = "ahead_", + default = NA, + keys = epi_keys(recipe), + columns = NULL, + skip = FALSE, + id = rand_id("epi_ahead") +) + +step_epi_lag( + recipe, + ..., + role = "predictor", + trained = FALSE, + lag = 1, + prefix = "lag_", + default = NA, + keys = epi_keys(recipe), + columns = NULL, + skip = FALSE, + id = rand_id("epi_lag") +) +} +\arguments{ +\item{recipe}{A recipe object. The step will be added to the +sequence of operations for this recipe.} + +\item{...}{One or more selector functions to choose variables +for this step. See \code{\link[=selections]{selections()}} for more details.} + +\item{role}{For model terms created by this step, what analysis role should +they be assigned?} + +\item{trained}{A logical to indicate if the quantities for +preprocessing have been estimated.} + +\item{ahead}{A vector of positive integers. Each specified column will be +lead for each value in the vector.} + +\item{prefix}{A prefix for generated column names, default to "ahead_".} + +\item{default}{Determines what fills empty rows +left by leading/lagging (defaults to NA).} + +\item{keys}{A character vector of the keys in an epi_df} + +\item{columns}{A character string of variable names that will +be populated (eventually) by the \code{terms} argument.} + +\item{skip}{A logical. Should the step be skipped when the +recipe is baked by \code{\link[=bake]{bake()}}? While all operations are baked +when \code{\link[=prep]{prep()}} is run, some operations may not be able to be +conducted on new data (e.g. processing the outcome variable(s)). +Care should be taken when using \code{skip = TRUE} as it may affect +the computations for subsequent operations.} + +\item{id}{A character string that is unique to this step to identify it.} + +\item{lag}{A vector of positive integers. Each specified column will be +lagged for each value in the vector.} +} +\value{ +An updated version of \code{recipe} with the new step added to the +sequence of any existing operations. + +An updated version of \code{recipe} with the new step added to the +sequence of any existing operations. +} +\description{ +\code{step_epi_ahead} creates a \emph{specification} of a recipe step that +will add new columns of leading data. Leading data will +by default include NA values where the lag was induced. +These can be removed with \code{\link[=step_naomit]{step_naomit()}}, or you may +specify an alternative filler value with the \code{default} +argument. + +\code{step_epi_lag} creates a \emph{specification} of a recipe step that +will add new columns of lagged data. Lagged data will +by default include NA values where the lag was induced. +These can be removed with \code{\link[=step_naomit]{step_naomit()}}, or you may +specify an alternative filler value with the \code{default} +argument. +} +\details{ +The step assumes that the data are already \emph{in the proper sequential +order} for leading. + +The step assumes that the data are already \emph{in the proper sequential +order} for lagging. +} +\examples{ +tib <- tibble::tibble( + x = 1:5, y = 1:5, + time_value = seq(as.Date("2020-01-01"), by = 1, length.out = 5), + geo_value = "ca" + ) \%>\% epiprocess::as_epi_df() + +library(recipes) +epi_recipe(y ~ x, data = tib) \%>\% + step_epi_lag(x, lag = 2:3) \%>\% + step_epi_ahead(y, ahead = 1) \%>\% + prep(tib) \%>\% + bake(tib) +} +\concept{row operation steps} diff --git a/man/step_epi_lag.Rd b/man/step_epi_lag.Rd deleted file mode 100644 index 52361f696..000000000 --- a/man/step_epi_lag.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/epi_lag.R -\name{step_epi_lag} -\alias{step_epi_lag} -\title{Create a lagged predictor} -\usage{ -step_epi_lag( - recipe, - ..., - role = "predictor", - trained = FALSE, - lag = 1, - prefix = "lag_", - default = NA, - keys = epi_keys(recipe), - columns = NULL, - skip = FALSE, - id = rand_id("epi_lag") -) -} -\arguments{ -\item{lag}{A vector of positive integers. Each specified column will be -lagged for each value in the vector.} -} -\value{ -An updated version of \code{recipe} with the new step added to the -sequence of any existing operations. -} -\description{ -\code{step_epi_lag} creates a \emph{specification} of a recipe step that -will add new columns of lagged data. Lagged data will -by default include NA values where the lag was induced. -These can be removed with \code{\link[=step_naomit]{step_naomit()}}, or you may -specify an alternative filler value with the \code{default} -argument. -} -\details{ -The step assumes that the data are already \emph{in the proper sequential -order} for lagging. -} -\concept{row operation steps} From 11f56e7ccb1bc71d21ec17ee20fc79442c0da71d Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 27 May 2022 12:58:44 -0700 Subject: [PATCH 18/65] Copy-pasted internal --- R/epi_shift_internal.R | 122 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 R/epi_shift_internal.R diff --git a/R/epi_shift_internal.R b/R/epi_shift_internal.R new file mode 100644 index 000000000..404bf7690 --- /dev/null +++ b/R/epi_shift_internal.R @@ -0,0 +1,122 @@ +#' Create a lagged predictor +#' +#' `step_epi_shift` creates a *specification* of a recipe step that +#' will add new columns of lagged data. Lagged data will +#' by default include NA values where the lag was induced. +#' These can be removed with [step_naomit()], or you may +#' specify an alternative filler value with the `default` +#' argument. +#' +#' @param shift A vector of integers. Each specified column will be +#' lagged for each value in the vector. +#' @template step-return +#' +#' @details The step assumes that the data are already _in the proper sequential +#' order_ for lagging. +#' +#' @family row operation steps +#' @export +#' @rdname step_epi_ahead +step_epi_lag2 <- + function(recipe, + ..., + role = "predictor", + trained = FALSE, + lag = 1, + prefix = "lag_", + default = NA, + keys = epi_keys(recipe), + columns = NULL, + skip = FALSE, + id = rand_id("epi_lag")) { + add_step( + recipe, + step_epi_lag2_new( + terms = dplyr::enquos(...), + role = role, + trained = trained, + lag = lag, + prefix = prefix, + default = default, + keys = keys, + columns = columns, + skip = skip, + id = id + ) + ) + } + +step_epi_lag2_new <- + function(terms, role, trained, lag, prefix, default, keys, + columns, skip, id) { + step( + subclass = "epi_lag", + terms = terms, + role = role, + trained = trained, + lag = lag, + prefix = prefix, + default = default, + keys = keys, + columns = columns, + skip = skip, + id = id + ) + } + +#' @export +prep.step_epi_lag2 <- function(x, training, info = NULL, ...) { + step_epi_lag2_new( + terms = x$terms, + role = x$role, + trained = TRUE, + lag = x$lag, + prefix = x$prefix, + default = x$default, + keys = x$keys, + columns = recipes_eval_select(x$terms, training, info), + skip = x$skip, + id = x$id + ) +} + +#' @export +bake.step_epi_lag2 <- function(object, new_data, ...) { + if (!all(object$lag == as.integer(object$lag))) { + rlang::abort("step_epi_lag requires 'lag' argument to be integer valued.") + } + grid <- tidyr::expand_grid(col = object$columns, lag_val = object$lag) %>% + dplyr::mutate(newname = glue::glue("{object$prefix}{lag_val}_{col}")) + + ## ensure no name clashes + new_data_names <- colnames(new_data) + intersection <- new_data_names %in% grid$newname + if (any(intersection)) { + rlang::abort( + paste0("Name collision occured in `", class(object)[1], + "`. The following variable names already exists: ", + paste0(new_data_names[intersection], collapse = ", "), + ".")) + } + ok <- object$keys + lagged <- purrr::reduce( + purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok), + dplyr::full_join, + by = ok + ) + + dplyr::full_join(new_data, lagged, by = ok) %>% + dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>% + dplyr::arrange(time_value) %>% + dplyr::ungroup() + +} + +#' @export +print.step_epi_lag2 <- + function(x, width = max(20, options()$width - 30), ...) { + ## TODO add printing of the lags + title <- "Lagging " + recipes::print_step(x$columns, x$terms, x$trained, title, width) + invisible(x) + } From 82a885f66a97ab8d20cff887c1c1ee31fdbe7cdf Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 27 May 2022 13:31:43 -0700 Subject: [PATCH 19/65] Updated recipe --- NAMESPACE | 4 ++++ R/epi_shift_internal.R | 42 +++++++++++++++++++++--------------------- man/step_epi_ahead.Rd | 34 +++++++++++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 22 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index b632f8922..59a762843 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ S3method(bake,step_epi_ahead) S3method(bake,step_epi_lag) +S3method(bake,step_epi_shift2) S3method(epi_keys,default) S3method(epi_keys,epi_df) S3method(epi_keys,recipe) @@ -10,8 +11,10 @@ S3method(epi_recipe,epi_df) S3method(epi_recipe,formula) S3method(prep,step_epi_ahead) S3method(prep,step_epi_lag) +S3method(prep,step_epi_shift2) S3method(print,step_epi_ahead) S3method(print,step_epi_lag) +S3method(print,step_epi_shift2) export("%>%") export(arx_args_list) export(arx_forecaster) @@ -29,6 +32,7 @@ export(smooth_arx_args_list) export(smooth_arx_forecaster) export(step_epi_ahead) export(step_epi_lag) +export(step_epi_shift2) import(recipes) importFrom(magrittr,"%>%") importFrom(rlang,"!!") diff --git a/R/epi_shift_internal.R b/R/epi_shift_internal.R index 404bf7690..a904796b8 100644 --- a/R/epi_shift_internal.R +++ b/R/epi_shift_internal.R @@ -17,25 +17,25 @@ #' @family row operation steps #' @export #' @rdname step_epi_ahead -step_epi_lag2 <- +step_epi_shift2 <- function(recipe, ..., role = "predictor", trained = FALSE, - lag = 1, - prefix = "lag_", + shift2 = 1, + prefix = "shift2_", default = NA, keys = epi_keys(recipe), columns = NULL, skip = FALSE, - id = rand_id("epi_lag")) { + id = rand_id("epi_shift2")) { add_step( recipe, - step_epi_lag2_new( + step_epi_shift2_new( terms = dplyr::enquos(...), role = role, trained = trained, - lag = lag, + shift2 = shift2, prefix = prefix, default = default, keys = keys, @@ -46,15 +46,15 @@ step_epi_lag2 <- ) } -step_epi_lag2_new <- - function(terms, role, trained, lag, prefix, default, keys, +step_epi_shift2_new <- + function(terms, role, trained, shift2, prefix, default, keys, columns, skip, id) { step( - subclass = "epi_lag", + subclass = "epi_shift2", terms = terms, role = role, trained = trained, - lag = lag, + shift2 = shift2, prefix = prefix, default = default, keys = keys, @@ -65,12 +65,12 @@ step_epi_lag2_new <- } #' @export -prep.step_epi_lag2 <- function(x, training, info = NULL, ...) { - step_epi_lag2_new( +prep.step_epi_shift2 <- function(x, training, info = NULL, ...) { + step_epi_shift2_new( terms = x$terms, role = x$role, trained = TRUE, - lag = x$lag, + shift2 = x$shift2, prefix = x$prefix, default = x$default, keys = x$keys, @@ -81,12 +81,12 @@ prep.step_epi_lag2 <- function(x, training, info = NULL, ...) { } #' @export -bake.step_epi_lag2 <- function(object, new_data, ...) { - if (!all(object$lag == as.integer(object$lag))) { - rlang::abort("step_epi_lag requires 'lag' argument to be integer valued.") +bake.step_epi_shift2 <- function(object, new_data, ...) { + if (!all(object$shift2 == as.integer(object$shift2))) { + rlang::abort("step_epi_shift2 requires 'shift2' argument to be integer valued.") } - grid <- tidyr::expand_grid(col = object$columns, lag_val = object$lag) %>% - dplyr::mutate(newname = glue::glue("{object$prefix}{lag_val}_{col}")) + grid <- tidyr::expand_grid(col = object$columns, shift2_val = object$shift2) %>% + dplyr::mutate(newname = glue::glue("{object$prefix}{shift2_val}_{col}")) ## ensure no name clashes new_data_names <- colnames(new_data) @@ -99,13 +99,13 @@ bake.step_epi_lag2 <- function(object, new_data, ...) { ".")) } ok <- object$keys - lagged <- purrr::reduce( + shiftged <- purrr::reduce( purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok), dplyr::full_join, by = ok ) - dplyr::full_join(new_data, lagged, by = ok) %>% + dplyr::full_join(new_data, shifted, by = ok) %>% dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>% dplyr::arrange(time_value) %>% dplyr::ungroup() @@ -113,7 +113,7 @@ bake.step_epi_lag2 <- function(object, new_data, ...) { } #' @export -print.step_epi_lag2 <- +print.step_epi_shift2 <- function(x, width = max(20, options()$width - 30), ...) { ## TODO add printing of the lags title <- "Lagging " diff --git a/man/step_epi_ahead.Rd b/man/step_epi_ahead.Rd index 006c224c3..984d0aae9 100644 --- a/man/step_epi_ahead.Rd +++ b/man/step_epi_ahead.Rd @@ -1,8 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/epi_ahead.R, R/epi_lag.R +% Please edit documentation in R/epi_ahead.R, R/epi_lag.R, +% R/epi_shift_internal.R \name{step_epi_ahead} \alias{step_epi_ahead} \alias{step_epi_lag} +\alias{step_epi_shift2} \title{Create a leading outcome} \usage{ step_epi_ahead( @@ -32,6 +34,20 @@ step_epi_lag( skip = FALSE, id = rand_id("epi_lag") ) + +step_epi_shift2( + recipe, + ..., + role = "predictor", + trained = FALSE, + shift2 = 1, + prefix = "shift2_", + default = NA, + keys = epi_keys(recipe), + columns = NULL, + skip = FALSE, + id = rand_id("epi_shift2") +) } \arguments{ \item{recipe}{A recipe object. The step will be added to the @@ -70,11 +86,17 @@ the computations for subsequent operations.} \item{lag}{A vector of positive integers. Each specified column will be lagged for each value in the vector.} + +\item{shift}{A vector of integers. Each specified column will be +lagged for each value in the vector.} } \value{ An updated version of \code{recipe} with the new step added to the sequence of any existing operations. +An updated version of \code{recipe} with the new step added to the +sequence of any existing operations. + An updated version of \code{recipe} with the new step added to the sequence of any existing operations. } @@ -92,11 +114,21 @@ by default include NA values where the lag was induced. These can be removed with \code{\link[=step_naomit]{step_naomit()}}, or you may specify an alternative filler value with the \code{default} argument. + +\code{step_epi_shift} creates a \emph{specification} of a recipe step that +will add new columns of lagged data. Lagged data will +by default include NA values where the lag was induced. +These can be removed with \code{\link[=step_naomit]{step_naomit()}}, or you may +specify an alternative filler value with the \code{default} +argument. } \details{ The step assumes that the data are already \emph{in the proper sequential order} for leading. +The step assumes that the data are already \emph{in the proper sequential +order} for lagging. + The step assumes that the data are already \emph{in the proper sequential order} for lagging. } From 4996530d214da3299a4bea27fc02ea684f9d8023 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 27 May 2022 13:51:59 -0700 Subject: [PATCH 20/65] It's still working! --- NAMESPACE | 8 ++++---- R/epi_shift_internal.R | 42 ++++++++++++++++++++-------------------- man/step_epi_ahead.Rd | 10 +++++----- musings/example-recipe.R | 4 ++-- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 59a762843..bb07e8d65 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,7 +2,7 @@ S3method(bake,step_epi_ahead) S3method(bake,step_epi_lag) -S3method(bake,step_epi_shift2) +S3method(bake,step_epi_shift) S3method(epi_keys,default) S3method(epi_keys,epi_df) S3method(epi_keys,recipe) @@ -11,10 +11,10 @@ S3method(epi_recipe,epi_df) S3method(epi_recipe,formula) S3method(prep,step_epi_ahead) S3method(prep,step_epi_lag) -S3method(prep,step_epi_shift2) +S3method(prep,step_epi_shift) S3method(print,step_epi_ahead) S3method(print,step_epi_lag) -S3method(print,step_epi_shift2) +S3method(print,step_epi_shift) export("%>%") export(arx_args_list) export(arx_forecaster) @@ -32,7 +32,7 @@ export(smooth_arx_args_list) export(smooth_arx_forecaster) export(step_epi_ahead) export(step_epi_lag) -export(step_epi_shift2) +export(step_epi_shift) import(recipes) importFrom(magrittr,"%>%") importFrom(rlang,"!!") diff --git a/R/epi_shift_internal.R b/R/epi_shift_internal.R index a904796b8..b45a3c2f5 100644 --- a/R/epi_shift_internal.R +++ b/R/epi_shift_internal.R @@ -17,25 +17,25 @@ #' @family row operation steps #' @export #' @rdname step_epi_ahead -step_epi_shift2 <- +step_epi_shift <- function(recipe, ..., role = "predictor", trained = FALSE, - shift2 = 1, - prefix = "shift2_", + lag = 1, + prefix = "shift_", default = NA, keys = epi_keys(recipe), columns = NULL, skip = FALSE, - id = rand_id("epi_shift2")) { + id = rand_id("epi_shift")) { add_step( recipe, - step_epi_shift2_new( + step_epi_shift_new( terms = dplyr::enquos(...), role = role, trained = trained, - shift2 = shift2, + lag = lag, prefix = prefix, default = default, keys = keys, @@ -46,15 +46,15 @@ step_epi_shift2 <- ) } -step_epi_shift2_new <- - function(terms, role, trained, shift2, prefix, default, keys, +step_epi_shift_new <- + function(terms, role, trained, lag, prefix, default, keys, columns, skip, id) { step( - subclass = "epi_shift2", + subclass = "epi_shift", terms = terms, role = role, trained = trained, - shift2 = shift2, + lag = lag, prefix = prefix, default = default, keys = keys, @@ -65,12 +65,12 @@ step_epi_shift2_new <- } #' @export -prep.step_epi_shift2 <- function(x, training, info = NULL, ...) { - step_epi_shift2_new( +prep.step_epi_shift <- function(x, training, info = NULL, ...) { + step_epi_shift_new( terms = x$terms, role = x$role, trained = TRUE, - shift2 = x$shift2, + lag = x$lag, prefix = x$prefix, default = x$default, keys = x$keys, @@ -81,12 +81,12 @@ prep.step_epi_shift2 <- function(x, training, info = NULL, ...) { } #' @export -bake.step_epi_shift2 <- function(object, new_data, ...) { - if (!all(object$shift2 == as.integer(object$shift2))) { - rlang::abort("step_epi_shift2 requires 'shift2' argument to be integer valued.") +bake.step_epi_shift <- function(object, new_data, ...) { + if (!all(object$lag == as.integer(object$lag))) { + rlang::abort("step_epi_lag requires 'lag' argument to be integer valued.") } - grid <- tidyr::expand_grid(col = object$columns, shift2_val = object$shift2) %>% - dplyr::mutate(newname = glue::glue("{object$prefix}{shift2_val}_{col}")) + grid <- tidyr::expand_grid(col = object$columns, lag_val = object$lag) %>% + dplyr::mutate(newname = glue::glue("{object$prefix}{lag_val}_{col}")) ## ensure no name clashes new_data_names <- colnames(new_data) @@ -99,13 +99,13 @@ bake.step_epi_shift2 <- function(object, new_data, ...) { ".")) } ok <- object$keys - shiftged <- purrr::reduce( + lagged <- purrr::reduce( purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok), dplyr::full_join, by = ok ) - dplyr::full_join(new_data, shifted, by = ok) %>% + dplyr::full_join(new_data, lagged, by = ok) %>% dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>% dplyr::arrange(time_value) %>% dplyr::ungroup() @@ -113,7 +113,7 @@ bake.step_epi_shift2 <- function(object, new_data, ...) { } #' @export -print.step_epi_shift2 <- +print.step_epi_shift <- function(x, width = max(20, options()$width - 30), ...) { ## TODO add printing of the lags title <- "Lagging " diff --git a/man/step_epi_ahead.Rd b/man/step_epi_ahead.Rd index 984d0aae9..105c7e64a 100644 --- a/man/step_epi_ahead.Rd +++ b/man/step_epi_ahead.Rd @@ -4,7 +4,7 @@ \name{step_epi_ahead} \alias{step_epi_ahead} \alias{step_epi_lag} -\alias{step_epi_shift2} +\alias{step_epi_shift} \title{Create a leading outcome} \usage{ step_epi_ahead( @@ -35,18 +35,18 @@ step_epi_lag( id = rand_id("epi_lag") ) -step_epi_shift2( +step_epi_shift( recipe, ..., role = "predictor", trained = FALSE, - shift2 = 1, - prefix = "shift2_", + lag = 1, + prefix = "shift_", default = NA, keys = epi_keys(recipe), columns = NULL, skip = FALSE, - id = rand_id("epi_shift2") + id = rand_id("epi_shift") ) } \arguments{ diff --git a/musings/example-recipe.R b/musings/example-recipe.R index 61e4b6868..aa05bf3a1 100644 --- a/musings/example-recipe.R +++ b/musings/example-recipe.R @@ -37,8 +37,8 @@ xx <- x %>% filter(time_value > "2021-12-01") r <- epi_recipe(x) %>% # if we add this as a class, maybe we get better # behaviour downstream? step_epi_ahead(death_rate, ahead = 7) %>% - step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% - step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% + step_epi_shift(death_rate, lag = c(0, 7, 14)) %>% + step_epi_shift(case_rate, lag = c(0, 7, 14)) %>% step_naomit(all_predictors()) %>% # below, `skip` means we don't do this at predict time # we should probably do something useful here to avoid user error From 1ace3d74d66eab32a4b24bed0d8bdf10723cc13d Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 27 May 2022 14:57:16 -0700 Subject: [PATCH 21/65] Updated main step funcction. --- R/epi_lag.R | 43 ++++++++++------------------------------ musings/example-recipe.R | 4 ++-- 2 files changed, 13 insertions(+), 34 deletions(-) diff --git a/R/epi_lag.R b/R/epi_lag.R index eeacf342c..3f0df40cf 100644 --- a/R/epi_lag.R +++ b/R/epi_lag.R @@ -29,38 +29,17 @@ step_epi_lag <- columns = NULL, skip = FALSE, id = rand_id("epi_lag")) { - add_step( - recipe, - step_epi_lag_new( - terms = dplyr::enquos(...), - role = role, - trained = trained, - lag = lag, - prefix = prefix, - default = default, - keys = keys, - columns = columns, - skip = skip, - id = id - ) - ) - } - -step_epi_lag_new <- - function(terms, role, trained, lag, prefix, default, keys, - columns, skip, id) { - step( - subclass = "epi_lag", - terms = terms, - role = role, - trained = trained, - lag = lag, - prefix = prefix, - default = default, - keys = keys, - columns = columns, - skip = skip, - id = id + step_epi_shift(recipe, + ..., + role = role, + trained = trained, + lag = lag, + prefix = prefix, + default = default, + keys = keys, + columns = columns, + skip = skip, + id = id ) } diff --git a/musings/example-recipe.R b/musings/example-recipe.R index aa05bf3a1..61e4b6868 100644 --- a/musings/example-recipe.R +++ b/musings/example-recipe.R @@ -37,8 +37,8 @@ xx <- x %>% filter(time_value > "2021-12-01") r <- epi_recipe(x) %>% # if we add this as a class, maybe we get better # behaviour downstream? step_epi_ahead(death_rate, ahead = 7) %>% - step_epi_shift(death_rate, lag = c(0, 7, 14)) %>% - step_epi_shift(case_rate, lag = c(0, 7, 14)) %>% + step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% + step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% step_naomit(all_predictors()) %>% # below, `skip` means we don't do this at predict time # we should probably do something useful here to avoid user error From f2220b2aba8ac3e4a8c941151a7e2b4ff6e46efa Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 27 May 2022 15:27:51 -0700 Subject: [PATCH 22/65] Partially finished with one function. --- NAMESPACE | 3 --- R/epi_lag.R | 57 ------------------------------------------ R/epi_shift_internal.R | 18 ++++++------- man/step_epi_ahead.Rd | 18 ++++++------- 4 files changed, 18 insertions(+), 78 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index bb07e8d65..8cab0ccc3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,6 @@ # Generated by roxygen2: do not edit by hand S3method(bake,step_epi_ahead) -S3method(bake,step_epi_lag) S3method(bake,step_epi_shift) S3method(epi_keys,default) S3method(epi_keys,epi_df) @@ -10,10 +9,8 @@ S3method(epi_recipe,default) S3method(epi_recipe,epi_df) S3method(epi_recipe,formula) S3method(prep,step_epi_ahead) -S3method(prep,step_epi_lag) S3method(prep,step_epi_shift) S3method(print,step_epi_ahead) -S3method(print,step_epi_lag) S3method(print,step_epi_shift) export("%>%") export(arx_args_list) diff --git a/R/epi_lag.R b/R/epi_lag.R index 3f0df40cf..0e772afdc 100644 --- a/R/epi_lag.R +++ b/R/epi_lag.R @@ -42,60 +42,3 @@ step_epi_lag <- id = id ) } - -#' @export -prep.step_epi_lag <- function(x, training, info = NULL, ...) { - step_epi_lag_new( - terms = x$terms, - role = x$role, - trained = TRUE, - lag = x$lag, - prefix = x$prefix, - default = x$default, - keys = x$keys, - columns = recipes_eval_select(x$terms, training, info), - skip = x$skip, - id = x$id - ) -} - -#' @export -bake.step_epi_lag <- function(object, new_data, ...) { - if (!all(object$lag == as.integer(object$lag))) { - rlang::abort("step_epi_lag requires 'lag' argument to be integer valued.") - } - grid <- tidyr::expand_grid(col = object$columns, lag_val = object$lag) %>% - dplyr::mutate(newname = glue::glue("{object$prefix}{lag_val}_{col}")) - - ## ensure no name clashes - new_data_names <- colnames(new_data) - intersection <- new_data_names %in% grid$newname - if (any(intersection)) { - rlang::abort( - paste0("Name collision occured in `", class(object)[1], - "`. The following variable names already exists: ", - paste0(new_data_names[intersection], collapse = ", "), - ".")) - } - ok <- object$keys - lagged <- purrr::reduce( - purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok), - dplyr::full_join, - by = ok - ) - - dplyr::full_join(new_data, lagged, by = ok) %>% - dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>% - dplyr::arrange(time_value) %>% - dplyr::ungroup() - -} - -#' @export -print.step_epi_lag <- - function(x, width = max(20, options()$width - 30), ...) { - ## TODO add printing of the lags - title <- "Lagging " - recipes::print_step(x$columns, x$terms, x$trained, title, width) - invisible(x) - } diff --git a/R/epi_shift_internal.R b/R/epi_shift_internal.R index b45a3c2f5..b47fe8641 100644 --- a/R/epi_shift_internal.R +++ b/R/epi_shift_internal.R @@ -20,15 +20,15 @@ step_epi_shift <- function(recipe, ..., - role = "predictor", - trained = FALSE, - lag = 1, - prefix = "shift_", - default = NA, - keys = epi_keys(recipe), - columns = NULL, - skip = FALSE, - id = rand_id("epi_shift")) { + role, + trained, + lag, + prefix, + default, + keys, + columns, + skip, + id) { add_step( recipe, step_epi_shift_new( diff --git a/man/step_epi_ahead.Rd b/man/step_epi_ahead.Rd index 105c7e64a..425293a8e 100644 --- a/man/step_epi_ahead.Rd +++ b/man/step_epi_ahead.Rd @@ -38,15 +38,15 @@ step_epi_lag( step_epi_shift( recipe, ..., - role = "predictor", - trained = FALSE, - lag = 1, - prefix = "shift_", - default = NA, - keys = epi_keys(recipe), - columns = NULL, - skip = FALSE, - id = rand_id("epi_shift") + role, + trained, + lag, + prefix, + default, + keys, + columns, + skip, + id ) } \arguments{ From 861f8cfbc94599b66c18190a47e341c5c619f099 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 30 May 2022 10:14:18 -0700 Subject: [PATCH 23/65] Refactored epi_lag --- R/epi_lag.R | 3 ++- R/epi_shift_internal.R | 35 ++++++++++++++++++----------------- man/step_epi_ahead.Rd | 10 +++++----- musings/example-recipe.R | 7 +++++++ 4 files changed, 32 insertions(+), 23 deletions(-) diff --git a/R/epi_lag.R b/R/epi_lag.R index 0e772afdc..ccbbb5aba 100644 --- a/R/epi_lag.R +++ b/R/epi_lag.R @@ -33,7 +33,7 @@ step_epi_lag <- ..., role = role, trained = trained, - lag = lag, + shift = -lag, prefix = prefix, default = default, keys = keys, @@ -42,3 +42,4 @@ step_epi_lag <- id = id ) } + diff --git a/R/epi_shift_internal.R b/R/epi_shift_internal.R index b47fe8641..511fb55da 100644 --- a/R/epi_shift_internal.R +++ b/R/epi_shift_internal.R @@ -1,18 +1,18 @@ -#' Create a lagged predictor +#' Create a shifted predictor #' #' `step_epi_shift` creates a *specification* of a recipe step that -#' will add new columns of lagged data. Lagged data will -#' by default include NA values where the lag was induced. +#' will add new columns of shifted data. shifted data will +#' by default include NA values where the shift was induced. #' These can be removed with [step_naomit()], or you may #' specify an alternative filler value with the `default` #' argument. #' #' @param shift A vector of integers. Each specified column will be -#' lagged for each value in the vector. +#' shifted for each value in the vector. #' @template step-return #' #' @details The step assumes that the data are already _in the proper sequential -#' order_ for lagging. +#' order_ for shifting. #' #' @family row operation steps #' @export @@ -22,7 +22,7 @@ step_epi_shift <- ..., role, trained, - lag, + shift, prefix, default, keys, @@ -35,7 +35,7 @@ step_epi_shift <- terms = dplyr::enquos(...), role = role, trained = trained, - lag = lag, + shift = shift, prefix = prefix, default = default, keys = keys, @@ -47,14 +47,14 @@ step_epi_shift <- } step_epi_shift_new <- - function(terms, role, trained, lag, prefix, default, keys, + function(terms, role, trained, shift, prefix, default, keys, columns, skip, id) { step( subclass = "epi_shift", terms = terms, role = role, trained = trained, - lag = lag, + shift = shift, prefix = prefix, default = default, keys = keys, @@ -70,7 +70,7 @@ prep.step_epi_shift <- function(x, training, info = NULL, ...) { terms = x$terms, role = x$role, trained = TRUE, - lag = x$lag, + shift = x$shift, prefix = x$prefix, default = x$default, keys = x$keys, @@ -82,10 +82,11 @@ prep.step_epi_shift <- function(x, training, info = NULL, ...) { #' @export bake.step_epi_shift <- function(object, new_data, ...) { - if (!all(object$lag == as.integer(object$lag))) { - rlang::abort("step_epi_lag requires 'lag' argument to be integer valued.") + if (!all(object$shift == as.integer(object$shift))) { + rlang::abort("step_epi_shift requires 'shift' argument to be integer valued.") } - grid <- tidyr::expand_grid(col = object$columns, lag_val = object$lag) %>% + grid <- tidyr::expand_grid(col = object$columns, lag_val = -object$shift) %>% + # Account for lag/lead dplyr::mutate(newname = glue::glue("{object$prefix}{lag_val}_{col}")) ## ensure no name clashes @@ -99,13 +100,13 @@ bake.step_epi_shift <- function(object, new_data, ...) { ".")) } ok <- object$keys - lagged <- purrr::reduce( + shifted <- purrr::reduce( purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok), dplyr::full_join, by = ok ) - dplyr::full_join(new_data, lagged, by = ok) %>% + dplyr::full_join(new_data, shifted, by = ok) %>% dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>% dplyr::arrange(time_value) %>% dplyr::ungroup() @@ -115,8 +116,8 @@ bake.step_epi_shift <- function(object, new_data, ...) { #' @export print.step_epi_shift <- function(x, width = max(20, options()$width - 30), ...) { - ## TODO add printing of the lags - title <- "Lagging " + ## TODO add printing of the shifts + title <- "shifting " # Account for lag/lead recipes::print_step(x$columns, x$terms, x$trained, title, width) invisible(x) } diff --git a/man/step_epi_ahead.Rd b/man/step_epi_ahead.Rd index 425293a8e..18a28e7b6 100644 --- a/man/step_epi_ahead.Rd +++ b/man/step_epi_ahead.Rd @@ -40,7 +40,7 @@ step_epi_shift( ..., role, trained, - lag, + shift, prefix, default, keys, @@ -88,7 +88,7 @@ the computations for subsequent operations.} lagged for each value in the vector.} \item{shift}{A vector of integers. Each specified column will be -lagged for each value in the vector.} +shifted for each value in the vector.} } \value{ An updated version of \code{recipe} with the new step added to the @@ -116,8 +116,8 @@ specify an alternative filler value with the \code{default} argument. \code{step_epi_shift} creates a \emph{specification} of a recipe step that -will add new columns of lagged data. Lagged data will -by default include NA values where the lag was induced. +will add new columns of shifted data. shifted data will +by default include NA values where the shift was induced. These can be removed with \code{\link[=step_naomit]{step_naomit()}}, or you may specify an alternative filler value with the \code{default} argument. @@ -130,7 +130,7 @@ The step assumes that the data are already \emph{in the proper sequential order} for lagging. The step assumes that the data are already \emph{in the proper sequential -order} for lagging. +order} for shifting. } \examples{ tib <- tibble::tibble( diff --git a/musings/example-recipe.R b/musings/example-recipe.R index 61e4b6868..59d859cdf 100644 --- a/musings/example-recipe.R +++ b/musings/example-recipe.R @@ -53,6 +53,13 @@ slm_fit <- workflow() %>% add_model(slm) %>% fit(data = x) +# Provides a summary of a fit +sm <- function(myfit) { + summary(myfit$fit$fit$fit) +} + +sm(slm_fit) + x_latest <- x %>% filter(!is.na(case_rate), !is.na(death_rate)) %>% group_by(geo_value) %>% From 894247d8916486349307a5f9226362d2c926d61c Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 30 May 2022 11:19:47 -0700 Subject: [PATCH 24/65] Code that finally works. --- NAMESPACE | 3 -- R/epi_ahead.R | 107 ++++----------------------------------- R/epi_shift_internal.R | 23 +++++++-- musings/example-recipe.R | 2 +- 4 files changed, 30 insertions(+), 105 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8cab0ccc3..3cdb7b31e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,5 @@ # Generated by roxygen2: do not edit by hand -S3method(bake,step_epi_ahead) S3method(bake,step_epi_shift) S3method(epi_keys,default) S3method(epi_keys,epi_df) @@ -8,9 +7,7 @@ S3method(epi_keys,recipe) S3method(epi_recipe,default) S3method(epi_recipe,epi_df) S3method(epi_recipe,formula) -S3method(prep,step_epi_ahead) S3method(prep,step_epi_shift) -S3method(print,step_epi_ahead) S3method(print,step_epi_shift) export("%>%") export(arx_args_list) diff --git a/R/epi_ahead.R b/R/epi_ahead.R index 3434b7d93..b90fbae43 100644 --- a/R/epi_ahead.R +++ b/R/epi_ahead.R @@ -63,101 +63,16 @@ step_epi_ahead <- columns = NULL, skip = FALSE, id = rand_id("epi_ahead")) { - add_step( - recipe, - step_epi_ahead_new( - terms = dplyr::enquos(...), - role = role, - trained = trained, - ahead = ahead, - prefix = prefix, - default = default, - keys = keys, - columns = columns, - skip = skip, - id = id - ) + step_epi_shift(recipe, + ..., + role = role, + trained = trained, + shift = ahead, + prefix = prefix, + default = default, + keys = keys, + columns = columns, + skip = skip, + id = id ) } - -step_epi_ahead_new <- - function(terms, role, trained, ahead, prefix, default, keys, - columns, skip, id) { - step( - subclass = "epi_ahead", - terms = terms, - role = role, - trained = trained, - ahead = ahead, - prefix = prefix, - default = default, - keys = keys, - columns = columns, - skip = skip, - id = id - ) - } - -#' @export -prep.step_epi_ahead <- function(x, training, info = NULL, ...) { - step_epi_ahead_new( - terms = x$terms, - role = x$role, - trained = TRUE, - ahead = x$ahead, - prefix = x$prefix, - default = x$default, - keys = x$keys, - columns = recipes_eval_select(x$terms, training, info), - skip = x$skip, - id = x$id - ) -} - -#' @export -bake.step_epi_ahead <- function(object, new_data, ...) { - if (!all(object$ahead == as.integer(object$ahead))) { - rlang::abort("step_epi_ahead requires 'ahead' argument to be integer valued.") - } - - grid <- tidyr::expand_grid( - col = object$columns, lag_val = -object$ahead) %>% - dplyr::mutate( - ahead_val = -lag_val, - newname = glue::glue("{object$prefix}{ahead_val}_{col}") - ) %>% - dplyr::select(-ahead_val) - - ## ensure no name clashes - new_data_names <- colnames(new_data) - intersection <- new_data_names %in% grid$newname - if (any(intersection)) { - rlang::abort( - paste0("Name collision occured in `", class(object)[1], - "`. The following variable names already exists: ", - paste0(new_data_names[intersection], collapse = ", "), - ".")) - } - - ok <- object$keys - lagged <- purrr::reduce( - purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok), - dplyr::full_join, - by = ok - ) - - dplyr::full_join(new_data, lagged, by = ok) %>% - dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>% - dplyr::arrange(time_value) %>% - dplyr::ungroup() - -} - -#' @export -print.step_epi_ahead <- - function(x, width = max(20, options()$width - 30), ...) { - ## TODO add printing of the lags - title <- "Leading " - recipes::print_step(x$columns, x$terms, x$trained, title, width) - invisible(x) - } diff --git a/R/epi_shift_internal.R b/R/epi_shift_internal.R index 511fb55da..e39f361c9 100644 --- a/R/epi_shift_internal.R +++ b/R/epi_shift_internal.R @@ -85,10 +85,23 @@ bake.step_epi_shift <- function(object, new_data, ...) { if (!all(object$shift == as.integer(object$shift))) { rlang::abort("step_epi_shift requires 'shift' argument to be integer valued.") } - grid <- tidyr::expand_grid(col = object$columns, lag_val = -object$shift) %>% - # Account for lag/lead - dplyr::mutate(newname = glue::glue("{object$prefix}{lag_val}_{col}")) - + grid <- tidyr::expand_grid(col = object$columns, lag_val = -object$shift) + is_lag <- object$role == "predictor" + if (!is_lag) { + grid <- dplyr::mutate(grid,ahead_val = -lag_val) + } + grid <- dplyr::mutate(grid, + newname = glue::glue( + paste0( + "{object$prefix}", + ifelse(is_lag,"{lag_val}","{ahead_val}"), + "_{col}" + ) + ) + ) + if (!is_lag) { + grid <- dplyr::select(grid, -ahead_val) + } ## ensure no name clashes new_data_names <- colnames(new_data) intersection <- new_data_names %in% grid$newname @@ -117,7 +130,7 @@ bake.step_epi_shift <- function(object, new_data, ...) { print.step_epi_shift <- function(x, width = max(20, options()$width - 30), ...) { ## TODO add printing of the shifts - title <- "shifting " # Account for lag/lead + title <- ifelse(x$role == "predictor","Lagging ","Leading ") # Account for lag/lead recipes::print_step(x$columns, x$terms, x$trained, title, width) invisible(x) } diff --git a/musings/example-recipe.R b/musings/example-recipe.R index 59d859cdf..ebed9e512 100644 --- a/musings/example-recipe.R +++ b/musings/example-recipe.R @@ -53,7 +53,7 @@ slm_fit <- workflow() %>% add_model(slm) %>% fit(data = x) -# Provides a summary of a fit +# Provides a summary of a fit's model sm <- function(myfit) { summary(myfit$fit$fit$fit) } From be6c421149e7e76ac534ff226f500787d8118b3d Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 30 May 2022 12:42:34 -0700 Subject: [PATCH 25/65] Removed extra function. --- musings/example-recipe.R | 7 ------- 1 file changed, 7 deletions(-) diff --git a/musings/example-recipe.R b/musings/example-recipe.R index ebed9e512..61e4b6868 100644 --- a/musings/example-recipe.R +++ b/musings/example-recipe.R @@ -53,13 +53,6 @@ slm_fit <- workflow() %>% add_model(slm) %>% fit(data = x) -# Provides a summary of a fit's model -sm <- function(myfit) { - summary(myfit$fit$fit$fit) -} - -sm(slm_fit) - x_latest <- x %>% filter(!is.na(case_rate), !is.na(death_rate)) %>% group_by(geo_value) %>% From 5d3546fb42632426066f29a713e1d5c14281fdb3 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 30 May 2022 16:01:03 -0700 Subject: [PATCH 26/65] Added code stub for testing epi_shift_internal. --- tests/testthat/test-epi_shift_internal.R | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tests/testthat/test-epi_shift_internal.R diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R new file mode 100644 index 000000000..870f6560d --- /dev/null +++ b/tests/testthat/test-epi_shift_internal.R @@ -0,0 +1,3 @@ +test_that("epi_shift_interal", { + +}) From f05ec0c932359a6348131254a7212ee6d2c13581 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 31 May 2022 09:53:37 -0700 Subject: [PATCH 27/65] Note that having lag and ahead as negative values is odd behaviour. --- tests/testthat/test-epi_shift_internal.R | 68 +++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index 870f6560d..70e560124 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -1,3 +1,69 @@ -test_that("epi_shift_interal", { +# Loading +library(tidyverse) +library(covidcast) +library(delphi.epidata) +library(epiprocess) +library(tidymodels) +# Taken from example-recipe +x <- covidcast( + data_source = "jhu-csse", + signals = "confirmed_7dav_incidence_prop", + time_type = "day", + geo_type = "state", + time_values = epirange(20200301, 20211231), + geo_values = "*" +) %>% + fetch_tbl() %>% + select(geo_value, time_value, case_rate = value) + +y <- covidcast( + data_source = "jhu-csse", + signals = "deaths_7dav_incidence_prop", + time_type = "day", + geo_type = "state", + time_values = epirange(20200301, 20211231), + geo_values = "*" +) %>% + fetch_tbl() %>% + select(geo_value, time_value, death_rate = value) + +x <- x %>% + full_join(y, by = c("geo_value", "time_value")) %>% + as_epi_df() +rm(y) + +xx <- x %>% filter(time_value > "2021-12-01") + +# Tests +test_that("Check that epi_ahead shifts properly", { + r <- epi_recipe(x) %>% + step_epi_ahead(death_rate, ahead = 7) %>% + step_epi_lag(death_rate, lag = -7) %>% + step_naomit(all_predictors()) %>% + step_naomit(all_outcomes(), skip = TRUE) + + slm_fit <- workflow() %>% + add_recipe(r) %>% + add_model(linear_reg()) %>% + fit(data = x) + + slope_ahead <- slm_fit$fit$fit$fit$coefficients[[2]] + expect_equal(slope_ahead,1) +}) + +test_that("Check that epi_lag shifts properly", { + r2 <- epi_recipe(x) %>% + step_epi_ahead(death_rate, ahead = -7) %>% + step_epi_lag(death_rate, lag = 7) %>% + step_naomit(all_predictors()) %>% + step_naomit(all_outcomes(), skip = TRUE) + + slm_fit2 <- workflow() %>% + add_recipe(r2) %>% + add_model(linear_reg()) %>% + fit(data = x) + + slope_lag <- slm_fit2$fit$fit$fit$coefficients[[2]] + expect_equal(slope_lag,1) }) From 03a2b14657937bafbeb31711596b2ed5c7e25aa7 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 31 May 2022 11:00:03 -0700 Subject: [PATCH 28/65] Added error checking --- tests/testthat/test-epi_shift_internal.R | 40 ++++++++++++++++++------ 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index 70e560124..286fca7b1 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -35,20 +35,24 @@ rm(y) xx <- x %>% filter(time_value > "2021-12-01") +slm_fit <- function(recipe, data = x) { + workflow() %>% + add_recipe(recipe) %>% + add_model(linear_reg()) %>% + fit(data = data) +} + # Tests test_that("Check that epi_ahead shifts properly", { - r <- epi_recipe(x) %>% + r1 <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = -7) %>% step_naomit(all_predictors()) %>% step_naomit(all_outcomes(), skip = TRUE) - slm_fit <- workflow() %>% - add_recipe(r) %>% - add_model(linear_reg()) %>% - fit(data = x) + slm_fit1 <- slm_fit(r1) - slope_ahead <- slm_fit$fit$fit$fit$coefficients[[2]] + slope_ahead <- slm_fit1$fit$fit$fit$coefficients[[2]] expect_equal(slope_ahead,1) }) @@ -59,11 +63,27 @@ test_that("Check that epi_lag shifts properly", { step_naomit(all_predictors()) %>% step_naomit(all_outcomes(), skip = TRUE) - slm_fit2 <- workflow() %>% - add_recipe(r2) %>% - add_model(linear_reg()) %>% - fit(data = x) + slm_fit2 <- slm_fit(r2) slope_lag <- slm_fit2$fit$fit$fit$coefficients[[2]] expect_equal(slope_lag,1) }) + +test_that("Check for non-integer values", { + r3 <- epi_recipe(x) %>% + step_epi_ahead(death_rate, ahead = 3.6) %>% + step_epi_lag(death_rate, lag = 1.9) + expect_error( + slm_fit(r3) + ) +}) + +test_that("Check for duplicate values", { + r4 <- epi_recipe(x) %>% + step_epi_ahead(death_rate, ahead = 7) %>% + step_epi_lag(death_rate, lag = 7) %>% + step_epi_lag(death_rate, lag = 7) + expect_error( + slm_fit(r4) + ) +}) From 67980e0d057bb17a6d367e4124be62f5c4bbd613 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 1 Jun 2022 14:24:22 -0700 Subject: [PATCH 29/65] Updated library due to failing test. --- tests/testthat/test-epi_shift_internal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index 286fca7b1..722e60780 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -1,5 +1,5 @@ # Loading -library(tidyverse) +library(dplyr) library(covidcast) library(delphi.epidata) library(epiprocess) From a9beed56e01aaca48bb5a85633befa62ff7ce3be Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 1 Jun 2022 14:47:59 -0700 Subject: [PATCH 30/65] Removed library loading. --- tests/testthat/test-epi_shift_internal.R | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index 722e60780..85bf2b0d8 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -1,10 +1,3 @@ -# Loading -library(dplyr) -library(covidcast) -library(delphi.epidata) -library(epiprocess) -library(tidymodels) - # Taken from example-recipe x <- covidcast( data_source = "jhu-csse", From eb9771cade10a77b60762d00c168acfd54145b6b Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 1 Jun 2022 14:51:34 -0700 Subject: [PATCH 31/65] Re-added packages. --- tests/testthat/test-epi_shift_internal.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index 85bf2b0d8..2b875abff 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -1,3 +1,9 @@ +library(dplyr) +library(covidcast) +library(delphi.epidata) +library(epiprocess) +library(tidymodels) + # Taken from example-recipe x <- covidcast( data_source = "jhu-csse", From 3001b69bcfdb43936e30989c4bb59bb362f6c404 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 1 Jun 2022 15:54:20 -0700 Subject: [PATCH 32/65] Updated test as to not use covidcast and delphi.epidata. --- musings/example-recipe.R | 1 + tests/testthat/test-epi_shift_internal.R | 36 ++++-------------------- 2 files changed, 7 insertions(+), 30 deletions(-) diff --git a/musings/example-recipe.R b/musings/example-recipe.R index 61e4b6868..84f536cd1 100644 --- a/musings/example-recipe.R +++ b/musings/example-recipe.R @@ -3,6 +3,7 @@ library(covidcast) library(delphi.epidata) library(epiprocess) library(tidymodels) + x <- covidcast( data_source = "jhu-csse", signals = "confirmed_7dav_incidence_prop", diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index 2b875abff..43c9863be 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -1,38 +1,14 @@ library(dplyr) -library(covidcast) -library(delphi.epidata) library(epiprocess) library(tidymodels) -# Taken from example-recipe -x <- covidcast( - data_source = "jhu-csse", - signals = "confirmed_7dav_incidence_prop", - time_type = "day", - geo_type = "state", - time_values = epirange(20200301, 20211231), - geo_values = "*" -) %>% - fetch_tbl() %>% - select(geo_value, time_value, case_rate = value) - -y <- covidcast( - data_source = "jhu-csse", - signals = "deaths_7dav_incidence_prop", - time_type = "day", - geo_type = "state", - time_values = epirange(20200301, 20211231), - geo_values = "*" -) %>% - fetch_tbl() %>% - select(geo_value, time_value, death_rate = value) - -x <- x %>% - full_join(y, by = c("geo_value", "time_value")) %>% +# Random generated dataset +set.seed(100) +x <- tibble(geo_value = rep("nowhere",200), + time_value = as.Date("2021-01-01") + 0:199, + case_rate = rpois(100,20) + 1:200, + death_rate = rpois(100,10) + 1:200) %>% as_epi_df() -rm(y) - -xx <- x %>% filter(time_value > "2021-12-01") slm_fit <- function(recipe, data = x) { workflow() %>% From 500c1583e3c7eeb78219b77144eaad38985ef763 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 1 Jun 2022 16:18:56 -0700 Subject: [PATCH 33/65] Used specific libraries rather than tidymodels. --- tests/testthat/test-epi_shift_internal.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index 43c9863be..5f0d7e0d5 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -1,6 +1,7 @@ library(dplyr) library(epiprocess) -library(tidymodels) +library(parsnip) +library(workflows) # Random generated dataset set.seed(100) From ea9aff0cbd1fb55c933f9f2f6bb29939c8741ffd Mon Sep 17 00:00:00 2001 From: kenmawer Date: Wed, 1 Jun 2022 16:50:13 -0700 Subject: [PATCH 34/65] Fix error of packages not being found on GitHub. --- tests/testthat/test-epi_shift_internal.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index 5f0d7e0d5..f3e984090 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -1,3 +1,6 @@ +install.packages("parsnip") +install.packages("workflows") + library(dplyr) library(epiprocess) library(parsnip) From bf141f4d0a5e239f655d6225da9fdd4f0f4539d4 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 2 Jun 2022 08:11:51 -0700 Subject: [PATCH 35/65] Updated as install.packages causes errors on GitHub. --- tests/testthat/test-epi_shift_internal.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index f3e984090..5f0d7e0d5 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -1,6 +1,3 @@ -install.packages("parsnip") -install.packages("workflows") - library(dplyr) library(epiprocess) library(parsnip) From 0f4f6fe7000aabe144969e5cec28cfd78d0f45f7 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 2 Jun 2022 08:30:03 -0700 Subject: [PATCH 36/65] Changed format as to use :: rather than load packages. --- tests/testthat/test-epi_shift_internal.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index 5f0d7e0d5..348c61dda 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -1,7 +1,5 @@ library(dplyr) library(epiprocess) -library(parsnip) -library(workflows) # Random generated dataset set.seed(100) @@ -12,10 +10,10 @@ x <- tibble(geo_value = rep("nowhere",200), as_epi_df() slm_fit <- function(recipe, data = x) { - workflow() %>% - add_recipe(recipe) %>% - add_model(linear_reg()) %>% - fit(data = data) + workflows::workflow() %>% + workflows::add_recipe(recipe) %>% + workflows::add_model(parsnip::linear_reg()) %>% + parsnip::fit(data = data) } # Tests From 20f8bac4e2fd35c06621b0ce65f8fb3c6479c8a4 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 2 Jun 2022 09:01:15 -0700 Subject: [PATCH 37/65] Updated description. --- DESCRIPTION | 4 +++- tests/testthat/test-epi_shift_internal.R | 10 ++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f51ff8a26..8cd69598d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,6 +16,7 @@ Imports: dplyr, glue, magrittr, + parsnip, purrr, recipes, rlang, @@ -23,7 +24,8 @@ Imports: tibble, tidyr, tidyselect, - tensr + tensr, + workflows Suggests: covidcast, data.table, diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index 348c61dda..5f0d7e0d5 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -1,5 +1,7 @@ library(dplyr) library(epiprocess) +library(parsnip) +library(workflows) # Random generated dataset set.seed(100) @@ -10,10 +12,10 @@ x <- tibble(geo_value = rep("nowhere",200), as_epi_df() slm_fit <- function(recipe, data = x) { - workflows::workflow() %>% - workflows::add_recipe(recipe) %>% - workflows::add_model(parsnip::linear_reg()) %>% - parsnip::fit(data = data) + workflow() %>% + add_recipe(recipe) %>% + add_model(linear_reg()) %>% + fit(data = data) } # Tests From 1c5c1c018a4e328adf45c474d28ad34acb4fdd4a Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 2 Jun 2022 11:50:09 -0700 Subject: [PATCH 38/65] pp function won't run, so that needs to be fixed. --- R/epi_shift_internal.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/epi_shift_internal.R b/R/epi_shift_internal.R index e39f361c9..769777e2b 100644 --- a/R/epi_shift_internal.R +++ b/R/epi_shift_internal.R @@ -130,7 +130,8 @@ bake.step_epi_shift <- function(object, new_data, ...) { print.step_epi_shift <- function(x, width = max(20, options()$width - 30), ...) { ## TODO add printing of the shifts - title <- ifelse(x$role == "predictor","Lagging ","Leading ") # Account for lag/lead + title <- ifelse(x$role == "predictor","Lagging","Leading") %>% + paste0(": ", abs(x$shift),",") recipes::print_step(x$columns, x$terms, x$trained, title, width) invisible(x) } From 722815ece585f9b2630e431cbed418a90d042d5d Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 2 Jun 2022 14:49:07 -0700 Subject: [PATCH 39/65] Put some work into it. --- R/step_naomit2.R | 12 ++++++++++++ musings/example-recipe.R | 5 +---- 2 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 R/step_naomit2.R diff --git a/R/step_naomit2.R b/R/step_naomit2.R new file mode 100644 index 000000000..7c2a510cd --- /dev/null +++ b/R/step_naomit2.R @@ -0,0 +1,12 @@ +#' Special NA omit step that does two steps in one +#' +#' @param x Recipe to be used. +#' +#' @return A recipe with NA's omitted .... +#' @export recipes + +step_naomit2 <- function(x) { + x %>% + recipes::step_naomit(all_predictors()) %>% + recipes::step_naomit(all_outcomes(), skip = TRUE) +} diff --git a/musings/example-recipe.R b/musings/example-recipe.R index 84f536cd1..a07314011 100644 --- a/musings/example-recipe.R +++ b/musings/example-recipe.R @@ -40,10 +40,7 @@ r <- epi_recipe(x) %>% # if we add this as a class, maybe we get better step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% - step_naomit(all_predictors()) %>% - # below, `skip` means we don't do this at predict time - # we should probably do something useful here to avoid user error - step_naomit(all_outcomes(), skip = TRUE) + step_narm() slm <- linear_reg() From 4459f02314cfb56a53b13308f488db7616b7ffda Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 3 Jun 2022 09:04:20 -0700 Subject: [PATCH 40/65] Updated description to be better. --- R/step_naomit2.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/step_naomit2.R b/R/step_naomit2.R index 7c2a510cd..ff99b15a2 100644 --- a/R/step_naomit2.R +++ b/R/step_naomit2.R @@ -1,9 +1,10 @@ #' Special NA omit step that does two steps in one #' -#' @param x Recipe to be used. +#' @param x Recipe to be used for omission steps #' -#' @return A recipe with NA's omitted .... -#' @export recipes +#' @return Omits NA's from both predictors and outcomes and training time; +#' however, only omits predictors at prediction time +#' @export step_naomit2 <- function(x) { x %>% From d00631f75c8d900e63f2f1f6e6934aa11f24ac84 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 3 Jun 2022 09:04:51 -0700 Subject: [PATCH 41/65] Documented changes. --- NAMESPACE | 1 + man/step_naomit2.Rd | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 man/step_naomit2.Rd diff --git a/NAMESPACE b/NAMESPACE index 3cdb7b31e..cca0c3de0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ export(smooth_arx_forecaster) export(step_epi_ahead) export(step_epi_lag) export(step_epi_shift) +export(step_naomit2) import(recipes) importFrom(magrittr,"%>%") importFrom(rlang,"!!") diff --git a/man/step_naomit2.Rd b/man/step_naomit2.Rd new file mode 100644 index 000000000..37e04e4b7 --- /dev/null +++ b/man/step_naomit2.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/step_naomit2.R +\name{step_naomit2} +\alias{step_naomit2} +\title{Special NA omit step that does two steps in one} +\usage{ +step_naomit2(x) +} +\arguments{ +\item{x}{Recipe to be used for omission steps} +} +\value{ +Omits NA's from both predictors and outcomes and training time; +however, only omits predictors at prediction time +} +\description{ +Special NA omit step that does two steps in one +} From 45a241c7b093cd073861eb59fbdcb7d4b554ef71 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 3 Jun 2022 09:43:02 -0700 Subject: [PATCH 42/65] Updated incorrect code. --- musings/example-recipe.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/musings/example-recipe.R b/musings/example-recipe.R index a07314011..1af4bba5d 100644 --- a/musings/example-recipe.R +++ b/musings/example-recipe.R @@ -40,7 +40,7 @@ r <- epi_recipe(x) %>% # if we add this as a class, maybe we get better step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% - step_narm() + step_naomit2() slm <- linear_reg() From f229b46ce44a01bd3d06026e00e93a79b87dcc06 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 3 Jun 2022 09:45:39 -0700 Subject: [PATCH 43/65] Added testing template. --- R/test-step_naomit2.R | 65 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 R/test-step_naomit2.R diff --git a/R/test-step_naomit2.R b/R/test-step_naomit2.R new file mode 100644 index 000000000..a04c59898 --- /dev/null +++ b/R/test-step_naomit2.R @@ -0,0 +1,65 @@ +library(dplyr) +library(epiprocess) +library(parsnip) +library(workflows) + +# Random generated dataset +set.seed(100) +x <- tibble(geo_value = rep("nowhere",200), + time_value = as.Date("2021-01-01") + 0:199, + case_rate = rpois(100,20) + 1:200, + death_rate = rpois(100,10) + 1:200) %>% + as_epi_df() + +slm_fit <- function(recipe, data = x) { + workflow() %>% + add_recipe(recipe) %>% + add_model(linear_reg()) %>% + fit(data = data) +} + +# Tests +test_that("Check that epi_ahead shifts properly", { + r1 <- epi_recipe(x) %>% + step_epi_ahead(death_rate, ahead = 7) %>% + step_epi_lag(death_rate, lag = -7) %>% + step_naomit(all_predictors()) %>% + step_naomit(all_outcomes(), skip = TRUE) + + slm_fit1 <- slm_fit(r1) + + slope_ahead <- slm_fit1$fit$fit$fit$coefficients[[2]] + expect_equal(slope_ahead,1) +}) + +test_that("Check that epi_lag shifts properly", { + r2 <- epi_recipe(x) %>% + step_epi_ahead(death_rate, ahead = -7) %>% + step_epi_lag(death_rate, lag = 7) %>% + step_naomit(all_predictors()) %>% + step_naomit(all_outcomes(), skip = TRUE) + + slm_fit2 <- slm_fit(r2) + + slope_lag <- slm_fit2$fit$fit$fit$coefficients[[2]] + expect_equal(slope_lag,1) +}) + +test_that("Check for non-integer values", { + r3 <- epi_recipe(x) %>% + step_epi_ahead(death_rate, ahead = 3.6) %>% + step_epi_lag(death_rate, lag = 1.9) + expect_error( + slm_fit(r3) + ) +}) + +test_that("Check for duplicate values", { + r4 <- epi_recipe(x) %>% + step_epi_ahead(death_rate, ahead = 7) %>% + step_epi_lag(death_rate, lag = 7) %>% + step_epi_lag(death_rate, lag = 7) + expect_error( + slm_fit(r4) + ) +}) From e42dd5d89adb740de63e936f0880b609cce8b53e Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 3 Jun 2022 10:01:02 -0700 Subject: [PATCH 44/65] Fixed misplaced file --- R/test-step_naomit2.R | 65 ----------------------------------- tests/testthat/step_naomit2.R | 37 ++++++++++++++++++++ 2 files changed, 37 insertions(+), 65 deletions(-) delete mode 100644 R/test-step_naomit2.R create mode 100644 tests/testthat/step_naomit2.R diff --git a/R/test-step_naomit2.R b/R/test-step_naomit2.R deleted file mode 100644 index a04c59898..000000000 --- a/R/test-step_naomit2.R +++ /dev/null @@ -1,65 +0,0 @@ -library(dplyr) -library(epiprocess) -library(parsnip) -library(workflows) - -# Random generated dataset -set.seed(100) -x <- tibble(geo_value = rep("nowhere",200), - time_value = as.Date("2021-01-01") + 0:199, - case_rate = rpois(100,20) + 1:200, - death_rate = rpois(100,10) + 1:200) %>% - as_epi_df() - -slm_fit <- function(recipe, data = x) { - workflow() %>% - add_recipe(recipe) %>% - add_model(linear_reg()) %>% - fit(data = data) -} - -# Tests -test_that("Check that epi_ahead shifts properly", { - r1 <- epi_recipe(x) %>% - step_epi_ahead(death_rate, ahead = 7) %>% - step_epi_lag(death_rate, lag = -7) %>% - step_naomit(all_predictors()) %>% - step_naomit(all_outcomes(), skip = TRUE) - - slm_fit1 <- slm_fit(r1) - - slope_ahead <- slm_fit1$fit$fit$fit$coefficients[[2]] - expect_equal(slope_ahead,1) -}) - -test_that("Check that epi_lag shifts properly", { - r2 <- epi_recipe(x) %>% - step_epi_ahead(death_rate, ahead = -7) %>% - step_epi_lag(death_rate, lag = 7) %>% - step_naomit(all_predictors()) %>% - step_naomit(all_outcomes(), skip = TRUE) - - slm_fit2 <- slm_fit(r2) - - slope_lag <- slm_fit2$fit$fit$fit$coefficients[[2]] - expect_equal(slope_lag,1) -}) - -test_that("Check for non-integer values", { - r3 <- epi_recipe(x) %>% - step_epi_ahead(death_rate, ahead = 3.6) %>% - step_epi_lag(death_rate, lag = 1.9) - expect_error( - slm_fit(r3) - ) -}) - -test_that("Check for duplicate values", { - r4 <- epi_recipe(x) %>% - step_epi_ahead(death_rate, ahead = 7) %>% - step_epi_lag(death_rate, lag = 7) %>% - step_epi_lag(death_rate, lag = 7) - expect_error( - slm_fit(r4) - ) -}) diff --git a/tests/testthat/step_naomit2.R b/tests/testthat/step_naomit2.R new file mode 100644 index 000000000..bbfedd396 --- /dev/null +++ b/tests/testthat/step_naomit2.R @@ -0,0 +1,37 @@ +library(dplyr) +library(epiprocess) +library(parsnip) +library(workflows) + +# Random generated dataset +set.seed(100) +x <- tibble(geo_value = rep("nowhere",200), + time_value = as.Date("2021-01-01") + 0:199, + case_rate = rpois(100,20) + 1:200, + death_rate = rpois(100,10) + 1:200) %>% + as_epi_df() + +y <- tibble(geo_value = rep("nowhere",200), + time_value = as.Date("2021-01-01") + 0:199, + case_rate = rpois(100,20) + 1:200, + death_rate = rpois(100,10) + 1:200) %>% + as_epi_df() + +# Tests +test_that("Check that epi_ahead shifts properly", { + r1 <- epi_recipe(x) %>% + step_epi_ahead(death_rate, ahead = 7) %>% + step_epi_lag(death_rate, lag = c(0,7,14)) + + r2 <- epi_recipe(y) %>% + step_epi_ahead(death_rate, ahead = 7) %>% + step_epi_lag(death_rate, lag = c(0,7,14)) + + r3 <- r1 %>% + step_naomit(all_predictors()) %>% + step_naomit(all_outcomes(), skip = TRUE) + + r4 <- step_naomit2(r2) + + expect_identical(r3,r4) +}) From e60abfd8045a3e000fbc9f478c73d7d582b8b943 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 3 Jun 2022 10:49:06 -0700 Subject: [PATCH 45/65] Renamed misnamed test --- tests/testthat/step_naomit2.R | 37 ------------------------------ tests/testthat/test-step_naomit2.R | 33 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 37 deletions(-) delete mode 100644 tests/testthat/step_naomit2.R create mode 100644 tests/testthat/test-step_naomit2.R diff --git a/tests/testthat/step_naomit2.R b/tests/testthat/step_naomit2.R deleted file mode 100644 index bbfedd396..000000000 --- a/tests/testthat/step_naomit2.R +++ /dev/null @@ -1,37 +0,0 @@ -library(dplyr) -library(epiprocess) -library(parsnip) -library(workflows) - -# Random generated dataset -set.seed(100) -x <- tibble(geo_value = rep("nowhere",200), - time_value = as.Date("2021-01-01") + 0:199, - case_rate = rpois(100,20) + 1:200, - death_rate = rpois(100,10) + 1:200) %>% - as_epi_df() - -y <- tibble(geo_value = rep("nowhere",200), - time_value = as.Date("2021-01-01") + 0:199, - case_rate = rpois(100,20) + 1:200, - death_rate = rpois(100,10) + 1:200) %>% - as_epi_df() - -# Tests -test_that("Check that epi_ahead shifts properly", { - r1 <- epi_recipe(x) %>% - step_epi_ahead(death_rate, ahead = 7) %>% - step_epi_lag(death_rate, lag = c(0,7,14)) - - r2 <- epi_recipe(y) %>% - step_epi_ahead(death_rate, ahead = 7) %>% - step_epi_lag(death_rate, lag = c(0,7,14)) - - r3 <- r1 %>% - step_naomit(all_predictors()) %>% - step_naomit(all_outcomes(), skip = TRUE) - - r4 <- step_naomit2(r2) - - expect_identical(r3,r4) -}) diff --git a/tests/testthat/test-step_naomit2.R b/tests/testthat/test-step_naomit2.R new file mode 100644 index 000000000..d8aa4f91f --- /dev/null +++ b/tests/testthat/test-step_naomit2.R @@ -0,0 +1,33 @@ +library(dplyr) +library(epiprocess) +library(parsnip) +library(workflows) + +# Random generated dataset +set.seed(100) +x <- tibble(geo_value = rep("nowhere",200), + time_value = as.Date("2021-01-01") + 0:199, + case_rate = rpois(100,20) + 1:200, + death_rate = rpois(100,10) + 1:200) %>% + as_epi_df() + +r <- epi_recipe(x) %>% + step_epi_ahead(death_rate, ahead = 7) %>% + step_epi_lag(death_rate, lag = c(0,7,14)) + +extract <- function(recipe) { + recipe +} + +# Tests +test_that("Check that epi_ahead shifts properly", { + expect_identical(step_naomit2(r), + r %>% + step_naomit(all_predictors()) %>% + step_naomit(all_outcomes(), skip = TRUE)) +}) + +z <- step_naomit2(r) +z2 <- r %>% + step_naomit(all_predictors()) %>% + step_naomit(all_outcomes(), skip = TRUE) From 45c5ab6b9218178f927620bfefbaae68692c959c Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 3 Jun 2022 11:19:53 -0700 Subject: [PATCH 46/65] Finished test --- tests/testthat/test-step_naomit2.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-step_naomit2.R b/tests/testthat/test-step_naomit2.R index d8aa4f91f..fabb311ef 100644 --- a/tests/testthat/test-step_naomit2.R +++ b/tests/testthat/test-step_naomit2.R @@ -19,15 +19,15 @@ extract <- function(recipe) { recipe } -# Tests -test_that("Check that epi_ahead shifts properly", { - expect_identical(step_naomit2(r), - r %>% - step_naomit(all_predictors()) %>% - step_naomit(all_outcomes(), skip = TRUE)) -}) - -z <- step_naomit2(r) +z1 <- step_naomit2(r) z2 <- r %>% step_naomit(all_predictors()) %>% step_naomit(all_outcomes(), skip = TRUE) + +# Tests +test_that("Check that both functions behave the same way", { + expect_identical(z1$steps[[3]][-1][-5],z2$steps[[3]][-1][-5]) + expect_identical(z1$steps[[4]][-1][-5],z2$steps[[4]][-1][-5]) + expect_identical(class(z1$steps[[3]]),class(z2$steps[[3]])) + expect_identical(class(z1$steps[[4]]),class(z2$steps[[4]])) +}) From 6b7862f453a7f78878c07eb72a88823d6a073155 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 3 Jun 2022 12:15:55 -0700 Subject: [PATCH 47/65] Polished test. --- tests/testthat/test-step_naomit2.R | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-step_naomit2.R b/tests/testthat/test-step_naomit2.R index fabb311ef..2672b68d0 100644 --- a/tests/testthat/test-step_naomit2.R +++ b/tests/testthat/test-step_naomit2.R @@ -11,20 +11,17 @@ x <- tibble(geo_value = rep("nowhere",200), death_rate = rpois(100,10) + 1:200) %>% as_epi_df() +# Preparing the datasets to be used for comparison r <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = c(0,7,14)) -extract <- function(recipe) { - recipe -} - z1 <- step_naomit2(r) z2 <- r %>% step_naomit(all_predictors()) %>% step_naomit(all_outcomes(), skip = TRUE) -# Tests +# Test test_that("Check that both functions behave the same way", { expect_identical(z1$steps[[3]][-1][-5],z2$steps[[3]][-1][-5]) expect_identical(z1$steps[[4]][-1][-5],z2$steps[[4]][-1][-5]) From e5ac2fbcbf4b1d237821fee9dcff47a489bc21f9 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 3 Jun 2022 15:35:34 -0700 Subject: [PATCH 48/65] New test added --- tests/testthat/test-grab_names.R | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 tests/testthat/test-grab_names.R diff --git a/tests/testthat/test-grab_names.R b/tests/testthat/test-grab_names.R new file mode 100644 index 000000000..8a8cb6978 --- /dev/null +++ b/tests/testthat/test-grab_names.R @@ -0,0 +1,9 @@ +print("a") + +df <- data.frame(b=1,c=2,ca=3,cat=4) + +test_that("Names are grabbed properly", { + expect_identical(grab_names(df,dplyr::starts_with("ca")), + subset(names(df),startsWith(names(df), "ca")) + ) +}) From 11addecfa0d3ecb8761d7c0027a86e28a63e8ad1 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 6 Jun 2022 09:24:39 -0700 Subject: [PATCH 49/65] Name replacement. --- R/{step_naomit2.R => step_narm.R} | 2 +- tests/testthat/{test-step_naomit2.R => test-step_narm.R} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename R/{step_naomit2.R => step_narm.R} (92%) rename tests/testthat/{test-step_naomit2.R => test-step_narm.R} (97%) diff --git a/R/step_naomit2.R b/R/step_narm.R similarity index 92% rename from R/step_naomit2.R rename to R/step_narm.R index ff99b15a2..2631c1b9f 100644 --- a/R/step_naomit2.R +++ b/R/step_narm.R @@ -6,7 +6,7 @@ #' however, only omits predictors at prediction time #' @export -step_naomit2 <- function(x) { +step_narm <- function(x) { x %>% recipes::step_naomit(all_predictors()) %>% recipes::step_naomit(all_outcomes(), skip = TRUE) diff --git a/tests/testthat/test-step_naomit2.R b/tests/testthat/test-step_narm.R similarity index 97% rename from tests/testthat/test-step_naomit2.R rename to tests/testthat/test-step_narm.R index 2672b68d0..dce91e220 100644 --- a/tests/testthat/test-step_naomit2.R +++ b/tests/testthat/test-step_narm.R @@ -16,7 +16,7 @@ r <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = c(0,7,14)) -z1 <- step_naomit2(r) +z1 <- step_narm(r) z2 <- r %>% step_naomit(all_predictors()) %>% step_naomit(all_outcomes(), skip = TRUE) From 338f277dece2fc37fc144e9020f1d5baffddf1bc Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 6 Jun 2022 09:25:36 -0700 Subject: [PATCH 50/65] Issue documented. --- NAMESPACE | 2 +- man/{step_naomit2.Rd => step_narm.Rd} | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) rename man/{step_naomit2.Rd => step_narm.Rd} (77%) diff --git a/NAMESPACE b/NAMESPACE index cca0c3de0..402e301a0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,7 +27,7 @@ export(smooth_arx_forecaster) export(step_epi_ahead) export(step_epi_lag) export(step_epi_shift) -export(step_naomit2) +export(step_narm) import(recipes) importFrom(magrittr,"%>%") importFrom(rlang,"!!") diff --git a/man/step_naomit2.Rd b/man/step_narm.Rd similarity index 77% rename from man/step_naomit2.Rd rename to man/step_narm.Rd index 37e04e4b7..4a5db71c9 100644 --- a/man/step_naomit2.Rd +++ b/man/step_narm.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/step_naomit2.R -\name{step_naomit2} -\alias{step_naomit2} +% Please edit documentation in R/step_narm.R +\name{step_narm} +\alias{step_narm} \title{Special NA omit step that does two steps in one} \usage{ -step_naomit2(x) +step_narm(x) } \arguments{ \item{x}{Recipe to be used for omission steps} From e373b497dfaeba632e3dd87c35d389426c9d733b Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 6 Jun 2022 10:58:09 -0700 Subject: [PATCH 51/65] Removed unnecessary export of step_epi_shift --- NAMESPACE | 1 - R/epi_shift_internal.R | 1 - 2 files changed, 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 402e301a0..72a1fbe97 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,7 +26,6 @@ export(smooth_arx_args_list) export(smooth_arx_forecaster) export(step_epi_ahead) export(step_epi_lag) -export(step_epi_shift) export(step_narm) import(recipes) importFrom(magrittr,"%>%") diff --git a/R/epi_shift_internal.R b/R/epi_shift_internal.R index 769777e2b..872e6f454 100644 --- a/R/epi_shift_internal.R +++ b/R/epi_shift_internal.R @@ -15,7 +15,6 @@ #' order_ for shifting. #' #' @family row operation steps -#' @export #' @rdname step_epi_ahead step_epi_shift <- function(recipe, From f29ae48adab0466a4d41be8b45c930bfce898256 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 6 Jun 2022 11:07:04 -0700 Subject: [PATCH 52/65] Made test names more specific. --- tests/testthat/test-epi_shift_internal.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index 5f0d7e0d5..d3f5f7604 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -45,7 +45,7 @@ test_that("Check that epi_lag shifts properly", { expect_equal(slope_lag,1) }) -test_that("Check for non-integer values", { +test_that("Values for ahead and lag must be integer values", { r3 <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = 3.6) %>% step_epi_lag(death_rate, lag = 1.9) @@ -54,7 +54,7 @@ test_that("Check for non-integer values", { ) }) -test_that("Check for duplicate values", { +test_that("Values for ahead and lag cannot be duplicates", { r4 <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = 7) %>% From 54ca1f5313ea63794f3531bdc79241bfbad847ec Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 6 Jun 2022 11:41:56 -0700 Subject: [PATCH 53/65] Changed name to step_epi_naomit --- NAMESPACE | 2 +- R/{step_narm.R => step_epi_naomit.R} | 2 +- man/{step_narm.Rd => step_epi_naomit.Rd} | 8 ++++---- .../testthat/{test-step_narm.R => test-step_epi_naomit.R} | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) rename R/{step_narm.R => step_epi_naomit.R} (91%) rename man/{step_narm.Rd => step_epi_naomit.Rd} (75%) rename tests/testthat/{test-step_narm.R => test-step_epi_naomit.R} (97%) diff --git a/NAMESPACE b/NAMESPACE index 402e301a0..3290abe5d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,8 +26,8 @@ export(smooth_arx_args_list) export(smooth_arx_forecaster) export(step_epi_ahead) export(step_epi_lag) +export(step_epi_naomit) export(step_epi_shift) -export(step_narm) import(recipes) importFrom(magrittr,"%>%") importFrom(rlang,"!!") diff --git a/R/step_narm.R b/R/step_epi_naomit.R similarity index 91% rename from R/step_narm.R rename to R/step_epi_naomit.R index 2631c1b9f..4f57acd72 100644 --- a/R/step_narm.R +++ b/R/step_epi_naomit.R @@ -6,7 +6,7 @@ #' however, only omits predictors at prediction time #' @export -step_narm <- function(x) { +step_epi_naomit <- function(x) { x %>% recipes::step_naomit(all_predictors()) %>% recipes::step_naomit(all_outcomes(), skip = TRUE) diff --git a/man/step_narm.Rd b/man/step_epi_naomit.Rd similarity index 75% rename from man/step_narm.Rd rename to man/step_epi_naomit.Rd index 4a5db71c9..0e8bd3619 100644 --- a/man/step_narm.Rd +++ b/man/step_epi_naomit.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/step_narm.R -\name{step_narm} -\alias{step_narm} +% Please edit documentation in R/step_epi_naomit.R +\name{step_epi_naomit} +\alias{step_epi_naomit} \title{Special NA omit step that does two steps in one} \usage{ -step_narm(x) +step_epi_naomit(x) } \arguments{ \item{x}{Recipe to be used for omission steps} diff --git a/tests/testthat/test-step_narm.R b/tests/testthat/test-step_epi_naomit.R similarity index 97% rename from tests/testthat/test-step_narm.R rename to tests/testthat/test-step_epi_naomit.R index dce91e220..5205a086b 100644 --- a/tests/testthat/test-step_narm.R +++ b/tests/testthat/test-step_epi_naomit.R @@ -16,7 +16,7 @@ r <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = c(0,7,14)) -z1 <- step_narm(r) +z1 <- step_epi_naomit(r) z2 <- r %>% step_naomit(all_predictors()) %>% step_naomit(all_outcomes(), skip = TRUE) From 3326893e19b121f6f2212cf20ebf4359e765e5c4 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 6 Jun 2022 12:16:24 -0700 Subject: [PATCH 54/65] Fixed butchered NAMESPACE. --- NAMESPACE | 5 ----- 1 file changed, 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index eebaec7d2..ceec4ee9f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,12 +26,7 @@ export(smooth_arx_args_list) export(smooth_arx_forecaster) export(step_epi_ahead) export(step_epi_lag) -<<<<<<< HEAD -export(step_narm) -======= export(step_epi_naomit) -export(step_epi_shift) ->>>>>>> 878eb71132d58400f79ffdd6eb14452f289cbf6a import(recipes) importFrom(magrittr,"%>%") importFrom(rlang,"!!") From 11b8a95f7f626b87692dbd112462fe9e8ff229d6 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 6 Jun 2022 12:24:08 -0700 Subject: [PATCH 55/65] Deleted placeholder I made when creating file. --- tests/testthat/test-grab_names.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/testthat/test-grab_names.R b/tests/testthat/test-grab_names.R index 8a8cb6978..2e7954ab3 100644 --- a/tests/testthat/test-grab_names.R +++ b/tests/testthat/test-grab_names.R @@ -1,5 +1,3 @@ -print("a") - df <- data.frame(b=1,c=2,ca=3,cat=4) test_that("Names are grabbed properly", { From fa1a476d64690241c2bbe2bd9dcc41bcec079ae4 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 6 Jun 2022 14:05:09 -0700 Subject: [PATCH 56/65] Added assign_arg_list test for testing. --- tests/testthat/test-assign_arg_list.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/testthat/test-assign_arg_list.R b/tests/testthat/test-assign_arg_list.R index 017c40935..7cd69dfa9 100644 --- a/tests/testthat/test-assign_arg_list.R +++ b/tests/testthat/test-assign_arg_list.R @@ -4,3 +4,8 @@ test_that("First argument must be a list",{ test_that("All arguments should be named",{ expect_error(assign_arg_list(list(1,2))) }) +test_that("assign_arg_list works as intended",{ + assign_arg_list(list(a="dog",b=2)) + expect_identical(a,"dog") + expect_identical(b,2) +}) From efc4e06b1ffef176379c7954de3ac1872b2eb723 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 6 Jun 2022 16:10:50 -0700 Subject: [PATCH 57/65] Updated df_mat_mul --- tests/testthat/test-df_mat_mul.R | 42 +++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-df_mat_mul.R b/tests/testthat/test-df_mat_mul.R index 166fdc92d..ebe3d493a 100644 --- a/tests/testthat/test-df_mat_mul.R +++ b/tests/testthat/test-df_mat_mul.R @@ -1,19 +1,43 @@ df <- data.frame(matrix(1:100, ncol = 5)) mat <- matrix(1:4, ncol = 2) -df_mat_mul(df, mat, "z", dplyr::num_range("X", 2:3)) -test_that("df_mat_mul checks inputs", { + +test_that("First input must be a data frame and second input must be a matrix", + { expect_error(df_mat_mul(30,mat)) expect_error(df_mat_mul(df,20)) }) -test_that("Incompatible matrix multipication cannot happen", { - expect_error(df_mat_mul(df, mat, "z", dplyr::num_range("X", 1:3))) +test_that("Argument name is a character" ,{ + expect_error(df_mat_mul(df, mat, 100)) +}) + +test_that("The length of names does not differ from the length of the number + of outputs" ,{ + expect_error(df_mat_mul(df, mat, c("a","b","c"), + dplyr::num_range("X", 2:3))) }) +test_that("The number of columns of the first data frame cannot differ from the + number of rows of the second matrix, hence preventing incompatible + matrix multiplication", { + expect_error(df_mat_mul(df, mat, "z", 1:3)) + }) + +X <- df[c(1,4,5)] +Z <- as.data.frame(as.matrix(df[2:3]) %*% mat) +colnames(Z) <- c("z1","z2") +output <- cbind(X,Z) + test_that("Matrix multiplication is being handled as expected", { - X <- df[c(1,4,5)] - Z <- as.data.frame(as.matrix(df[2:3]) %*% mat) - colnames(Z) <- c("z1","z2") - output <- cbind(X,Z) - expect_identical(df_mat_mul(df,mat, "z", dplyr::num_range("X", 2:3)),output) + expect_identical(df_mat_mul(df, mat, "z", 2:3),output) +}) + +test_that("Names are used from the out_names field", { + expect_identical(df_mat_mul(df, mat, c("z1","z2"), 2:3),output) +}) + +test_that("Other tidyselect functionalities are working", { + mult <- df_mat_mul(df, mat, "z", dplyr::num_range("X", 2:3)) + expect_identical(mult,output) + expect_identical(df_mat_mul(df, mat, "z", 2, 3),output) }) From 28ae5d3b5f1f8f1ab2cfd7e30067129058d01d7a Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 7 Jun 2022 09:05:08 -0700 Subject: [PATCH 58/65] Added test for checking for a mismatched test. --- tests/testthat/test-df_mat_mul.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-df_mat_mul.R b/tests/testthat/test-df_mat_mul.R index ebe3d493a..291a7b0ce 100644 --- a/tests/testthat/test-df_mat_mul.R +++ b/tests/testthat/test-df_mat_mul.R @@ -13,8 +13,7 @@ test_that("Argument name is a character" ,{ test_that("The length of names does not differ from the length of the number of outputs" ,{ - expect_error(df_mat_mul(df, mat, c("a","b","c"), - dplyr::num_range("X", 2:3))) + expect_error(df_mat_mul(df, mat, c("a","b","c"), 2:3)) }) test_that("The number of columns of the first data frame cannot differ from the @@ -28,11 +27,8 @@ Z <- as.data.frame(as.matrix(df[2:3]) %*% mat) colnames(Z) <- c("z1","z2") output <- cbind(X,Z) -test_that("Matrix multiplication is being handled as expected", { +test_that("Names are being handled properly", { expect_identical(df_mat_mul(df, mat, "z", 2:3),output) -}) - -test_that("Names are used from the out_names field", { expect_identical(df_mat_mul(df, mat, c("z1","z2"), 2:3),output) }) @@ -40,4 +36,6 @@ test_that("Other tidyselect functionalities are working", { mult <- df_mat_mul(df, mat, "z", dplyr::num_range("X", 2:3)) expect_identical(mult,output) expect_identical(df_mat_mul(df, mat, "z", 2, 3),output) + # Mismatched names should not work: + expect_error(df_mat_mul(df, mat, "z", dplyr::num_range("Y", 2:3))) }) From d73d9e27a6fc142a3e3973fc0fdbfe13bb8f8bbb Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 7 Jun 2022 09:46:23 -0700 Subject: [PATCH 59/65] Added comments on why we need to do certain tests that way. --- tests/testthat/test-step_epi_naomit.R | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-step_epi_naomit.R b/tests/testthat/test-step_epi_naomit.R index 5205a086b..ee7329a58 100644 --- a/tests/testthat/test-step_epi_naomit.R +++ b/tests/testthat/test-step_epi_naomit.R @@ -21,10 +21,15 @@ z2 <- r %>% step_naomit(all_predictors()) %>% step_naomit(all_outcomes(), skip = TRUE) -# Test +# Checks the behaviour of a step function, omitting the quosure and id that +# differ from one another, even with identical behaviour +behav <- function(recipe,step_num) recipe$steps[[step_num]][-1][-5] +# Checks the class type of an object +step_class <- function(recipe,step_num) class(recipe$steps[step_num]) + test_that("Check that both functions behave the same way", { - expect_identical(z1$steps[[3]][-1][-5],z2$steps[[3]][-1][-5]) - expect_identical(z1$steps[[4]][-1][-5],z2$steps[[4]][-1][-5]) - expect_identical(class(z1$steps[[3]]),class(z2$steps[[3]])) - expect_identical(class(z1$steps[[4]]),class(z2$steps[[4]])) + expect_identical(behav(z1,3),behav(z2,3)) + expect_identical(behav(z1,4),behav(z2,4)) + expect_identical(step_class(z1,3),step_class(z2,3)) + expect_identical(step_class(z1,4),step_class(z2,4)) }) From f2728392436ea72977252169271f7977b8138426 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 7 Jun 2022 10:50:45 -0700 Subject: [PATCH 60/65] Committed changes that should be updated. --- R/step_epi_naomit.R | 23 +++++++++++++++++------ man/step_epi_naomit.Rd | 22 ++++++++++++++++------ tests/testthat/test-step_epi_naomit.R | 4 ++++ 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/R/step_epi_naomit.R b/R/step_epi_naomit.R index 4f57acd72..9886582ab 100644 --- a/R/step_epi_naomit.R +++ b/R/step_epi_naomit.R @@ -1,13 +1,24 @@ -#' Special NA omit step that does two steps in one +#' Unified NA omission wrapper function for recipes #' -#' @param x Recipe to be used for omission steps +#' @param recipe Recipe to be used for omission steps #' -#' @return Omits NA's from both predictors and outcomes and training time; -#' however, only omits predictors at prediction time +#' @return Omits NA's from both predictors and outcomes at training time; +#' however, only omits associated predictors at prediction time to avoid +#' losing data. #' @export +#' @examples +#' tibble(geo_value = rep("place",200), +#' time_value = as.Date("2021-01-01") + 0:199, +#' case_rate = 1:200, +#' death_rate = 1:200) %>% +#' as_epi_df() %>% +#' recipe() %>% +#' step_epi_naomit() -step_epi_naomit <- function(x) { - x %>% + +step_epi_naomit <- function(recipe) { + stopifnot("recipe" %in% class(recipe)) + recipe %>% recipes::step_naomit(all_predictors()) %>% recipes::step_naomit(all_outcomes(), skip = TRUE) } diff --git a/man/step_epi_naomit.Rd b/man/step_epi_naomit.Rd index 0e8bd3619..1dfcaab19 100644 --- a/man/step_epi_naomit.Rd +++ b/man/step_epi_naomit.Rd @@ -2,17 +2,27 @@ % Please edit documentation in R/step_epi_naomit.R \name{step_epi_naomit} \alias{step_epi_naomit} -\title{Special NA omit step that does two steps in one} +\title{Unified NA omission wrapper function for recipes} \usage{ -step_epi_naomit(x) +step_epi_naomit(recipe) } \arguments{ -\item{x}{Recipe to be used for omission steps} +\item{recipe}{Recipe to be used for omission steps} } \value{ -Omits NA's from both predictors and outcomes and training time; -however, only omits predictors at prediction time +Omits NA's from both predictors and outcomes at training time; +however, only omits associated predictors at prediction time to avoid +losing data. } \description{ -Special NA omit step that does two steps in one +Unified NA omission wrapper function for recipes +} +\examples{ +tibble(geo_value = rep("place",200), + time_value = as.Date("2021-01-01") + 0:199, + case_rate = 1:200, + death_rate = 1:200) \%>\% + as_epi_df() \%>\% + recipe() \%>\% + step_epi_naomit() } diff --git a/tests/testthat/test-step_epi_naomit.R b/tests/testthat/test-step_epi_naomit.R index ee7329a58..a0688adf1 100644 --- a/tests/testthat/test-step_epi_naomit.R +++ b/tests/testthat/test-step_epi_naomit.R @@ -16,6 +16,10 @@ r <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = c(0,7,14)) +test_that("Argument must be a recipe", { + expect_error(step_epi_naomit(x)) +}) + z1 <- step_epi_naomit(r) z2 <- r %>% step_naomit(all_predictors()) %>% From 6fa4a5ff6a9b144d54422efe08add0fce0bd9ab7 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Tue, 7 Jun 2022 10:52:07 -0700 Subject: [PATCH 61/65] Removed randomness. --- tests/testthat/test-step_epi_naomit.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-step_epi_naomit.R b/tests/testthat/test-step_epi_naomit.R index a0688adf1..3fad4480e 100644 --- a/tests/testthat/test-step_epi_naomit.R +++ b/tests/testthat/test-step_epi_naomit.R @@ -4,11 +4,10 @@ library(parsnip) library(workflows) # Random generated dataset -set.seed(100) x <- tibble(geo_value = rep("nowhere",200), time_value = as.Date("2021-01-01") + 0:199, - case_rate = rpois(100,20) + 1:200, - death_rate = rpois(100,10) + 1:200) %>% + case_rate = 1:200, + death_rate = 1:200) %>% as_epi_df() # Preparing the datasets to be used for comparison From 609207547c7435de06140000064ceded4e2ad370 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Tue, 7 Jun 2022 16:45:15 -0700 Subject: [PATCH 62/65] add codeowners --- .github/CODEOWNERS | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/CODEOWNERS diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000..2a94f5695 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @dajmcdon From 168e91a5dc54d9f055b7a2ac14c42d459569cc53 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Tue, 7 Jun 2022 17:16:12 -0700 Subject: [PATCH 63/65] Rewrite main history to earlier --- DESCRIPTION | 2 - NAMESPACE | 4 +- R/epi_ahead.R | 107 ++++++++++++++++-- R/epi_lag.R | 100 +++++++++++++++-- R/epi_shift_internal.R | 136 ----------------------- R/step_epi_naomit.R | 24 ---- man/step_epi_ahead.Rd | 34 +----- man/step_epi_naomit.Rd | 28 ----- musings/example-recipe.R | 6 +- tests/testthat/test-assign_arg_list.R | 11 -- tests/testthat/test-df_mat_mul.R | 41 ------- tests/testthat/test-epi_shift_internal.R | 65 ----------- tests/testthat/test-grab_names.R | 7 -- tests/testthat/test-step_epi_naomit.R | 38 ------- 14 files changed, 192 insertions(+), 411 deletions(-) delete mode 100644 R/epi_shift_internal.R delete mode 100644 R/step_epi_naomit.R delete mode 100644 man/step_epi_naomit.Rd delete mode 100644 tests/testthat/test-assign_arg_list.R delete mode 100644 tests/testthat/test-df_mat_mul.R delete mode 100644 tests/testthat/test-epi_shift_internal.R delete mode 100644 tests/testthat/test-grab_names.R delete mode 100644 tests/testthat/test-step_epi_naomit.R diff --git a/DESCRIPTION b/DESCRIPTION index a7dcf64f2..268404141 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,7 +20,6 @@ Imports: glue, hardhat (>= 1.0.0.9000), magrittr, - parsnip, purrr, recipes (>= 0.2.0.9001), rlang, @@ -29,7 +28,6 @@ Imports: tibble, tidyr, tidyselect, - tensr, workflows Suggests: covidcast, diff --git a/NAMESPACE b/NAMESPACE index 6aa7814fd..4361255d2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand -S3method(bake,step_epi_shift) +S3method(bake,step_epi_ahead) +S3method(bake,step_epi_lag) S3method(epi_keys,default) S3method(epi_keys,epi_df) S3method(epi_keys,recipe) @@ -35,7 +36,6 @@ export(smooth_arx_args_list) export(smooth_arx_forecaster) export(step_epi_ahead) export(step_epi_lag) -export(step_epi_naomit) import(recipes) importFrom(magrittr,"%>%") importFrom(rlang,"!!") diff --git a/R/epi_ahead.R b/R/epi_ahead.R index b90fbae43..3434b7d93 100644 --- a/R/epi_ahead.R +++ b/R/epi_ahead.R @@ -63,16 +63,101 @@ step_epi_ahead <- columns = NULL, skip = FALSE, id = rand_id("epi_ahead")) { - step_epi_shift(recipe, - ..., - role = role, - trained = trained, - shift = ahead, - prefix = prefix, - default = default, - keys = keys, - columns = columns, - skip = skip, - id = id + add_step( + recipe, + step_epi_ahead_new( + terms = dplyr::enquos(...), + role = role, + trained = trained, + ahead = ahead, + prefix = prefix, + default = default, + keys = keys, + columns = columns, + skip = skip, + id = id + ) ) } + +step_epi_ahead_new <- + function(terms, role, trained, ahead, prefix, default, keys, + columns, skip, id) { + step( + subclass = "epi_ahead", + terms = terms, + role = role, + trained = trained, + ahead = ahead, + prefix = prefix, + default = default, + keys = keys, + columns = columns, + skip = skip, + id = id + ) + } + +#' @export +prep.step_epi_ahead <- function(x, training, info = NULL, ...) { + step_epi_ahead_new( + terms = x$terms, + role = x$role, + trained = TRUE, + ahead = x$ahead, + prefix = x$prefix, + default = x$default, + keys = x$keys, + columns = recipes_eval_select(x$terms, training, info), + skip = x$skip, + id = x$id + ) +} + +#' @export +bake.step_epi_ahead <- function(object, new_data, ...) { + if (!all(object$ahead == as.integer(object$ahead))) { + rlang::abort("step_epi_ahead requires 'ahead' argument to be integer valued.") + } + + grid <- tidyr::expand_grid( + col = object$columns, lag_val = -object$ahead) %>% + dplyr::mutate( + ahead_val = -lag_val, + newname = glue::glue("{object$prefix}{ahead_val}_{col}") + ) %>% + dplyr::select(-ahead_val) + + ## ensure no name clashes + new_data_names <- colnames(new_data) + intersection <- new_data_names %in% grid$newname + if (any(intersection)) { + rlang::abort( + paste0("Name collision occured in `", class(object)[1], + "`. The following variable names already exists: ", + paste0(new_data_names[intersection], collapse = ", "), + ".")) + } + + ok <- object$keys + lagged <- purrr::reduce( + purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok), + dplyr::full_join, + by = ok + ) + + dplyr::full_join(new_data, lagged, by = ok) %>% + dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>% + dplyr::arrange(time_value) %>% + dplyr::ungroup() + +} + +#' @export +print.step_epi_ahead <- + function(x, width = max(20, options()$width - 30), ...) { + ## TODO add printing of the lags + title <- "Leading " + recipes::print_step(x$columns, x$terms, x$trained, title, width) + invisible(x) + } diff --git a/R/epi_lag.R b/R/epi_lag.R index ccbbb5aba..b7dc28b2a 100644 --- a/R/epi_lag.R +++ b/R/epi_lag.R @@ -29,17 +29,95 @@ step_epi_lag <- columns = NULL, skip = FALSE, id = rand_id("epi_lag")) { - step_epi_shift(recipe, - ..., - role = role, - trained = trained, - shift = -lag, - prefix = prefix, - default = default, - keys = keys, - columns = columns, - skip = skip, - id = id + add_step( + recipe, + step_epi_lag_new( + terms = dplyr::enquos(...), + role = role, + trained = trained, + lag = lag, + prefix = prefix, + default = default, + keys = keys, + columns = columns, + skip = skip, + id = id + ) ) } +step_epi_lag_new <- + function(terms, role, trained, lag, prefix, default, keys, + columns, skip, id) { + step( + subclass = "epi_lag", + terms = terms, + role = role, + trained = trained, + lag = lag, + prefix = prefix, + default = default, + keys = keys, + columns = columns, + skip = skip, + id = id + ) + } + +#' @export +prep.step_epi_lag <- function(x, training, info = NULL, ...) { + step_epi_lag_new( + terms = x$terms, + role = x$role, + trained = TRUE, + lag = x$lag, + prefix = x$prefix, + default = x$default, + keys = x$keys, + columns = recipes_eval_select(x$terms, training, info), + skip = x$skip, + id = x$id + ) +} + +#' @export +bake.step_epi_lag <- function(object, new_data, ...) { + if (!all(object$lag == as.integer(object$lag))) { + rlang::abort("step_epi_lag requires 'lag' argument to be integer valued.") + } + + grid <- tidyr::expand_grid(col = object$columns, lag_val = object$lag) %>% + dplyr::mutate(newname = glue::glue("{object$prefix}{lag_val}_{col}")) + + ## ensure no name clashes + new_data_names <- colnames(new_data) + intersection <- new_data_names %in% grid$newname + if (any(intersection)) { + rlang::abort( + paste0("Name collision occured in `", class(object)[1], + "`. The following variable names already exists: ", + paste0(new_data_names[intersection], collapse = ", "), + ".")) + } + ok <- object$keys + lagged <- purrr::reduce( + purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok), + dplyr::full_join, + by = ok + ) + + dplyr::full_join(new_data, lagged, by = ok) %>% + dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>% + dplyr::arrange(time_value) %>% + dplyr::ungroup() + +} + +#' @export +print.step_epi_lag <- + function(x, width = max(20, options()$width - 30), ...) { + ## TODO add printing of the lags + title <- "Lagging " + recipes::print_step(x$columns, x$terms, x$trained, title, width) + invisible(x) + } diff --git a/R/epi_shift_internal.R b/R/epi_shift_internal.R deleted file mode 100644 index 872e6f454..000000000 --- a/R/epi_shift_internal.R +++ /dev/null @@ -1,136 +0,0 @@ -#' Create a shifted predictor -#' -#' `step_epi_shift` creates a *specification* of a recipe step that -#' will add new columns of shifted data. shifted data will -#' by default include NA values where the shift was induced. -#' These can be removed with [step_naomit()], or you may -#' specify an alternative filler value with the `default` -#' argument. -#' -#' @param shift A vector of integers. Each specified column will be -#' shifted for each value in the vector. -#' @template step-return -#' -#' @details The step assumes that the data are already _in the proper sequential -#' order_ for shifting. -#' -#' @family row operation steps -#' @rdname step_epi_ahead -step_epi_shift <- - function(recipe, - ..., - role, - trained, - shift, - prefix, - default, - keys, - columns, - skip, - id) { - add_step( - recipe, - step_epi_shift_new( - terms = dplyr::enquos(...), - role = role, - trained = trained, - shift = shift, - prefix = prefix, - default = default, - keys = keys, - columns = columns, - skip = skip, - id = id - ) - ) - } - -step_epi_shift_new <- - function(terms, role, trained, shift, prefix, default, keys, - columns, skip, id) { - step( - subclass = "epi_shift", - terms = terms, - role = role, - trained = trained, - shift = shift, - prefix = prefix, - default = default, - keys = keys, - columns = columns, - skip = skip, - id = id - ) - } - -#' @export -prep.step_epi_shift <- function(x, training, info = NULL, ...) { - step_epi_shift_new( - terms = x$terms, - role = x$role, - trained = TRUE, - shift = x$shift, - prefix = x$prefix, - default = x$default, - keys = x$keys, - columns = recipes_eval_select(x$terms, training, info), - skip = x$skip, - id = x$id - ) -} - -#' @export -bake.step_epi_shift <- function(object, new_data, ...) { - if (!all(object$shift == as.integer(object$shift))) { - rlang::abort("step_epi_shift requires 'shift' argument to be integer valued.") - } - grid <- tidyr::expand_grid(col = object$columns, lag_val = -object$shift) - is_lag <- object$role == "predictor" - if (!is_lag) { - grid <- dplyr::mutate(grid,ahead_val = -lag_val) - } - grid <- dplyr::mutate(grid, - newname = glue::glue( - paste0( - "{object$prefix}", - ifelse(is_lag,"{lag_val}","{ahead_val}"), - "_{col}" - ) - ) - ) - if (!is_lag) { - grid <- dplyr::select(grid, -ahead_val) - } - ## ensure no name clashes - new_data_names <- colnames(new_data) - intersection <- new_data_names %in% grid$newname - if (any(intersection)) { - rlang::abort( - paste0("Name collision occured in `", class(object)[1], - "`. The following variable names already exists: ", - paste0(new_data_names[intersection], collapse = ", "), - ".")) - } - ok <- object$keys - shifted <- purrr::reduce( - purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok), - dplyr::full_join, - by = ok - ) - - dplyr::full_join(new_data, shifted, by = ok) %>% - dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>% - dplyr::arrange(time_value) %>% - dplyr::ungroup() - -} - -#' @export -print.step_epi_shift <- - function(x, width = max(20, options()$width - 30), ...) { - ## TODO add printing of the shifts - title <- ifelse(x$role == "predictor","Lagging","Leading") %>% - paste0(": ", abs(x$shift),",") - recipes::print_step(x$columns, x$terms, x$trained, title, width) - invisible(x) - } diff --git a/R/step_epi_naomit.R b/R/step_epi_naomit.R deleted file mode 100644 index 9886582ab..000000000 --- a/R/step_epi_naomit.R +++ /dev/null @@ -1,24 +0,0 @@ -#' Unified NA omission wrapper function for recipes -#' -#' @param recipe Recipe to be used for omission steps -#' -#' @return Omits NA's from both predictors and outcomes at training time; -#' however, only omits associated predictors at prediction time to avoid -#' losing data. -#' @export -#' @examples -#' tibble(geo_value = rep("place",200), -#' time_value = as.Date("2021-01-01") + 0:199, -#' case_rate = 1:200, -#' death_rate = 1:200) %>% -#' as_epi_df() %>% -#' recipe() %>% -#' step_epi_naomit() - - -step_epi_naomit <- function(recipe) { - stopifnot("recipe" %in% class(recipe)) - recipe %>% - recipes::step_naomit(all_predictors()) %>% - recipes::step_naomit(all_outcomes(), skip = TRUE) -} diff --git a/man/step_epi_ahead.Rd b/man/step_epi_ahead.Rd index 18a28e7b6..006c224c3 100644 --- a/man/step_epi_ahead.Rd +++ b/man/step_epi_ahead.Rd @@ -1,10 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/epi_ahead.R, R/epi_lag.R, -% R/epi_shift_internal.R +% Please edit documentation in R/epi_ahead.R, R/epi_lag.R \name{step_epi_ahead} \alias{step_epi_ahead} \alias{step_epi_lag} -\alias{step_epi_shift} \title{Create a leading outcome} \usage{ step_epi_ahead( @@ -34,20 +32,6 @@ step_epi_lag( skip = FALSE, id = rand_id("epi_lag") ) - -step_epi_shift( - recipe, - ..., - role, - trained, - shift, - prefix, - default, - keys, - columns, - skip, - id -) } \arguments{ \item{recipe}{A recipe object. The step will be added to the @@ -86,17 +70,11 @@ the computations for subsequent operations.} \item{lag}{A vector of positive integers. Each specified column will be lagged for each value in the vector.} - -\item{shift}{A vector of integers. Each specified column will be -shifted for each value in the vector.} } \value{ An updated version of \code{recipe} with the new step added to the sequence of any existing operations. -An updated version of \code{recipe} with the new step added to the -sequence of any existing operations. - An updated version of \code{recipe} with the new step added to the sequence of any existing operations. } @@ -114,13 +92,6 @@ by default include NA values where the lag was induced. These can be removed with \code{\link[=step_naomit]{step_naomit()}}, or you may specify an alternative filler value with the \code{default} argument. - -\code{step_epi_shift} creates a \emph{specification} of a recipe step that -will add new columns of shifted data. shifted data will -by default include NA values where the shift was induced. -These can be removed with \code{\link[=step_naomit]{step_naomit()}}, or you may -specify an alternative filler value with the \code{default} -argument. } \details{ The step assumes that the data are already \emph{in the proper sequential @@ -128,9 +99,6 @@ order} for leading. The step assumes that the data are already \emph{in the proper sequential order} for lagging. - -The step assumes that the data are already \emph{in the proper sequential -order} for shifting. } \examples{ tib <- tibble::tibble( diff --git a/man/step_epi_naomit.Rd b/man/step_epi_naomit.Rd deleted file mode 100644 index 1dfcaab19..000000000 --- a/man/step_epi_naomit.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/step_epi_naomit.R -\name{step_epi_naomit} -\alias{step_epi_naomit} -\title{Unified NA omission wrapper function for recipes} -\usage{ -step_epi_naomit(recipe) -} -\arguments{ -\item{recipe}{Recipe to be used for omission steps} -} -\value{ -Omits NA's from both predictors and outcomes at training time; -however, only omits associated predictors at prediction time to avoid -losing data. -} -\description{ -Unified NA omission wrapper function for recipes -} -\examples{ -tibble(geo_value = rep("place",200), - time_value = as.Date("2021-01-01") + 0:199, - case_rate = 1:200, - death_rate = 1:200) \%>\% - as_epi_df() \%>\% - recipe() \%>\% - step_epi_naomit() -} diff --git a/musings/example-recipe.R b/musings/example-recipe.R index 754c5f1a1..d2bf9f46c 100644 --- a/musings/example-recipe.R +++ b/musings/example-recipe.R @@ -4,7 +4,6 @@ library(delphi.epidata) library(epiprocess) # library(epipredict) library(tidymodels) - x <- covidcast( data_source = "jhu-csse", signals = "confirmed_7dav_incidence_prop", @@ -41,7 +40,10 @@ r <- epi_recipe(x) %>% # if we add this as a class, maybe we get better step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% - step_naomit2() + step_naomit(all_predictors()) %>% + # below, `skip` means we don't do this at predict time + # we should probably do something useful here to avoid user error + step_naomit(all_outcomes(), skip = TRUE) # specify trainer, this uses stats::lm() by default, but doing # slm <- linear_reg() %>% use_engine("glmnet", penalty = 0.1) diff --git a/tests/testthat/test-assign_arg_list.R b/tests/testthat/test-assign_arg_list.R deleted file mode 100644 index 7cd69dfa9..000000000 --- a/tests/testthat/test-assign_arg_list.R +++ /dev/null @@ -1,11 +0,0 @@ -test_that("First argument must be a list",{ - expect_error(assign_arg_list(c(1,2,3))) -}) -test_that("All arguments should be named",{ - expect_error(assign_arg_list(list(1,2))) -}) -test_that("assign_arg_list works as intended",{ - assign_arg_list(list(a="dog",b=2)) - expect_identical(a,"dog") - expect_identical(b,2) -}) diff --git a/tests/testthat/test-df_mat_mul.R b/tests/testthat/test-df_mat_mul.R deleted file mode 100644 index 291a7b0ce..000000000 --- a/tests/testthat/test-df_mat_mul.R +++ /dev/null @@ -1,41 +0,0 @@ -df <- data.frame(matrix(1:100, ncol = 5)) -mat <- matrix(1:4, ncol = 2) - -test_that("First input must be a data frame and second input must be a matrix", - { - expect_error(df_mat_mul(30,mat)) - expect_error(df_mat_mul(df,20)) -}) - -test_that("Argument name is a character" ,{ - expect_error(df_mat_mul(df, mat, 100)) -}) - -test_that("The length of names does not differ from the length of the number - of outputs" ,{ - expect_error(df_mat_mul(df, mat, c("a","b","c"), 2:3)) -}) - -test_that("The number of columns of the first data frame cannot differ from the - number of rows of the second matrix, hence preventing incompatible - matrix multiplication", { - expect_error(df_mat_mul(df, mat, "z", 1:3)) - }) - -X <- df[c(1,4,5)] -Z <- as.data.frame(as.matrix(df[2:3]) %*% mat) -colnames(Z) <- c("z1","z2") -output <- cbind(X,Z) - -test_that("Names are being handled properly", { - expect_identical(df_mat_mul(df, mat, "z", 2:3),output) - expect_identical(df_mat_mul(df, mat, c("z1","z2"), 2:3),output) -}) - -test_that("Other tidyselect functionalities are working", { - mult <- df_mat_mul(df, mat, "z", dplyr::num_range("X", 2:3)) - expect_identical(mult,output) - expect_identical(df_mat_mul(df, mat, "z", 2, 3),output) - # Mismatched names should not work: - expect_error(df_mat_mul(df, mat, "z", dplyr::num_range("Y", 2:3))) -}) diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R deleted file mode 100644 index d3f5f7604..000000000 --- a/tests/testthat/test-epi_shift_internal.R +++ /dev/null @@ -1,65 +0,0 @@ -library(dplyr) -library(epiprocess) -library(parsnip) -library(workflows) - -# Random generated dataset -set.seed(100) -x <- tibble(geo_value = rep("nowhere",200), - time_value = as.Date("2021-01-01") + 0:199, - case_rate = rpois(100,20) + 1:200, - death_rate = rpois(100,10) + 1:200) %>% - as_epi_df() - -slm_fit <- function(recipe, data = x) { - workflow() %>% - add_recipe(recipe) %>% - add_model(linear_reg()) %>% - fit(data = data) -} - -# Tests -test_that("Check that epi_ahead shifts properly", { - r1 <- epi_recipe(x) %>% - step_epi_ahead(death_rate, ahead = 7) %>% - step_epi_lag(death_rate, lag = -7) %>% - step_naomit(all_predictors()) %>% - step_naomit(all_outcomes(), skip = TRUE) - - slm_fit1 <- slm_fit(r1) - - slope_ahead <- slm_fit1$fit$fit$fit$coefficients[[2]] - expect_equal(slope_ahead,1) -}) - -test_that("Check that epi_lag shifts properly", { - r2 <- epi_recipe(x) %>% - step_epi_ahead(death_rate, ahead = -7) %>% - step_epi_lag(death_rate, lag = 7) %>% - step_naomit(all_predictors()) %>% - step_naomit(all_outcomes(), skip = TRUE) - - slm_fit2 <- slm_fit(r2) - - slope_lag <- slm_fit2$fit$fit$fit$coefficients[[2]] - expect_equal(slope_lag,1) -}) - -test_that("Values for ahead and lag must be integer values", { - r3 <- epi_recipe(x) %>% - step_epi_ahead(death_rate, ahead = 3.6) %>% - step_epi_lag(death_rate, lag = 1.9) - expect_error( - slm_fit(r3) - ) -}) - -test_that("Values for ahead and lag cannot be duplicates", { - r4 <- epi_recipe(x) %>% - step_epi_ahead(death_rate, ahead = 7) %>% - step_epi_lag(death_rate, lag = 7) %>% - step_epi_lag(death_rate, lag = 7) - expect_error( - slm_fit(r4) - ) -}) diff --git a/tests/testthat/test-grab_names.R b/tests/testthat/test-grab_names.R deleted file mode 100644 index 2e7954ab3..000000000 --- a/tests/testthat/test-grab_names.R +++ /dev/null @@ -1,7 +0,0 @@ -df <- data.frame(b=1,c=2,ca=3,cat=4) - -test_that("Names are grabbed properly", { - expect_identical(grab_names(df,dplyr::starts_with("ca")), - subset(names(df),startsWith(names(df), "ca")) - ) -}) diff --git a/tests/testthat/test-step_epi_naomit.R b/tests/testthat/test-step_epi_naomit.R deleted file mode 100644 index 3fad4480e..000000000 --- a/tests/testthat/test-step_epi_naomit.R +++ /dev/null @@ -1,38 +0,0 @@ -library(dplyr) -library(epiprocess) -library(parsnip) -library(workflows) - -# Random generated dataset -x <- tibble(geo_value = rep("nowhere",200), - time_value = as.Date("2021-01-01") + 0:199, - case_rate = 1:200, - death_rate = 1:200) %>% - as_epi_df() - -# Preparing the datasets to be used for comparison -r <- epi_recipe(x) %>% - step_epi_ahead(death_rate, ahead = 7) %>% - step_epi_lag(death_rate, lag = c(0,7,14)) - -test_that("Argument must be a recipe", { - expect_error(step_epi_naomit(x)) -}) - -z1 <- step_epi_naomit(r) -z2 <- r %>% - step_naomit(all_predictors()) %>% - step_naomit(all_outcomes(), skip = TRUE) - -# Checks the behaviour of a step function, omitting the quosure and id that -# differ from one another, even with identical behaviour -behav <- function(recipe,step_num) recipe$steps[[step_num]][-1][-5] -# Checks the class type of an object -step_class <- function(recipe,step_num) class(recipe$steps[step_num]) - -test_that("Check that both functions behave the same way", { - expect_identical(behav(z1,3),behav(z2,3)) - expect_identical(behav(z1,4),behav(z2,4)) - expect_identical(step_class(z1,3),step_class(z2,3)) - expect_identical(step_class(z1,4),step_class(z2,4)) -}) From 18cc390ca44f229c49997fc1d2aea09c6a0a9ff4 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Mon, 13 Jun 2022 15:03:02 -0700 Subject: [PATCH 64/65] Add frosting to CI --- .github/workflows/R-CMD-check.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index d9650507e..5cb805dde 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,8 +1,8 @@ on: push: - branches: [main, master] + branches: [main, frosting] pull_request: - branches: [main, master] + branches: [main, frosting] name: R-CMD-check From 7da1d5bdba99cf7161838c833fcf8ced1fab85f9 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Tue, 14 Jun 2022 15:24:34 -0700 Subject: [PATCH 65/65] Fix subtle but evil as_tibble.epi_df bug, fix tests --- R/epi_recipe.R | 3 +++ tests/testthat/test-frosting.R | 2 -- tests/testthat/test-layer_predict.R | 8 ++------ 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/R/epi_recipe.R b/R/epi_recipe.R index 47f24ecbf..496d819bd 100644 --- a/R/epi_recipe.R +++ b/R/epi_recipe.R @@ -408,5 +408,8 @@ kill_levels <- function(x, keys) { #' @export as_tibble.epi_df <- function(x, ...) { # so that downstream calls to as_tibble don't clobber our metadata + # this avoids infinite recursion inside dplyr::dplyr_col_modify + # TODO: this needs a different approach, long-term + class(x) <- class(x)[class(x) != "grouped_df"] return(x) } diff --git a/tests/testthat/test-frosting.R b/tests/testthat/test-frosting.R index 1d9b43db3..cc6e4e633 100644 --- a/tests/testthat/test-frosting.R +++ b/tests/testthat/test-frosting.R @@ -36,5 +36,3 @@ test_that("prediction works without any postprocessor", { expect_equal(tail(p$time_value, 1), as.Date("2021-12-31")) expect_equal(unique(p$geo_value), c("ak", "ca", "ny")) }) - - diff --git a/tests/testthat/test-layer_predict.R b/tests/testthat/test-layer_predict.R index 736c426ce..5e6148ff5 100644 --- a/tests/testthat/test-layer_predict.R +++ b/tests/testthat/test-layer_predict.R @@ -19,7 +19,7 @@ test_that("predict layer works alone", { expect_equal(ncol(p), 3L) expect_s3_class(p, "epi_df") expect_equal(nrow(p), 108L) - expect_named(p, c("geo_value", "time_value", ".pred")) + expect_named(p, c("time_value", "geo_value", ".pred")) }) @@ -32,9 +32,5 @@ test_that("prediction with interval works", { expect_equal(ncol(p), 4L) expect_s3_class(p, "epi_df") expect_equal(nrow(p), 108L) - expect_named(p, c("geo_value", "time_value", ".pred_lower", ".pred_upper")) - - - + expect_named(p, c("time_value", "geo_value", ".pred_lower", ".pred_upper")) }) -