diff --git a/NAMESPACE b/NAMESPACE index 1d8affef..4f9b8151 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -96,19 +96,16 @@ importFrom(rlang,.env) importFrom(rlang,arg_match) importFrom(rlang,caller_arg) importFrom(rlang,caller_env) -importFrom(rlang,check_dots_empty0) importFrom(rlang,enquo) importFrom(rlang,enquos) importFrom(rlang,env) importFrom(rlang,f_env) importFrom(rlang,f_rhs) -importFrom(rlang,global_env) importFrom(rlang,is_environment) importFrom(rlang,is_formula) importFrom(rlang,is_function) importFrom(rlang,is_missing) importFrom(rlang,is_quosure) -importFrom(rlang,is_string) importFrom(rlang,missing_arg) importFrom(rlang,new_function) importFrom(rlang,quo_is_missing) diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R index 28236a91..d1ddf5bf 100644 --- a/R/grouped_epi_archive.R +++ b/R/grouped_epi_archive.R @@ -186,7 +186,8 @@ grouped_epi_archive = #' object. See the documentation for the wrapper function [`epix_slide()`] for #' details. #' @importFrom data.table key address -#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms env +#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms +#' env missing_arg slide = function(f, ..., before, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", @@ -229,11 +230,6 @@ grouped_epi_archive = # implementation doesn't take advantage of it. ref_time_values = sort(ref_time_values) } - - # Check that `f` takes enough args - if (!missing(f) && is.function(f)) { - assert_sufficient_f_args(f, ...) - } # Validate and pre-process `before`: if (missing(before)) { @@ -296,71 +292,8 @@ grouped_epi_archive = !!new_col := .env$comp_value)) } - # If f is not missing, then just go ahead, slide by group - if (!missing(f)) { - if (rlang::is_formula(f)) f = as_slide_computation(f) - x = purrr::map_dfr(ref_time_values, function(ref_time_value) { - # Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`, - # `epi_archive` if `all_versions` is `TRUE`: - as_of_raw = private$ungrouped$as_of(ref_time_value, min_time_value = ref_time_value - before, all_versions = all_versions) - - # Set: - # * `as_of_df`, the data.frame/tibble/epi_df/etc. that we will - # `group_modify` as the `.data` argument. Might or might not - # include version column. - # * `group_modify_fn`, the corresponding `.f` argument - if (!all_versions) { - as_of_df = as_of_raw - group_modify_fn = comp_one_grp - } else { - as_of_archive = as_of_raw - # We essentially want to `group_modify` the archive, but - # haven't implemented this method yet. Next best would be - # `group_modify` on its `$DT`, but that has different - # behavior based on whether or not `dtplyr` is loaded. - # Instead, go through an ordinary data frame, trying to avoid - # copies. - if (address(as_of_archive$DT) == address(private$ungrouped$DT)) { - # `as_of` aliased its the full `$DT`; copy before mutating: - as_of_archive$DT <- copy(as_of_archive$DT) - } - dt_key = data.table::key(as_of_archive$DT) - as_of_df = as_of_archive$DT - data.table::setDF(as_of_df) - - # Convert each subgroup chunk to an archive before running the calculation. - group_modify_fn = function(.data_group, .group_key, - f, ..., - ref_time_value, - new_col) { - # .data_group is coming from as_of_df as a tibble, but we - # want to feed `comp_one_grp` an `epi_archive` backed by a - # DT; convert and wrap: - data.table::setattr(.data_group, "sorted", dt_key) - data.table::setDT(.data_group, key=dt_key) - .data_group_archive = as_of_archive$clone() - .data_group_archive$DT = .data_group - comp_one_grp(.data_group_archive, .group_key, f = f, ..., - ref_time_value = ref_time_value, - new_col = new_col - ) - } - } - - return( - dplyr::group_by(as_of_df, dplyr::across(tidyselect::all_of(private$vars)), - .drop=private$drop) %>% - dplyr::group_modify(group_modify_fn, - f = f, ..., - ref_time_value = ref_time_value, - new_col = new_col, - .keep = TRUE) - ) - }) - } - - # Else interpret ... as an expression for tidy evaluation - else { + # If `f` is missing, interpret ... as an expression for tidy evaluation + if (missing(f)) { quos = enquos(...) if (length(quos) == 0) { Abort("If `f` is missing then a computation must be specified via `...`.") @@ -369,83 +302,70 @@ grouped_epi_archive = Abort("If `f` is missing then only a single computation can be specified via `...`.") } - quo = quos[[1]] - f = function(.x, .group_key, .ref_time_value, quo, ...) { - # Convert to environment to standardize between tibble and R6 - # based inputs. In both cases, we should get a simple - # environment with the empty environment as its parent. - data_env = rlang::as_environment(.x) - data_mask = rlang::new_data_mask(bottom = data_env, top = data_env) - data_mask$.data <- rlang::as_data_pronoun(data_mask) - # We'll also install `.x` directly, not as an - # `rlang_data_pronoun`, so that we can, e.g., use more dplyr and - # epiprocess operations. - data_mask$.x = .x - data_mask$.group_key = .group_key - data_mask$.ref_time_value = .ref_time_value - rlang::eval_tidy(quo, data_mask) - } + f = quos[[1]] new_col = sym(names(rlang::quos_auto_name(quos))) + ... = missing_arg() # magic value that passes zero args as dots in calls below + } - x = purrr::map_dfr(ref_time_values, function(ref_time_value) { - # Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`, - # `epi_archive` if `all_versions` is `TRUE`: - as_of_raw = private$ungrouped$as_of(ref_time_value, min_time_value = ref_time_value - before, all_versions = all_versions) + f = as_slide_computation(f, ...) + x = purrr::map_dfr(ref_time_values, function(ref_time_value) { + # Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`, + # `epi_archive` if `all_versions` is `TRUE`: + as_of_raw = private$ungrouped$as_of(ref_time_value, min_time_value = ref_time_value - before, all_versions = all_versions) - # Set: - # * `as_of_df`, the data.frame/tibble/epi_df/etc. that we will - # `group_modify` as the `.data` argument. Might or might not - # include version column. - # * `group_modify_fn`, the corresponding `.f` argument - if (!all_versions) { - as_of_df = as_of_raw - group_modify_fn = comp_one_grp - } else { - as_of_archive = as_of_raw - # We essentially want to `group_modify` the archive, but don't - # provide an implementation yet. Next best would be - # `group_modify` on its `$DT`, but that has different behavior - # based on whether or not `dtplyr` is loaded. Instead, go - # through an ordinary data frame, trying to avoid copies. - if (address(as_of_archive$DT) == address(private$ungrouped$DT)) { - # `as_of` aliased its the full `$DT`; copy before mutating: - as_of_archive$DT <- copy(as_of_archive$DT) - } - dt_key = data.table::key(as_of_archive$DT) - as_of_df = as_of_archive$DT - data.table::setDF(as_of_df) + # Set: + # * `as_of_df`, the data.frame/tibble/epi_df/etc. that we will + # `group_modify` as the `.data` argument. Might or might not + # include version column. + # * `group_modify_fn`, the corresponding `.f` argument + if (!all_versions) { + as_of_df = as_of_raw + group_modify_fn = comp_one_grp + } else { + as_of_archive = as_of_raw + # We essentially want to `group_modify` the archive, but + # haven't implemented this method yet. Next best would be + # `group_modify` on its `$DT`, but that has different + # behavior based on whether or not `dtplyr` is loaded. + # Instead, go through an ordinary data frame, trying to avoid + # copies. + if (address(as_of_archive$DT) == address(private$ungrouped$DT)) { + # `as_of` aliased its the full `$DT`; copy before mutating: + as_of_archive$DT <- copy(as_of_archive$DT) + } + dt_key = data.table::key(as_of_archive$DT) + as_of_df = as_of_archive$DT + data.table::setDF(as_of_df) - # Convert each subgroup chunk to an archive before running the calculation. - group_modify_fn = function(.data_group, .group_key, - f, ..., - ref_time_value, - new_col) { - # .data_group is coming from as_of_df as a tibble, but we - # want to feed `comp_one_grp` an `epi_archive` backed by a - # DT; convert and wrap: - data.table::setattr(.data_group, "sorted", dt_key) - data.table::setDT(.data_group, key=dt_key) - .data_group_archive = as_of_archive$clone() - .data_group_archive$DT = .data_group - comp_one_grp(.data_group_archive, .group_key, f = f, quo = quo, - ref_time_value = ref_time_value, - new_col = new_col - ) - } + # Convert each subgroup chunk to an archive before running the calculation. + group_modify_fn = function(.data_group, .group_key, + f, ..., + ref_time_value, + new_col) { + # .data_group is coming from as_of_df as a tibble, but we + # want to feed `comp_one_grp` an `epi_archive` backed by a + # DT; convert and wrap: + data.table::setattr(.data_group, "sorted", dt_key) + data.table::setDT(.data_group, key=dt_key) + .data_group_archive = as_of_archive$clone() + .data_group_archive$DT = .data_group + comp_one_grp(.data_group_archive, .group_key, f = f, ..., + ref_time_value = ref_time_value, + new_col = new_col + ) } + } - return( - dplyr::group_by(as_of_df, dplyr::across(tidyselect::all_of(private$vars)), - .drop=private$drop) %>% - dplyr::group_modify(group_modify_fn, - f = f, quo = quo, - ref_time_value = ref_time_value, - comp_effective_key_vars = comp_effective_key_vars, - new_col = new_col, - .keep = TRUE) - ) - }) - } + return( + dplyr::group_by(as_of_df, dplyr::across(tidyselect::all_of(private$vars)), + .drop=private$drop) %>% + dplyr::group_modify(group_modify_fn, + f = f, ..., + ref_time_value = ref_time_value, + new_col = new_col, + .keep = TRUE) + ) + }) # Unchop/unnest if we need to if (!as_list_col) { diff --git a/R/slide.R b/R/slide.R index 3ebf9b26..7467f219 100644 --- a/R/slide.R +++ b/R/slide.R @@ -123,7 +123,7 @@ #' #' @importFrom lubridate days weeks #' @importFrom dplyr bind_rows group_vars filter select -#' @importFrom rlang .data .env !! enquo enquos sym env +#' @importFrom rlang .data .env !! enquo enquos sym env missing_arg #' @export #' @examples #' # slide a 7-day trailing average formula on cases @@ -167,11 +167,6 @@ epi_slide = function(x, f, ..., before, after, ref_time_values, # Check we have an `epi_df` object if (!inherits(x, "epi_df")) Abort("`x` must be of class `epi_df`.") - - # Check that `f` takes enough args - if (!missing(f) && is.function(f)) { - assert_sufficient_f_args(f, ...) - } if (missing(ref_time_values)) { ref_time_values = unique(x$time_value) @@ -356,28 +351,8 @@ epi_slide = function(x, f, ..., before, after, ref_time_values, return(mutate(.data_group, !!new_col := slide_values)) } - # If f is not missing, then just go ahead, slide by group - if (!missing(f)) { - if (rlang::is_formula(f)) f = as_slide_computation(f) - f_rtv_wrapper = function(x, g, ...) { - ref_time_value = min(x$time_value) + before - x <- x[x$.real,] - x$.real <- NULL - f(x, g, ref_time_value, ...) - } - x = x %>% - group_modify(slide_one_grp, - f = f_rtv_wrapper, ..., - starts = starts, - stops = stops, - time_values = ref_time_values, - all_rows = all_rows, - new_col = new_col, - .keep = FALSE) - } - - # Else interpret ... as an expression for tidy evaluation - else { + # If `f` is missing, interpret ... as an expression for tidy evaluation + if (missing(f)) { quos = enquos(...) if (length(quos) == 0) { Abort("If `f` is missing then a computation must be specified via `...`.") @@ -386,31 +361,29 @@ epi_slide = function(x, f, ..., before, after, ref_time_values, Abort("If `f` is missing then only a single computation can be specified via `...`.") } - quo = quos[[1]] - f = function(.x, .group_key, quo, ...) { - .ref_time_value = min(.x$time_value) + before - .x <- .x[.x$.real,] - .x$.real <- NULL - data_mask = rlang::as_data_mask(.x) - # We'll also install `.x` directly, not as an `rlang_data_pronoun`, so - # that we can, e.g., use more dplyr and epiprocess operations. - data_mask$.x = .x - data_mask$.group_key = .group_key - data_mask$.ref_time_value = .ref_time_value - rlang::eval_tidy(quo, data_mask) - } + f = quos[[1]] new_col = sym(names(rlang::quos_auto_name(quos))) - - x = x %>% - group_modify(slide_one_grp, - f = f, quo = quo, - starts = starts, - stops = stops, - time_values = ref_time_values, - all_rows = all_rows, - new_col = new_col, - .keep = FALSE) + ... = missing_arg() # magic value that passes zero args as dots in calls below + } + + f = as_slide_computation(f, ...) + # Create a wrapper that calculates and passes `.ref_time_value` to the + # computation. + f_wrapper = function(.x, .group_key, ...) { + .ref_time_value = min(.x$time_value) + before + .x <- .x[.x$.real,] + .x$.real <- NULL + f(.x, .group_key, .ref_time_value, ...) } + x = x %>% + group_modify(slide_one_grp, + f = f_wrapper, ..., + starts = starts, + stops = stops, + time_values = ref_time_values, + all_rows = all_rows, + new_col = new_col, + .keep = FALSE) # Unnest if we need to, and return if (!as_list_col) { diff --git a/R/utils.R b/R/utils.R index 6c4a50e1..6aa0d674 100644 --- a/R/utils.R +++ b/R/utils.R @@ -181,44 +181,45 @@ assert_sufficient_f_args <- function(f, ...) { } } -#' Convert to function +#' Generate a `epi[x]_slide` computation function from a function, formula, or quosure #' -#' @description -#' `as_slide_computation()` transforms a one-sided formula into a function. -#' This powers the lambda syntax in packages like purrr. +#' @description `as_slide_computation()` transforms a one-sided formula or a +#' quosure into a function; functions are returned as-is or with light +#' modifications to calculate `ref_time_value`. #' #' This code and documentation borrows heavily from [`rlang::as_function`] #' (https://github.com/r-lib/rlang/blob/c55f6027928d3104ed449e591e8a225fcaf55e13/R/fn.R#L343-L427). #' #' This code extends `rlang::as_function` to create functions that take three -#' arguments. The arguments can be accessed via the idiomatic `.x`, `.y`, -#' etc, positional references (`..1`, `..2`, etc), and also by `epi -#' [x]_slide`-specific names. +#' arguments. The arguments can be accessed via the idiomatic `.`, `.x`, and +#' `.y`, extended to include `.z`; positional references `..1` and `..2`, +#' extended to include `..3`; and also by `epi[x]_slide`-specific names +#' `.group_key` and `.ref_time_value`. #' #' @source https://github.com/r-lib/rlang/blob/c55f6027928d3104ed449e591e8a225fcaf55e13/R/fn.R#L343-L427 #' -#' @param x A function or formula. +#' @param f A function, one-sided formula, or quosure. #' -#' If a **function**, it is used as is. +#' If a **function**, the function is returned as-is, with no +#' modifications. #' -#' If a **formula**, e.g. `~ mean(.x$cases)`, it is converted to a function with up -#' to three arguments: `.x` (single argument), or `.x` and `.y` +#' If a **formula**, e.g. `~ mean(.x$cases)`, it is converted to a function +#' with up to three arguments: `.x` (single argument), or `.x` and `.y` #' (two arguments), or `.x`, `.y`, and `.z` (three arguments). The `.` #' placeholder can be used instead of `.x`, `.group_key` can be used in #' place of `.y`, and `.ref_time_value` can be used in place of `.z`. This #' allows you to create very compact anonymous functions (lambdas) with up -#' to three inputs. Functions created from formulas have a special class. Use -#' `rlang::is_lambda()` to test for it. -#' -#' If a **string**, the function is looked up in `env`. Note that -#' this interface is strictly for user convenience because of the -#' scoping issues involved. Package developers should avoid -#' supplying functions by name and instead supply them by value. -#' -#' @param env Environment in which to fetch the function in case `x` -#' is a string. -#' @inheritParams rlang::args_dots_empty -#' @inheritParams rlang::args_error_context +#' to three inputs. Functions created from formulas have a special class. +#' Use `inherits(fn, "epiprocess_slide_computation")` to test for it. +#' +#' If a **quosure**, in the case that `f` was not provided to the parent +#' `epi[x]_slide` call and the `...` is interpreted as an expression for +#' tidy evaluation, it is evaluated within a wrapper function. The wrapper +#' sets up object access via a data mask. +#' +#' @param ... Additional arguments to pass to the function or formula +#' specified via `x`. If `x` is a quosure, any arguments passed via `...` +#' will be ignored. #' @examples #' f <- as_slide_computation(~ .x + 1) #' f(10) @@ -229,36 +230,61 @@ assert_sufficient_f_args <- function(f, ...) { #' h <- as_slide_computation(~ .x - .group_key) #' h(6, 3) #' -#' @importFrom rlang check_dots_empty0 is_function new_function f_env -#' is_environment missing_arg f_rhs is_string is_formula caller_arg -#' caller_env global_env +#' @importFrom rlang is_function new_function f_env is_environment missing_arg +#' f_rhs is_formula caller_arg caller_env #' #' @noRd -as_slide_computation <- function(x, - env = global_env(), - ..., - arg = caller_arg(x), - call = caller_env()) { - check_dots_empty0(...) - - if (is_function(x)) { - return(x) +as_slide_computation <- function(f, ...) { + arg = caller_arg(f) + call = caller_env() + + # A quosure is a type of formula, so be careful with the order and contents + # of the conditional logic here. + if (is_quosure(f)) { + fn = function(.x, .group_key, .ref_time_value) { + # Convert to environment to standardize between tibble and R6 + # based inputs. In both cases, we should get a simple + # environment with the empty environment as its parent. + data_env = rlang::as_environment(.x) + data_mask = rlang::new_data_mask(bottom = data_env, top = data_env) + data_mask$.data <- rlang::as_data_pronoun(data_mask) + # We'll also install `.x` directly, not as an `rlang_data_pronoun`, so + # that we can, e.g., use more dplyr and epiprocess operations. + data_mask$.x = .x + data_mask$.group_key = .group_key + data_mask$.ref_time_value = .ref_time_value + rlang::eval_tidy(f, data_mask) + } + + return(fn) } - if (is_formula(x)) { - if (length(x) > 2) { + if (is_function(f)) { + # Check that `f` takes enough args + assert_sufficient_f_args(f, ...) + return(f) + } + + if (is_formula(f)) { + if (length(f) > 2) { Abort(sprintf("%s must be a one-sided formula", arg), class = "epiprocess__as_slide_computation__formula_is_twosided", - epiprocess__x = x, + epiprocess__f = f, call = call) } + if (rlang::dots_n(...) > 0L) { + Abort("No arguments can be passed via `...` when `f` is a formula, or there are unrecognized/misspelled parameter names.", + class = "epiprocess__as_slide_computation__formula_with_dots", + epiprocess__f = f, + epiprocess__enquos_dots = enquos(...)) + } - env <- f_env(x) + env <- f_env(f) if (!is_environment(env)) { Abort("Formula must carry an environment.", class = "epiprocess__as_slide_computation__formula_has_no_env", - epiprocess__x = x, - epiprocess__x_env = env, + epiprocess__f = f, + epiprocess__f_env = env, arg = arg, call = call) } @@ -267,19 +293,16 @@ as_slide_computation <- function(x, .x = quote(..1), .y = quote(..2), .z = quote(..3), . = quote(..1), .group_key = quote(..2), .ref_time_value = quote(..3) ) - fn <- new_function(args, f_rhs(x), env) + fn <- new_function(args, f_rhs(f), env) fn <- structure(fn, class = c("epiprocess_slide_computation", "function")) - return(fn) - } - if (is_string(x)) { - return(get(x, envir = env, mode = "function")) + return(fn) } - Abort(sprintf("Can't convert a %s to a slide computation", class(x)), + Abort(sprintf("Can't convert an object of class %s to a slide computation", paste(collapse=" ", deparse(class(f)))), class = "epiprocess__as_slide_computation__cant_convert_catchall", - epiprocess__x = x, - epiprocess__x_class = class(x), + epiprocess__f = f, + epiprocess__f_class = class(f), arg = arg, call = call) } diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 23bab72f..2e61e088 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -509,6 +509,12 @@ test_that("epi_slide computation via dots outputs the same result using col name slide_value = max(.x$time_value)) expect_identical(result1, expected_output) + + result2 <- small_x %>% + epi_slide(before = 2, + slide_value = max(.data$time_value)) + + expect_identical(result2, expected_output) }) test_that("`epi_slide` can access objects inside of helper functions", { diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index b3353bac..9e091642 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -669,6 +669,13 @@ test_that("epix_slide computation via dots outputs the same result using col nam sum_binary = sum(.x$time_value)) expect_identical(xx1, xx_ref) + + xx2 <- xx %>% + group_by(.data$geo_value) %>% + epix_slide(before = 2, + sum_binary = sum(.data$time_value)) + + expect_identical(xx2, xx_ref) }) test_that("`epix_slide` doesn't decay date output", { diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index c1e89aec..8460a5e8 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -176,9 +176,23 @@ test_that("assert_sufficient_f_args alerts if the provided f has defaults for th class = "epiprocess__assert_sufficient_f_args__required_args_contain_defaults") expect_error(assert_sufficient_f_args(f_xsgt_dots, "b"), class = "epiprocess__assert_sufficient_f_args__required_args_contain_defaults") - expect_error(suppressWarnings(assert_sufficient_f_args(f_xs_dots, "b")), - regexp = "pass the window data and group key to `f`'s x and setting argument", - class = "epiprocess__assert_sufficient_f_args__required_args_contain_defaults") + expect_error( + expect_warning( + assert_sufficient_f_args(f_xs_dots, "b"), + class = "epiprocess__assert_sufficient_f_args__mandatory_f_args_passed_to_f_dots" + ), + class = "epiprocess__assert_sufficient_f_args__required_args_contain_defaults" + ) + + # forwarding no dots should produce a different error message in some cases: + expect_error( + expect_warning( + assert_sufficient_f_args(f_xs_dots), + class = "epiprocess__assert_sufficient_f_args__mandatory_f_args_passed_to_f_dots" + ), + regexp = "window data and group key to `f`'s x and setting argument", + class = "epiprocess__assert_sufficient_f_args__required_args_contain_defaults" + ) }) test_that("computation formula-derived functions take all argument types", { @@ -203,6 +217,10 @@ test_that("as_slide_computation raises errors as expected", { expect_error(as_slide_computation(y ~ ..1), class="epiprocess__as_slide_computation__formula_is_twosided") + # Formulas can't be paired with ... + expect_error(as_slide_computation(~ ..1, method = "fn"), + class="epiprocess__as_slide_computation__formula_with_dots") + # `f_env` must be an environment formula_without_env <- stats::as.formula(~ ..1) rlang::f_env(formula_without_env) <- 5