diff --git a/.Rbuildignore b/.Rbuildignore index ccbd7ea7..8ca62412 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,5 @@ +^renv$ +^renv\.lock$ ^.*\.Rproj$ ^\.Rproj\.user$ ^LICENSE\.md$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 2fca5dbd..67f4bdb7 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,5 +1,7 @@ # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +# +# Created with usethis + edited to run on PRs to dev, use API key. on: push: branches: [main, master] @@ -27,3 +29,5 @@ jobs: needs: check - uses: r-lib/actions/check-r-package@v2 + env: + DELPHI_EPIDATA_KEY: ${{ secrets.SECRET_EPIPROCESS_GHACTIONS_DELPHI_EPIDATA_KEY }} diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 847176d3..12e352b3 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -1,5 +1,7 @@ # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +# +# Created with usethis + edited to run on PRs to dev, use API key. on: push: branches: [main, master] @@ -34,6 +36,8 @@ jobs: needs: website - name: Build site + env: + DELPHI_EPIDATA_KEY: ${{ secrets.SECRET_EPIPROCESS_GHACTIONS_DELPHI_EPIDATA_KEY }} run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) shell: Rscript {0} diff --git a/NAMESPACE b/NAMESPACE index f14e27db..1d8affef 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -69,6 +69,7 @@ importFrom(data.table,key) importFrom(data.table,set) importFrom(data.table,setkeyv) importFrom(dplyr,arrange) +importFrom(dplyr,bind_rows) importFrom(dplyr,dplyr_col_modify) importFrom(dplyr,dplyr_reconstruct) importFrom(dplyr,dplyr_row_slice) @@ -76,6 +77,7 @@ importFrom(dplyr,filter) importFrom(dplyr,group_by) importFrom(dplyr,group_by_drop_default) importFrom(dplyr,group_modify) +importFrom(dplyr,group_vars) importFrom(dplyr,groups) importFrom(dplyr,mutate) importFrom(dplyr,relocate) @@ -97,6 +99,7 @@ importFrom(rlang,caller_env) importFrom(rlang,check_dots_empty0) importFrom(rlang,enquo) importFrom(rlang,enquos) +importFrom(rlang,env) importFrom(rlang,f_env) importFrom(rlang,f_rhs) importFrom(rlang,global_env) diff --git a/NEWS.md b/NEWS.md index 0af73682..562b03e3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,16 +6,31 @@ inter-release development versions will include an additional ".9999" suffix. ## Breaking changes: -* Changes to `epix_slide`: - * The `f` computation is now required to take at least three arguments. `f` - must take an `epi_df` with the same column names as the archive's `DT`, +* Changes to `epi_slide` and `epix_slide`: + * If `f` is a function, it is now required to take at least three arguments. + `f` must take an `epi_df` with the same column names as the archive's `DT`, minus the `version` column; followed by a one-row tibble containing the values of the grouping variables for the associated group; followed by a - reference time value, usually as a `Date` object; followed by any number - of named arguments. + reference time value, usually as a `Date` object. Optionally, it can take + any number of additional arguments after that, and forward values for those + arguments through `epi[x]_slide`'s `...` args. + * To make your existing slide computations work, add a third argument to + your `f` function to accept this new input: e.g., change `f = function(x, + g, ) { }` to `f = function(x, g, rt, ) { }`. ## New features: +* `epi_slide` and `epix_slide` also make the window data, group key and reference + time value available to slide computations specified as formulas or tidy + evaluation expressions, in additional or completely new ways. + * If `f` is a formula, it can now access the reference time value via `.z` or + `.ref_time_value`. + * If `f` is missing, the tidy evaluation expression in `...` can now refer to + the window data as an `epi_df` or `tibble` with `.x`, the group key with + `.group_key`, and the reference time value with `.ref_time_value`. The usual + `.data` and `.env` pronouns also work, but`pick()` and `cur_data()` are not; + work off of `.x` instead. * `epix_slide` has been made more like `dplyr::group_modify`. It will no longer perform element/row recycling for size stability, accepts slide computation outputs containing any number of rows, and no longer supports `all_rows`. @@ -29,11 +44,6 @@ inter-release development versions will include an additional ".9999" suffix. more closely whether/when/how to output an `epi_df`. * To keep the old behavior, convert the output of `epix_slide()` to `epi_df` when desired and set the metadata appropriately. -* `epix_slide` `f` computations passed as functions or formulas now have - access to the reference time value. If `f` is a function, it is passed a - Date containing the reference time value as the third argument. If a - formula, `f` can access the reference time value via `.z` or - `.ref_time_value`. ## Improvements: diff --git a/R/epiprocess.R b/R/epiprocess.R index 0749647f..e047de8c 100644 --- a/R/epiprocess.R +++ b/R/epiprocess.R @@ -7,3 +7,4 @@ #' @docType package #' @name epiprocess NULL +utils::globalVariables(c(".x", ".group_key", ".ref_time_value")) diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R index b03dc8c1..fd91ed4d 100644 --- a/R/grouped_epi_archive.R +++ b/R/grouped_epi_archive.R @@ -186,7 +186,7 @@ grouped_epi_archive = #' object. See the documentation for the wrapper function [`epix_slide()`] for #' details. #' @importFrom data.table key address -#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms +#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms env slide = function(f, ..., before, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", @@ -370,7 +370,21 @@ grouped_epi_archive = } quo = quos[[1]] - f = function(x, quo, ...) rlang::eval_tidy(quo, x) + f = function(.x, .group_key, .ref_time_value, quo, ...) { + # Convert to environment to standardize between tibble and R6 + # based inputs. In both cases, we should get a simple + # environment with the empty environment as its parent. + data_env = rlang::as_environment(.x) + data_mask = rlang::new_data_mask(bottom = data_env, top = data_env) + data_mask$.data <- rlang::as_data_pronoun(data_mask) + # We'll also install `.x` directly, not as an + # `rlang_data_pronoun`, so that we can, e.g., use more dplyr and + # epiprocess operations. + data_mask$.x = .x + data_mask$.group_key = .group_key + data_mask$.ref_time_value = .ref_time_value + rlang::eval_tidy(quo, data_mask) + } new_col = sym(names(rlang::quos_auto_name(quos))) x = purrr::map_dfr(ref_time_values, function(ref_time_value) { diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index d04e30d3..c110555c 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -678,7 +678,10 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr #' computation. #' @param ... Additional arguments to pass to the function or formula specified #' via `f`. Alternatively, if `f` is missing, then `...` is interpreted as an -#' expression for tidy evaluation. See details of [`epi_slide`]. +#' expression for tidy evaluation; in addition to referring to columns +#' directly by name, the expression has access to `.data` and `.env` pronouns +#' as in `dplyr` verbs, and can also refer to the `.group_key` and +#' `.ref_time_value`. See details of [`epi_slide`]. #' @param before How far `before` each `ref_time_value` should the sliding #' window extend? If provided, should be a single, non-NA, #' [integer-compatible][vctrs::vec_cast] number of time steps. This window diff --git a/R/slide.R b/R/slide.R index c459f21e..635d4d3d 100644 --- a/R/slide.R +++ b/R/slide.R @@ -23,7 +23,10 @@ #' If `f` is missing, then `...` will specify the computation. #' @param ... Additional arguments to pass to the function or formula specified #' via `f`. Alternatively, if `f` is missing, then the `...` is interpreted as -#' an expression for tidy evaluation. See details. +#' an expression for tidy evaluation; in addition to referring to columns +#' directly by name, the expression has access to `.data` and `.env` pronouns +#' as in `dplyr` verbs, and can also refer to `.x`, `.group_key`, and +#' `.ref_time_value`. See details. #' @param before,after How far `before` and `after` each `ref_time_value` should #' the sliding window extend? At least one of these two arguments must be #' provided; the other's default will be 0. Any value provided for either @@ -119,7 +122,8 @@ #' through the `new_col_name` argument. #' #' @importFrom lubridate days weeks -#' @importFrom rlang .data .env !! enquo enquos sym +#' @importFrom dplyr bind_rows group_vars filter select +#' @importFrom rlang .data .env !! enquo enquos sym env #' @export #' @examples #' # slide a 7-day trailing average formula on cases @@ -166,11 +170,8 @@ epi_slide = function(x, f, ..., before, after, ref_time_values, # Check that `f` takes enough args if (!missing(f) && is.function(f)) { - assert_sufficient_f_args(f, ...) + assert_sufficient_f_args(f, ..., n_mandatory_f_args = 3L) } - - # Arrange by increasing time_value - x = arrange(x, time_value) if (missing(ref_time_values)) { ref_time_values = unique(x$time_value) @@ -231,6 +232,35 @@ epi_slide = function(x, f, ..., before, after, ref_time_values, after <- time_step(after) } + min_ref_time_values = ref_time_values - before + min_ref_time_values_not_in_x <- min_ref_time_values[!(min_ref_time_values %in% unique(x$time_value))] + + # Do set up to let us recover `ref_time_value`s later. + # A helper column marking real observations. + x$.real = TRUE + + # Create df containing phony data. Df has the same columns and attributes as + # `x`, but filled with `NA`s aside from grouping columns. Number of rows is + # equal to the number of `min_ref_time_values_not_in_x` we have * the + # number of unique levels seen in the grouping columns. + before_time_values_df = data.frame(time_value=min_ref_time_values_not_in_x) + if (length(group_vars(x)) != 0) { + before_time_values_df = dplyr::cross_join( + # Get unique combinations of grouping columns seen in real data. + unique(x[, group_vars(x)]), + before_time_values_df + ) + } + # Automatically fill in all other columns from `x` with `NA`s, and carry + # attributes over to new df. + before_time_values_df <- bind_rows(x[0,], before_time_values_df) + before_time_values_df$.real <- FALSE + + x <- bind_rows(before_time_values_df, x) + + # Arrange by increasing time_value + x = arrange(x, time_value) + # Now set up starts and stops for sliding/hopping time_range = range(unique(x$time_value)) starts = in_range(ref_time_values - before, time_range) @@ -272,7 +302,9 @@ epi_slide = function(x, f, ..., before, after, ref_time_values, o = .data_group$time_value %in% time_values num_ref_rows = sum(o) - # Count the number of appearances of each reference time value + # Count the number of appearances of each reference time value (these + # appearances should all be real for now, but if we allow ref time values + # outside of .data_group's time values): counts = .data_group %>% dplyr::filter(.data$time_value %in% time_values) %>% dplyr::count(.data$time_value) %>% @@ -282,7 +314,7 @@ epi_slide = function(x, f, ..., before, after, ref_time_values, !all(purrr::map_lgl(slide_values_list, is.data.frame))) { Abort("The slide computations must return always atomic vectors or data frames (and not a mix of these two structures).") } - + # Unlist if appropriate: slide_values = if (as_list_col) { @@ -318,6 +350,7 @@ epi_slide = function(x, f, ..., before, after, ref_time_values, # fills with NA equivalent. vctrs::vec_slice(slide_values, o) = orig_values } else { + # This implicitly removes phony (`.real` == FALSE) observations. .data_group = filter(.data_group, o) } return(mutate(.data_group, !!new_col := slide_values)) @@ -325,9 +358,16 @@ epi_slide = function(x, f, ..., before, after, ref_time_values, # If f is not missing, then just go ahead, slide by group if (!missing(f)) { + if (rlang::is_formula(f)) f = as_slide_computation(f) + f_rtv_wrapper = function(x, g, ...) { + ref_time_value = min(x$time_value) + before + x <- x[x$.real,] + x$.real <- NULL + f(x, g, ref_time_value, ...) + } x = x %>% group_modify(slide_one_grp, - f = f, ..., + f = f_rtv_wrapper, ..., starts = starts, stops = stops, time_values = ref_time_values, @@ -347,7 +387,18 @@ epi_slide = function(x, f, ..., before, after, ref_time_values, } quo = quos[[1]] - f = function(x, quo, ...) rlang::eval_tidy(quo, x) + f = function(.x, .group_key, quo, ...) { + .ref_time_value = min(.x$time_value) + before + .x <- .x[.x$.real,] + .x$.real <- NULL + data_mask = rlang::as_data_mask(.x) + # We'll also install `.x` directly, not as an `rlang_data_pronoun`, so + # that we can, e.g., use more dplyr and epiprocess operations. + data_mask$.x = .x + data_mask$.group_key = .group_key + data_mask$.ref_time_value = .ref_time_value + rlang::eval_tidy(quo, data_mask) + } new_col = sym(names(rlang::quos_auto_name(quos))) x = x %>% @@ -365,5 +416,15 @@ epi_slide = function(x, f, ..., before, after, ref_time_values, if (!as_list_col) { x = unnest(x, !!new_col, names_sep = names_sep) } + + # Remove any remaining phony observations. When `all_rows` is TRUE, phony + # observations aren't necessarily removed in `slide_one_grp`. + if (all_rows) { + x <- x[x$.real,] + } + + # Drop helper column `.real`. + x$.real <- NULL + return(x) } diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index cc95fa5f..33c3a7fb 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -40,7 +40,10 @@ If \code{f} is missing, then \code{...} will specify the computation.} \item{...}{Additional arguments to pass to the function or formula specified via \code{f}. Alternatively, if \code{f} is missing, then the \code{...} is interpreted as -an expression for tidy evaluation. See details.} +an expression for tidy evaluation; in addition to referring to columns +directly by name, the expression has access to \code{.data} and \code{.env} pronouns +as in \code{dplyr} verbs, and can also refer to \code{.x}, \code{.group_key}, and +\code{.ref_time_value}. See details.} \item{before, after}{How far \code{before} and \code{after} each \code{ref_time_value} should the sliding window extend? At least one of these two arguments must be @@ -73,9 +76,9 @@ contain the derivative values. Default is "slide_value"; note that setting \code{new_col_name} equal to an existing column name will overwrite this column.} \item{as_list_col}{Should the slide results be held in a list column, or be -\link[tidyr:chop]{unchopped}/\link[tidyr:nest]{unnested}? Default is \code{FALSE}, +\link[tidyr:chop]{unchopped}/\link[tidyr:unnest]{unnested}? Default is \code{FALSE}, in which case a list object returned by \code{f} would be unnested (using -\code{\link[tidyr:nest]{tidyr::unnest()}}), and, if the slide computations output data frames, +\code{\link[tidyr:unnest]{tidyr::unnest()}}), and, if the slide computations output data frames, the names of the resulting columns are given by prepending \code{new_col_name} to the names of the list elements.} diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index fd2a2646..c0f07d88 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -40,7 +40,10 @@ computation.} \item{...}{Additional arguments to pass to the function or formula specified via \code{f}. Alternatively, if \code{f} is missing, then \code{...} is interpreted as an -expression for tidy evaluation. See details of \code{\link{epi_slide}}.} +expression for tidy evaluation; in addition to referring to columns +directly by name, the expression has access to \code{.data} and \code{.env} pronouns +as in \code{dplyr} verbs, and can also refer to the \code{.group_key} and +\code{.ref_time_value}. See details of \code{\link{epi_slide}}.} \item{before}{How far \code{before} each \code{ref_time_value} should the sliding window extend? If provided, should be a single, non-NA, @@ -77,9 +80,9 @@ contain the derivative values. Default is "slide_value"; note that setting \code{new_col_name} equal to an existing column name will overwrite this column.} \item{as_list_col}{Should the slide results be held in a list column, or be -\link[tidyr:chop]{unchopped}/\link[tidyr:nest]{unnested}? Default is \code{FALSE}, +\link[tidyr:chop]{unchopped}/\link[tidyr:unnest]{unnested}? Default is \code{FALSE}, in which case a list object returned by \code{f} would be unnested (using -\code{\link[tidyr:nest]{tidyr::unnest()}}), and, if the slide computations output data frames, +\code{\link[tidyr:unnest]{tidyr::unnest()}}), and, if the slide computations output data frames, the names of the resulting columns are given by prepending \code{new_col_name} to the names of the list elements.} diff --git a/man/reexports.Rd b/man/reexports.Rd index b633e86c..46e961d9 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -23,7 +23,7 @@ below to see their documentation. \describe{ \item{dplyr}{\code{\link[dplyr]{arrange}}, \code{\link[dplyr]{filter}}, \code{\link[dplyr]{group_by}}, \code{\link[dplyr:group_map]{group_modify}}, \code{\link[dplyr]{mutate}}, \code{\link[dplyr]{relocate}}, \code{\link[dplyr]{rename}}, \code{\link[dplyr]{slice}}, \code{\link[dplyr:group_by]{ungroup}}} - \item{tidyr}{\code{\link[tidyr:nest]{unnest}}} + \item{tidyr}{\code{\link[tidyr]{unnest}}} \item{tsibble}{\code{\link[tsibble:as-tsibble]{as_tsibble}}} }} diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R index 21191a0b..23bab72f 100644 --- a/tests/testthat/test-epi_slide.R +++ b/tests/testthat/test-epi_slide.R @@ -9,7 +9,16 @@ ungrouped = dplyr::bind_rows( as_epi_df() grouped = ungrouped %>% group_by(geo_value) -f = function(x, g) dplyr::tibble(value=mean(x$value), count=length(x$value)) + +small_x = dplyr::bind_rows( + dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value=11:15), + dplyr::tibble(geo_value = "al", time_value = d + 1:5, value=-(1:5)) +) %>% + as_epi_df(as_of = d + 6) %>% + group_by(geo_value) + + +f = function(x, g, t) dplyr::tibble(value=mean(x$value), count=length(x$value)) toy_edf = tibble::tribble( ~geo_value, ~time_value, ~value , @@ -163,10 +172,10 @@ test_that("computation output formats x as_list_col", { }) test_that("epi_slide alerts if the provided f doesn't take enough args", { - f_xg = function(x, g) dplyr::tibble(value=mean(x$value), count=length(x$value)) + f_xgt = function(x, g, t) dplyr::tibble(value=mean(x$value), count=length(x$value)) # If `regexp` is NA, asserts that there should be no errors/messages. - expect_error(epi_slide(grouped, f_xg, before = 1L, ref_time_values = d+1), regexp = NA) - expect_warning(epi_slide(grouped, f_xg, before = 1L, ref_time_values = d+1), regexp = NA) + expect_error(epi_slide(grouped, f_xgt, before = 1L, ref_time_values = d+1), regexp = NA) + expect_warning(epi_slide(grouped, f_xgt, before = 1L, ref_time_values = d+1), regexp = NA) f_x_dots = function(x, ...) dplyr::tibble(value=mean(x$value), count=length(x$value)) expect_warning(epi_slide(grouped, f_x_dots, before = 1L, ref_time_values = d+1), @@ -301,3 +310,213 @@ test_that("`epi_slide` doesn't decay date output", { inherits("Date") ) }) + +test_that("basic grouped epi_slide computation produces expected output", { + # Also checks that we correctly remove extra rows and columns (`.real`) used + # to recover `ref_time_value`s. + expected_output = dplyr::bind_rows( + dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value=cumsum(11:15)), + dplyr::tibble(geo_value = "al", time_value = d + 1:5, value = -(1:5), slide_value=cumsum(-(1:5))) + ) %>% + group_by(geo_value) %>% + as_epi_df(as_of = d + 6) + + # formula + result1 <- epi_slide(small_x, f = ~sum(.x$value), before=50) + expect_identical(result1, expected_output) + + # function + result2 <- epi_slide(small_x, f = function(x, g, t) sum(x$value), before=50) + expect_identical(result2, expected_output) + + # dots + result3 <- epi_slide(small_x, slide_value = sum(value), before=50) + expect_identical(result3, expected_output) +}) + +test_that("ungrouped epi_slide computation completes successfully", { + expect_error( + small_x %>% + ungroup() %>% + epi_slide(before = 2, + slide_value = sum(.x$value)), + regexp=NA + ) +}) + +test_that("basic ungrouped epi_slide computation produces expected output", { + expected_output = dplyr::bind_rows( + dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value=cumsum(11:15)) + ) %>% + as_epi_df(as_of = d + 6) + + result1 <- small_x %>% + ungroup() %>% + filter(geo_value == "ak") %>% + epi_slide(before = 50, + slide_value = sum(.x$value)) + expect_identical(result1, expected_output) + + # Ungrouped with multiple geos + expected_output = dplyr::bind_rows( + dplyr::tibble( + geo_value = "ak", time_value = d + 1:5, value=11:15, slide_value=cumsum(11:15) + cumsum(-(1:5) + )), + dplyr::tibble( + geo_value = "al", time_value = d + 1:5, value=-(1:5), slide_value=cumsum(11:15) + cumsum(-(1:5)) + ) + ) %>% + as_epi_df(as_of = d + 6) %>% + arrange(time_value) + + result2 <- small_x %>% + ungroup() %>% + epi_slide(before = 50, + slide_value = sum(.x$value)) + expect_identical(result2, expected_output) +}) + +test_that("epi_slide computation via formula can use ref_time_value", { + expected_output = dplyr::bind_rows( + dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value=d + 1:5), + dplyr::tibble(geo_value = "al", time_value = d + 1:5, value = -(1:5), slide_value=d + 1:5) + ) %>% + group_by(geo_value) %>% + as_epi_df(as_of = d + 6) + + result1 <- small_x %>% + epi_slide(f = ~ .ref_time_value, + before = 50) + + expect_identical(result1, expected_output) + + result2 <- small_x %>% + epi_slide(f = ~ .z, + before = 50) + + expect_identical(result2, expected_output) + + result3 <- small_x %>% + epi_slide(f = ~ ..3, + before = 50) + + expect_identical(result3, expected_output) + + # Ungrouped with multiple geos + expected_output = dplyr::bind_rows( + dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value=d + 1:5), + dplyr::tibble(geo_value = "al", time_value = d + 1:5, value = -(1:5), slide_value=d + 1:5) + ) %>% + as_epi_df(as_of = d + 6) %>% + arrange(time_value) + + result4 <- small_x %>% + ungroup() %>% + epi_slide(f = ~ .ref_time_value, + before = 50) + expect_identical(result4, expected_output) +}) + +test_that("epi_slide computation via function can use ref_time_value", { + expected_output = dplyr::bind_rows( + dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value=d + 1:5), + dplyr::tibble(geo_value = "al", time_value = d + 1:5, value = -(1:5), slide_value=d + 1:5) + ) %>% + group_by(geo_value) %>% + as_epi_df(as_of = d + 6) + + result1 <- small_x %>% + epi_slide(f = function(x, g, t) t, + before = 2) + + expect_identical(result1, expected_output) +}) + +test_that("epi_slide computation via dots can use ref_time_value and group", { + # ref_time_value + expected_output = dplyr::bind_rows( + dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value=d + 1:5), + dplyr::tibble(geo_value = "al", time_value = d + 1:5, value = -(1:5), slide_value=d + 1:5) + ) %>% + group_by(geo_value) %>% + as_epi_df(as_of = d + 6) + + result1 <- small_x %>% + epi_slide(before = 50, + slide_value = .ref_time_value) + + expect_identical(result1, expected_output) + + # `.{x,group_key,ref_time_value}` should be inaccessible from `.data` and + # `.env`. + expect_error(small_x %>% + epi_slide(before = 50, + slide_value = .env$.ref_time_value) + ) + + # group_key + # Use group_key column + expected_output = dplyr::bind_rows( + dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value="ak"), + dplyr::tibble(geo_value = "al", time_value = d + 1:5, value = -(1:5), slide_value="al") + ) %>% + group_by(geo_value) %>% + as_epi_df(as_of = d + 6) + + result3 <- small_x %>% + epi_slide(before = 2, + slide_value = .group_key$geo_value) + + expect_identical(result3, expected_output) + + # Use entire group_key object + expected_output = dplyr::bind_rows( + dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value=1L), + dplyr::tibble(geo_value = "al", time_value = d + 1:5, value = -(1:5), slide_value=1L) + ) %>% + group_by(geo_value) %>% + as_epi_df(as_of = d + 6) + + result4 <- small_x %>% + epi_slide(before = 2, + slide_value = nrow(.group_key)) + + expect_identical(result4, expected_output) + + # Ungrouped with multiple geos + expected_output = dplyr::bind_rows( + dplyr::tibble(geo_value = "ak", time_value = d + 1:5, value = 11:15, slide_value=d + 1:5), + dplyr::tibble(geo_value = "al", time_value = d + 1:5, value = -(1:5), slide_value=d + 1:5) + ) %>% + as_epi_df(as_of = d + 6) %>% + arrange(time_value) + + result5 <- small_x %>% + ungroup() %>% + epi_slide(before = 50, + slide_value = .ref_time_value) + expect_identical(result5, expected_output) +}) + +test_that("epi_slide computation via dots outputs the same result using col names and the data var", { + expected_output <- small_x %>% + epi_slide(before = 2, + slide_value = max(time_value)) %>% + as_epi_df(as_of = d + 6) + + result1 <- small_x %>% + epi_slide(before = 2, + slide_value = max(.x$time_value)) + + expect_identical(result1, expected_output) +}) + +test_that("`epi_slide` can access objects inside of helper functions", { + helper = function(archive_haystack, time_value_needle) { + archive_haystack %>% epi_slide(has_needle = time_value_needle %in% time_value, before = 365000L) + } + expect_error( + helper(small_x, as.Date("2021-01-01")), + NA + ) +}) diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 1ae018d0..b3353bac 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -573,54 +573,102 @@ test_that("epix_slide alerts if the provided f doesn't take enough args", { class = "epiprocess__assert_sufficient_f_args__mandatory_f_args_passed_to_f_dots") }) -test_that("epix_slide computation can use ref_time_value", { - # Formula - xx1 <- xx %>% - group_by(.data$geo_value) %>% - epix_slide(f = ~ .ref_time_value, - before = 2) - +test_that("epix_slide computation via formula can use ref_time_value", { xx_ref <- tibble(geo_value = rep("x",4), time_value = c(4,5,6,7), slide_value = c(4,5,6,7) ) %>% group_by(geo_value) - expect_identical(xx1,xx_ref) + xx1 <- xx %>% + group_by(.data$geo_value) %>% + epix_slide(f = ~ .ref_time_value, + before = 2) + + expect_identical(xx1, xx_ref) xx2 <- xx %>% group_by(.data$geo_value) %>% epix_slide(f = ~ .z, before = 2) - expect_identical(xx2,xx_ref) + expect_identical(xx2, xx_ref) xx3 <- xx %>% group_by(.data$geo_value) %>% epix_slide(f = ~ ..3, before = 2) - expect_identical(xx3,xx_ref) + expect_identical(xx3, xx_ref) +}) - # Function - xx4 <- xx %>% +test_that("epix_slide computation via function can use ref_time_value", { + xx_ref <- tibble(geo_value = rep("x",4), + time_value = c(4,5,6,7), + slide_value = c(4,5,6,7) + ) %>% + group_by(geo_value) + + xx1 <- xx %>% group_by(.data$geo_value) %>% epix_slide(f = function(x, g, t) t, before = 2) - expect_identical(xx4,xx_ref) + expect_identical(xx1, xx_ref) +}) - # Dots - expect_error(xx %>% +test_that("epix_slide computation via dots can use ref_time_value and group", { + # ref_time_value + xx_ref <- tibble(geo_value = rep("x",4), + time_value = c(4,5,6,7), + slide_value = c(4,5,6,7) + ) %>% + group_by(geo_value) + + xx1 <- xx %>% group_by(.data$geo_value) %>% epix_slide(before = 2, - slide_value = ref_time_value), - "object 'ref_time_value' not found") -expect_error(xx %>% + slide_value = .ref_time_value) + + expect_identical(xx1, xx_ref) + + # group_key + xx_ref <- tibble(geo_value = rep("x",4), + time_value = c(4,5,6,7), + slide_value = "x" + ) %>% + group_by(geo_value) + + # Use group_key column + xx3 <- xx %>% + group_by(.data$geo_value) %>% + epix_slide(before = 2, + slide_value = .group_key$geo_value) + + expect_identical(xx3, xx_ref) + + # Use entire group_key object + expect_error( + xx %>% + group_by(.data$geo_value) %>% + epix_slide(before = 2, + slide_value = nrow(.group_key)), + NA + ) +}) + +test_that("epix_slide computation via dots outputs the same result using col names and the data var", { + xx_ref <- xx %>% group_by(.data$geo_value) %>% epix_slide(before = 2, - slide_value = .env$ref_time_value), - "object 'ref_time_value' not found") + sum_binary = sum(time_value)) + + xx1 <- xx %>% + group_by(.data$geo_value) %>% + epix_slide(before = 2, + sum_binary = sum(.x$time_value)) + + expect_identical(xx1, xx_ref) }) test_that("`epix_slide` doesn't decay date output", { @@ -634,3 +682,18 @@ test_that("`epix_slide` doesn't decay date output", { inherits("Date") ) }) + +test_that("`epix_slide` can access objects inside of helper functions", { + helper = function(archive_haystack, time_value_needle) { + archive_haystack %>% epix_slide(has_needle = time_value_needle %in% time_value, before = 365000L) + } + expect_error( + helper(archive_cases_dv_subset, as.Date("2021-01-01")), + NA + ) + + expect_error( + helper(xx, 3L), + NA + ) +})