From 0e1fe51956f54195521e4318de33ec0427b42d59 Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Thu, 18 Jan 2024 16:10:49 -0800 Subject: [PATCH 1/5] Remove unnecessary `in_range` in `epi_slide` implementation `slider::hop_index` doesn't require starts & stops to be in `.i`, and we aren't actually doing that anyway. Plus comment to help clarify that we're passing the group key to comps via `...`. --- R/slide.R | 21 ++++++++------------- R/utils.R | 4 ---- tests/testthat/test-utils.R | 6 ------ 3 files changed, 8 insertions(+), 23 deletions(-) diff --git a/R/slide.R b/R/slide.R index c7493fd3..cbc01f91 100644 --- a/R/slide.R +++ b/R/slide.R @@ -230,17 +230,12 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, after <- time_step(after) } - # Do set up to let us recover `ref_time_value`s later. - min_ref_time_values <- ref_time_values - before - min_ref_time_values_not_in_x <- min_ref_time_values[!(min_ref_time_values %in% unique(x$time_value))] - # Arrange by increasing time_value x <- arrange(x, time_value) # Now set up starts and stops for sliding/hopping - time_range <- range(unique(c(x$time_value, min_ref_time_values_not_in_x))) - starts <- in_range(ref_time_values - before, time_range) - stops <- in_range(ref_time_values + after, time_range) + starts <- ref_time_values - before + stops <- ref_time_values + after if (length(starts) == 0 || length(stops) == 0) { Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check your settings for ref_time_values and align (and before, if specified).") @@ -251,7 +246,8 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, # Computation for one group, all time values slide_one_grp <- function(.data_group, - f_factory, ..., + f_factory, + ..., # group key + any "real" ... args starts, stops, time_values, @@ -260,13 +256,13 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, # Figure out which reference time values appear in the data group in the # first place (we need to do this because it could differ based on the # group, hence the setup/checks for the reference time values based on all - # the data could still be off) + # the data could still be off): o <- time_values %in% .data_group$time_value starts <- starts[o] stops <- stops[o] time_values <- time_values[o] - f <- f_factory(starts) + f <- f_factory(time_values) # Compute the slide values slide_values_list <- slider::hop_index( @@ -353,12 +349,11 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, # Create a wrapper that calculates and passes `.ref_time_value` to the # computation. `i` is contained in the `f_wrapper_factory` environment such # that when called within `slide_one_grp` `i` is reset for every group. - f_wrapper_factory <- function(starts) { + f_wrapper_factory <- function(kept_ref_time_values) { # Use `i` to advance through list of start dates. i <- 1L - starts <- starts + before f_wrapper <- function(.x, .group_key, ...) { - .ref_time_value <- starts[[i]] + .ref_time_value <- kept_ref_time_values[[i]] i <<- i + 1L f(.x, .group_key, .ref_time_value, ...) } diff --git a/R/utils.R b/R/utils.R index 9cc707a6..52a33ad2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -361,10 +361,6 @@ as_slide_computation <- function(f, ...) { ########## -in_range <- function(x, rng) pmin(pmax(x, rng[1]), rng[2]) - -########## - Min <- function(x) min(x, na.rm = TRUE) Max <- function(x) max(x, na.rm = TRUE) Sum <- function(x) sum(x, na.rm = TRUE) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 2319d045..4ad692a0 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -7,12 +7,6 @@ test_that("Abort and Warn work", { expect_warning(Warn("warn")) }) -test_that("in_range works", { - expect_equal(in_range(1, c(2, 4)), 2) - expect_equal(in_range(3, c(2, 4)), 3) - expect_equal(in_range(5, c(2, 4)), 4) -}) - test_that("new summarizing functions work", { x <- c(3, 4, 5, 9, NA) expect_equal(Min(x), 3) From 51edc5ac5ad7d9fe5ea3253f5b19c472800ef823 Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Thu, 18 Jan 2024 16:21:46 -0800 Subject: [PATCH 2/5] Rename `time_values` in `epi_slide` internals to clarify usage Rename `time_values` to `ref_time_values` or `kept_ref_time_values` depending on the context. Does not change the interface of `epi_slide`. --- R/slide.R | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/R/slide.R b/R/slide.R index cbc01f91..9b9ad0fb 100644 --- a/R/slide.R +++ b/R/slide.R @@ -250,19 +250,19 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, ..., # group key + any "real" ... args starts, stops, - time_values, + ref_time_values, all_rows, new_col) { # Figure out which reference time values appear in the data group in the # first place (we need to do this because it could differ based on the # group, hence the setup/checks for the reference time values based on all # the data could still be off): - o <- time_values %in% .data_group$time_value + o <- ref_time_values %in% .data_group$time_value starts <- starts[o] stops <- stops[o] - time_values <- time_values[o] + kept_ref_time_values <- ref_time_values[o] - f <- f_factory(time_values) + f <- f_factory(kept_ref_time_values) # Compute the slide values slide_values_list <- slider::hop_index( @@ -275,13 +275,11 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, # Now figure out which rows in the data group are in the reference time # values; this will be useful for all sorts of checks that follow - o <- .data_group$time_value %in% time_values + o <- .data_group$time_value %in% kept_ref_time_values num_ref_rows <- sum(o) - # Count the number of appearances of each reference time value (these - # appearances should all be real for now, but if we allow ref time values - # outside of .data_group's time values): - counts <- dplyr::filter(.data_group, .data$time_value %in% time_values) %>% + # Count the number of appearances of each kept reference time value. + counts <- dplyr::filter(.data_group, .data$time_value %in% kept_ref_time_values) %>% dplyr::count(.data$time_value) %>% dplyr::pull(n) @@ -363,7 +361,7 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, f_factory = f_wrapper_factory, ..., starts = starts, stops = stops, - time_values = ref_time_values, + ref_time_values = ref_time_values, all_rows = all_rows, new_col = new_col, .keep = FALSE From 2156ff7da444a00d40ffb5b39a6ab01f06cd39cc Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Thu, 18 Jan 2024 16:32:03 -0800 Subject: [PATCH 3/5] Try to separate out group key passing from slide ... forwarding --- R/slide.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/slide.R b/R/slide.R index 9b9ad0fb..aeb22eb2 100644 --- a/R/slide.R +++ b/R/slide.R @@ -246,8 +246,9 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, # Computation for one group, all time values slide_one_grp <- function(.data_group, + .group_key, # see `?group_modify` + ..., # `...` to `epi_slide` forwarded here f_factory, - ..., # group key + any "real" ... args starts, stops, ref_time_values, @@ -268,9 +269,10 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, slide_values_list <- slider::hop_index( .x = .data_group, .i = .data_group$time_value, - .f = f, ..., .starts = starts, - .stops = stops + .stops = stops, + .f = f, + .group_key, ... ) # Now figure out which rows in the data group are in the reference time @@ -358,7 +360,8 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, return(f_wrapper) } x <- group_modify(x, slide_one_grp, - f_factory = f_wrapper_factory, ..., + ..., + f_factory = f_wrapper_factory, starts = starts, stops = stops, ref_time_values = ref_time_values, From 8eabb4a06b0e284d1431560d89be6041492a8ad6 Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Thu, 18 Jan 2024 16:33:17 -0800 Subject: [PATCH 4/5] Avoid linter warning for `dplyr::pull(n)` --- R/slide.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/slide.R b/R/slide.R index aeb22eb2..cedcd354 100644 --- a/R/slide.R +++ b/R/slide.R @@ -283,7 +283,7 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, # Count the number of appearances of each kept reference time value. counts <- dplyr::filter(.data_group, .data$time_value %in% kept_ref_time_values) %>% dplyr::count(.data$time_value) %>% - dplyr::pull(n) + `[[`("n") if (!all(purrr::map_lgl(slide_values_list, is.atomic)) && !all(purrr::map_lgl(slide_values_list, is.data.frame))) { From a824f6224032b2dbfbdd8990ce008ef76a79836b Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Thu, 18 Jan 2024 16:39:16 -0800 Subject: [PATCH 5/5] Remove redundant validation of `starts` and `stops` We checked them for nonzero length when we filtered `ref_time_values` down to those present in the `x$time_value`, but now we require `all(ref_time_values %in% unique(x$time_value))`. --- R/slide.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R/slide.R b/R/slide.R index cedcd354..e2c0bf55 100644 --- a/R/slide.R +++ b/R/slide.R @@ -237,10 +237,6 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values, starts <- ref_time_values - before stops <- ref_time_values + after - if (length(starts) == 0 || length(stops) == 0) { - Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check your settings for ref_time_values and align (and before, if specified).") - } - # Symbolize new column name new_col <- sym(new_col_name)