From 0e1fe51956f54195521e4318de33ec0427b42d59 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 18 Jan 2024 16:10:49 -0800
Subject: [PATCH 1/5] Remove unnecessary `in_range` in `epi_slide`
 implementation

`slider::hop_index` doesn't require starts & stops to be in `.i`, and we aren't
actually doing that anyway.

Plus comment to help clarify that we're passing the group key to comps via `...`.
---
 R/slide.R                   | 21 ++++++++-------------
 R/utils.R                   |  4 ----
 tests/testthat/test-utils.R |  6 ------
 3 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/R/slide.R b/R/slide.R
index c7493fd3..cbc01f91 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -230,17 +230,12 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
     after <- time_step(after)
   }
 
-  # Do set up to let us recover `ref_time_value`s later.
-  min_ref_time_values <- ref_time_values - before
-  min_ref_time_values_not_in_x <- min_ref_time_values[!(min_ref_time_values %in% unique(x$time_value))]
-
   # Arrange by increasing time_value
   x <- arrange(x, time_value)
 
   # Now set up starts and stops for sliding/hopping
-  time_range <- range(unique(c(x$time_value, min_ref_time_values_not_in_x)))
-  starts <- in_range(ref_time_values - before, time_range)
-  stops <- in_range(ref_time_values + after, time_range)
+  starts <- ref_time_values - before
+  stops <- ref_time_values + after
 
   if (length(starts) == 0 || length(stops) == 0) {
     Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check your settings for ref_time_values and align (and before, if specified).")
@@ -251,7 +246,8 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
 
   # Computation for one group, all time values
   slide_one_grp <- function(.data_group,
-                            f_factory, ...,
+                            f_factory,
+                            ..., # group key + any "real" ... args
                             starts,
                             stops,
                             time_values,
@@ -260,13 +256,13 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
     # Figure out which reference time values appear in the data group in the
     # first place (we need to do this because it could differ based on the
     # group, hence the setup/checks for the reference time values based on all
-    # the data could still be off)
+    # the data could still be off):
     o <- time_values %in% .data_group$time_value
     starts <- starts[o]
     stops <- stops[o]
     time_values <- time_values[o]
 
-    f <- f_factory(starts)
+    f <- f_factory(time_values)
 
     # Compute the slide values
     slide_values_list <- slider::hop_index(
@@ -353,12 +349,11 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
   # Create a wrapper that calculates and passes `.ref_time_value` to the
   # computation. `i` is contained in the `f_wrapper_factory` environment such
   # that when called within `slide_one_grp` `i` is reset for every group.
-  f_wrapper_factory <- function(starts) {
+  f_wrapper_factory <- function(kept_ref_time_values) {
     # Use `i` to advance through list of start dates.
     i <- 1L
-    starts <- starts + before
     f_wrapper <- function(.x, .group_key, ...) {
-      .ref_time_value <- starts[[i]]
+      .ref_time_value <- kept_ref_time_values[[i]]
       i <<- i + 1L
       f(.x, .group_key, .ref_time_value, ...)
     }
diff --git a/R/utils.R b/R/utils.R
index 9cc707a6..52a33ad2 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -361,10 +361,6 @@ as_slide_computation <- function(f, ...) {
 
 ##########
 
-in_range <- function(x, rng) pmin(pmax(x, rng[1]), rng[2])
-
-##########
-
 Min <- function(x) min(x, na.rm = TRUE)
 Max <- function(x) max(x, na.rm = TRUE)
 Sum <- function(x) sum(x, na.rm = TRUE)
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index 2319d045..4ad692a0 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -7,12 +7,6 @@ test_that("Abort and Warn work", {
   expect_warning(Warn("warn"))
 })
 
-test_that("in_range works", {
-  expect_equal(in_range(1, c(2, 4)), 2)
-  expect_equal(in_range(3, c(2, 4)), 3)
-  expect_equal(in_range(5, c(2, 4)), 4)
-})
-
 test_that("new summarizing functions work", {
   x <- c(3, 4, 5, 9, NA)
   expect_equal(Min(x), 3)

From 51edc5ac5ad7d9fe5ea3253f5b19c472800ef823 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 18 Jan 2024 16:21:46 -0800
Subject: [PATCH 2/5] Rename `time_values` in `epi_slide` internals to clarify
 usage

Rename `time_values` to `ref_time_values` or `kept_ref_time_values` depending on
the context.  Does not change the interface of `epi_slide`.
---
 R/slide.R | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/R/slide.R b/R/slide.R
index cbc01f91..9b9ad0fb 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -250,19 +250,19 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
                             ..., # group key + any "real" ... args
                             starts,
                             stops,
-                            time_values,
+                            ref_time_values,
                             all_rows,
                             new_col) {
     # Figure out which reference time values appear in the data group in the
     # first place (we need to do this because it could differ based on the
     # group, hence the setup/checks for the reference time values based on all
     # the data could still be off):
-    o <- time_values %in% .data_group$time_value
+    o <- ref_time_values %in% .data_group$time_value
     starts <- starts[o]
     stops <- stops[o]
-    time_values <- time_values[o]
+    kept_ref_time_values <- ref_time_values[o]
 
-    f <- f_factory(time_values)
+    f <- f_factory(kept_ref_time_values)
 
     # Compute the slide values
     slide_values_list <- slider::hop_index(
@@ -275,13 +275,11 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
 
     # Now figure out which rows in the data group are in the reference time
     # values; this will be useful for all sorts of checks that follow
-    o <- .data_group$time_value %in% time_values
+    o <- .data_group$time_value %in% kept_ref_time_values
     num_ref_rows <- sum(o)
 
-    # Count the number of appearances of each reference time value (these
-    # appearances should all be real for now, but if we allow ref time values
-    # outside of .data_group's time values):
-    counts <- dplyr::filter(.data_group, .data$time_value %in% time_values) %>%
+    # Count the number of appearances of each kept reference time value.
+    counts <- dplyr::filter(.data_group, .data$time_value %in% kept_ref_time_values) %>%
       dplyr::count(.data$time_value) %>%
       dplyr::pull(n)
 
@@ -363,7 +361,7 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
     f_factory = f_wrapper_factory, ...,
     starts = starts,
     stops = stops,
-    time_values = ref_time_values,
+    ref_time_values = ref_time_values,
     all_rows = all_rows,
     new_col = new_col,
     .keep = FALSE

From 2156ff7da444a00d40ffb5b39a6ab01f06cd39cc Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 18 Jan 2024 16:32:03 -0800
Subject: [PATCH 3/5] Try to separate out group key passing from slide ...
 forwarding

---
 R/slide.R | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/R/slide.R b/R/slide.R
index 9b9ad0fb..aeb22eb2 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -246,8 +246,9 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
 
   # Computation for one group, all time values
   slide_one_grp <- function(.data_group,
+                            .group_key, # see `?group_modify`
+                            ..., # `...` to `epi_slide` forwarded here
                             f_factory,
-                            ..., # group key + any "real" ... args
                             starts,
                             stops,
                             ref_time_values,
@@ -268,9 +269,10 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
     slide_values_list <- slider::hop_index(
       .x = .data_group,
       .i = .data_group$time_value,
-      .f = f, ...,
       .starts = starts,
-      .stops = stops
+      .stops = stops,
+      .f = f,
+      .group_key, ...
     )
 
     # Now figure out which rows in the data group are in the reference time
@@ -358,7 +360,8 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
     return(f_wrapper)
   }
   x <- group_modify(x, slide_one_grp,
-    f_factory = f_wrapper_factory, ...,
+    ...,
+    f_factory = f_wrapper_factory,
     starts = starts,
     stops = stops,
     ref_time_values = ref_time_values,

From 8eabb4a06b0e284d1431560d89be6041492a8ad6 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 18 Jan 2024 16:33:17 -0800
Subject: [PATCH 4/5] Avoid linter warning for `dplyr::pull(n)`

---
 R/slide.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/slide.R b/R/slide.R
index aeb22eb2..cedcd354 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -283,7 +283,7 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
     # Count the number of appearances of each kept reference time value.
     counts <- dplyr::filter(.data_group, .data$time_value %in% kept_ref_time_values) %>%
       dplyr::count(.data$time_value) %>%
-      dplyr::pull(n)
+      `[[`("n")
 
     if (!all(purrr::map_lgl(slide_values_list, is.atomic)) &&
       !all(purrr::map_lgl(slide_values_list, is.data.frame))) {

From a824f6224032b2dbfbdd8990ce008ef76a79836b Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 18 Jan 2024 16:39:16 -0800
Subject: [PATCH 5/5] Remove redundant validation of `starts` and `stops`

We checked them for nonzero length when we filtered `ref_time_values` down to
those present in the `x$time_value`, but now we require `all(ref_time_values
%in% unique(x$time_value))`.
---
 R/slide.R | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/R/slide.R b/R/slide.R
index cedcd354..e2c0bf55 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -237,10 +237,6 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
   starts <- ref_time_values - before
   stops <- ref_time_values + after
 
-  if (length(starts) == 0 || length(stops) == 0) {
-    Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check your settings for ref_time_values and align (and before, if specified).")
-  }
-
   # Symbolize new column name
   new_col <- sym(new_col_name)