Skip to content

Simplify and clarify parts of epi_slide implementation #399

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 22 additions & 30 deletions R/slide.R
Original file line number Diff line number Diff line change
Expand Up @@ -230,64 +230,56 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
after <- time_step(after)
}

# Do set up to let us recover `ref_time_value`s later.
min_ref_time_values <- ref_time_values - before
min_ref_time_values_not_in_x <- min_ref_time_values[!(min_ref_time_values %in% unique(x$time_value))]

# Arrange by increasing time_value
x <- arrange(x, time_value)

# Now set up starts and stops for sliding/hopping
time_range <- range(unique(c(x$time_value, min_ref_time_values_not_in_x)))
starts <- in_range(ref_time_values - before, time_range)
stops <- in_range(ref_time_values + after, time_range)

if (length(starts) == 0 || length(stops) == 0) {
Abort("The starting and/or stopping times for sliding are out of bounds with respect to the range of times in your data. Check your settings for ref_time_values and align (and before, if specified).")
}
starts <- ref_time_values - before
stops <- ref_time_values + after
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

praise: Great simplification!


# Symbolize new column name
new_col <- sym(new_col_name)

# Computation for one group, all time values
slide_one_grp <- function(.data_group,
f_factory, ...,
.group_key, # see `?group_modify`
..., # `...` to `epi_slide` forwarded here
Comment on lines +245 to +246
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

praise: Good to be clear about this. I was confused how .group_key, though normally expected by the function provided to group_modify, was being handled (and obfuscated) by ....

f_factory,
starts,
stops,
time_values,
ref_time_values,
all_rows,
new_col) {
# Figure out which reference time values appear in the data group in the
# first place (we need to do this because it could differ based on the
# group, hence the setup/checks for the reference time values based on all
# the data could still be off)
o <- time_values %in% .data_group$time_value
# the data could still be off):
o <- ref_time_values %in% .data_group$time_value
starts <- starts[o]
stops <- stops[o]
time_values <- time_values[o]
kept_ref_time_values <- ref_time_values[o]

f <- f_factory(starts)
f <- f_factory(kept_ref_time_values)

# Compute the slide values
slide_values_list <- slider::hop_index(
.x = .data_group,
.i = .data_group$time_value,
.f = f, ...,
.starts = starts,
.stops = stops
.stops = stops,
.f = f,
.group_key, ...
)

# Now figure out which rows in the data group are in the reference time
# values; this will be useful for all sorts of checks that follow
o <- .data_group$time_value %in% time_values
o <- .data_group$time_value %in% kept_ref_time_values
num_ref_rows <- sum(o)

# Count the number of appearances of each reference time value (these
# appearances should all be real for now, but if we allow ref time values
# outside of .data_group's time values):
counts <- dplyr::filter(.data_group, .data$time_value %in% time_values) %>%
# Count the number of appearances of each kept reference time value.
counts <- dplyr::filter(.data_group, .data$time_value %in% kept_ref_time_values) %>%
dplyr::count(.data$time_value) %>%
dplyr::pull(n)
`[[`("n")

if (!all(purrr::map_lgl(slide_values_list, is.atomic)) &&
!all(purrr::map_lgl(slide_values_list, is.data.frame))) {
Expand Down Expand Up @@ -353,22 +345,22 @@ epi_slide <- function(x, f, ..., before, after, ref_time_values,
# Create a wrapper that calculates and passes `.ref_time_value` to the
# computation. `i` is contained in the `f_wrapper_factory` environment such
# that when called within `slide_one_grp` `i` is reset for every group.
f_wrapper_factory <- function(starts) {
f_wrapper_factory <- function(kept_ref_time_values) {
# Use `i` to advance through list of start dates.
i <- 1L
starts <- starts + before
f_wrapper <- function(.x, .group_key, ...) {
.ref_time_value <- starts[[i]]
.ref_time_value <- kept_ref_time_values[[i]]
i <<- i + 1L
f(.x, .group_key, .ref_time_value, ...)
}
return(f_wrapper)
}
x <- group_modify(x, slide_one_grp,
f_factory = f_wrapper_factory, ...,
...,
f_factory = f_wrapper_factory,
starts = starts,
stops = stops,
time_values = ref_time_values,
ref_time_values = ref_time_values,
all_rows = all_rows,
new_col = new_col,
.keep = FALSE
Expand Down
4 changes: 0 additions & 4 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -361,10 +361,6 @@ as_slide_computation <- function(f, ...) {

##########

in_range <- function(x, rng) pmin(pmax(x, rng[1]), rng[2])

##########

Min <- function(x) min(x, na.rm = TRUE)
Max <- function(x) max(x, na.rm = TRUE)
Sum <- function(x) sum(x, na.rm = TRUE)
Expand Down
6 changes: 0 additions & 6 deletions tests/testthat/test-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,6 @@ test_that("Abort and Warn work", {
expect_warning(Warn("warn"))
})

test_that("in_range works", {
expect_equal(in_range(1, c(2, 4)), 2)
expect_equal(in_range(3, c(2, 4)), 3)
expect_equal(in_range(5, c(2, 4)), 4)
})

test_that("new summarizing functions work", {
x <- c(3, 4, 5, 9, NA)
expect_equal(Min(x), 3)
Expand Down