Skip to content

Allow epi_slide to access ref_time_value #318

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jun 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,15 @@ importFrom(data.table,key)
importFrom(data.table,set)
importFrom(data.table,setkeyv)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_rows)
importFrom(dplyr,dplyr_col_modify)
importFrom(dplyr,dplyr_reconstruct)
importFrom(dplyr,dplyr_row_slice)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,group_by_drop_default)
importFrom(dplyr,group_modify)
importFrom(dplyr,group_vars)
importFrom(dplyr,groups)
importFrom(dplyr,mutate)
importFrom(dplyr,relocate)
Expand Down
81 changes: 71 additions & 10 deletions R/slide.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@
#' If `f` is missing, then `...` will specify the computation.
#' @param ... Additional arguments to pass to the function or formula specified
#' via `f`. Alternatively, if `f` is missing, then the `...` is interpreted as
#' an expression for tidy evaluation. See details.
#' an expression for tidy evaluation; in addition to referring to columns
#' directly by name, the expression has access to `.data` and `.env` pronouns
#' as in `dplyr` verbs, and can also refer to `.x`, `.group_key`, and
#' `.ref_time_value`. See details.
#' @param before,after How far `before` and `after` each `ref_time_value` should
#' the sliding window extend? At least one of these two arguments must be
#' provided; the other's default will be 0. Any value provided for either
Expand Down Expand Up @@ -119,7 +122,8 @@
#' through the `new_col_name` argument.
#'
#' @importFrom lubridate days weeks
#' @importFrom rlang .data .env !! enquo enquos sym
#' @importFrom dplyr bind_rows group_vars filter select
#' @importFrom rlang .data .env !! enquo enquos sym env
#' @export
#' @examples
#' # slide a 7-day trailing average formula on cases
Expand Down Expand Up @@ -166,11 +170,8 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,

# Check that `f` takes enough args
if (!missing(f) && is.function(f)) {
assert_sufficient_f_args(f, ...)
assert_sufficient_f_args(f, ..., n_mandatory_f_args = 3L)
}

# Arrange by increasing time_value
x = arrange(x, time_value)

if (missing(ref_time_values)) {
ref_time_values = unique(x$time_value)
Expand Down Expand Up @@ -231,6 +232,35 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
after <- time_step(after)
}

min_ref_time_values = ref_time_values - before
min_ref_time_values_not_in_x <- min_ref_time_values[!(min_ref_time_values %in% unique(x$time_value))]

# Do set up to let us recover `ref_time_value`s later.
# A helper column marking real observations.
x$.real = TRUE

# Create df containing phony data. Df has the same columns and attributes as
# `x`, but filled with `NA`s aside from grouping columns. Number of rows is
# equal to the number of `min_ref_time_values_not_in_x` we have * the
# number of unique levels seen in the grouping columns.
before_time_values_df = data.frame(time_value=min_ref_time_values_not_in_x)
if (length(group_vars(x)) != 0) {
before_time_values_df = dplyr::cross_join(
# Get unique combinations of grouping columns seen in real data.
unique(x[, group_vars(x)]),
before_time_values_df
)
}
# Automatically fill in all other columns from `x` with `NA`s, and carry
# attributes over to new df.
before_time_values_df <- bind_rows(x[0,], before_time_values_df)
before_time_values_df$.real <- FALSE

x <- bind_rows(before_time_values_df, x)

# Arrange by increasing time_value
x = arrange(x, time_value)

# Now set up starts and stops for sliding/hopping
time_range = range(unique(x$time_value))
starts = in_range(ref_time_values - before, time_range)
Expand Down Expand Up @@ -272,7 +302,9 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
o = .data_group$time_value %in% time_values
num_ref_rows = sum(o)

# Count the number of appearances of each reference time value
# Count the number of appearances of each reference time value (these
# appearances should all be real for now, but if we allow ref time values
# outside of .data_group's time values):
counts = .data_group %>%
dplyr::filter(.data$time_value %in% time_values) %>%
dplyr::count(.data$time_value) %>%
Expand All @@ -282,7 +314,7 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
!all(purrr::map_lgl(slide_values_list, is.data.frame))) {
Abort("The slide computations must return always atomic vectors or data frames (and not a mix of these two structures).")
}

# Unlist if appropriate:
slide_values =
if (as_list_col) {
Expand Down Expand Up @@ -318,16 +350,24 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
# fills with NA equivalent.
vctrs::vec_slice(slide_values, o) = orig_values
} else {
# This implicitly removes phony (`.real` == FALSE) observations.
.data_group = filter(.data_group, o)
}
return(mutate(.data_group, !!new_col := slide_values))
}

# If f is not missing, then just go ahead, slide by group
if (!missing(f)) {
if (rlang::is_formula(f)) f = as_slide_computation(f)
f_rtv_wrapper = function(x, g, ...) {
ref_time_value = min(x$time_value) + before
x <- x[x$.real,]
x$.real <- NULL
f(x, g, ref_time_value, ...)
}
x = x %>%
group_modify(slide_one_grp,
f = f, ...,
f = f_rtv_wrapper, ...,
starts = starts,
stops = stops,
time_values = ref_time_values,
Expand All @@ -347,7 +387,18 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
}

quo = quos[[1]]
f = function(x, quo, ...) rlang::eval_tidy(quo, x)
f = function(.x, .group_key, quo, ...) {
.ref_time_value = min(.x$time_value) + before
.x <- .x[.x$.real,]
.x$.real <- NULL
data_mask = rlang::as_data_mask(.x)
# We'll also install `.x` directly, not as an `rlang_data_pronoun`, so
# that we can, e.g., use more dplyr and epiprocess operations.
data_mask$.x = .x
data_mask$.group_key = .group_key
data_mask$.ref_time_value = .ref_time_value
rlang::eval_tidy(quo, data_mask)
}
new_col = sym(names(rlang::quos_auto_name(quos)))

x = x %>%
Expand All @@ -365,5 +416,15 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
if (!as_list_col) {
x = unnest(x, !!new_col, names_sep = names_sep)
}

# Remove any remaining phony observations. When `all_rows` is TRUE, phony
# observations aren't necessarily removed in `slide_one_grp`.
if (all_rows) {
x <- x[x$.real,]
}

# Drop helper column `.real`.
x$.real <- NULL

return(x)
}
9 changes: 6 additions & 3 deletions man/epi_slide.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/epix_slide.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/reexports.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading