Skip to content

Lcb/slide unnest dedupe cols #509

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ S3method(group_by,epi_df)
S3method(group_by,grouped_epi_archive)
S3method(group_by_drop_default,grouped_epi_archive)
S3method(group_modify,epi_df)
S3method(group_vars,grouped_epi_archive)
S3method(groups,grouped_epi_archive)
S3method(guess_period,Date)
S3method(guess_period,POSIXt)
Expand All @@ -48,8 +49,10 @@ export("%>%")
export(archive_cases_dv_subset)
export(arrange)
export(arrange_canonical)
export(as_diagonal_slide_computation)
export(as_epi_archive)
export(as_epi_df)
export(as_time_slide_computation)
export(as_tsibble)
export(autoplot)
export(clone)
Expand Down
11 changes: 8 additions & 3 deletions R/archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ validate_version_bound <- function(version_bound, x, na_ok = FALSE,
if (!identical(class(version_bound), class(x[["version"]]))) {
cli_abort(
"{version_bound_arg} must have the same `class` vector as x$version,
which has a `class` of {paste(collapse = ' ', deparse(class(x$version)))}",
which has a `class` of {format_class_vec(class(x$version))}",
class = "epiprocess__version_bound_mismatched_class"
)
}
Expand Down Expand Up @@ -585,8 +585,8 @@ print.epi_archive <- function(x, ..., class = TRUE, methods = TRUE) {
#' `...`.
#' @param .drop As described in [`dplyr::group_by`]; determines treatment of
#' factor columns.
#' @param x For `groups` or `ungroup`: a `grouped_epi_archive`; for
#' `is_grouped_epi_archive`: any object
#' @param x For `groups`, `group_vars`, or `ungroup`: a `grouped_epi_archive`;
#' for `is_grouped_epi_archive`: any object
#' @param .tbl (For `group_by_drop_default`:) an `epi_archive` or
#' `grouped_epi_archive` (`epi_archive` dispatches to the S3 default method;
#' `grouped_epi_archive` dispatches its own S3 method)
Expand Down Expand Up @@ -665,6 +665,11 @@ print.epi_archive <- function(x, ..., class = TRUE, methods = TRUE) {
#' group_by(geo_value, age_group) %>%
#' ungroup(age_group)
#'
#' # To get the grouping variable names as a character vector:
#' toy_archive %>%
#' group_by(geo_value) %>%
#' group_vars()
#'
#' # To get the grouping variable names as a `list` of `name`s (a.k.a. symbols):
#' toy_archive %>%
#' group_by(geo_value) %>%
Expand Down
77 changes: 48 additions & 29 deletions R/grouped_epi_archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ new_grouped_epi_archive <- function(x, vars, drop) {
or `ungroup` first.",
class = "epiprocess__grouped_epi_archive__ungrouped_arg_is_already_grouped",
epiprocess__ungrouped_class = class(x),
epiprocess__ungrouped_groups = groups(x)
epiprocess__ungrouped_group_vars = group_vars(x)
)
}
assert_class(x, "epi_archive")
Expand Down Expand Up @@ -160,6 +160,14 @@ group_by_drop_default.grouped_epi_archive <- function(.tbl) {
.tbl$private$drop
}

#' @include methods-epi_archive.R
#' @rdname group_by.epi_archive
#'
#' @importFrom dplyr group_vars
#' @export
group_vars.grouped_epi_archive <- function(x) {
x$private$vars
}

#' @include methods-epi_archive.R
#' @rdname group_by.epi_archive
Expand All @@ -170,7 +178,6 @@ groups.grouped_epi_archive <- function(x) {
rlang::syms(x$private$vars)
}


#' @include methods-epi_archive.R
#' @rdname group_by.epi_archive
#'
Expand Down Expand Up @@ -209,14 +216,15 @@ epix_slide.grouped_epi_archive <- function(
.f,
...,
.before = Inf,
.ref_time_values = NULL,
.versions = NULL,
.new_col_name = NULL,
.all_versions = FALSE) {
# Deprecated argument handling
# Perform some deprecated argument checks without using `<param> =
# deprecated()` in the function signature, because they are from
# early development versions and much more likely to be clutter than
# informative in the signature.
provided_args <- rlang::call_args_names(rlang::call_match())
if (any(purrr::map_lgl(
provided_args, ~ .x %in% c("x", "f", "before", "ref_time_values", "new_col_name", "all_versions")
))) {
if (any(provided_args %in% c("x", "f", "before", "ref_time_values", "new_col_name", "all_versions"))) {
cli::cli_abort(
"epix_slide: you are using one of the following old argument names: `x`, `f`, `before`, `ref_time_values`,
`new_col_name`, `all_versions`. Please use the new names: `.x`, `.f`, `.before`, `.ref_time_values`,
Expand Down Expand Up @@ -255,26 +263,37 @@ epix_slide.grouped_epi_archive <- function(
}

# Argument validation
if (is.null(.ref_time_values)) {
ref_time_values <- epix_slide_ref_time_values_default(.x$private$ungrouped)
if (is.null(.versions)) {
.versions <- epix_slide_versions_default(.x$private$ungrouped)
} else {
assert_numeric(.ref_time_values, min.len = 1L, null.ok = FALSE, any.missing = FALSE)
if (any(.ref_time_values > .x$private$ungrouped$versions_end)) {
cli_abort("Some `ref_time_values` are greater than the latest version in the archive.")
assert_numeric(.versions, min.len = 1L, null.ok = FALSE, any.missing = FALSE)
if (any(.versions > .x$private$ungrouped$versions_end)) {
cli_abort("All `.versions` must be less than or equal to the latest version in the archive.")
}
if (anyDuplicated(.ref_time_values) != 0L) {
cli_abort("Some `ref_time_values` are duplicated.")
if (anyDuplicated(.versions) != 0L) {
cli_abort("All `.versions` must be unique.")
}
# Sort, for consistency with `epi_slide`, although the current
# implementation doesn't take advantage of it.
ref_time_values <- sort(.ref_time_values)
.versions <- sort(.versions)
}

validate_slide_window_arg(.before, .x$private$ungrouped$time_type)

checkmate::assert_string(.new_col_name, null.ok = TRUE)
if (identical(.new_col_name, "time_value")) {
cli_abort('`new_col_name` must not be `"time_value"`; `epix_slide()` uses that column name to attach the `ref_time_value` associated with each slide computation') # nolint: line_length_linter
if (!is.null(.new_col_name)) {
if (.new_col_name %in% .x$private$vars) {
cli_abort(c("`.new_col_name` must not be one of the grouping column name(s);
`epix_slide()` uses these column name(s) to label what group
each slide computation came from.",
"i" = "{cli::qty(length(.x$private$vars))} grouping column name{?s}
{?was/were} {format_chr_with_quotes(.x$private$vars)}",
"x" = "`new_col_name` was {format_chr_with_quotes(new_col_name)}"
))
}
if (identical(.new_col_name, "version")) {
cli_abort('`.new_col_name` must not be `"version"`; `epix_slide()` uses that column name to attach the element of `.versions` associated with each slide computation') # nolint: line_length_linter
}
}

assert_logical(.all_versions, len = 1L)
Expand All @@ -287,23 +306,23 @@ epix_slide.grouped_epi_archive <- function(
cli_abort("If `f` is missing then a computation must be specified via `...`.")
}

f <- as_slide_computation(quosures)
f <- as_diagonal_slide_computation(quosures)
# Magic value that passes zero args as dots in calls below. Equivalent to
# `... <- missing_arg()`, but use `assign` to avoid warning about
# improper use of dots.
assign("...", missing_arg())
} else {
used_data_masking <- FALSE
f <- as_slide_computation(.f, ...)
f <- as_diagonal_slide_computation(.f, ...)
}

# Computation for one group, one time value
comp_one_grp <- function(.data_group, .group_key,
f, ...,
ref_time_value,
version,
new_col_name) {
# Carry out the specified computation
comp_value <- f(.data_group, .group_key, ref_time_value, ...)
comp_value <- f(.data_group, .group_key, version, ...)

# If this wasn't a tidyeval computation, we still need to check the output
# types. We'll let `group_modify` and `vec_rbind` deal with checking for
Expand All @@ -326,7 +345,7 @@ epix_slide.grouped_epi_archive <- function(
# redundant work. `group_modify()` provides the group key, we provide the
# ref time value (appropriately recycled) and comp_value (appropriately
# named / unpacked, for quick feedback)
res <- list(time_value = vctrs::vec_rep(ref_time_value, vctrs::vec_size(comp_value)))
res <- list(version = vctrs::vec_rep(version, vctrs::vec_size(comp_value)))

if (is.null(new_col_name)) {
if (inherits(comp_value, "data.frame")) {
Expand All @@ -338,7 +357,7 @@ epix_slide.grouped_epi_archive <- function(
}
} else {
# vector or packed data.frame-type column (note: new_col_name of
# "time_value" is disallowed):
# "version" is disallowed):
res[[new_col_name]] <- comp_value
}

Expand All @@ -350,12 +369,12 @@ epix_slide.grouped_epi_archive <- function(
return(validate_tibble(new_tibble(res)))
}

out <- lapply(ref_time_values, function(ref_time_value) {
out <- lapply(.versions, function(version) {
# Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`,
# `epi_archive` if `all_versions` is `TRUE`:
as_of_raw <- .x$private$ungrouped %>% epix_as_of(
ref_time_value,
min_time_value = ref_time_value - .before,
version,
min_time_value = version - .before,
all_versions = .all_versions
)

Expand Down Expand Up @@ -391,7 +410,7 @@ epix_slide.grouped_epi_archive <- function(
# Convert each subgroup chunk to an archive before running the calculation.
group_modify_fn <- function(.data_group, .group_key,
f, ...,
ref_time_value,
version,
new_col_name) {
# .data_group is coming from as_of_df as a tibble, but we
# want to feed `comp_one_grp` an `epi_archive` backed by a
Expand All @@ -402,7 +421,7 @@ epix_slide.grouped_epi_archive <- function(
.data_group_archive$DT <- .data_group
comp_one_grp(.data_group_archive, .group_key,
f = f, ...,
ref_time_value = ref_time_value,
version = version,
new_col_name = new_col_name
)
}
Expand All @@ -413,7 +432,7 @@ epix_slide.grouped_epi_archive <- function(
dplyr::group_by(as_of_df, !!!syms(.x$private$vars), .drop = .x$private$drop),
group_modify_fn,
f = f, ...,
ref_time_value = ref_time_value,
version = version,
new_col_name = .new_col_name,
.keep = TRUE
)
Expand Down
Loading
Loading