Skip to content

feat: introduce epix_as_of_current() for convenience #645

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 4, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ export(epi_slide_mean)
export(epi_slide_opt)
export(epi_slide_sum)
export(epix_as_of)
export(epix_as_of_current)
export(epix_fill_through_version)
export(epix_merge)
export(epix_slide)
Expand All @@ -99,6 +100,7 @@ export(new_epi_df)
export(relocate)
export(rename)
export(revision_summary)
export(set_versions_end)
export(slice)
export(sum_groups_epi_df)
export(time_column_names)
Expand Down
6 changes: 5 additions & 1 deletion R/grouped_epi_archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -437,9 +437,13 @@ epix_slide.grouped_epi_archive <- function(
out <- lapply(.versions, function(.version) {
# Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`,
# `epi_archive` if `all_versions` is `TRUE`:
min_time_value <- .version - .before
if (is.na(min_time_value)) {
min_time_value <- -Inf
}
as_of_raw <- .x$private$ungrouped %>% epix_as_of(
.version,
min_time_value = .version - .before,
min_time_value = min_time_value,
all_versions = .all_versions
)

Expand Down
78 changes: 57 additions & 21 deletions R/methods-epi_archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#' epix_as_of(archive_cases_dv_subset2, max(archive_cases_dv_subset$DT$version))
#'
#' @importFrom data.table between key
#' @importFrom checkmate assert_scalar assert_logical assert_class
#' @export
epix_as_of <- function(x, version, min_time_value = -Inf, all_versions = FALSE,
max_version = deprecated()) {
Expand Down Expand Up @@ -88,6 +89,14 @@ epix_as_of <- function(x, version, min_time_value = -Inf, all_versions = FALSE,
if (version > x$versions_end) {
cli_abort("`version` must be at most `epi_archive$versions_end`.")
}
assert_scalar(min_time_value, na.ok = FALSE)
min_time_value_inf <- is.infinite(min_time_value) && min_time_value < 0
min_time_value_same_type <- typeof(min_time_value) == typeof(x$DT$time_value) &
class(min_time_value) == class(x$DT$time_value)
if (!min_time_value_inf && !min_time_value_same_type) {
cli_abort("`min_time_value` must be either -Inf or a time_value of the same type and
class as `epi_archive$time_value`.")
}
assert_logical(all_versions, len = 1)
if (!is.na(x$clobberable_versions_start) && version >= x$clobberable_versions_start) {
cli_warn(
Expand All @@ -100,39 +109,63 @@ epix_as_of <- function(x, version, min_time_value = -Inf, all_versions = FALSE,
)
}

# We can't disable nonstandard evaluation nor use the `..` feature in the `i`
# argument of `[.data.table` below; try to avoid problematic names and abort
# if we fail to do so:
.min_time_value <- min_time_value
.version <- version
if (any(c(".min_time_value", ".version") %in% names(x$DT))) {
cli_abort("epi_archives can't contain a `.min_time_value` or `.version` column")
}

# Filter by version and return
if (all_versions) {
# epi_archive is copied into result, so we can modify result directly
result <- epix_truncate_versions_after(x, version)
result$DT <- result$DT[time_value >= .min_time_value, ] # nolint: object_usage_linter
if (!min_time_value_inf) {
# See below for why we need this branch.
filter_mask <- result$DT$time_value >= min_time_value
result$DT <- result$DT[filter_mask, ] # nolint: object_usage_linter
}
return(result)
}

# Make sure to use data.table ways of filtering and selecting
as_of_epi_df <- x$DT[time_value >= .min_time_value & version <= .version, ] %>% # nolint: object_usage_linter
unique(
by = c("geo_value", "time_value", other_keys),
fromLast = TRUE
) %>%
if (min_time_value_inf) {
# This branch is needed for `epix_as_of` to work with `yearmonth` time type
# to avoid time_value > .min_time_value, which is NA for `yearmonth`.
filter_mask <- x$DT$version <= version
} else {
filter_mask <- x$DT$time_value >= min_time_value & x$DT$version <= version
}
as_of_epi_df <- x$DT[filter_mask, ] %>%
unique(by = c("geo_value", "time_value", other_keys), fromLast = TRUE) %>%
as.data.frame() %>%
tibble::as_tibble() %>%
dplyr::select(-"version") %>%
as_epi_df(
as_of = version,
other_keys = other_keys
)
as_epi_df(as_of = version, other_keys = other_keys)

return(as_of_epi_df)
}

#' Get the latest snapshot from an `epi_archive` object.
#'
#' The latest snapshot is the snapshot of the last known version.
#'
#' @param x An `epi_archive` object
#' @return The latest snapshot from an `epi_archive` object
#' @export
epix_as_of_current <- function(x) {
assert_class(x, "epi_archive")
x %>% epix_as_of(.$versions_end)
}

#' Set the `versions_end` attribute of an `epi_archive` object
#'
#' An escape hatch for epix_as_of, which does not allow version >
#' `$versions_end`.
#'
#' @param x An `epi_archive` object
#' @param versions_end The new `versions_end` value
#' @return An `epi_archive` object with the updated `versions_end` attribute
#' @export
set_versions_end <- function(x, versions_end) {
assert_class(x, "epi_archive")
validate_version_bound(versions_end, x$DT, na_ok = FALSE)
x$versions_end <- versions_end
x
}

#' Fill `epi_archive` unobserved history
#'
Expand Down Expand Up @@ -880,10 +913,13 @@ epix_slide.epi_archive <- function(
#' @noRd
epix_slide_versions_default <- function(ea) {
versions_with_updates <- c(ea$DT$version, ea$versions_end)
tidyr::full_seq(versions_with_updates, guess_period(versions_with_updates))
if (ea$time_type == "yearmonth") {
min(versions_with_updates) + seq(0, max(versions_with_updates) - min(versions_with_updates), by = 1)
} else {
tidyr::full_seq(versions_with_updates, guess_period(versions_with_updates))
}
}


#' Filter an `epi_archive` object to keep only older versions
#'
#' Generates a filtered `epi_archive` from an `epi_archive` object, keeping
Expand Down
6 changes: 3 additions & 3 deletions R/slide.R
Original file line number Diff line number Diff line change
Expand Up @@ -1108,7 +1108,7 @@ epi_slide_sum <- function(
#' `before` and `after` args are assumed to have been validated by the calling
#' function (using `validate_slide_window_arg`).
#'
#' @importFrom checkmate assert_function
#' @importFrom checkmate assert_function anyInfinite
#' @keywords internal
full_date_seq <- function(x, before, after, time_type) {
if (!time_type %in% c("day", "week", "yearmonth", "integer")) {
Expand All @@ -1126,7 +1126,7 @@ full_date_seq <- function(x, before, after, time_type) {
if (time_type %in% c("yearmonth", "integer")) {
all_dates <- seq(min(x$time_value), max(x$time_value), by = 1L)

if (before != 0 && before != Inf) {
if (before != 0 && !anyInfinite(before)) {
pad_early_dates <- all_dates[1L] - before:1
}
if (after != 0) {
Expand All @@ -1139,7 +1139,7 @@ full_date_seq <- function(x, before, after, time_type) {
)

all_dates <- seq(min(x$time_value), max(x$time_value), by = by)
if (before != 0 && before != Inf) {
if (before != 0 && !anyInfinite(before)) {
# The behavior is analogous to the branch with tsibble types above. For
# more detail, note that the function `seq.Date(from, ..., length.out =
# n)` returns `from + 0:n`. Since we want `from + 1:n`, we drop the first
Expand Down
2 changes: 2 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,13 @@ reference:
desc: Functions operating on `epi_archive` objects.
- contents:
- epix_as_of
- epix_as_of_current
- epix_slide
- epix_merge
- revision_summary
- epix_fill_through_version
- epix_truncate_versions_after
- set_versions_end

- title: Basic analysis and visualization
- contents:
Expand Down
17 changes: 17 additions & 0 deletions man/epix_as_of_current.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/set_versions_end.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 37 additions & 0 deletions tests/testthat/test-methods-epi_archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,40 @@ test_that("group_vars works as expected", {
"geo_value"
)
})

test_that("epix_as_of_now works as expected", {
expect_equal(
attr(ea2_data %>% as_epi_archive() %>% epix_as_of_current(), "metadata")$as_of,
as.Date("2020-06-04")
)
time_value <- as.Date("2020-06-01")
df <- dplyr::tribble(
~geo_value, ~time_value, ~version, ~cases,
"ca", time_value, time_value, 1,
"ca", time_value + 7, time_value + 7, 2,
)
expect_equal(
attr(df %>% as_epi_archive() %>% epix_as_of_current(), "metadata")$as_of,
as.Date("2020-06-08")
)
time_value <- tsibble::yearmonth(as.Date("2020-06-01") - lubridate::month(1))
df <- dplyr::tribble(
~geo_value, ~time_value, ~version, ~cases,
"ca", time_value, time_value, 1,
"ca", time_value + lubridate::month(1), time_value + lubridate::month(1), 2,
)
expect_equal(
attr(df %>% as_epi_archive() %>% epix_as_of_current(), "metadata")$as_of,
tsibble::yearmonth("2020-06")
)
time_value <- 2020
df <- dplyr::tribble(
~geo_value, ~time_value, ~version, ~cases,
"ca", time_value, time_value, 1,
"ca", time_value + 7, time_value + 7, 2,
)
expect_equal(
attr(df %>% as_epi_archive() %>% epix_as_of_current(), "metadata")$as_of,
2027
)
})