diff --git a/NAMESPACE b/NAMESPACE index 8558be1c..fc6aaf74 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,8 @@ S3method("[",epi_df) S3method("names<-",epi_df) +S3method(arrange_canonical,default) +S3method(arrange_canonical,epi_df) S3method(as_epi_df,data.frame) S3method(as_epi_df,epi_df) S3method(as_epi_df,tbl_df) @@ -45,6 +47,7 @@ S3method(unnest,epi_df) export("%>%") export(archive_cases_dv_subset) export(arrange) +export(arrange_canonical) export(as_epi_archive) export(as_epi_df) export(as_tsibble) diff --git a/R/key_colnames.R b/R/key_colnames.R index 99d8a9ed..b0119764 100644 --- a/R/key_colnames.R +++ b/R/key_colnames.R @@ -18,20 +18,20 @@ key_colnames.default <- function(x, ...) { #' @export key_colnames.data.frame <- function(x, other_keys = character(0L), ...) { assert_character(other_keys) - nm <- c("time_value", "geo_value", other_keys) + nm <- c("geo_value", "time_value", other_keys) intersect(nm, colnames(x)) } #' @export key_colnames.epi_df <- function(x, ...) { other_keys <- attr(x, "metadata")$other_keys - c("time_value", "geo_value", other_keys) + c("geo_value", "time_value", other_keys) } #' @export key_colnames.epi_archive <- function(x, ...) { other_keys <- attr(x, "metadata")$other_keys - c("time_value", "geo_value", other_keys) + c("geo_value", "time_value", other_keys) } kill_time_value <- function(v) { diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R index faa2bdb0..b405cfff 100644 --- a/R/methods-epi_df.R +++ b/R/methods-epi_df.R @@ -274,8 +274,8 @@ group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) { #' daily_edf %>% #' group_by(geo_value) %>% #' complete(time_value = full_seq(time_value, period = 1)) -#' # Complete has explicit=TRUE by default, but if it's FALSE, then complete only fills the implicit gaps -#' # not those that are explicitly NA +#' # Complete has explicit=TRUE by default, but if it's FALSE, then complete +#' # only fills the implicit gaps, not those that are explicitly NA #' daily_edf <- tibble::tribble( #' ~geo_value, ~time_value, ~value, #' 1, start_date + 1, 1, @@ -303,11 +303,18 @@ group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) { #' ) %>% #' as_epi_df(as_of = start_date + 3) #' weekly_edf %>% -#' complete(geo_value, time_value = full_seq(time_value, period = 7), fill = list(value = 0)) +#' complete( +#' geo_value, +#' time_value = full_seq(time_value, period = 7), +#' fill = list(value = 0) +#' ) #' # With grouping #' weekly_edf %>% #' group_by(geo_value) %>% -#' complete(time_value = full_seq(time_value, period = 7), fill = list(value = 0)) +#' complete( +#' time_value = full_seq(time_value, period = 7), +#' fill = list(value = 0) +#' ) #' @export complete.epi_df <- function(data, ..., fill = list(), explicit = TRUE) { result <- dplyr::dplyr_reconstruct(NextMethod(), data) @@ -331,3 +338,36 @@ reclass <- function(x, metadata) { attributes(x)$metadata <- metadata return(x) } + +#' Arrange an epi_df into a standard order +#' +#' Moves [key_colnames()] to the left, then arranges rows based on that +#' ordering. This function is mainly for use in tests and so that +#' other function output will be in predictable order, where necessary. +#' +#' @param x an `epi_df`. Other objects will produce a warning and return as is. +#' @param ... not used +#' +#' @keywords internal +#' @export +arrange_canonical <- function(x, ...) { + UseMethod("arrange_canonical") +} + +#' @export +arrange_canonical.default <- function(x, ...) { + rlang::check_dots_empty() + cli::cli_abort(c( + "`arrange_canonical()` is only meaningful for an {.cls epi_df}." + )) + return(x) +} + +#' @export +arrange_canonical.epi_df <- function(x, ...) { + rlang::check_dots_empty() + keys <- key_colnames(x) + x %>% + dplyr::relocate(dplyr::all_of(keys), .before = 1) %>% + dplyr::arrange(dplyr::across(dplyr::all_of(keys))) +} diff --git a/man/arrange_canonical.Rd b/man/arrange_canonical.Rd new file mode 100644 index 00000000..3d29c2af --- /dev/null +++ b/man/arrange_canonical.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-epi_df.R +\name{arrange_canonical} +\alias{arrange_canonical} +\title{Arrange an epi_df into a standard order} +\usage{ +arrange_canonical(x, ...) +} +\arguments{ +\item{x}{an \code{epi_df}. Other objects will produce a warning and return as is.} + +\item{...}{not used} +} +\description{ +Moves \code{\link[=key_colnames]{key_colnames()}} to the left, then arranges rows based on that +ordering. This function is mainly for use in tests and so that +other function output will be in predictable order, where necessary. +} +\keyword{internal} diff --git a/man/complete.epi_df.Rd b/man/complete.epi_df.Rd index d9ae9f4d..9f450cb0 100644 --- a/man/complete.epi_df.Rd +++ b/man/complete.epi_df.Rd @@ -39,8 +39,8 @@ daily_edf \%>\% daily_edf \%>\% group_by(geo_value) \%>\% complete(time_value = full_seq(time_value, period = 1)) -# Complete has explicit=TRUE by default, but if it's FALSE, then complete only fills the implicit gaps -# not those that are explicitly NA +# Complete has explicit=TRUE by default, but if it's FALSE, then complete +# only fills the implicit gaps, not those that are explicitly NA daily_edf <- tibble::tribble( ~geo_value, ~time_value, ~value, 1, start_date + 1, 1, @@ -68,9 +68,16 @@ weekly_edf <- tibble::tribble( ) \%>\% as_epi_df(as_of = start_date + 3) weekly_edf \%>\% - complete(geo_value, time_value = full_seq(time_value, period = 7), fill = list(value = 0)) + complete( + geo_value, + time_value = full_seq(time_value, period = 7), + fill = list(value = 0) + ) # With grouping weekly_edf \%>\% group_by(geo_value) \%>\% - complete(time_value = full_seq(time_value, period = 7), fill = list(value = 0)) + complete( + time_value = full_seq(time_value, period = 7), + fill = list(value = 0) + ) } diff --git a/tests/testthat/_snaps/revision-latency-functions.md b/tests/testthat/_snaps/revision-latency-functions.md index 00ee8253..1ac21469 100644 --- a/tests/testthat/_snaps/revision-latency-functions.md +++ b/tests/testthat/_snaps/revision-latency-functions.md @@ -28,19 +28,19 @@ time_value geo_value n_revisions min_lag max_lag time_near_latest spread 1 2020-01-01 ak 4 2 days 19 days 19 days 101 - 2 2020-01-01 al 1 0 days 19 days 19 days 99 - 3 2020-01-02 ak 1 4 days 5 days 4 days 9 - 4 2020-01-02 al 0 0 days 0 days 0 days 0 - 5 2020-01-03 ak 0 3 days 3 days 3 days 0 + 2 2020-01-02 ak 1 4 days 5 days 4 days 9 + 3 2020-01-03 ak 0 3 days 3 days 3 days 0 + 4 2020-01-01 al 1 0 days 19 days 19 days 99 + 5 2020-01-02 al 0 0 days 0 days 0 days 0 6 2020-01-03 al 1 1 days 2 days 2 days 3 7 2020-01-04 al 0 1 days 1 days 1 days 0 rel_spread min_value max_value median_value 1 0.990 1 102 6 - 2 0.99 1 100 50.5 - 3 0.09 91 100 95.5 - 4 0 1 1 1 - 5 NaN 0 0 0 + 2 0.09 91 100 95.5 + 3 NaN 0 0 0 + 4 0.99 1 100 50.5 + 5 0 1 1 1 6 0.75 1 4 2.5 7 0 9 9 9 @@ -76,19 +76,19 @@ time_value geo_value n_revisions min_lag max_lag time_near_latest spread 1 2020-01-01 ak 6 2 days 19 days 19 days 101 - 2 2020-01-01 al 1 0 days 19 days 19 days 99 - 3 2020-01-02 ak 1 4 days 5 days 4 days 9 - 4 2020-01-02 al 0 0 days 0 days 0 days 0 - 5 2020-01-03 ak 0 3 days 3 days 3 days 0 + 2 2020-01-02 ak 1 4 days 5 days 4 days 9 + 3 2020-01-03 ak 0 3 days 3 days 3 days 0 + 4 2020-01-01 al 1 0 days 19 days 19 days 99 + 5 2020-01-02 al 0 0 days 0 days 0 days 0 6 2020-01-03 al 1 1 days 2 days 2 days 3 7 2020-01-04 al 1 0 days 1 days 1 days 0 rel_spread min_value max_value median_value 1 0.990 1 102 5.5 - 2 0.99 1 100 50.5 - 3 0.09 91 100 95.5 - 4 0 1 1 1 - 5 NaN 0 0 0 + 2 0.09 91 100 95.5 + 3 NaN 0 0 0 + 4 0.99 1 100 50.5 + 5 0 1 1 1 6 0.75 1 4 2.5 7 0 9 9 9 diff --git a/tests/testthat/test-arrange-canonical.R b/tests/testthat/test-arrange-canonical.R new file mode 100644 index 00000000..ec42feac --- /dev/null +++ b/tests/testthat/test-arrange-canonical.R @@ -0,0 +1,19 @@ +test_that("canonical arrangement works", { + tib <- tibble( + x = 1:8, + demo_grp = rep(c("b", "b", "a", "a"), times = 2), + geo_value = rep(c("ga", "ca"), each = 4), + time_value = rep(2:1, times = 4) + ) + expect_error(arrange_canonical(tib)) + + tib <- tib %>% as_epi_df(additional_metadata = list(other_keys = "demo_grp")) + expect_equal(names(tib), c("geo_value", "time_value", "x", "demo_grp")) + + tib_sorted <- arrange_canonical(tib) + expect_equal(names(tib_sorted), c("geo_value", "time_value", "demo_grp", "x")) + expect_equal(tib_sorted$geo_value, rep(c("ca", "ga"), each = 4)) + expect_equal(tib_sorted$time_value, c(1, 1, 2, 2, 1, 1, 2, 2)) + expect_equal(tib_sorted$demo_grp, rep(letters[1:2], times = 4)) + expect_equal(tib_sorted$x, c(8, 6, 7, 5, 4, 2, 3, 1)) +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index e220af16..12e7a3f7 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -240,8 +240,8 @@ test_that("guess_period works", { weekly_dates ) # On POSIXcts: - daily_posixcts <- as.POSIXct(daily_dates, tz = "ET") + 3600 - weekly_posixcts <- as.POSIXct(weekly_dates, tz = "ET") + 3600 + daily_posixcts <- as.POSIXct(daily_dates, tz = "US/Aleutian") + 3600 + weekly_posixcts <- as.POSIXct(weekly_dates, tz = "US/Aleutian") + 3600 expect_identical( daily_posixcts[[1L]] + guess_period(daily_posixcts) * (seq_along(daily_posixcts) - 1L), daily_posixcts @@ -251,8 +251,8 @@ test_that("guess_period works", { weekly_posixcts ) # On POSIXlts: - daily_posixlts <- as.POSIXlt(daily_dates, tz = "ET") + 3600 - weekly_posixlts <- as.POSIXlt(weekly_dates, tz = "ET") + 3600 + daily_posixlts <- as.POSIXlt(daily_dates, tz = "US/Aleutian") + 3600 + weekly_posixlts <- as.POSIXlt(weekly_dates, tz = "US/Aleutian") + 3600 expect_identical( daily_posixlts[[1L]] + guess_period(daily_posixlts) * (seq_along(daily_posixlts) - 1L), daily_posixlts