cmu-delphi · brookslogan · Aug 21, 2024 · Aug 13, 2024 · Aug 13, 2024 · Aug 13, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: epiprocess
 Title: Tools for basic signal processing in epidemiology
-Version: 0.8.2
+Version: 0.8.3
 Authors@R: c(
     person("Jacob", "Bien", role = "ctb"),
     person("Logan", "Brooks", email = "[email protected]", role = c("aut", "cre")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,8 @@
 
 S3method("[",epi_df)
 S3method("names<-",epi_df)
+S3method(arrange_canonical,default)
+S3method(arrange_canonical,epi_df)
 S3method(as_epi_df,data.frame)
 S3method(as_epi_df,epi_df)
 S3method(as_epi_df,tbl_df)
@@ -45,6 +47,7 @@ S3method(unnest,epi_df)
 export("%>%")
 export(archive_cases_dv_subset)
 export(arrange)
+export(arrange_canonical)
 export(as_epi_archive)
 export(as_epi_df)
 export(as_tsibble)

diff --git a/R/key_colnames.R b/R/key_colnames.R
@@ -18,20 +18,20 @@ key_colnames.default <- function(x, ...) {
 #' @export
 key_colnames.data.frame <- function(x, other_keys = character(0L), ...) {
   assert_character(other_keys)
-  nm <- c("time_value", "geo_value", other_keys)
+  nm <- c("geo_value", "time_value", other_keys)
   intersect(nm, colnames(x))
 }
 
 #' @export
 key_colnames.epi_df <- function(x, ...) {
   other_keys <- attr(x, "metadata")$other_keys
-  c("time_value", "geo_value", other_keys)
+  c("geo_value", "time_value", other_keys)
 }
 
 #' @export
 key_colnames.epi_archive <- function(x, ...) {
   other_keys <- attr(x, "metadata")$other_keys
-  c("time_value", "geo_value", other_keys)
+  c("geo_value", "time_value", other_keys)
 }
 
 kill_time_value <- function(v) {

diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R
@@ -274,8 +274,8 @@ group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) {
 #' daily_edf %>%
 #'   group_by(geo_value) %>%
 #'   complete(time_value = full_seq(time_value, period = 1))
-#' # Complete has explicit=TRUE by default, but if it's FALSE, then complete only fills the implicit gaps
-#' # not those that are explicitly NA
+#' # Complete has explicit=TRUE by default, but if it's FALSE, then complete
+#' # only fills the implicit gaps, not those that are explicitly NA
 #' daily_edf <- tibble::tribble(
 #'   ~geo_value, ~time_value, ~value,
 #'   1, start_date + 1, 1,
@@ -303,11 +303,18 @@ group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) {
 #' ) %>%
 #'   as_epi_df(as_of = start_date + 3)
 #' weekly_edf %>%
-#'   complete(geo_value, time_value = full_seq(time_value, period = 7), fill = list(value = 0))
+#'   complete(
+#'     geo_value,
+#'     time_value = full_seq(time_value, period = 7),
+#'     fill = list(value = 0)
+#'   )
 #' # With grouping
 #' weekly_edf %>%
 #'   group_by(geo_value) %>%
-#'   complete(time_value = full_seq(time_value, period = 7), fill = list(value = 0))
+#'   complete(
+#'     time_value = full_seq(time_value, period = 7),
+#'     fill = list(value = 0)
+#'   )
 #' @export
 complete.epi_df <- function(data, ..., fill = list(), explicit = TRUE) {
   result <- dplyr::dplyr_reconstruct(NextMethod(), data)
@@ -331,3 +338,37 @@ reclass <- function(x, metadata) {
   attributes(x)$metadata <- metadata
   return(x)
 }
+
+#' Arrange an epi_df into a standard order
+#'
+#' Moves `key_colnames()` to the left, then arranges rows based on that
+#' ordering. This function is mainly for use in tests and so that
+#' other function output will be in predictable order, where necessary.
+#'
+#' @param x an `epi_df`. Other objects will produce a warning and return as is.
+#' @param ... not used
+#'
+#' @keywords internal
+#' @export
+arrange_canonical <- function(x, ...) {
+  UseMethod("arrange_canonical")
+}
+
+#' @export
+arrange_canonical.default <- function(x, ...) {
+  rlang::check_dots_empty()
+  cli::cli_warn(c(
+    "`arrange_canonical()` is only meaningful for an {.cls epi_df}.",
+    i = "Returning the original {.cls {class(x)[1]}} object."
+  ))
+  return(x)
+}
+
+#' @export
+arrange_canonical.epi_df <- function(x, ...) {
+  rlang::check_dots_empty()
+  keys <- key_colnames(x)
+  x %>%
+    dplyr::relocate(dplyr::all_of(keys), .before = 1) %>%
+    dplyr::arrange(dplyr::across(dplyr::all_of(keys)))
+}
diff --git a/man/arrange_canonical.Rd b/man/arrange_canonical.Rd
diff --git a/man/complete.epi_df.Rd b/man/complete.epi_df.Rd
diff --git a/tests/testthat/test-arrange-canonical.R b/tests/testthat/test-arrange-canonical.R
@@ -0,0 +1,19 @@
+test_that("canonical arrangement works", {
+  tib <- tibble(
+    x = 1:8,
+    y = rep(c("b", "b", "a", "a"), times = 2),
+    geo_value = rep(c("ga", "ca"), each = 4),
+    time_value = rep(2:1, times = 4)
+  )
+  expect_warning(arrange_canonical(tib))
+
+  tib <- tib %>% as_epi_df(additional_metadata = list(other_keys = "y"))
+  expect_equal(names(tib), c("geo_value", "time_value", "x", "y"))
+
+  tib_sorted <- arrange_canonical(tib)
+  expect_equal(names(tib_sorted), c("geo_value", "time_value", "y", "x"))
+  expect_equal(tib_sorted$geo_value, rep(c("ca", "ga"), each = 4))
+  expect_equal(tib_sorted$time_value, c(1, 1, 2, 2, 1, 1, 2, 2))
+  expect_equal(tib_sorted$y, rep(letters[1:2], times = 4))
+  expect_equal(tib_sorted$x, c(8, 6, 7, 5, 4, 2, 3, 1))
+})
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
@@ -251,8 +251,8 @@ test_that("guess_period works", {
     weekly_posixcts
   )
   # On POSIXlts:
-  daily_posixlts <- as.POSIXlt(daily_dates, tz = "ET") + 3600
-  weekly_posixlts <- as.POSIXlt(weekly_dates, tz = "ET") + 3600
+  daily_posixlts <- as.POSIXlt(daily_dates, tz = "UTC") + 3600
+  weekly_posixlts <- as.POSIXlt(weekly_dates, tz = "UTC") + 3600
   expect_identical(
     daily_posixlts[[1L]] + guess_period(daily_posixlts) * (seq_along(daily_posixlts) - 1L),
     daily_posixlts