From 06a61904dd7ba86de004158d037dd9557417c67c Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 29 Jul 2022 11:35:57 -0700 Subject: [PATCH 01/16] Updates relating to checking on main. --- DESCRIPTION | 2 +- man/as_epi_df.Rd | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 00b78130..18a3bdfb 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,7 +56,7 @@ Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.1 Depends: R (>= 2.10) URL: https://cmu-delphi.github.io/epiprocess/ diff --git a/man/as_epi_df.Rd b/man/as_epi_df.Rd index 5d1b1335..b5df1302 100644 --- a/man/as_epi_df.Rd +++ b/man/as_epi_df.Rd @@ -51,9 +51,9 @@ examples. } \section{Methods (by class)}{ \itemize{ -\item \code{epi_df}: Simply returns the \code{epi_df} object unchanged. +\item \code{as_epi_df(epi_df)}: Simply returns the \code{epi_df} object unchanged. -\item \code{tbl_df}: The input tibble \code{x} must contain the columns +\item \code{as_epi_df(tbl_df)}: The input tibble \code{x} must contain the columns \code{geo_value} and \code{time_value}. All other columns will be preserved as is, and treated as measured variables. If \code{as_of} is missing, then the function will try to guess it from an \code{as_of}, \code{issue}, or \code{version} column of \code{x} @@ -61,14 +61,14 @@ will try to guess it from an \code{as_of}, \code{issue}, or \code{version} colum (stored in its attributes); if this fails, then the current day-time will be used. -\item \code{data.frame}: Works analogously to \code{as_epi_df.tbl_df()}. +\item \code{as_epi_df(data.frame)}: Works analogously to \code{as_epi_df.tbl_df()}. -\item \code{tbl_ts}: Works analogously to \code{as_epi_df.tbl_df()}, except that +\item \code{as_epi_df(tbl_ts)}: Works analogously to \code{as_epi_df.tbl_df()}, except that the \code{tbl_ts} class is dropped, and any key variables (other than "geo_value") are added to the metadata of the returned object, under the \code{other_keys} field. -}} +}} \examples{ # Convert a `tsibble` that has county code as an extra key # Notice that county code should be a character string to preserve any leading zeroes From afbb29bc0422d9a7db1bfb93bc319a2d51700fa0 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 18 Aug 2022 11:52:05 -0700 Subject: [PATCH 02/16] A few refactors. --- R/methods-epi_archive.R | 18 +++++++++--------- man/epix_slide.Rd | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 3387c935..a081bea4 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -358,10 +358,10 @@ epix_merge = function(x, y, #' @param ... Additional arguments to pass to the function or formula specified #' via `f`. Alternatively, if `f` is missing, then the current argument is #' interpreted as an expression for tidy evaluation. -#' @param n Number of time steps to use in the running window. For example, if -#' `n = 7`, and one time step is one day, then to produce a value on January 7 -#' we apply the given function or formula to data in between January 1 and -#' 7. +#' @param before Number of time steps to use in the running window. For example, +#' if `before = 7`, and one time step is one day, then to produce a value on +#' January 7 we apply the given function or formula to data in between January +#' 1 and 7. #' @param group_by The variable(s) to group by before slide computation. If #' missing, then the keys in the underlying data table, excluding `time_value` #' and `version`, will be used for grouping. To omit a grouping entirely, use @@ -422,11 +422,11 @@ epix_merge = function(x, y, #' Finally, this is simply a wrapper around the `slide()` method of the #' `epi_archive` class, so if `x` is an `epi_archive` object, then: #' ``` -#' epix_slide(x, new_var = comp(old_var), n = 120) +#' epix_slide(x, new_var = comp(old_var), before = 120) #' ``` #' is equivalent to: #' ``` -#' x$slide(x, new_var = comp(old_var), n = 120) +#' x$slide(x, new_var = comp(old_var), before = 120) #' ``` #' #' @importFrom rlang enquo @@ -444,15 +444,15 @@ epix_merge = function(x, y, #' by = "1 day") #' epix_slide(x = archive_cases_dv_subset, #' f = ~ mean(.x$case_rate_7d_av), -#' n = 3, +#' before = 3, #' group_by = geo_value, #' ref_time_values = time_values, #' new_col_name = 'case_rate_3d_av') -epix_slide = function(x, f, ..., n, group_by, ref_time_values, +epix_slide = function(x, f, ..., before, group_by, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.") - return(x$slide(f, ..., n = n, + return(x$slide(f, ..., before = before, group_by = {{group_by}}, ref_time_values = ref_time_values, time_step = time_step, diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index 79e9c1c3..89461e16 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -35,7 +35,7 @@ via \code{f}. Alternatively, if \code{f} is missing, then the current argument i interpreted as an expression for tidy evaluation.} \item{n}{Number of time steps to use in the running window. For example, if -\code{n = 7}, and one time step is one day, then to produce a value on January 7 +\code{before = 7}, and one time step is one day, then to produce a value on January 7 we apply the given function or formula to data in between January 1 and 7.} From fc7e635c3c36429e3450bd374e7106eb39968b7e Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 18 Aug 2022 15:14:38 -0700 Subject: [PATCH 03/16] Refactored `n` with `before`. --- R/archive.R | 6 +++--- man/epi_archive.Rd | 2 +- man/epix_slide.Rd | 16 ++++++++-------- vignettes/advanced.Rmd | 8 ++++---- vignettes/archive.Rmd | 7 ++++--- vignettes/compactify.Rmd | 2 +- 6 files changed, 21 insertions(+), 20 deletions(-) diff --git a/R/archive.R b/R/archive.R index 2f6af5e0..ff48dd19 100644 --- a/R/archive.R +++ b/R/archive.R @@ -584,7 +584,7 @@ epi_archive = #' details. #' @importFrom data.table key #' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms - slide = function(f, ..., n, group_by, ref_time_values, + slide = function(f, ..., before, group_by, ref_time_values, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { @@ -599,8 +599,8 @@ epi_archive = } # If a custom time step is specified, then redefine units - before_num = n-1 - if (!missing(time_step)) before_num = time_step(n-1) + before_num = before-1 + if (!missing(time_step)) before_num = time_step(before-1) # What to group by? If missing, set according to internal keys; # otherwise, tidyselect. diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd index 026f27e1..998ade9e 100644 --- a/man/epi_archive.Rd +++ b/man/epi_archive.Rd @@ -277,7 +277,7 @@ details. \if{html}{\out{
}}\preformatted{epi_archive$slide( f, ..., - n, + before, group_by, ref_time_values, time_step, diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index 89461e16..4fdd03be 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -8,7 +8,7 @@ epix_slide( x, f, ..., - n, + before, group_by, ref_time_values, time_step, @@ -34,10 +34,10 @@ sliding window of \code{n} time steps.} via \code{f}. Alternatively, if \code{f} is missing, then the current argument is interpreted as an expression for tidy evaluation.} -\item{n}{Number of time steps to use in the running window. For example, if -\code{before = 7}, and one time step is one day, then to produce a value on January 7 -we apply the given function or formula to data in between January 1 and -7.} +\item{before}{Number of time steps to use in the running window. For example, +if \code{before = 7}, and one time step is one day, then to produce a value on +January 7 we apply the given function or formula to data in between January +1 and 7.} \item{group_by}{The variable(s) to group by before slide computation. If missing, then the keys in the underlying data table, excluding \code{time_value} @@ -117,12 +117,12 @@ version-aware sliding is necessary (as it its purpose). Finally, this is simply a wrapper around the \code{slide()} method of the \code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then: -\if{html}{\out{
}}\preformatted{epix_slide(x, new_var = comp(old_var), n = 120) +\if{html}{\out{
}}\preformatted{epix_slide(x, new_var = comp(old_var), before = 120) }\if{html}{\out{
}} is equivalent to: -\if{html}{\out{
}}\preformatted{x$slide(x, new_var = comp(old_var), n = 120) +\if{html}{\out{
}}\preformatted{x$slide(x, new_var = comp(old_var), before = 120) }\if{html}{\out{
}} } \examples{ @@ -138,7 +138,7 @@ time_values <- seq(as.Date("2020-06-01"), by = "1 day") epix_slide(x = archive_cases_dv_subset, f = ~ mean(.x$case_rate_7d_av), - n = 3, + before = 3, group_by = geo_value, ref_time_values = time_values, new_col_name = 'case_rate_3d_av') diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd index 5514eaaa..a1799041 100644 --- a/vignettes/advanced.Rmd +++ b/vignettes/advanced.Rmd @@ -61,12 +61,12 @@ df %>% df %>% mutate(version = time_value) %>% as_epi_archive() %>% - epix_slide(x_2dav = mean(x), n = 2, ref_time_values = as.Date("2020-06-02")) + epix_slide(x_2dav = mean(x), before = 2, ref_time_values = as.Date("2020-06-02")) df %>% mutate(version = time_value) %>% as_epi_archive() %>% - epix_slide(~ mean(.x$x), n = 2, ref_time_values = as.Date("2020-06-02")) + epix_slide(~ mean(.x$x), before = 2, ref_time_values = as.Date("2020-06-02")) ``` When the slide computation returns an atomic vector (rather than a single value) @@ -153,7 +153,7 @@ df %>% as_epi_archive() %>% epix_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)), ref_time_values = as.Date("2020-06-02"), - n = 2, as_list_col = FALSE, names_sep = NULL) + before = 2, as_list_col = FALSE, names_sep = NULL) ``` ## Multi-row outputs @@ -354,7 +354,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { x %>% epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = ahead)), - n = 120, ref_time_values = fc_time_values) %>% + before = 120, ref_time_values = fc_time_values) %>% mutate(target_date = time_value + ahead, as_of = TRUE, geo_value = fc_geo_value) } diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd index b78644ad..cda0e9ae 100644 --- a/vignettes/archive.Rmd +++ b/vignettes/archive.Rmd @@ -357,8 +357,9 @@ fc_time_values <- seq(as.Date("2020-08-01"), as.Date("2021-12-01"), by = "1 month") -z <- epix_slide(x, fc = prob_arx(x = percent_cli, y = case_rate_7d_av), n = 120, - ref_time_values = fc_time_values, group_by = geo_value) +z <- epix_slide(x, fc = prob_arx(x = percent_cli, y = case_rate_7d_av), + before = 120, ref_time_values = fc_time_values, + group_by = geo_value) head(z, 10) ``` @@ -387,7 +388,7 @@ x_latest <- epix_as_of(x, max_version = max(x$DT$version)) k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { if (as_of) { x %>% - epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), n = 120, + epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), before = 120, ref_time_values = fc_time_values, group_by = geo_value) %>% mutate(target_date = time_value + ahead, as_of = TRUE) } diff --git a/vignettes/compactify.Rmd b/vignettes/compactify.Rmd index 034235b3..ecb227ca 100644 --- a/vignettes/compactify.Rmd +++ b/vignettes/compactify.Rmd @@ -102,7 +102,7 @@ speeds <- rbind(speeds, speed_test(iterate_as_of,"as_of_1000x")) # Performance of slide slide_median <- function(my_ea) { - my_ea$slide(median = median(case_rate_7d_av), n = 7) + my_ea$slide(median = median(case_rate_7d_av), before = 7) } speeds <- rbind(speeds, speed_test(slide_median,"slide_median")) From d413219e7bd87a736627f2b0e572b7d190539012 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 18 Aug 2022 16:30:33 -0700 Subject: [PATCH 04/16] No errors! --- tests/testthat/test-methods-epi_archive.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-methods-epi_archive.R b/tests/testthat/test-methods-epi_archive.R index d0434f59..2a3292ad 100644 --- a/tests/testthat/test-methods-epi_archive.R +++ b/tests/testthat/test-methods-epi_archive.R @@ -71,13 +71,13 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss compactify = TRUE) reference_by_modulus = epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - n = 3, + before = 3, group_by = modulus, ref_time_values = time_values, new_col_name = 'case_rate_3d_av') reference_by_both = epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - n = 3, + before = 3, group_by = c(geo_value, modulus), ref_time_values = time_values, new_col_name = 'case_rate_3d_av') @@ -85,7 +85,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - n = 3, + before = 3, group_by = modulus, ref_time_values = time_values, new_col_name = 'case_rate_3d_av' @@ -96,7 +96,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - n = 3, + before = 3, group_by = "modulus", ref_time_values = time_values, new_col_name = 'case_rate_3d_av'), @@ -105,7 +105,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - n = 3, + before = 3, group_by = "modulus", ref_time_values = time_values, new_col_name = 'case_rate_3d_av' @@ -121,7 +121,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - n = 3, + before = 3, group_by = tidyselect::all_of(my_group_by), ref_time_values = time_values, new_col_name = 'case_rate_3d_av'), @@ -130,7 +130,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - n = 3, + before = 3, group_by = tidyselect::all_of(my_group_by), ref_time_values = time_values, new_col_name = 'case_rate_3d_av' @@ -141,7 +141,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - n = 3, + before = 3, ref_time_values = time_values, new_col_name = 'case_rate_3d_av'), reference_by_both @@ -149,7 +149,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - n = 3, + before = 3, ref_time_values = time_values, new_col_name = 'case_rate_3d_av' ), From 98311c6bc2ed1a2c5d460bc504ee47486f11a3a7 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 18 Aug 2022 17:19:49 -0700 Subject: [PATCH 05/16] Still need to update testing. --- R/methods-epi_archive.R | 2 +- tests/testthat/test-epix_slide.R | 42 ++++++++++++++++++++++++++++++++ vignettes/archive.Rmd | 2 +- 3 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 tests/testthat/test-epix_slide.R diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index a081bea4..01e08517 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -426,7 +426,7 @@ epix_merge = function(x, y, #' ``` #' is equivalent to: #' ``` -#' x$slide(x, new_var = comp(old_var), before = 120) +#' x$slide(new_var = comp(old_var), before = 120) #' ``` #' #' @importFrom rlang enquo diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R new file mode 100644 index 00000000..c54ad73c --- /dev/null +++ b/tests/testthat/test-epix_slide.R @@ -0,0 +1,42 @@ +library(dplyr) +library(rlang) + +test_that("epix_slide only works on an epi_archive",{ + expect_error(epix_slide(data.frame(x=1))) +}) + +test_that("epix_slide works as intended",{ + x <- tibble::tribble(~version, ~time_value, + 5, c(1:2,4), + 6, c(1:2,4:5), + 7, 2:6) %>% + tidyr::unnest(time_value) + + xx <- bind_cols(geo_value = rep("x",12), + arrange(x,time_value,version), + binary = 2^(1:12)) %>% + as_epi_archive() + + time_values <- 2:5 + + xx1 <- epix_slide(x = xx, + f = ~ sum(.xx$binary), + before = 3, + group_by = geo_value, + ref_time_values = versions, + new_col_name = "sum_binary") + + xx2 <- tibble(geo_value = rep("x",5), + time_value = as.Date("2020-06-01") + 1:5, + sum_binary = c(3)) + + expect_identical(xx1,xx2) # * + + xx3 <- xx$slide(f = ~ sum(.xx$binary), + before = 3, + group_by = "geo_value", + ref_time_values = time_values, + new_col_name = 'sum_binary') + + expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical +}) diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd index cda0e9ae..c89f28a3 100644 --- a/vignettes/archive.Rmd +++ b/vignettes/archive.Rmd @@ -358,7 +358,7 @@ fc_time_values <- seq(as.Date("2020-08-01"), by = "1 month") z <- epix_slide(x, fc = prob_arx(x = percent_cli, y = case_rate_7d_av), - before = 120, ref_time_values = fc_time_values, + before = 120, ref_time_values = fc_time_values, group_by = geo_value) head(z, 10) From 5062127aa4b7e01eb45781dc8a23aeed102c6667 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 18 Aug 2022 17:32:43 -0700 Subject: [PATCH 06/16] IDK why it says xx is not found!!!! --- man/epix_slide.Rd | 2 +- tests/testthat/test-epix_slide.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index 4fdd03be..13dab737 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -122,7 +122,7 @@ Finally, this is simply a wrapper around the \code{slide()} method of the is equivalent to: -\if{html}{\out{
}}\preformatted{x$slide(x, new_var = comp(old_var), before = 120) +\if{html}{\out{
}}\preformatted{x$slide(new_var = comp(old_var), before = 120) }\if{html}{\out{
}} } \examples{ diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index c54ad73c..f6268c38 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -17,13 +17,13 @@ test_that("epix_slide works as intended",{ binary = 2^(1:12)) %>% as_epi_archive() - time_values <- 2:5 + time_values <- c(2,3:5) xx1 <- epix_slide(x = xx, f = ~ sum(.xx$binary), before = 3, group_by = geo_value, - ref_time_values = versions, + ref_time_values = time_values, new_col_name = "sum_binary") xx2 <- tibble(geo_value = rep("x",5), From b5cec01c862435e9c1a6c2ba09397d0bc1b7c3a0 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Thu, 18 Aug 2022 17:55:45 -0700 Subject: [PATCH 07/16] Still having problems with non-similar xx1 and xx2. --- tests/testthat/test-epix_slide.R | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index f6268c38..3f503353 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -17,25 +17,23 @@ test_that("epix_slide works as intended",{ binary = 2^(1:12)) %>% as_epi_archive() - time_values <- c(2,3:5) - xx1 <- epix_slide(x = xx, - f = ~ sum(.xx$binary), + f = ~ sum(.x$binary), before = 3, group_by = geo_value, - ref_time_values = time_values, new_col_name = "sum_binary") - xx2 <- tibble(geo_value = rep("x",5), - time_value = as.Date("2020-06-01") + 1:5, - sum_binary = c(3)) + xx2 <- tibble(geo_value = rep("x",2), + time_value = c(5,6), + sum_binary = c(2^7, + 2^10+2^8)) %>% + as_epi_df() expect_identical(xx1,xx2) # * - xx3 <- xx$slide(f = ~ sum(.xx$binary), + xx3 <- xx$slide(f = ~ sum(.x$binary), before = 3, group_by = "geo_value", - ref_time_values = time_values, new_col_name = 'sum_binary') expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical From a126abd263f10c702c567a5dac204ece69333482 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 19 Aug 2022 11:29:55 -0700 Subject: [PATCH 08/16] This fixes `epix_slide` without adding random stuff when fixing `epi_slide`. --- tests/testthat/test-epix_slide.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 3f503353..17251d01 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -27,7 +27,7 @@ test_that("epix_slide works as intended",{ time_value = c(5,6), sum_binary = c(2^7, 2^10+2^8)) %>% - as_epi_df() + as_epi_df(as_of = 1) expect_identical(xx1,xx2) # * From 31f7f4cbb6918e10c9eeda85f1529a763dc9e894 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 19 Aug 2022 11:49:58 -0700 Subject: [PATCH 09/16] Replaced `t` with `ref_time_value`. --- R/archive.R | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/R/archive.R b/R/archive.R index ff48dd19..d5e57daf 100644 --- a/R/archive.R +++ b/R/archive.R @@ -673,12 +673,13 @@ epi_archive = if (!missing(f)) { if (rlang::is_formula(f)) f = rlang::as_function(f) - x = purrr::map_dfr(ref_time_values, function(t) { - self$as_of(t, min_time_value = t - before_num) %>% + x = purrr::map_dfr(ref_time_values, function(ref_time_value) { + self$as_of(ref_time_value, + min_time_value = ref_time_value - before_num) %>% dplyr::group_by(!!!group_by) %>% dplyr::group_modify(comp_one_grp, f = f, ..., - time_value = t, + time_value = ref_time_value, key_vars = key_vars, new_col = new_col, .keep = TRUE) %>% @@ -700,12 +701,13 @@ epi_archive = f = function(x, quo, ...) rlang::eval_tidy(quo, x) new_col = sym(names(rlang::quos_auto_name(quos))) - x = purrr::map_dfr(ref_time_values, function(t) { - self$as_of(t, min_time_value = t - before_num) %>% + x = purrr::map_dfr(ref_time_values, function(ref_time_value) { + self$as_of(ref_time_value, + min_time_value = ref_time_value - before_num) %>% dplyr::group_by(!!!group_by) %>% dplyr::group_modify(comp_one_grp, f = f, quo = quo, - time_value = t, + time_value = ref_time_value, key_vars = key_vars, new_col = new_col, .keep = TRUE) %>% From fbfa546edcbc6afe2d86b8abd4b7a9c1bf92e211 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 19 Aug 2022 16:38:15 -0700 Subject: [PATCH 10/16] Put in more changes. --- tests/testthat/test-epix_slide.R | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 17251d01..3435c241 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -1,20 +1,17 @@ library(dplyr) -library(rlang) test_that("epix_slide only works on an epi_archive",{ expect_error(epix_slide(data.frame(x=1))) }) test_that("epix_slide works as intended",{ - x <- tibble::tribble(~version, ~time_value, - 5, c(1:2,4), - 6, c(1:2,4:5), - 7, 2:6) %>% - tidyr::unnest(time_value) + x <- tibble::tribble(~version, ~time_value, ~binary, + 5, c(1:2,4), 2^(1:3), + 6, c(1:2,4:5), 2^(4:7), + 7, 2:6, 2^(8:12)) %>% + tidyr::unnest(c(time_value,binary)) - xx <- bind_cols(geo_value = rep("x",12), - arrange(x,time_value,version), - binary = 2^(1:12)) %>% + xx <- bind_cols(geo_value = rep("x",12), x) %>% as_epi_archive() xx1 <- epix_slide(x = xx, @@ -24,10 +21,11 @@ test_that("epix_slide works as intended",{ new_col_name = "sum_binary") xx2 <- tibble(geo_value = rep("x",2), + # 7 should also be there below; this is a bug on issue #153 time_value = c(5,6), - sum_binary = c(2^7, - 2^10+2^8)) %>% - as_epi_df(as_of = 1) + sum_binary = c(2^3, + 2^7+2^6)) %>% + as_epi_df(as_of = 1) # Also a bug (issue #213) expect_identical(xx1,xx2) # * From ecb36a37a52c6c399235a41833cde49c5a501722 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 19 Aug 2022 16:44:56 -0700 Subject: [PATCH 11/16] Getting unknown error. --- tests/testthat/test-epix_slide.R | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 3435c241..36e5f136 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -6,12 +6,13 @@ test_that("epix_slide only works on an epi_archive",{ test_that("epix_slide works as intended",{ x <- tibble::tribble(~version, ~time_value, ~binary, - 5, c(1:2,4), 2^(1:3), - 6, c(1:2,4:5), 2^(4:7), - 7, 2:6, 2^(8:12)) %>% + 4, c(1:3), 2^(1:3), + 5, c(1:2,4), 2^(4:6), + 6, c(1:2,4:5), 2^(7:10), + 7, 2:6, 2^(11:15)) %>% tidyr::unnest(c(time_value,binary)) - xx <- bind_cols(geo_value = rep("x",12), x) %>% + xx <- bind_cols(geo_value = rep("x",15), x) %>% as_epi_archive() xx1 <- epix_slide(x = xx, @@ -20,11 +21,12 @@ test_that("epix_slide works as intended",{ group_by = geo_value, new_col_name = "sum_binary") - xx2 <- tibble(geo_value = rep("x",2), + xx2 <- tibble(geo_value = rep("x",3), # 7 should also be there below; this is a bug on issue #153 - time_value = c(5,6), - sum_binary = c(2^3, - 2^7+2^6)) %>% + time_value = c(4,5,6), + sum_binary = c(2^3+2^2, + 2^6+2^5, + 2^10+2^9)) %>% as_epi_df(as_of = 1) # Also a bug (issue #213) expect_identical(xx1,xx2) # * From 63e01d8dc0c921c807f47d1f94125c7ebb6f6b4b Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 19 Aug 2022 17:02:55 -0700 Subject: [PATCH 12/16] This should pass. --- tests/testthat/test-epix_slide.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 36e5f136..6fd82c67 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -25,7 +25,7 @@ test_that("epix_slide works as intended",{ # 7 should also be there below; this is a bug on issue #153 time_value = c(4,5,6), sum_binary = c(2^3+2^2, - 2^6+2^5, + 2^6+2^3, 2^10+2^9)) %>% as_epi_df(as_of = 1) # Also a bug (issue #213) From 9e726515e2945a14fd614b0c05d77b1cc2256c0f Mon Sep 17 00:00:00 2001 From: kenmawer Date: Fri, 19 Aug 2022 17:24:32 -0700 Subject: [PATCH 13/16] Updated `before` that was a number too low. --- R/archive.R | 4 ++-- R/methods-epi_archive.R | 6 +++--- man/epix_slide.Rd | 6 +++--- tests/testthat/test-epix_slide.R | 4 ++-- tests/testthat/test-methods-epi_archive.R | 18 +++++++++--------- vignettes/advanced.Rmd | 8 ++++---- vignettes/archive.Rmd | 4 ++-- 7 files changed, 25 insertions(+), 25 deletions(-) diff --git a/R/archive.R b/R/archive.R index d5e57daf..81c5662b 100644 --- a/R/archive.R +++ b/R/archive.R @@ -599,8 +599,8 @@ epi_archive = } # If a custom time step is specified, then redefine units - before_num = before-1 - if (!missing(time_step)) before_num = time_step(before-1) + before_num = before + if (!missing(time_step)) before_num = time_step(before) # What to group by? If missing, set according to internal keys; # otherwise, tidyselect. diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 01e08517..8addb2ad 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -422,11 +422,11 @@ epix_merge = function(x, y, #' Finally, this is simply a wrapper around the `slide()` method of the #' `epi_archive` class, so if `x` is an `epi_archive` object, then: #' ``` -#' epix_slide(x, new_var = comp(old_var), before = 120) +#' epix_slide(x, new_var = comp(old_var), before = 119) #' ``` #' is equivalent to: #' ``` -#' x$slide(new_var = comp(old_var), before = 120) +#' x$slide(new_var = comp(old_var), before = 119) #' ``` #' #' @importFrom rlang enquo @@ -444,7 +444,7 @@ epix_merge = function(x, y, #' by = "1 day") #' epix_slide(x = archive_cases_dv_subset, #' f = ~ mean(.x$case_rate_7d_av), -#' before = 3, +#' before = 2, #' group_by = geo_value, #' ref_time_values = time_values, #' new_col_name = 'case_rate_3d_av') diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index 13dab737..b698a66d 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -117,12 +117,12 @@ version-aware sliding is necessary (as it its purpose). Finally, this is simply a wrapper around the \code{slide()} method of the \code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then: -\if{html}{\out{
}}\preformatted{epix_slide(x, new_var = comp(old_var), before = 120) +\if{html}{\out{
}}\preformatted{epix_slide(x, new_var = comp(old_var), before = 119) }\if{html}{\out{
}} is equivalent to: -\if{html}{\out{
}}\preformatted{x$slide(new_var = comp(old_var), before = 120) +\if{html}{\out{
}}\preformatted{x$slide(new_var = comp(old_var), before = 119) }\if{html}{\out{
}} } \examples{ @@ -138,7 +138,7 @@ time_values <- seq(as.Date("2020-06-01"), by = "1 day") epix_slide(x = archive_cases_dv_subset, f = ~ mean(.x$case_rate_7d_av), - before = 3, + before = 2, group_by = geo_value, ref_time_values = time_values, new_col_name = 'case_rate_3d_av') diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 6fd82c67..6e93401f 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -17,7 +17,7 @@ test_that("epix_slide works as intended",{ xx1 <- epix_slide(x = xx, f = ~ sum(.x$binary), - before = 3, + before = 2, group_by = geo_value, new_col_name = "sum_binary") @@ -32,7 +32,7 @@ test_that("epix_slide works as intended",{ expect_identical(xx1,xx2) # * xx3 <- xx$slide(f = ~ sum(.x$binary), - before = 3, + before = 2, group_by = "geo_value", new_col_name = 'sum_binary') diff --git a/tests/testthat/test-methods-epi_archive.R b/tests/testthat/test-methods-epi_archive.R index 2a3292ad..02630595 100644 --- a/tests/testthat/test-methods-epi_archive.R +++ b/tests/testthat/test-methods-epi_archive.R @@ -71,13 +71,13 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss compactify = TRUE) reference_by_modulus = epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - before = 3, + before = 2, group_by = modulus, ref_time_values = time_values, new_col_name = 'case_rate_3d_av') reference_by_both = epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - before = 3, + before = 2, group_by = c(geo_value, modulus), ref_time_values = time_values, new_col_name = 'case_rate_3d_av') @@ -85,7 +85,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - before = 3, + before = 2, group_by = modulus, ref_time_values = time_values, new_col_name = 'case_rate_3d_av' @@ -96,7 +96,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - before = 3, + before = 2, group_by = "modulus", ref_time_values = time_values, new_col_name = 'case_rate_3d_av'), @@ -105,7 +105,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - before = 3, + before = 2, group_by = "modulus", ref_time_values = time_values, new_col_name = 'case_rate_3d_av' @@ -121,7 +121,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - before = 3, + before = 2, group_by = tidyselect::all_of(my_group_by), ref_time_values = time_values, new_col_name = 'case_rate_3d_av'), @@ -130,7 +130,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - before = 3, + before = 2, group_by = tidyselect::all_of(my_group_by), ref_time_values = time_values, new_col_name = 'case_rate_3d_av' @@ -141,7 +141,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( epix_slide(x = ea, f = ~ mean(.x$case_rate_7d_av), - before = 3, + before = 2, ref_time_values = time_values, new_col_name = 'case_rate_3d_av'), reference_by_both @@ -149,7 +149,7 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss expect_identical( ea$slide( f = ~ mean(.x$case_rate_7d_av), - before = 3, + before = 2, ref_time_values = time_values, new_col_name = 'case_rate_3d_av' ), diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd index a1799041..8b2eb16f 100644 --- a/vignettes/advanced.Rmd +++ b/vignettes/advanced.Rmd @@ -61,12 +61,12 @@ df %>% df %>% mutate(version = time_value) %>% as_epi_archive() %>% - epix_slide(x_2dav = mean(x), before = 2, ref_time_values = as.Date("2020-06-02")) + epix_slide(x_2dav = mean(x), before = 1, ref_time_values = as.Date("2020-06-02")) df %>% mutate(version = time_value) %>% as_epi_archive() %>% - epix_slide(~ mean(.x$x), before = 2, ref_time_values = as.Date("2020-06-02")) + epix_slide(~ mean(.x$x), before = 1, ref_time_values = as.Date("2020-06-02")) ``` When the slide computation returns an atomic vector (rather than a single value) @@ -153,7 +153,7 @@ df %>% as_epi_archive() %>% epix_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)), ref_time_values = as.Date("2020-06-02"), - before = 2, as_list_col = FALSE, names_sep = NULL) + before = 1, as_list_col = FALSE, names_sep = NULL) ``` ## Multi-row outputs @@ -354,7 +354,7 @@ k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { x %>% epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, geo_value, time_value, args = prob_arx_args(ahead = ahead)), - before = 120, ref_time_values = fc_time_values) %>% + before = 119, ref_time_values = fc_time_values) %>% mutate(target_date = time_value + ahead, as_of = TRUE, geo_value = fc_geo_value) } diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd index c89f28a3..588b0c88 100644 --- a/vignettes/archive.Rmd +++ b/vignettes/archive.Rmd @@ -358,7 +358,7 @@ fc_time_values <- seq(as.Date("2020-08-01"), by = "1 month") z <- epix_slide(x, fc = prob_arx(x = percent_cli, y = case_rate_7d_av), - before = 120, ref_time_values = fc_time_values, + before = 119, ref_time_values = fc_time_values, group_by = geo_value) head(z, 10) @@ -388,7 +388,7 @@ x_latest <- epix_as_of(x, max_version = max(x$DT$version)) k_week_ahead <- function(x, ahead = 7, as_of = TRUE) { if (as_of) { x %>% - epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), before = 120, + epix_slide(fc = prob_arx(percent_cli, case_rate_7d_av, ahead = ahead), before = 119, ref_time_values = fc_time_values, group_by = geo_value) %>% mutate(target_date = time_value + ahead, as_of = TRUE) } From dd6ef488d5ff2000bf5e76893587229364275da2 Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Tue, 23 Aug 2022 11:09:30 -0700 Subject: [PATCH 14/16] Refactor out `before_num` from `epix_slide` --- R/archive.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/R/archive.R b/R/archive.R index 81c5662b..41bd463f 100644 --- a/R/archive.R +++ b/R/archive.R @@ -599,8 +599,7 @@ epi_archive = } # If a custom time step is specified, then redefine units - before_num = before - if (!missing(time_step)) before_num = time_step(before) + if (!missing(time_step)) before <- time_step(before) # What to group by? If missing, set according to internal keys; # otherwise, tidyselect. @@ -675,7 +674,7 @@ epi_archive = x = purrr::map_dfr(ref_time_values, function(ref_time_value) { self$as_of(ref_time_value, - min_time_value = ref_time_value - before_num) %>% + min_time_value = ref_time_value - before) %>% dplyr::group_by(!!!group_by) %>% dplyr::group_modify(comp_one_grp, f = f, ..., @@ -703,7 +702,7 @@ epi_archive = x = purrr::map_dfr(ref_time_values, function(ref_time_value) { self$as_of(ref_time_value, - min_time_value = ref_time_value - before_num) %>% + min_time_value = ref_time_value - before) %>% dplyr::group_by(!!!group_by) %>% dplyr::group_modify(comp_one_grp, f = f, quo = quo, From 6c562084898ba9e5e5301db6c164a70b361ca84a Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Wed, 24 Aug 2022 03:00:28 -0700 Subject: [PATCH 15/16] Fix `local_edition` ambiguity in tests This appears to cause errors when running tests within some interactive sessions, e.g., from running tests (attaching `testthat`), then `library(tidyverse)` (attaching `readr` and masking `local_edition` even if `testthat` is re-`library`-ied in), then attempting to run tests again. --- tests/testthat/test-epix_fill_through_version.R | 13 +++++++------ tests/testthat/test-epix_merge.R | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test-epix_fill_through_version.R b/tests/testthat/test-epix_fill_through_version.R index 03e9c504..1d78bf49 100644 --- a/tests/testthat/test-epix_fill_through_version.R +++ b/tests/testthat/test-epix_fill_through_version.R @@ -11,9 +11,10 @@ test_that("epix_fill_through_version mirrors input when it is sufficiently up to # edition 3, which is based on `waldo::compare` rather than `base::identical`; # `waldo::compare` in waldo >=0.3.1 appears (as of 0.4.0) to compare R6 # objects by contents rather than address (in a way that is tested but maybe - # not guaranteed via user docs). Use `local_edition` to ensure we use edition - # 3 here. - local_edition(3) + # not guaranteed via user docs). Use `testthat::local_edition` to ensure we + # use testthat edition 3 here (use `testthat::` to prevent ambiguity with + # `readr`). + testthat::local_edition(3) expect_identical(ea_orig, ea_trivial_fill_na1) expect_identical(ea_orig, ea_trivial_fill_na2) expect_identical(ea_orig, ea_trivial_fill_locf) @@ -30,9 +31,9 @@ test_that("epix_fill_through_version can extend observed versions, gives expecte ea_fill_na = epix_fill_through_version(ea_orig, later_unobserved_version, "na") ea_fill_locf = epix_fill_through_version(ea_orig, later_unobserved_version, "locf") - # We use edition 3 features here, passing `ignore_attr` to `waldo::compare`. - # Ensure we are using edition 3: - local_edition(3) + # We use testthat edition 3 features here, passing `ignore_attr` to + # `waldo::compare`. Ensure we are using edition 3: + testthat::local_edition(3) withCallingHandlers({ expect_identical(ea_fill_na$versions_end, later_unobserved_version) expect_identical(tibble::as_tibble(ea_fill_na$as_of(first_unobserved_version)), diff --git a/tests/testthat/test-epix_merge.R b/tests/testthat/test-epix_merge.R index 51f2c3c6..8a873926 100644 --- a/tests/testthat/test-epix_merge.R +++ b/tests/testthat/test-epix_merge.R @@ -58,7 +58,7 @@ test_that("epix_merge merges and carries forward updates properly", { ) # We rely on testthat edition 3 expect_identical using waldo, not identical. See # test-epix_fill_through_version.R comments for details. - local_edition(3) + testthat::local_edition(3) expect_identical(xy, xy_expected) }) From d6489103d489a3c5601ff556d02f6ebdeb5cb796 Mon Sep 17 00:00:00 2001 From: "Logan C. Brooks" Date: Wed, 24 Aug 2022 03:29:11 -0700 Subject: [PATCH 16/16] Update `epix_slide` `before` docs, add validation - Try to describe `before` in a similar manner as the PR for `epi_slide` `before`&`after` currently does. - Fix some example discussion not yet adjusted for `before` being off by one relative to `n`. - Update some discussion about differences between `epix_slide` and `epi_slide` `time_value` windows. - Add tests for the `before` validation code --- R/archive.R | 15 ++++++++++- R/methods-epi_archive.R | 31 ++++++++++++++++------ man/epix_slide.Rd | 31 ++++++++++++++++------ tests/testthat/test-epix_slide.R | 45 +++++++++++++++++++++++++------- 4 files changed, 95 insertions(+), 27 deletions(-) diff --git a/R/archive.R b/R/archive.R index 41bd463f..61eca1af 100644 --- a/R/archive.R +++ b/R/archive.R @@ -597,7 +597,20 @@ epi_archive = ref_time_values = ref_time_values[ref_time_values %in% unique(self$DT$time_value)] } - + + # Validate and pre-process `before`: + if (missing(before)) { + Abort("`before` is required (and must be passed by name); + if you did not want to apply a sliding window but rather + to map `as_of` and `f` across various `ref_time_values`, + pass a large `before` value (e.g., if time steps are days, + `before=365000`).") + } + before <- vctrs::vec_cast(before, integer()) + if (length(before) != 1L || is.na(before) || before < 0L) { + Abort("`before` must be length-1, non-NA, non-negative") + } + # If a custom time step is specified, then redefine units if (!missing(time_step)) before <- time_step(before) diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 8addb2ad..6cffa144 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -358,10 +358,21 @@ epix_merge = function(x, y, #' @param ... Additional arguments to pass to the function or formula specified #' via `f`. Alternatively, if `f` is missing, then the current argument is #' interpreted as an expression for tidy evaluation. -#' @param before Number of time steps to use in the running window. For example, -#' if `before = 7`, and one time step is one day, then to produce a value on -#' January 7 we apply the given function or formula to data in between January -#' 1 and 7. +#' @param before How far `before` each `ref_time_value` should the sliding +#' window extend? If provided, should be a single, non-NA, +#' [integer-compatible][vctrs::vec_cast] number of time steps. This window +#' endpoint is inclusive. For example, if `before = 7`, and one time step is +#' one day, then to produce a value for a `ref_time_value` of January 8, we +#' apply the given function or formula to data (for each group present) with +#' `time_value`s from January 1 onward, as they were reported on January 8. +#' For typical disease surveillance sources, this will not include any data +#' with a `time_value` of January 8, and, depending on the amount of reporting +#' latency, may not include January 7 or even earlier `time_value`s. (If +#' instead the archive were to hold nowcasts instead of regular surveillance +#' data, then we would indeed expect data for `time_value` January 8. If it +#' were to hold forecasts, then we would expect data for `time_value`s after +#' January 8, and the sliding window would extend as far after each +#' `ref_time_value` as needed to include all such `time_value`s.) #' @param group_by The variable(s) to group by before slide computation. If #' missing, then the keys in the underlying data table, excluding `time_value` #' and `version`, will be used for grouping. To omit a grouping entirely, use @@ -396,10 +407,14 @@ epix_merge = function(x, y, #' values. #' #' @details Two key distinctions between inputs to the current function and -#' `epi_slide()`: -#' 1. `epix_slide()` uses windows that are **always right-aligned** (in -#' `epi_slide()`, custom alignments could be specified using the `align` or -#' `before` arguments). +#' [`epi_slide()`]: +#' 1. `epix_slide()` doesn't accept an `after` argument; its windows extend +#' from `before` time steps before a given `ref_time_value` through the last +#' `time_value` available as of version `ref_time_value` (typically, this +#' won't include `ref_time_value` itself, as observations about a particular +#' time interval (e.g., day) are only published after that time interval ends); +#' `epi_slide` windows extend from `before` time steps before a +#' `ref_time_value` through `after` time steps after `ref_time_value`. #' 2. `epix_slide()` uses a `group_by` to specify the grouping upfront (in #' `epi_slide()`, this would be accomplished by a preceding function call to #' `dplyr::group_by()`). diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd index b698a66d..bbeb51d4 100644 --- a/man/epix_slide.Rd +++ b/man/epix_slide.Rd @@ -34,10 +34,21 @@ sliding window of \code{n} time steps.} via \code{f}. Alternatively, if \code{f} is missing, then the current argument is interpreted as an expression for tidy evaluation.} -\item{before}{Number of time steps to use in the running window. For example, -if \code{before = 7}, and one time step is one day, then to produce a value on -January 7 we apply the given function or formula to data in between January -1 and 7.} +\item{before}{How far \code{before} each \code{ref_time_value} should the sliding +window extend? If provided, should be a single, non-NA, +\link[vctrs:vec_cast]{integer-compatible} number of time steps. This window +endpoint is inclusive. For example, if \code{before = 7}, and one time step is +one day, then to produce a value for a \code{ref_time_value} of January 8, we +apply the given function or formula to data (for each group present) with +\code{time_value}s from January 1 onward, as they were reported on January 8. +For typical disease surveillance sources, this will not include any data +with a \code{time_value} of January 8, and, depending on the amount of reporting +latency, may not include January 7 or even earlier \code{time_value}s. (If +instead the archive were to hold nowcasts instead of regular surveillance +data, then we would indeed expect data for \code{time_value} January 8. If it +were to hold forecasts, then we would expect data for \code{time_value}s after +January 8, and the sliding window would extend as far after each +\code{ref_time_value} as needed to include all such \code{time_value}s.)} \item{group_by}{The variable(s) to group by before slide computation. If missing, then the keys in the underlying data table, excluding \code{time_value} @@ -89,11 +100,15 @@ examples. } \details{ Two key distinctions between inputs to the current function and -\code{epi_slide()}: +\code{\link[=epi_slide]{epi_slide()}}: \enumerate{ -\item \code{epix_slide()} uses windows that are \strong{always right-aligned} (in -\code{epi_slide()}, custom alignments could be specified using the \code{align} or -\code{before} arguments). +\item \code{epix_slide()} doesn't accept an \code{after} argument; its windows extend +from \code{before} time steps before a given \code{ref_time_value} through the last +\code{time_value} available as of version \code{ref_time_value} (typically, this +won't include \code{ref_time_value} itself, as observations about a particular +time interval (e.g., day) are only published after that time interval ends); +\code{epi_slide} windows extend from \code{before} time steps before a +\code{ref_time_value} through \code{after} time steps after \code{ref_time_value}. \item \code{epix_slide()} uses a \code{group_by} to specify the grouping upfront (in \code{epi_slide()}, this would be accomplished by a preceding function call to \code{dplyr::group_by()}). diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R index 6e93401f..9a11c64a 100644 --- a/tests/testthat/test-epix_slide.R +++ b/tests/testthat/test-epix_slide.R @@ -4,17 +4,17 @@ test_that("epix_slide only works on an epi_archive",{ expect_error(epix_slide(data.frame(x=1))) }) +x <- tibble::tribble(~version, ~time_value, ~binary, + 4, c(1:3), 2^(1:3), + 5, c(1:2,4), 2^(4:6), + 6, c(1:2,4:5), 2^(7:10), + 7, 2:6, 2^(11:15)) %>% + tidyr::unnest(c(time_value,binary)) + +xx <- bind_cols(geo_value = rep("x",15), x) %>% + as_epi_archive() + test_that("epix_slide works as intended",{ - x <- tibble::tribble(~version, ~time_value, ~binary, - 4, c(1:3), 2^(1:3), - 5, c(1:2,4), 2^(4:6), - 6, c(1:2,4:5), 2^(7:10), - 7, 2:6, 2^(11:15)) %>% - tidyr::unnest(c(time_value,binary)) - - xx <- bind_cols(geo_value = rep("x",15), x) %>% - as_epi_archive() - xx1 <- epix_slide(x = xx, f = ~ sum(.x$binary), before = 2, @@ -38,3 +38,28 @@ test_that("epix_slide works as intended",{ expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical }) + +test_that("epix_slide `before` validation works", { + expect_error(xx$slide(f = ~ sum(.x$binary)), + "`before` is required") + expect_error(xx$slide(f = ~ sum(.x$binary), before=NA), + "`before`.*NA") + expect_error(xx$slide(f = ~ sum(.x$binary), before=-1), + "`before`.*negative") + expect_error(xx$slide(f = ~ sum(.x$binary), before=1.5), + regexp="before", + class="vctrs_error_incompatible_type") + # We might want to allow this at some point (issue #219): + expect_error(xx$slide(f = ~ sum(.x$binary), before=Inf), + regexp="before", + class="vctrs_error_incompatible_type") + # (wrapper shouldn't introduce a value:) + expect_error(epix_slide(xx, f = ~ sum(.x$binary)), "`before` is required") + # These `before` values should be accepted: + expect_error(xx$slide(f = ~ sum(.x$binary), before=0), + NA) + expect_error(xx$slide(f = ~ sum(.x$binary), before=2L), + NA) + expect_error(xx$slide(f = ~ sum(.x$binary), before=365000), + NA) +})