diff --git a/R/archive.R b/R/archive.R index e81315a7..79d08ddf 100644 --- a/R/archive.R +++ b/R/archive.R @@ -286,23 +286,24 @@ epi_archive = #' details. #' @importFrom data.table key #' @importFrom rlang !! !!! enquo enquos is_quosure sym syms - slide = function(f, ..., n = 7, group_by, ref_time_values, + slide = function(f, ..., max_version_gap, group_by, + ref_versions, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { # If missing, then set ref time values to be everything; else make # sure we intersect with observed time values - if (missing(ref_time_values)) { - ref_time_values = unique(self$DT$time_value) + if (missing(ref_versions)) { + ref_versions = unique(self$DT$version) } else { - ref_time_values = ref_time_values[ref_time_values %in% + ref_versions = ref_versions[ref_versions %in% unique(self$DT$time_value)] } # If a custom time step is specified, then redefine units - before_num = n-1 - if (!missing(time_step)) before_num = time_step(n-1) + before_num = max_version_gap-1 + if (!missing(time_step)) before_num = time_step(max_version_gap-1) # What to group by? If missing, set according to internal keys if (missing(group_by)) { @@ -324,7 +325,7 @@ epi_archive = # Computation for one group, one time value comp_one_grp = function(.data_group, f, ..., - time_value, + version, key_vars, new_col) { # Carry out the specified computation @@ -370,21 +371,22 @@ epi_archive = # Note that we've already recycled comp value to make size stable, # so tibble() will just recycle time value appropriately - return(tibble::tibble(time_value = time_value, + return(tibble::tibble(version = version, !!new_col := comp_value)) } # If f is not missing, then just go ahead, slide by group if (!missing(f)) { + if (rlang::is_formula(f)) f = rlang::as_function(f) - x = purrr::map_dfr(ref_time_values, function(t) { + x = purrr::map_dfr(ref_versions, function(t) { self$as_of(t, min_time_value = t - before_num) %>% tibble::as_tibble() %>% dplyr::group_by(!!!group_by) %>% dplyr::group_modify(comp_one_grp, f = f, ..., - time_value = t, + version = t, key_vars = key_vars, new_col = new_col, .keep = TRUE) %>% @@ -406,13 +408,13 @@ epi_archive = f = function(x, quo, ...) rlang::eval_tidy(quo, x) new_col = sym(names(rlang::quos_auto_name(quos))) - x = purrr::map_dfr(ref_time_values, function(t) { + x = purrr::map_dfr(ref_versions, function(t) { self$as_of(t, min_time_value = t - before_num) %>% tibble::as_tibble() %>% dplyr::group_by(!!!group_by) %>% dplyr::group_modify(comp_one_grp, f = f, quo = quo, - time_value = t, + version = t, key_vars = key_vars, new_col = new_col, .keep = TRUE) %>% diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R index 4a1cd375..3a5af3e0 100644 --- a/R/methods-epi_archive.R +++ b/R/methods-epi_archive.R @@ -112,15 +112,17 @@ epix_merge = function(x, y, ..., locf = TRUE, nan = NA) { #' @param ... Additional arguments to pass to the function or formula specified #' via `f`. Alternatively, if `f` is missing, then the current argument is #' interpreted as an expression for tidy evaluation. -#' @param n Number of time steps to use in the running window. For example, if -#' `n = 7`, and one time step is one day, then to produce a value on January 7 +#' @param max_version_gap Number of time steps to use in the running window. +#' For example, if +#' `max_version_gap = 7`, and one time step is one day, then to produce a +#' value on January 7 #' we apply the given function or formula to data in between January 1 and #' 7. Default is 7. #' @param group_by The variable(s) to group by before slide computation. If #' missing, then the keys in the underlying data table, excluding `time_value` #' and `version`, will be used for grouping. To omit a grouping entirely, use #' `group_by = NULL`. -#' @param ref_time_values Time values for sliding computations, meaning, each +#' @param ref_versions Time values for sliding computations, meaning, each #' element of this vector serves as the reference time point for one sliding #' window. If missing, then this will be set to all unique time values in the #' underlying data table, by default. @@ -176,11 +178,11 @@ epix_merge = function(x, y, ..., locf = TRUE, nan = NA) { #' Finally, this is simply a wrapper around the `slide()` method of the #' `epi_archive` class, so if `x` is an `epi_archive` object, then: #' ``` -#' epix_slide(x, new_var = comp(old_var), n = 120) +#' epix_slide(x, new_var = comp(old_var), max_version_gap = 120) #' ``` #' is equivalent to: #' ``` -#' x$slide(x, new_var = comp(old_var), n = 120) +#' x$slide(new_var = comp(old_var), max_version_gap = 120) #' ``` #' #' @importFrom rlang enquo @@ -191,24 +193,25 @@ epix_merge = function(x, y, ..., locf = TRUE, nan = NA) { #' # 0 day which has no results, for 2020-06-01 #' # 1 day, for 2020-06-02 #' # 2 days, for the rest of the results -#' # never 3 days dur to data latency +#' # never 3 days due to data latency #' -#' time_values <- seq(as.Date("2020-06-01"), +#' versions <- seq(as.Date("2020-06-01"), #' as.Date("2020-06-15"), #' by = "1 day") #' epix_slide(x = archive_cases_dv_subset, #' f = ~ mean(.x$case_rate_7d_av), #' n = 3, #' group_by = geo_value, -#' ref_time_values = time_values, +#' ref_versions = versions, #' new_col_name = 'case_rate_3d_av') -epix_slide = function(x, f, ..., n = 7, group_by, ref_time_values, +epix_slide = function(x, f, ..., max_version_gap, group_by, ref_versions, time_step, new_col_name = "slide_value", as_list_col = FALSE, names_sep = "_", all_rows = FALSE) { if (!inherits(x, "epi_archive")) Abort("`x` must be of class `epi_archive`.") - return(x$slide(f, ..., n = n, + return(x$slide(f, ..., + max_version_gap = max_version_gap, group_by = enquo(group_by), - ref_time_values = ref_time_values, + ref_versions = ref_versions, time_step = time_step, new_col_name = new_col_name, as_list_col = as_list_col, diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd index 84ac9406..8b0cfb2a 100644 --- a/man/epi_archive.Rd +++ b/man/epi_archive.Rd @@ -193,9 +193,9 @@ details. \if{html}{\out{