Skip to content

Commit adb8504

Browse files
committed
feat: refactor epi_slide
* works with grouped epi_dfs only * add .complete_only parameter * correct deprecation messages * add huge amounts of tests * add aggregate_epi_df * single data point per group epi_df now defaults to day time type
1 parent 2f91a90 commit adb8504

11 files changed

+810
-735
lines changed

NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ S3method(ungroup,epi_df)
4545
S3method(ungroup,grouped_epi_archive)
4646
S3method(unnest,epi_df)
4747
export("%>%")
48+
export(aggregate_epi_df)
4849
export(archive_cases_dv_subset)
4950
export(arrange)
5051
export(arrange_canonical)

R/epi_df.R

+20-2
Original file line numberDiff line numberDiff line change
@@ -245,10 +245,10 @@ as_epi_df.tbl_df <- function(
245245
)
246246
}
247247
if (lifecycle::is_present(geo_type)) {
248-
cli_warn("epi_archive constructor argument `geo_type` is now ignored. Consider removing.")
248+
cli_warn("epi_df constructor argument `geo_type` is now ignored. Consider removing.")
249249
}
250250
if (lifecycle::is_present(time_type)) {
251-
cli_warn("epi_archive constructor argument `time_type` is now ignored. Consider removing.")
251+
cli_warn("epi_df constructor argument `time_type` is now ignored. Consider removing.")
252252
}
253253

254254
# If geo type is missing, then try to guess it
@@ -277,6 +277,20 @@ as_epi_df.tbl_df <- function(
277277
}
278278

279279
assert_character(other_keys)
280+
281+
# Check one time_value per group
282+
duplicated_time_values <- x %>%
283+
group_by(across(all_of(c("geo_value", "time_value", other_keys)))) %>%
284+
dplyr::summarize(n = dplyr::n(), .groups = "drop") %>%
285+
filter(n > 1)
286+
if (nrow(duplicated_time_values) > 0) {
287+
bad_data <- capture.output(duplicated_time_values)
288+
cli_abort(
289+
"as_epi_df: some groups in the data have duplicated time values. epi_df requires a unique time_value per group.",
290+
body = c("Sample groups:", bad_data)
291+
)
292+
}
293+
280294
new_epi_df(x, geo_type, time_type, as_of, other_keys)
281295
}
282296

@@ -309,3 +323,7 @@ as_epi_df.tbl_ts <- function(x, as_of, other_keys = character(), ...) {
309323
is_epi_df <- function(x) {
310324
inherits(x, "epi_df")
311325
}
326+
327+
group_epi_df <- function(x) {
328+
x %>% group_by(group_by(across(all_of(kill_time_value(key_colnames(.))))))
329+
}

R/grouped_epi_archive.R

+5-2
Original file line numberDiff line numberDiff line change
@@ -270,11 +270,14 @@ epix_slide.grouped_epi_archive <- function(
270270
ref_time_values <- sort(.ref_time_values)
271271
}
272272

273-
validate_slide_window_arg(.before, .x$private$ungrouped$time_type)
273+
validate_slide_window_arg(.before, .x$private$ungrouped$time_type, lower = 0) # nolint: object_usage_linter
274274

275275
checkmate::assert_string(.new_col_name, null.ok = TRUE)
276276
if (identical(.new_col_name, "time_value")) {
277-
cli_abort('`new_col_name` must not be `"time_value"`; `epix_slide()` uses that column name to attach the `ref_time_value` associated with each slide computation') # nolint: line_length_linter
277+
cli_abort(
278+
'`new_col_name` must not be `"time_value"`; `epix_slide()` uses that column name
279+
to attach the `ref_time_value` associated with each slide computation'
280+
)
278281
}
279282

280283
assert_logical(.all_versions, len = 1L)

R/methods-epi_df.R

+33
Original file line numberDiff line numberDiff line change
@@ -383,3 +383,36 @@ arrange_canonical.epi_df <- function(x, ...) {
383383
dplyr::relocate(dplyr::all_of(keys), .before = 1) %>%
384384
dplyr::arrange(dplyr::across(dplyr::all_of(keys)))
385385
}
386+
387+
#' Aggregate an `epi_df` object
388+
#'
389+
#' Aggregates an `epi_df` object by the specified group columns, summing the
390+
#' `value` column, and returning an `epi_df`. If aggregating over `geo_value`,
391+
#' the resulting `epi_df` will have `geo_value` set to `"total"`.
392+
#'
393+
#' @param .x an `epi_df`
394+
#' @param value_col character name of the column to aggregate
395+
#' @param group_cols character vector of column names to group by
396+
#' @return an `epi_df` object
397+
#'
398+
#' @export
399+
aggregate_epi_df <- function(.x, value_col = "value", group_cols = "time_value") {
400+
assert_class(.x, "epi_df")
401+
assert_character(value_col, len = 1)
402+
assert_character(group_cols)
403+
checkmate::assert_subset(value_col, names(.x))
404+
checkmate::assert_subset(group_cols, names(.x))
405+
406+
.x %>%
407+
group_by(across(all_of(group_cols))) %>%
408+
dplyr::summarize(!!(value_col) := sum(!!sym(value_col))) %>%
409+
ungroup() %>%
410+
{
411+
if (!"geo_value" %in% group_cols) {
412+
mutate(., geo_value = "total") %>% relocate(geo_value, .before = 1)
413+
} else {
414+
.
415+
}
416+
} %>%
417+
as_epi_df(as_of = attr(.x, "metadata")$as_of)
418+
}

0 commit comments

Comments
 (0)