cmu-delphi
diff --git a/‎NAMESPACE
+1 b/‎NAMESPACE
+1
diff --git a/‎R/epi_df.R
+20-2 b/‎R/epi_df.R
+20-2
diff --git a/‎R/grouped_epi_archive.R
+5-2 b/‎R/grouped_epi_archive.R
+5-2
diff --git a/‎R/methods-epi_df.R
+33 b/‎R/methods-epi_df.R
+33
@@ -45,6 +45,7 @@ S3method(ungroup,epi_df)
 S3method(ungroup,grouped_epi_archive)
 S3method(unnest,epi_df)
 export("%>%")
+export(aggregate_epi_df)
 export(archive_cases_dv_subset)
 export(arrange)
 export(arrange_canonical)
 
@@ -245,10 +245,10 @@ as_epi_df.tbl_df <- function(
     )
   }
   if (lifecycle::is_present(geo_type)) {
-    cli_warn("epi_archive constructor argument `geo_type` is now ignored. Consider removing.")
+    cli_warn("epi_df constructor argument `geo_type` is now ignored. Consider removing.")
   }
   if (lifecycle::is_present(time_type)) {
-    cli_warn("epi_archive constructor argument `time_type` is now ignored. Consider removing.")
+    cli_warn("epi_df constructor argument `time_type` is now ignored. Consider removing.")
   }
 
   # If geo type is missing, then try to guess it
@@ -277,6 +277,20 @@ as_epi_df.tbl_df <- function(
   }
 
   assert_character(other_keys)
+
+  # Check one time_value per group
+  duplicated_time_values <- x %>%
+    group_by(across(all_of(c("geo_value", "time_value", other_keys)))) %>%
+    dplyr::summarize(n = dplyr::n(), .groups = "drop") %>%
+    filter(n > 1)
+  if (nrow(duplicated_time_values) > 0) {
+    bad_data <- capture.output(duplicated_time_values)
+    cli_abort(
+      "as_epi_df: some groups in the data have duplicated time values. epi_df requires a unique time_value per group.",
+      body = c("Sample groups:", bad_data)
+    )
+  }
+
   new_epi_df(x, geo_type, time_type, as_of, other_keys)
 }
 
@@ -309,3 +323,7 @@ as_epi_df.tbl_ts <- function(x, as_of, other_keys = character(), ...) {
 is_epi_df <- function(x) {
   inherits(x, "epi_df")
 }
+
+group_epi_df <- function(x) {
+  x %>% group_by(group_by(across(all_of(kill_time_value(key_colnames(.))))))
+}
@@ -270,11 +270,14 @@ epix_slide.grouped_epi_archive <- function(
     ref_time_values <- sort(.ref_time_values)
   }
 
-  validate_slide_window_arg(.before, .x$private$ungrouped$time_type)
+  validate_slide_window_arg(.before, .x$private$ungrouped$time_type, lower = 0) # nolint: object_usage_linter
 
   checkmate::assert_string(.new_col_name, null.ok = TRUE)
   if (identical(.new_col_name, "time_value")) {
-    cli_abort('`new_col_name` must not be `"time_value"`; `epix_slide()` uses that column name to attach the `ref_time_value` associated with each slide computation') # nolint: line_length_linter
+    cli_abort(
+      '`new_col_name` must not be `"time_value"`; `epix_slide()` uses that column name
+      to attach the `ref_time_value` associated with each slide computation'
+    )
   }
 
   assert_logical(.all_versions, len = 1L)
 
@@ -383,3 +383,36 @@ arrange_canonical.epi_df <- function(x, ...) {
     dplyr::relocate(dplyr::all_of(keys), .before = 1) %>%
     dplyr::arrange(dplyr::across(dplyr::all_of(keys)))
 }
+
+#' Aggregate an `epi_df` object
+#'
+#' Aggregates an `epi_df` object by the specified group columns, summing the
+#' `value` column, and returning an `epi_df`. If aggregating over `geo_value`,
+#' the resulting `epi_df` will have `geo_value` set to `"total"`.
+#'
+#' @param .x an `epi_df`
+#' @param value_col character name of the column to aggregate
+#' @param group_cols character vector of column names to group by
+#' @return an `epi_df` object
+#'
+#' @export
+aggregate_epi_df <- function(.x, value_col = "value", group_cols = "time_value") {
+  assert_class(.x, "epi_df")
+  assert_character(value_col, len = 1)
+  assert_character(group_cols)
+  checkmate::assert_subset(value_col, names(.x))
+  checkmate::assert_subset(group_cols, names(.x))
+
+  .x %>%
+    group_by(across(all_of(group_cols))) %>%
+    dplyr::summarize(!!(value_col) := sum(!!sym(value_col))) %>%
+    ungroup() %>%
+    {
+      if (!"geo_value" %in% group_cols) {
+        mutate(., geo_value = "total") %>% relocate(geo_value, .before = 1)
+      } else {
+        .
+      }
+    } %>%
+    as_epi_df(as_of = attr(.x, "metadata")$as_of)
+}
Original file line number	Diff line number	Diff line change
`@@ -245,10 +245,10 @@ as_epi_df.tbl_df <- function(`
`245`	`245`	`)`
`246`	`246`	`}`
`247`	`247`	`if (lifecycle::is_present(geo_type)) {`
`248`		- cli_warn("epi_archive constructor argument `geo_type` is now ignored. Consider removing.")
	`248`	+ cli_warn("epi_df constructor argument `geo_type` is now ignored. Consider removing.")
`249`	`249`	`}`
`250`	`250`	`if (lifecycle::is_present(time_type)) {`
`251`		- cli_warn("epi_archive constructor argument `time_type` is now ignored. Consider removing.")
	`251`	+ cli_warn("epi_df constructor argument `time_type` is now ignored. Consider removing.")
`252`	`252`	`}`
`253`	`253`
`254`	`254`	`# If geo type is missing, then try to guess it`
`@@ -277,6 +277,20 @@ as_epi_df.tbl_df <- function(`
`277`	`277`	`}`
`278`	`278`
`279`	`279`	`assert_character(other_keys)`
	`280`	`+`
	`281`	`+ # Check one time_value per group`
	`282`	`+ duplicated_time_values <- x %>%`
	`283`	`+ group_by(across(all_of(c("geo_value", "time_value", other_keys)))) %>%`
	`284`	`+ dplyr::summarize(n = dplyr::n(), .groups = "drop") %>%`
	`285`	`+ filter(n > 1)`
	`286`	`+ if (nrow(duplicated_time_values) > 0) {`
	`287`	`+ bad_data <- capture.output(duplicated_time_values)`
	`288`	`+ cli_abort(`
	`289`	`+ "as_epi_df: some groups in the data have duplicated time values. epi_df requires a unique time_value per group.",`
	`290`	`+ body = c("Sample groups:", bad_data)`
	`291`	`+ )`
	`292`	`+ }`
	`293`	`+`
`280`	`294`	`new_epi_df(x, geo_type, time_type, as_of, other_keys)`
`281`	`295`	`}`
`282`	`296`
`@@ -309,3 +323,7 @@ as_epi_df.tbl_ts <- function(x, as_of, other_keys = character(), ...) {`
`309`	`323`	`is_epi_df <- function(x) {`
`310`	`324`	`inherits(x, "epi_df")`
`311`	`325`	`}`
	`326`	`+`
	`327`	`+group_epi_df <- function(x) {`
	`328`	`+ x %>% group_by(group_by(across(all_of(kill_time_value(key_colnames(.))))))`
	`329`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -270,11 +270,14 @@ epix_slide.grouped_epi_archive <- function(`
`270`	`270`	`ref_time_values <- sort(.ref_time_values)`
`271`	`271`	`}`
`272`	`272`
`273`		`- validate_slide_window_arg(.before, .x$private$ungrouped$time_type)`
	`273`	`+ validate_slide_window_arg(.before, .x$private$ungrouped$time_type, lower = 0) # nolint: object_usage_linter`
`274`	`274`
`275`	`275`	`checkmate::assert_string(.new_col_name, null.ok = TRUE)`
`276`	`276`	`if (identical(.new_col_name, "time_value")) {`
`277`		- cli_abort('`new_col_name` must not be `"time_value"`; `epix_slide()` uses that column name to attach the `ref_time_value` associated with each slide computation') # nolint: line_length_linter
	`277`	`+ cli_abort(`
	`278`	+ '`new_col_name` must not be `"time_value"`; `epix_slide()` uses that column name
	`279`	+ to attach the `ref_time_value` associated with each slide computation'
	`280`	`+ )`
`278`	`281`	`}`
`279`	`282`
`280`	`283`	`assert_logical(.all_versions, len = 1L)`