diff --git a/DESCRIPTION b/DESCRIPTION index dc48eb86..339d5681 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -76,6 +76,7 @@ Collate: 'data.R' 'epi_df.R' 'epiprocess.R' + 'group_by_epi_df_methods.R' 'methods-epi_archive.R' 'grouped_epi_archive.R' 'growth_rate.R' diff --git a/NAMESPACE b/NAMESPACE index 73db3483..c59004c8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ S3method(groups,grouped_epi_archive) S3method(next_after,Date) S3method(next_after,integer) S3method(print,epi_df) +S3method(select,epi_df) S3method(summary,epi_df) S3method(ungroup,epi_df) S3method(ungroup,grouped_epi_archive) diff --git a/R/epi_df.R b/R/epi_df.R index 045c4aaf..53dca62b 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -122,6 +122,9 @@ new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of, if (!is.list(additional_metadata)) { Abort("`additional_metadata` must be a list type.") } + if (is.null(additional_metadata[["other_keys"]])) { + additional_metadata[["other_keys"]] <- character(0L) + } # If geo type is missing, then try to guess it if (missing(geo_type)) { @@ -334,4 +337,4 @@ as_epi_df.tbl_ts = function(x, geo_type, time_type, as_of, #' @export is_epi_df = function(x) { inherits(x, "epi_df") -} \ No newline at end of file +} diff --git a/R/group_by_epi_df_methods.R b/R/group_by_epi_df_methods.R new file mode 100644 index 00000000..b531178f --- /dev/null +++ b/R/group_by_epi_df_methods.R @@ -0,0 +1,17 @@ +# These methods (and maybe some others in methods-epi_df.R) are here to augment +# `?dplyr_extending` implementations to get the correct behavior on grouped +# `epi_df`s. It would be nice if there were a way to replace these with a +# generic core that automatically fixed all misbehaving methods; see +# brainstorming within Issue #223. + +#' @importFrom dplyr select +#' @export +select.epi_df <- function(.data, ...) { + selected <- NextMethod(.data) + might_decay <- reclass(selected, attr(selected, "metadata")) + return(dplyr_reconstruct(might_decay, might_decay)) +} + +# others to consider: +# - arrange +# - diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R index 6429b867..6e4666e7 100644 --- a/R/methods-epi_df.R +++ b/R/methods-epi_df.R @@ -179,10 +179,14 @@ dplyr_row_slice.epi_df = function(data, i, ...) { #' @export `names<-.epi_df` = function(x, value) { - old_names = names(x) - old_other_keys = attributes(x)$metadata$other_keys - result = NextMethod() - attributes(x)$metadata$other_keys <- value[match(old_other_keys, old_names)] + old_names <- names(x) + old_metadata <- attr(x, "metadata") + old_other_keys <- old_metadata[["other_keys"]] + new_other_keys <- value[match(old_other_keys, old_names)] + new_metadata <- old_metadata + new_metadata[["other_keys"]] <- new_other_keys + result <- reclass(NextMethod(), new_metadata) + # decay to non-`epi_df` if needed: dplyr::dplyr_reconstruct(result, result) } diff --git a/tests/testthat/test-epi_df.R b/tests/testthat/test-epi_df.R index 10b0015e..decd6fd7 100644 --- a/tests/testthat/test-epi_df.R +++ b/tests/testthat/test-epi_df.R @@ -1,23 +1,27 @@ test_that("new_epi_df works as intended", { # Empty tibble - wmsg = capture_warnings(a <- new_epi_df()) - expect_match(wmsg[1], - "Unknown or uninitialised column: `geo_value`.") - expect_match(wmsg[2], - "Unknown or uninitialised column: `time_value`.") + wmsg <- capture_warnings(a <- new_epi_df()) + expect_match( + wmsg[1], + "Unknown or uninitialised column: `geo_value`." + ) + expect_match( + wmsg[2], + "Unknown or uninitialised column: `time_value`." + ) expect_true(is_epi_df(a)) expect_identical(attributes(a)$metadata$geo_type, "custom") expect_identical(attributes(a)$metadata$time_type, "custom") expect_true(lubridate::is.POSIXt(attributes(a)$metadata$as_of)) - + # Simple non-empty tibble with geo_value and time_value cols tib <- tibble::tibble( x = 1:10, y = 1:10, time_value = rep(seq(as.Date("2020-01-01"), by = 1, length.out = 5), times = 2), geo_value = rep(c("ca", "hi"), each = 5) ) - - epi_tib = new_epi_df(tib) + + epi_tib <- new_epi_df(tib) expect_true(is_epi_df(epi_tib)) expect_length(epi_tib, 4L) expect_identical(attributes(epi_tib)$metadata$geo_type, "state") @@ -32,10 +36,72 @@ test_that("as_epi_df errors when additional_metadata is not a list", { dplyr::slice_tail(n = 6) %>% tsibble::as_tsibble() %>% dplyr::mutate( - state = rep("MA",6), - pol = rep(c("blue", "swing", "swing"), each = 2)) - + state = rep("MA", 6), + pol = rep(c("blue", "swing", "swing"), each = 2) + ) + expect_error( - as_epi_df(ex_input, additional_metadata = c(other_keys = "state", "pol")), - "`additional_metadata` must be a list type.") -}) \ No newline at end of file + as_epi_df(ex_input, additional_metadata = c(other_keys = "state", "pol")), + "`additional_metadata` must be a list type." + ) +}) + +# select fixes + +tib <- tibble::tibble( + x = 1:10, y = 1:10, + time_value = rep(seq(as.Date("2020-01-01"), + by = 1, length.out = 5 + ), times = 2), + geo_value = rep(c("ca", "hi"), each = 5) +) +epi_tib <- epiprocess::new_epi_df(tib) +test_that("grouped epi_df maintains type for select", { + grouped_epi <- epi_tib %>% group_by(geo_value) + selected_df <- grouped_epi %>% select(-y) + expect_true(inherits(selected_df, "epi_df")) + # make sure that the attributes are right + epi_attr <- attributes(selected_df) + expect_identical(epi_attr$names, c("geo_value", "time_value", "x")) + expect_identical(epi_attr$row.names, seq(1, 10)) + expect_identical(epi_attr$groups, attributes(grouped_epi)$groups) + expect_identical(epi_attr$metadata, attributes(epi_tib)$metadata) + expect_identical(selected_df, epi_tib %>% select(-y) %>% group_by(geo_value)) +}) + +test_that("grouped epi_df drops type when dropping keys", { + grouped_epi <- epi_tib %>% group_by(geo_value) + selected_df <- grouped_epi %>% select(geo_value) + expect_true(!inherits(selected_df, "epi_df")) +}) + +test_that("grouped epi_df handles extra keys correctly", { + tib <- tibble::tibble( + x = 1:10, y = 1:10, + time_value = rep(seq(as.Date("2020-01-01"), + by = 1, length.out = 5 + ), times = 2), + geo_value = rep(c("ca", "hi"), each = 5), + extra_key = rep(seq(as.Date("2020-01-01"), + by = 1, length.out = 5 + ), times = 2) + ) + epi_tib <- epiprocess::new_epi_df(tib, + additional_metadata = list(other_keys = "extra_key") + ) + attributes(epi_tib) + grouped_epi <- epi_tib %>% group_by(geo_value) + selected_df <- grouped_epi %>% select(-extra_key) + expect_true(inherits(selected_df, "epi_df")) + # make sure that the attributes are right + old_attr <- attributes(epi_tib) + epi_attr <- attributes(selected_df) + expect_identical(epi_attr$names, c("geo_value", "time_value", "x", "y")) + expect_identical(epi_attr$row.names, seq(1, 10)) + expect_identical(epi_attr$groups, attributes(grouped_epi)$groups) + expect_identical(epi_attr$metadata, list( + geo_type = "state", time_type = "day", + as_of = old_attr$metadata$as_of, + other_keys = character(0) + )) +}) diff --git a/tests/testthat/test-methods-epi_df.R b/tests/testthat/test-methods-epi_df.R index 9d03cf93..aeb08ced 100644 --- a/tests/testthat/test-methods-epi_df.R +++ b/tests/testthat/test-methods-epi_df.R @@ -124,3 +124,40 @@ test_that("Metadata and grouping are dropped by `as_tibble`", { !any(c("metadata", "groups") %in% names(attributes(grouped_converted))) ) }) + +test_that("Renaming columns gives appropriate colnames and metadata", { + edf <- tibble::tibble(geo_value = 1, time_value = 1, age = 1, value = 1) %>% + as_epi_df(additional_metadata = list(other_keys = "age")) + # renaming using base R + renamed_edf1 <- edf %>% + `[`(c("geo_value", "time_value", "age", "value")) %>% + `names<-`(c("geo_value", "time_value", "age_group", "value")) + expect_identical(names(renamed_edf1), c("geo_value", "time_value", "age_group", "value")) + expect_identical(attr(renamed_edf1, "metadata")$other_keys, c("age_group")) + # renaming using select + renamed_edf2 <- edf %>% + as_epi_df(additional_metadata = list(other_keys = "age")) %>% + select(geo_value, time_value, age_group = age, value) + expect_identical(renamed_edf1, renamed_edf2) +}) + +test_that("Renaming columns while grouped gives appropriate colnames and metadata", { + gedf <- tibble::tibble(geo_value = 1, time_value = 1, age = 1, value = 1) %>% + as_epi_df(additional_metadata = list(other_keys = "age")) %>% + group_by(geo_value) + # renaming using base R + renamed_gedf1 <- gedf %>% + `[`(c("geo_value", "time_value", "age", "value")) %>% + `names<-`(c("geo_value", "time_value", "age_group", "value")) + # tets type preservation + expect_true(inherits(renamed_gedf1, "epi_df")) + expect_true(inherits(renamed_gedf1, "grouped_df")) + # the names are right + expect_identical(names(renamed_gedf1), c("geo_value", "time_value", "age_group", "value")) + expect_identical(attr(renamed_gedf1, "metadata")$other_keys, c("age_group")) + # renaming using select + renamed_gedf2 <- gedf %>% + as_epi_df(additional_metadata = list(other_keys = "age")) %>% + select(geo_value, time_value, age_group = age, value) + expect_identical(renamed_gedf1, renamed_gedf2) +})