Skip to content

Commit eb4b355

Browse files
authored
Merge pull request #443 from cmu-delphi/ds/r6-clean-lcb/suggests2
Edit R6 refactor
2 parents 92ce8f5 + cf6a9e3 commit eb4b355

10 files changed

+78
-76
lines changed

NAMESPACE

+3-1
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,15 @@ importFrom(checkmate,assert_scalar)
9393
importFrom(checkmate,checkInt)
9494
importFrom(checkmate,check_atomic)
9595
importFrom(checkmate,check_data_frame)
96+
importFrom(checkmate,check_names)
9697
importFrom(checkmate,test_set_equal)
9798
importFrom(checkmate,test_subset)
9899
importFrom(checkmate,vname)
100+
importFrom(cli,cat_line)
99101
importFrom(cli,cli_abort)
100-
importFrom(cli,cli_inform)
101102
importFrom(cli,cli_vec)
102103
importFrom(cli,cli_warn)
104+
importFrom(cli,format_message)
103105
importFrom(data.table,":=")
104106
importFrom(data.table,address)
105107
importFrom(data.table,as.data.table)

R/archive.R

+15-9
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ validate_version_bound <- function(version_bound, x, na_ok = FALSE,
7575
#' @return `max(x$version)` if it has any rows; raises error if it has 0 rows or
7676
#' an `NA` version value
7777
#'
78+
#' @importFrom checkmate check_names
79+
#'
7880
#' @export
7981
max_version_with_row_in <- function(x) {
8082
if (nrow(x) == 0L) {
@@ -87,7 +89,8 @@ max_version_with_row_in <- function(x) {
8789
class = "epiprocess__max_version_cannot_be_used"
8890
)
8991
} else {
90-
version_col <- purrr::pluck(x, "version") # error not NULL if doesn't exist
92+
check_names(names(x), must.include = "version")
93+
version_col <- x[["version"]]
9194
if (anyNA(version_col)) {
9295
cli_abort("version values cannot be NA",
9396
class = "epiprocess__version_values_must_not_be_na"
@@ -364,8 +367,7 @@ new_epi_archive <- function(
364367
DT <- as.data.table(x, key = key_vars) # nolint: object_name_linter
365368
if (!identical(key_vars, key(DT))) setkeyv(DT, cols = key_vars)
366369

367-
maybe_first_duplicate_key_row_index <- anyDuplicated(DT, by = key(DT))
368-
if (maybe_first_duplicate_key_row_index != 0L) {
370+
if (anyDuplicated(DT, by = key(DT)) != 0L) {
369371
cli_abort("`x` must have one row per unique combination of the key variables. If you
370372
have additional key variables other than `geo_value`, `time_value`, and
371373
`version`, such as an age group column, please specify them in `other_keys`.
@@ -466,7 +468,7 @@ as_epi_archive <- function(x, geo_type = NULL, time_type = NULL, other_keys = NU
466468
#' @param methods Boolean; whether to print all available methods of
467469
#' the archive
468470
#'
469-
#' @importFrom cli cli_inform
471+
#' @importFrom cli cat_line format_message
470472
#' @importFrom rlang check_dots_empty
471473
#' @export
472474
print.epi_archive <- function(x, ..., class = TRUE, methods = TRUE) {
@@ -477,23 +479,27 @@ print.epi_archive <- function(x, ..., class = TRUE, methods = TRUE) {
477479
))
478480
}
479481

480-
cli_inform(
482+
cat_line(format_message(
481483
c(
482484
">" = if (class) "An `epi_archive` object, with metadata:",
483485
"i" = if (length(setdiff(key(x$DT), c("geo_value", "time_value", "version"))) > 0) {
484486
"Non-standard DT keys: {setdiff(key(x$DT), c('geo_value', 'time_value', 'version'))}"
485487
},
486-
"i" = "Min/max time values: {min(x$DT$time_value)} / {max(x$DT$time_value)}",
487-
"i" = "First/last version with update: {min(x$DT$version)} / {max(x$DT$version)}",
488+
"i" = if (nrow(x$DT) != 0L) {
489+
"Min/max time values: {min(x$DT$time_value)} / {max(x$DT$time_value)}"
490+
},
491+
"i" = if (nrow(x$DT) != 0L) {
492+
"First/last version with update: {min(x$DT$version)} / {max(x$DT$version)}"
493+
},
488494
"i" = if (!is.na(x$clobberable_versions_start)) {
489495
"Clobberable versions start: {x$clobberable_versions_start}"
490496
},
491497
"i" = "Versions end: {x$versions_end}",
492498
"i" = "A preview of the table ({nrow(x$DT)} rows x {ncol(x$DT)} columns):"
493499
)
494-
)
495-
500+
))
496501
print(x$DT[])
502+
497503
return(invisible(x))
498504
}
499505

R/epiprocess.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#' assert_logical assert_list assert_character assert_class
99
#' assert_int assert_numeric check_data_frame vname check_atomic
1010
#' anyInfinite test_subset test_set_equal checkInt
11-
#' @importFrom cli cli_abort cli_inform cli_warn
11+
#' @importFrom cli cli_abort cli_warn
1212
#' @importFrom rlang %||%
1313
#' @name epiprocess
1414
"_PACKAGE"

R/grouped_epi_archive.R

+5-5
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ new_grouped_epi_archive <- function(x, vars, drop) {
8080
list(
8181
private = private
8282
),
83-
class = c("grouped_epi_archive", "epi_archive")
83+
class = "grouped_epi_archive"
8484
))
8585
}
8686

@@ -216,7 +216,7 @@ epix_slide.grouped_epi_archive <- function(x, f, ..., before, ref_time_values,
216216
if ("group_by" %in% nse_dots_names(...)) {
217217
cli_abort("
218218
The `group_by` argument to `slide` has been removed; please use
219-
the `group_by` S3 generic function or `$group_by` R6 method
219+
the `group_by()` S3 generic function
220220
before the slide instead. (If you were instead trying to pass a
221221
`group_by` argument to `f` or create a column named `group_by`,
222222
this check is a false positive, but you will still need to use a
@@ -370,7 +370,7 @@ epix_slide.grouped_epi_archive <- function(x, f, ..., before, ref_time_values,
370370
# DT; convert and wrap:
371371
data.table::setattr(.data_group, "sorted", dt_key)
372372
data.table::setDT(.data_group, key = dt_key)
373-
.data_group_archive <- as_of_archive %>% clone()
373+
.data_group_archive <- as_of_archive
374374
.data_group_archive$DT <- .data_group
375375
comp_one_grp(.data_group_archive, .group_key,
376376
f = f, ...,
@@ -437,8 +437,8 @@ is_grouped_epi_archive <- function(x) {
437437

438438
#' @export
439439
clone.grouped_epi_archive <- function(x, ...) {
440-
ungrouped <- x$private$ungrouped %>% clone()
441-
new_grouped_epi_archive(ungrouped, x$private$vars, x$private$drop)
440+
x$private$ungrouped <- x$private$ungrouped %>% clone()
441+
x
442442
}
443443

444444

R/methods-epi_archive.R

+24-24
Original file line numberDiff line numberDiff line change
@@ -23,38 +23,38 @@
2323
#' @return An `epi_df` object.
2424
#'
2525
#' @examples
26-
#' # warning message of data latency shown
2726
#' epix_as_of(
2827
#' archive_cases_dv_subset,
2928
#' max_version = max(archive_cases_dv_subset$DT$version)
3029
#' )
3130
#'
3231
#' range(archive_cases_dv_subset$DT$version) # 2020-06-02 -- 2021-12-01
3332
#'
34-
#' epix_as_of(
35-
#' archive_cases_dv_subset,
36-
#' max_version = as.Date("2020-06-12")
37-
#' )
33+
#' epix_as_of(archive_cases_dv_subset, as.Date("2020-06-12"))
3834
#'
39-
#' # When fetching a snapshot as of the latest version with update data in the
40-
#' # archive, a warning is issued by default, as this update data might not yet
41-
#' # be finalized (for example, if data versions are labeled with dates, these
42-
#' # versions might be overwritten throughout the corresponding days with
43-
#' # additional data or "hotfixes" of erroroneous data; when we build an archive
44-
#' # based on database queries, the latest available update might still be
45-
#' # subject to change, but previous versions should be finalized). We can
46-
#' # muffle such warnings with the following pattern:
47-
#' withCallingHandlers(
48-
#' {
49-
#' epix_as_of(
50-
#' archive_cases_dv_subset,
51-
#' max_version = max(archive_cases_dv_subset$DT$version)
52-
#' )
53-
#' },
54-
#' epiprocess__snapshot_as_of_clobberable_version = function(wrn) invokeRestart("muffleWarning")
35+
#' # --- Advanced: ---
36+
#'
37+
#' # When requesting recent versions of a data set, there can be some
38+
#' # reproducibility issues. For example, requesting data as of the current date
39+
#' # may return different values based on whether today's data is available yet
40+
#' # or not. Other factors include the time it takes between data becoming
41+
#' # available and when you download the data, and whether the data provider
42+
#' # will overwrite ("clobber") version data rather than just publishing new
43+
#' # versions. You can include information about these factors by setting the
44+
#' # `clobberable_versions_start` and `versions_end` of an `epi_archive`, in
45+
#' # which case you will get warnings about potential reproducibility issues:
46+
#'
47+
#' archive_cases_dv_subset2 <- as_epi_archive(
48+
#' archive_cases_dv_subset$DT,
49+
#' # Suppose last version with an update could potentially be rewritten
50+
#' # (a.k.a. "hotfixed", "clobbered", etc.):
51+
#' clobberable_versions_start = max(archive_cases_dv_subset$DT$version),
52+
#' # Suppose today is the following day, and there are no updates out yet:
53+
#' versions_end <- max(archive_cases_dv_subset$DT$version) + 1L,
54+
#' compactify = TRUE
5555
#' )
56-
#' # Since R 4.0, there is a `globalCallingHandlers` function that can be used
57-
#' # to globally toggle these warnings.
56+
#'
57+
#' epix_as_of(archive_cases_dv_subset2, max(archive_cases_dv_subset$DT$version))
5858
#'
5959
#' @importFrom data.table between key
6060
#' @export
@@ -533,7 +533,7 @@ epix_detailed_restricted_mutate <- function(.data, ...) {
533533
out_dt <- dplyr::dplyr_col_modify(in_tbl, col_modify_cols) %>%
534534
data.table::setattr("sorted", data.table::key(.data$DT)) %>%
535535
data.table::setDT(key = key(.data$DT))
536-
out_archive <- .data %>% clone()
536+
out_archive <- .data
537537
out_archive$DT <- out_dt
538538
request_names <- names(col_modify_cols)
539539
return(list(

man/epix_as_of.Rd

+23-23
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-epix_fill_through_version.R

-3
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,13 @@ test_that("epix_fill_through_version does not mutate x", {
5959
as_epi_archive(tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, value = 10L))
6060
)) {
6161
ea_orig_before <- clone(ea_orig)
62-
ea_orig_dt_before_copy <- data.table::copy(ea_orig$DT)
6362
some_unobserved_version <- 8L
6463

6564
ea_fill_na <- epix_fill_through_version(ea_orig, some_unobserved_version, "na")
6665
expect_identical(ea_orig_before, ea_orig)
67-
expect_identical(ea_orig_dt_before_copy, ea_orig$DT)
6866

6967
ea_fill_locf <- epix_fill_through_version(ea_orig, some_unobserved_version, "locf")
7068
expect_identical(ea_orig_before, ea_orig)
71-
expect_identical(ea_orig_dt_before_copy, ea_orig$DT)
7269
}
7370
})
7471

tests/testthat/test-epix_merge.R

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
test_that("epix_merge requires forbids on invalid `y`", {
2-
ea <- archive_cases_dv_subset %>%
3-
clone()
2+
ea <- archive_cases_dv_subset
43
expect_error(epix_merge(ea, data.frame(x = 1)))
54
})
65

tests/testthat/test-epix_slide.R

+2-3
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,6 @@ test_that("epix_slide with all_versions option has access to all older versions"
371371
}
372372

373373
ea_orig_mirror <- ea %>% clone()
374-
ea_orig_mirror$DT <- data.table::copy(ea_orig_mirror$DT)
375374

376375
result1 <- ea %>%
377376
group_by() %>%
@@ -485,7 +484,7 @@ test_that("epix_as_of and epix_slide with long enough window are compatible", {
485484
)
486485

487486
# Test the same sort of thing when grouping by geo in an archive with multiple geos.
488-
ea_multigeo <- ea %>% clone()
487+
ea_multigeo <- ea
489488
ea_multigeo$DT <- rbind(
490489
ea_multigeo$DT,
491490
copy(ea_multigeo$DT)[, geo_value := "y"][, binary := -binary][]
@@ -565,7 +564,7 @@ test_that("epix_slide with all_versions option works as intended", {
565564
# back depending on the decisions there:
566565
#
567566
# test_that("`epix_slide` uses `versions_end` as a resulting `epi_df`'s `as_of`", {
568-
# ea_updated_stale = ea %>% clone()
567+
# ea_updated_stale = ea
569568
# ea_updated_stale$versions_end <- ea_updated_stale$versions_end + 3 # (dbl)
570569
# #
571570
# expect_identical(

tests/testthat/test-methods-epi_archive.R

+4-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
library(dplyr)
22

3-
ea <- archive_cases_dv_subset %>%
4-
clone()
3+
ea <- archive_cases_dv_subset
54

65
ea2_data <- tibble::tribble(
76
~geo_value, ~time_value, ~version, ~cases,
@@ -32,7 +31,7 @@ test_that("Warning against max_version being clobberable", {
3231
expect_warning(regexp = NA, ea %>% epix_as_of(max_version = max(ea$DT$version)))
3332
expect_warning(regexp = NA, ea %>% epix_as_of(max_version = min(ea$DT$version)))
3433
# but with `clobberable_versions_start` non-`NA`, yes
35-
ea_with_clobberable <- ea %>% clone()
34+
ea_with_clobberable <- ea
3635
ea_with_clobberable$clobberable_versions_start <- max(ea_with_clobberable$DT$version)
3736
expect_warning(ea_with_clobberable %>% epix_as_of(max_version = max(ea$DT$version)))
3837
expect_warning(regexp = NA, ea_with_clobberable %>% epix_as_of(max_version = min(ea$DT$version)))
@@ -92,7 +91,7 @@ test_that("epix_truncate_version_after doesn't filter if max_verion at latest ve
9291
ea2 <- ea2_data %>%
9392
as_epi_archive()
9493

95-
ea_expected <- ea2 %>% clone()
94+
ea_expected <- ea2
9695

9796
ea_as_of <- ea2 %>%
9897
epix_truncate_versions_after(max_version = as.Date("2020-06-04"))
@@ -120,7 +119,7 @@ test_that("epix_truncate_version_after returns the same groups as input grouped_
120119
as_epi_archive()
121120
ea2 <- ea2 %>% group_by(geo_value)
122121

123-
ea_expected <- ea2 %>% clone()
122+
ea_expected <- ea2
124123

125124
ea_as_of <- ea2 %>%
126125
epix_truncate_versions_after(max_version = as.Date("2020-06-04"))

0 commit comments

Comments
 (0)