Skip to content

Commit dce69d4

Browse files
author
admin
committed
Added error message to epi_slide fun to address issue cmu-delphi#65.
Merge branch 'main' into epi_slide-error_message # Please enter a commit message to explain why this merge is necessary, # especially if it merges an updated upstream into a topic branch. # # Lines starting with '#' will be ignored, and an empty message aborts # the commit.
2 parents 7c636a7 + 5265efa commit dce69d4

10 files changed

+62
-60
lines changed

.Rbuildignore

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,4 @@
44
^\.github$
55
^docs$
66
^_pkgdown.yml
7-
^vignettes$
8-
^index\.md$
7+
^index\.md$

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ Description: This package introduces a common data structure for
1818
License: MIT + file LICENSE
1919
Imports:
2020
data.table,
21-
delphi.epidata,
2221
dplyr,
2322
fabletools,
2423
feasts,
@@ -37,7 +36,9 @@ Imports:
3736
utils
3837
Suggests:
3938
covidcast,
39+
delphi.epidata,
4040
gginnards,
41+
ggplot2,
4142
knitr,
4243
outbreaks,
4344
rmarkdown,

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ export(rename)
4545
export(slice)
4646
export(ungroup)
4747
export(unnest)
48+
importFrom(R6,R6Class)
4849
importFrom(data.table,as.data.table)
4950
importFrom(data.table,between)
5051
importFrom(data.table,key)

R/archive.R

Lines changed: 50 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#' @details An `epi_archive` is an R6 class which contains a data table `DT`, of
1717
#' class `data.table` from the `data.table` package, with (at least) the
1818
#' following columns:
19-
#'
19+
#'
2020
#' * `geo_value`: the geographic value associated with each row of measurements.
2121
#' * `time_value`: the time value associated with each row of measurements.
2222
#' * `version`: the time value specifying the version for each row of
@@ -31,7 +31,7 @@
3131
#' on `DT` directly). There can only be a single row per unique combination of
3232
#' key variables, and thus the key variables are critical for figuring out how
3333
#' to generate a snapshot of data from the archive, as of a given version.
34-
#'
34+
#'
3535
#' In general, last observation carried forward (LOCF) is used to data in
3636
#' between recorded versions. Currently, deletions must be represented as
3737
#' revising a row to a special state (e.g., making the entries `NA` or
@@ -43,7 +43,7 @@
4343
#' reference semantics. A primary consequence of this is that objects are not
4444
#' copied when modified. You can read more about this in Hadley Wickham's
4545
#' [Advanced R](https://adv-r.hadley.nz/r6.html#r6-semantics) book.
46-
#'
46+
#'
4747
#' @section Metadata:
4848
#' The following pieces of metadata are included as fields in an `epi_archive`
4949
#' object:
@@ -75,7 +75,8 @@
7575
#' sliding computation at any given reference time point t is performed on
7676
#' **data that would have been available as of t**. More details on `slide()`
7777
#' are documented in the wrapper function `epix_slide()`.
78-
#'
78+
#'
79+
#' @importFrom R6 R6Class
7980
#' @export
8081
epi_archive =
8182
R6::R6Class(
@@ -88,7 +89,7 @@ epi_archive =
8889
additional_metadata = NULL,
8990
#' @description Creates a new `epi_archive` object.
9091
#' @param x A data frame, data table, or tibble, with columns `geo_value`,
91-
#' `time_value`, `version`, and then any additional number of columns.
92+
#' `time_value`, `version`, and then any additional number of columns.
9293
#' @param geo_type Type for the geo values. If missing, then the function will
9394
#' attempt to infer it from the geo values present; if this fails, then it
9495
#' will be set to "custom".
@@ -104,12 +105,12 @@ epi_archive =
104105
#' @return An `epi_archive` object.
105106
#' @importFrom data.table as.data.table key setkeyv
106107
initialize = function(x, geo_type, time_type, other_keys,
107-
additional_metadata) {
108+
additional_metadata) {
108109
# Check that we have a data frame
109110
if (!is.data.frame(x)) {
110111
Abort("`x` must be a data frame.")
111112
}
112-
113+
113114
# Check that we have geo_value, time_value, version columns
114115
if (!("geo_value" %in% names(x))) {
115116
Abort("`x` must contain a `geo_value` column.")
@@ -120,7 +121,7 @@ epi_archive =
120121
if (!("version" %in% names(x))) {
121122
Abort("`x` must contain a `version` column.")
122123
}
123-
124+
124125
# If geo type is missing, then try to guess it
125126
if (missing(geo_type)) {
126127
geo_type = guess_geo_type(x$geo_value)
@@ -130,7 +131,7 @@ epi_archive =
130131
if (missing(time_type)) {
131132
time_type = guess_time_type(x$time_value)
132133
}
133-
134+
134135
# Finish off with small checks on keys variables and metadata
135136
if (missing(other_keys)) other_keys = NULL
136137
if (missing(additional_metadata)) additional_metadata = list()
@@ -144,7 +145,7 @@ epi_archive =
144145
c("geo_type", "time_type"))) {
145146
Warn("`additional_metadata` names overlap with existing metadata fields \"geo_type\", \"time_type\".")
146147
}
147-
148+
148149
# Create the data table; if x was an un-keyed data.table itself,
149150
# then the call to as.data.table() will fail to set keys, so we
150151
# need to check this, then do it manually if needed
@@ -162,8 +163,8 @@ epi_archive =
162163
cat("An `epi_archive` object, with metadata:\n")
163164
cat(sprintf("* %-9s = %s\n", "geo_type", self$geo_type))
164165
cat(sprintf("* %-9s = %s\n", "time_type", self$time_type))
165-
if (!is.null(self$additional_metadata)) {
166-
sapply(self$additional_metadata, function(m) {
166+
if (!is.null(self$additional_metadata)) {
167+
sapply(self$additional_metadata, function(m) {
167168
cat(sprintf("* %-9s = %s\n", names(m), m))
168169
})
169170
}
@@ -177,7 +178,7 @@ epi_archive =
177178
cat(sprintf("* %-14s = %s\n", "max version",
178179
max(self$DT$version)))
179180
cat("----------\n")
180-
cat(sprintf("Data archive (stored in DT field): %i x %i\n",
181+
cat(sprintf("Data archive (stored in DT field): %i x %i\n",
181182
nrow(self$DT), ncol(self$DT)))
182183
cat("----------\n")
183184
cat(sprintf("Public methods: %s",
@@ -194,7 +195,7 @@ epi_archive =
194195
other_keys = setdiff(key(self$DT),
195196
c("geo_value", "time_value", "version"))
196197
if (length(other_keys) == 0) other_keys = NULL
197-
198+
198199
# Check a few things on max_version
199200
if (!identical(class(max_version), class(self$DT$version))) {
200201
Abort("`max_version` and `DT$version` must have same class.")
@@ -208,25 +209,25 @@ epi_archive =
208209
if (max_version == self_max) {
209210
Warn("Getting data as of the latest version possible. For a variety of reasons, it is possible that we only have a preliminary picture of this version (e.g., the upstream source has updated it but we have not seen it due to latency in synchronization). Thus, the snapshot that we produce here might not be reproducible at a later time (e.g., when the archive has caught up in terms of synchronization).")
210211
}
211-
212+
212213
# Filter by version and return
213214
return(
214-
# Make sure to use data.table ways of filtering and selecting
215+
# Make sure to use data.table ways of filtering and selecting
215216
self$DT[between(time_value,
216217
min_time_value,
217218
max_version) &
218219
version <= max_version, ] %>%
219220
unique(by = c("geo_value", "time_value", other_keys),
220221
fromLast = TRUE) %>%
221-
tibble::as_tibble() %>%
222+
tibble::as_tibble() %>%
222223
dplyr::select(-.data$version) %>%
223224
as_epi_df(geo_type = self$geo_type,
224225
time_type = self$time_type,
225226
as_of = max_version,
226227
additional_metadata = c(self$additional_metadata,
227228
other_keys = other_keys))
228229
)
229-
},
230+
},
230231
#####
231232
#' @description Merges another `data.table` with the current one, and allows for
232233
#' a post-filling of `NA` values by last observation carried forward (LOCF).
@@ -235,7 +236,7 @@ epi_archive =
235236
merge = function(y, ..., locf = TRUE, nan = NA) {
236237
# Check we have a `data.table` object
237238
if (!(inherits(y, "data.table") || inherits(y, "epi_archive"))) {
238-
Abort("`y` must be of class `data.table` or `epi_archive`.")
239+
Abort("`y` must be of class `data.table` or `epi_archive`.")
239240
}
240241

241242
# Use the data.table merge function, carrying through ... args
@@ -250,42 +251,42 @@ epi_archive =
250251

251252
# Important: use nafill and not setnafill because the latter
252253
# returns the entire data frame by reference, and the former can
253-
# be set to act on particular columns by reference using :=
254+
# be set to act on particular columns by reference using :=
254255
self$DT[,
255-
(cols) := nafill(.SD, type = "locf", nan = nan),
256-
.SDcols = cols,
256+
(cols) := nafill(.SD, type = "locf", nan = nan),
257+
.SDcols = cols,
257258
by = by]
258259
}
259-
},
260+
},
260261
#####
261262
#' @description Slides a given function over variables in an `epi_archive`
262263
#' object. See the documentation for the wrapper function `epix_as_of()` for
263-
#' details.
264+
#' details.
264265
#' @importFrom data.table key
265266
#' @importFrom rlang !! !!! enquo enquos is_quosure sym syms
266-
slide = function(f, ..., n = 7, group_by, ref_time_values,
267+
slide = function(f, ..., n = 7, group_by, ref_time_values,
267268
time_step, new_col_name = "slide_value",
268269
as_list_col = FALSE, names_sep = "_",
269-
all_rows = FALSE) {
270+
all_rows = FALSE) {
270271
# If missing, then set ref time values to be everything; else make
271-
# sure we intersect with observed time values
272+
# sure we intersect with observed time values
272273
if (missing(ref_time_values)) {
273274
ref_time_values = unique(self$DT$time_value)
274275
}
275276
else {
276277
ref_time_values = ref_time_values[ref_time_values %in%
277278
unique(self$DT$time_value)]
278279
}
279-
280-
# If a custom time step is specified, then redefine units
280+
281+
# If a custom time step is specified, then redefine units
281282
before_num = n-1
282283
if (!missing(time_step)) before_num = time_step(n-1)
283-
284+
284285
# What to group by? If missing, set according to internal keys
285286
if (missing(group_by)) {
286287
group_by = setdiff(key(self$DT), c("time_value", "version"))
287288
}
288-
289+
289290
# Symbolize column name, defuse grouping variables. We have to do
290291
# the middle step here which is a bit complicated (unfortunately)
291292
# since the function epix_slide() could have called the current one,
@@ -297,20 +298,20 @@ epi_archive =
297298

298299
# Key variable names, apart from time value and version
299300
key_vars = setdiff(key(self$DT), c("time_value", "version"))
300-
301+
301302
# Computation for one group, one time value
302303
comp_one_grp = function(.data_group,
303-
f, ...,
304+
f, ...,
304305
time_value,
305306
key_vars,
306307
new_col) {
307-
# Carry out the specified computation
308+
# Carry out the specified computation
308309
comp_value = f(.data_group, ...)
309310

310311
# Count the number of appearances of the reference time value.
311312
# Note: ideally, we want to directly count occurrences of the ref
312313
# time value but due to latency, this will often not appear in the
313-
# data group. So we count the number of unique key values, outside
314+
# data group. So we count the number of unique key values, outside
314315
# of the time value column
315316
count = sum(!duplicated(.data_group[, key_vars]))
316317

@@ -344,23 +345,23 @@ epi_archive =
344345
else {
345346
Abort("The slide computation must return an atomic vector or a data frame.")
346347
}
347-
348+
348349
# Note that we've already recycled comp value to make size stable,
349350
# so tibble() will just recycle time value appropriately
350-
return(tibble::tibble(time_value = time_value,
351+
return(tibble::tibble(time_value = time_value,
351352
!!new_col := comp_value))
352353
}
353-
354+
354355
# If f is not missing, then just go ahead, slide by group
355356
if (!missing(f)) {
356357
if (rlang::is_formula(f)) f = rlang::as_function(f)
357-
358+
358359
x = purrr::map_dfr(ref_time_values, function(t) {
359360
self$as_of(t, min_time_value = t - before_num) %>%
360-
tibble::as_tibble() %>%
361+
tibble::as_tibble() %>%
361362
dplyr::group_by(!!!group_by) %>%
362363
dplyr::group_modify(comp_one_grp,
363-
f = f, ...,
364+
f = f, ...,
364365
time_value = t,
365366
key_vars = key_vars,
366367
new_col = new_col,
@@ -378,14 +379,14 @@ epi_archive =
378379
if (length(quos) > 1) {
379380
Abort("If `f` is missing then only a single computation can be specified via `...`.")
380381
}
381-
382+
382383
quo = quos[[1]]
383384
f = function(x, quo, ...) rlang::eval_tidy(quo, x)
384385
new_col = sym(names(rlang::quos_auto_name(quos)))
385386

386387
x = purrr::map_dfr(ref_time_values, function(t) {
387388
self$as_of(t, min_time_value = t - before_num) %>%
388-
tibble::as_tibble() %>%
389+
tibble::as_tibble() %>%
389390
dplyr::group_by(!!!group_by) %>%
390391
dplyr::group_modify(comp_one_grp,
391392
f = f, quo = quo,
@@ -396,12 +397,12 @@ epi_archive =
396397
dplyr::ungroup()
397398
})
398399
}
399-
400+
400401
# Unnest if we need to
401402
if (!as_list_col) {
402403
x = tidyr::unnest(x, !!new_col, names_sep = names_sep)
403404
}
404-
405+
405406
# Join to get all rows, if we need to, then return
406407
if (all_rows) {
407408
cols = c(as.character(group_by), "time_value")
@@ -412,7 +413,7 @@ epi_archive =
412413
}
413414
)
414415
)
415-
416+
416417
#' Convert to `epi_archive` format
417418
#'
418419
#' Converts a data frame, data table, or tibble into an `epi_archive`
@@ -448,15 +449,15 @@ epi_archive =
448449
#'
449450
#' @export
450451
as_epi_archive = function(x, geo_type, time_type, other_keys,
451-
additional_metadata = list()) {
452-
epi_archive$new(x, geo_type, time_type, other_keys, additional_metadata)
452+
additional_metadata = list()) {
453+
epi_archive$new(x, geo_type, time_type, other_keys, additional_metadata)
453454
}
454455

455456
#' Test for `epi_archive` format
456457
#'
457458
#' @param x An object.
458459
#' @return `TRUE` if the object inherits from `epi_archive`.
459-
#'
460+
#'
460461
#' @export
461462
is_epi_archive = function(x) {
462463
inherits(x, "epi_archive")

man/as_tsibble.epi_df.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/epix_as_of.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/epix_merge.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/epix_slide.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/print.epi_df.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)