Skip to content

Commit a2c9aa1

Browse files
authored
Merge pull request #317 from cmu-delphi/ndefries/epix-slide-dots-rtv
Pass ref time value and group key to epix_slide for tidy computations
2 parents 91d270a + e047a4a commit a2c9aa1

14 files changed

+444
-54
lines changed

.Rbuildignore

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
^renv$
2+
^renv\.lock$
13
^.*\.Rproj$
24
^\.Rproj\.user$
35
^LICENSE\.md$

.github/workflows/R-CMD-check.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
22
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3+
#
4+
# Created with usethis + edited to run on PRs to dev, use API key.
35
on:
46
push:
57
branches: [main, master]
@@ -27,3 +29,5 @@ jobs:
2729
needs: check
2830

2931
- uses: r-lib/actions/check-r-package@v2
32+
env:
33+
DELPHI_EPIDATA_KEY: ${{ secrets.SECRET_EPIPROCESS_GHACTIONS_DELPHI_EPIDATA_KEY }}

.github/workflows/pkgdown.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
22
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3+
#
4+
# Created with usethis + edited to run on PRs to dev, use API key.
35
on:
46
push:
57
branches: [main, master]
@@ -34,6 +36,8 @@ jobs:
3436
needs: website
3537

3638
- name: Build site
39+
env:
40+
DELPHI_EPIDATA_KEY: ${{ secrets.SECRET_EPIPROCESS_GHACTIONS_DELPHI_EPIDATA_KEY }}
3741
run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
3842
shell: Rscript {0}
3943

NAMESPACE

+3
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,15 @@ importFrom(data.table,key)
6969
importFrom(data.table,set)
7070
importFrom(data.table,setkeyv)
7171
importFrom(dplyr,arrange)
72+
importFrom(dplyr,bind_rows)
7273
importFrom(dplyr,dplyr_col_modify)
7374
importFrom(dplyr,dplyr_reconstruct)
7475
importFrom(dplyr,dplyr_row_slice)
7576
importFrom(dplyr,filter)
7677
importFrom(dplyr,group_by)
7778
importFrom(dplyr,group_by_drop_default)
7879
importFrom(dplyr,group_modify)
80+
importFrom(dplyr,group_vars)
7981
importFrom(dplyr,groups)
8082
importFrom(dplyr,mutate)
8183
importFrom(dplyr,relocate)
@@ -97,6 +99,7 @@ importFrom(rlang,caller_env)
9799
importFrom(rlang,check_dots_empty0)
98100
importFrom(rlang,enquo)
99101
importFrom(rlang,enquos)
102+
importFrom(rlang,env)
100103
importFrom(rlang,f_env)
101104
importFrom(rlang,f_rhs)
102105
importFrom(rlang,global_env)

NEWS.md

+20-10
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,31 @@ inter-release development versions will include an additional ".9999" suffix.
66

77
## Breaking changes:
88

9-
* Changes to `epix_slide`:
10-
* The `f` computation is now required to take at least three arguments. `f`
11-
must take an `epi_df` with the same column names as the archive's `DT`,
9+
* Changes to `epi_slide` and `epix_slide`:
10+
* If `f` is a function, it is now required to take at least three arguments.
11+
`f` must take an `epi_df` with the same column names as the archive's `DT`,
1212
minus the `version` column; followed by a one-row tibble containing the
1313
values of the grouping variables for the associated group; followed by a
14-
reference time value, usually as a `Date` object; followed by any number
15-
of named arguments.
14+
reference time value, usually as a `Date` object. Optionally, it can take
15+
any number of additional arguments after that, and forward values for those
16+
arguments through `epi[x]_slide`'s `...` args.
17+
* To make your existing slide computations work, add a third argument to
18+
your `f` function to accept this new input: e.g., change `f = function(x,
19+
g, <any other arguments>) { <body> }` to `f = function(x, g, rt, <any
20+
other arguments>) { <body> }`.
1621

1722
## New features:
1823

24+
* `epi_slide` and `epix_slide` also make the window data, group key and reference
25+
time value available to slide computations specified as formulas or tidy
26+
evaluation expressions, in additional or completely new ways.
27+
* If `f` is a formula, it can now access the reference time value via `.z` or
28+
`.ref_time_value`.
29+
* If `f` is missing, the tidy evaluation expression in `...` can now refer to
30+
the window data as an `epi_df` or `tibble` with `.x`, the group key with
31+
`.group_key`, and the reference time value with `.ref_time_value`. The usual
32+
`.data` and `.env` pronouns also work, but`pick()` and `cur_data()` are not;
33+
work off of `.x` instead.
1934
* `epix_slide` has been made more like `dplyr::group_modify`. It will no longer
2035
perform element/row recycling for size stability, accepts slide computation
2136
outputs containing any number of rows, and no longer supports `all_rows`.
@@ -29,11 +44,6 @@ inter-release development versions will include an additional ".9999" suffix.
2944
more closely whether/when/how to output an `epi_df`.
3045
* To keep the old behavior, convert the output of `epix_slide()` to `epi_df`
3146
when desired and set the metadata appropriately.
32-
* `epix_slide` `f` computations passed as functions or formulas now have
33-
access to the reference time value. If `f` is a function, it is passed a
34-
Date containing the reference time value as the third argument. If a
35-
formula, `f` can access the reference time value via `.z` or
36-
`.ref_time_value`.
3747

3848
## Improvements:
3949

R/epiprocess.R

+1
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@
77
#' @docType package
88
#' @name epiprocess
99
NULL
10+
utils::globalVariables(c(".x", ".group_key", ".ref_time_value"))

R/grouped_epi_archive.R

+16-2
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ grouped_epi_archive =
186186
#' object. See the documentation for the wrapper function [`epix_slide()`] for
187187
#' details.
188188
#' @importFrom data.table key address
189-
#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms
189+
#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms env
190190
slide = function(f, ..., before, ref_time_values,
191191
time_step, new_col_name = "slide_value",
192192
as_list_col = FALSE, names_sep = "_",
@@ -370,7 +370,21 @@ grouped_epi_archive =
370370
}
371371

372372
quo = quos[[1]]
373-
f = function(x, quo, ...) rlang::eval_tidy(quo, x)
373+
f = function(.x, .group_key, .ref_time_value, quo, ...) {
374+
# Convert to environment to standardize between tibble and R6
375+
# based inputs. In both cases, we should get a simple
376+
# environment with the empty environment as its parent.
377+
data_env = rlang::as_environment(.x)
378+
data_mask = rlang::new_data_mask(bottom = data_env, top = data_env)
379+
data_mask$.data <- rlang::as_data_pronoun(data_mask)
380+
# We'll also install `.x` directly, not as an
381+
# `rlang_data_pronoun`, so that we can, e.g., use more dplyr and
382+
# epiprocess operations.
383+
data_mask$.x = .x
384+
data_mask$.group_key = .group_key
385+
data_mask$.ref_time_value = .ref_time_value
386+
rlang::eval_tidy(quo, data_mask)
387+
}
374388
new_col = sym(names(rlang::quos_auto_name(quos)))
375389

376390
x = purrr::map_dfr(ref_time_values, function(ref_time_value) {

R/methods-epi_archive.R

+4-1
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,10 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
678678
#' computation.
679679
#' @param ... Additional arguments to pass to the function or formula specified
680680
#' via `f`. Alternatively, if `f` is missing, then `...` is interpreted as an
681-
#' expression for tidy evaluation. See details of [`epi_slide`].
681+
#' expression for tidy evaluation; in addition to referring to columns
682+
#' directly by name, the expression has access to `.data` and `.env` pronouns
683+
#' as in `dplyr` verbs, and can also refer to the `.group_key` and
684+
#' `.ref_time_value`. See details of [`epi_slide`].
682685
#' @param before How far `before` each `ref_time_value` should the sliding
683686
#' window extend? If provided, should be a single, non-NA,
684687
#' [integer-compatible][vctrs::vec_cast] number of time steps. This window

R/slide.R

+71-10
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@
2323
#' If `f` is missing, then `...` will specify the computation.
2424
#' @param ... Additional arguments to pass to the function or formula specified
2525
#' via `f`. Alternatively, if `f` is missing, then the `...` is interpreted as
26-
#' an expression for tidy evaluation. See details.
26+
#' an expression for tidy evaluation; in addition to referring to columns
27+
#' directly by name, the expression has access to `.data` and `.env` pronouns
28+
#' as in `dplyr` verbs, and can also refer to `.x`, `.group_key`, and
29+
#' `.ref_time_value`. See details.
2730
#' @param before,after How far `before` and `after` each `ref_time_value` should
2831
#' the sliding window extend? At least one of these two arguments must be
2932
#' provided; the other's default will be 0. Any value provided for either
@@ -119,7 +122,8 @@
119122
#' through the `new_col_name` argument.
120123
#'
121124
#' @importFrom lubridate days weeks
122-
#' @importFrom rlang .data .env !! enquo enquos sym
125+
#' @importFrom dplyr bind_rows group_vars filter select
126+
#' @importFrom rlang .data .env !! enquo enquos sym env
123127
#' @export
124128
#' @examples
125129
#' # slide a 7-day trailing average formula on cases
@@ -166,11 +170,8 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
166170

167171
# Check that `f` takes enough args
168172
if (!missing(f) && is.function(f)) {
169-
assert_sufficient_f_args(f, ...)
173+
assert_sufficient_f_args(f, ..., n_mandatory_f_args = 3L)
170174
}
171-
172-
# Arrange by increasing time_value
173-
x = arrange(x, time_value)
174175

175176
if (missing(ref_time_values)) {
176177
ref_time_values = unique(x$time_value)
@@ -231,6 +232,35 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
231232
after <- time_step(after)
232233
}
233234

235+
min_ref_time_values = ref_time_values - before
236+
min_ref_time_values_not_in_x <- min_ref_time_values[!(min_ref_time_values %in% unique(x$time_value))]
237+
238+
# Do set up to let us recover `ref_time_value`s later.
239+
# A helper column marking real observations.
240+
x$.real = TRUE
241+
242+
# Create df containing phony data. Df has the same columns and attributes as
243+
# `x`, but filled with `NA`s aside from grouping columns. Number of rows is
244+
# equal to the number of `min_ref_time_values_not_in_x` we have * the
245+
# number of unique levels seen in the grouping columns.
246+
before_time_values_df = data.frame(time_value=min_ref_time_values_not_in_x)
247+
if (length(group_vars(x)) != 0) {
248+
before_time_values_df = dplyr::cross_join(
249+
# Get unique combinations of grouping columns seen in real data.
250+
unique(x[, group_vars(x)]),
251+
before_time_values_df
252+
)
253+
}
254+
# Automatically fill in all other columns from `x` with `NA`s, and carry
255+
# attributes over to new df.
256+
before_time_values_df <- bind_rows(x[0,], before_time_values_df)
257+
before_time_values_df$.real <- FALSE
258+
259+
x <- bind_rows(before_time_values_df, x)
260+
261+
# Arrange by increasing time_value
262+
x = arrange(x, time_value)
263+
234264
# Now set up starts and stops for sliding/hopping
235265
time_range = range(unique(x$time_value))
236266
starts = in_range(ref_time_values - before, time_range)
@@ -272,7 +302,9 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
272302
o = .data_group$time_value %in% time_values
273303
num_ref_rows = sum(o)
274304

275-
# Count the number of appearances of each reference time value
305+
# Count the number of appearances of each reference time value (these
306+
# appearances should all be real for now, but if we allow ref time values
307+
# outside of .data_group's time values):
276308
counts = .data_group %>%
277309
dplyr::filter(.data$time_value %in% time_values) %>%
278310
dplyr::count(.data$time_value) %>%
@@ -282,7 +314,7 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
282314
!all(purrr::map_lgl(slide_values_list, is.data.frame))) {
283315
Abort("The slide computations must return always atomic vectors or data frames (and not a mix of these two structures).")
284316
}
285-
317+
286318
# Unlist if appropriate:
287319
slide_values =
288320
if (as_list_col) {
@@ -318,16 +350,24 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
318350
# fills with NA equivalent.
319351
vctrs::vec_slice(slide_values, o) = orig_values
320352
} else {
353+
# This implicitly removes phony (`.real` == FALSE) observations.
321354
.data_group = filter(.data_group, o)
322355
}
323356
return(mutate(.data_group, !!new_col := slide_values))
324357
}
325358

326359
# If f is not missing, then just go ahead, slide by group
327360
if (!missing(f)) {
361+
if (rlang::is_formula(f)) f = as_slide_computation(f)
362+
f_rtv_wrapper = function(x, g, ...) {
363+
ref_time_value = min(x$time_value) + before
364+
x <- x[x$.real,]
365+
x$.real <- NULL
366+
f(x, g, ref_time_value, ...)
367+
}
328368
x = x %>%
329369
group_modify(slide_one_grp,
330-
f = f, ...,
370+
f = f_rtv_wrapper, ...,
331371
starts = starts,
332372
stops = stops,
333373
time_values = ref_time_values,
@@ -347,7 +387,18 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
347387
}
348388

349389
quo = quos[[1]]
350-
f = function(x, quo, ...) rlang::eval_tidy(quo, x)
390+
f = function(.x, .group_key, quo, ...) {
391+
.ref_time_value = min(.x$time_value) + before
392+
.x <- .x[.x$.real,]
393+
.x$.real <- NULL
394+
data_mask = rlang::as_data_mask(.x)
395+
# We'll also install `.x` directly, not as an `rlang_data_pronoun`, so
396+
# that we can, e.g., use more dplyr and epiprocess operations.
397+
data_mask$.x = .x
398+
data_mask$.group_key = .group_key
399+
data_mask$.ref_time_value = .ref_time_value
400+
rlang::eval_tidy(quo, data_mask)
401+
}
351402
new_col = sym(names(rlang::quos_auto_name(quos)))
352403

353404
x = x %>%
@@ -365,5 +416,15 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
365416
if (!as_list_col) {
366417
x = unnest(x, !!new_col, names_sep = names_sep)
367418
}
419+
420+
# Remove any remaining phony observations. When `all_rows` is TRUE, phony
421+
# observations aren't necessarily removed in `slide_one_grp`.
422+
if (all_rows) {
423+
x <- x[x$.real,]
424+
}
425+
426+
# Drop helper column `.real`.
427+
x$.real <- NULL
428+
368429
return(x)
369430
}

man/epi_slide.Rd

+6-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/epix_slide.Rd

+6-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/reexports.Rd

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)