Skip to content

Commit 492234b

Browse files
authored
Merge pull request #7 from kenmawer/km-issue_22_fix
Km issue 22 fix
2 parents 5527bcb + 7912dd3 commit 492234b

File tree

3 files changed

+170
-10
lines changed

3 files changed

+170
-10
lines changed

R/epi_ahead.R

+163
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
#' Create a leading outcome
2+
#'
3+
#' `step_epi_ahead` creates a *specification* of a recipe step that
4+
#' will add new columns of leading data. Leading data will
5+
#' by default include NA values where the lag was induced.
6+
#' These can be removed with [step_naomit()], or you may
7+
#' specify an alternative filler value with the `default`
8+
#' argument.
9+
#'
10+
#' @param recipe A recipe object. The step will be added to the
11+
#' sequence of operations for this recipe.
12+
#' @param ... One or more selector functions to choose variables
13+
#' for this step. See [selections()] for more details.
14+
#' @param role For model terms created by this step, what analysis role should
15+
#' they be assigned?
16+
#' @param trained A logical to indicate if the quantities for
17+
#' preprocessing have been estimated.
18+
#' @param ahead A vector of positive integers. Each specified column will be
19+
#' lead for each value in the vector.
20+
#' @param prefix A prefix for generated column names, default to "ahead_".
21+
#' @param default Determines what fills empty rows
22+
#' left by leading/lagging (defaults to NA).
23+
#' @param keys A character vector of the keys in an epi_df
24+
#' @param columns A character string of variable names that will
25+
#' be populated (eventually) by the `terms` argument.
26+
#' @param skip A logical. Should the step be skipped when the
27+
#' recipe is baked by [bake()]? While all operations are baked
28+
#' when [prep()] is run, some operations may not be able to be
29+
#' conducted on new data (e.g. processing the outcome variable(s)).
30+
#' Care should be taken when using `skip = TRUE` as it may affect
31+
#' the computations for subsequent operations.
32+
#' @param id A character string that is unique to this step to identify it.
33+
#' @template step-return
34+
#'
35+
#' @details The step assumes that the data are already _in the proper sequential
36+
#' order_ for leading.
37+
#'
38+
#' @family row operation steps
39+
#' @export
40+
#'
41+
#' @examples
42+
#' tib <- tibble::tibble(
43+
#' x = 1:5, y = 1:5,
44+
#' time_value = seq(as.Date("2020-01-01"), by = 1, length.out = 5),
45+
#' geo_value = "ca"
46+
#' ) %>% epiprocess::as_epi_df()
47+
#'
48+
#' library(recipes)
49+
#' epi_recipe(y ~ x, data = tib) %>%
50+
#' step_epi_lag(x, lag = 2:3) %>%
51+
#' step_epi_ahead(y, ahead = 1) %>%
52+
#' prep(tib) %>%
53+
#' bake(tib)
54+
step_epi_ahead <-
55+
function(recipe,
56+
...,
57+
role = "outcome",
58+
trained = FALSE,
59+
ahead = 1,
60+
prefix = "ahead_",
61+
default = NA,
62+
keys = epi_keys(recipe),
63+
columns = NULL,
64+
skip = FALSE,
65+
id = rand_id("epi_ahead")) {
66+
add_step(
67+
recipe,
68+
step_epi_ahead_new(
69+
terms = dplyr::enquos(...),
70+
role = role,
71+
trained = trained,
72+
ahead = ahead,
73+
prefix = prefix,
74+
default = default,
75+
keys = keys,
76+
columns = columns,
77+
skip = skip,
78+
id = id
79+
)
80+
)
81+
}
82+
83+
step_epi_ahead_new <-
84+
function(terms, role, trained, ahead, prefix, default, keys,
85+
columns, skip, id) {
86+
step(
87+
subclass = "epi_ahead",
88+
terms = terms,
89+
role = role,
90+
trained = trained,
91+
ahead = ahead,
92+
prefix = prefix,
93+
default = default,
94+
keys = keys,
95+
columns = columns,
96+
skip = skip,
97+
id = id
98+
)
99+
}
100+
101+
#' @export
102+
prep.step_epi_ahead <- function(x, training, info = NULL, ...) {
103+
step_epi_ahead_new(
104+
terms = x$terms,
105+
role = x$role,
106+
trained = TRUE,
107+
ahead = x$ahead,
108+
prefix = x$prefix,
109+
default = x$default,
110+
keys = x$keys,
111+
columns = recipes_eval_select(x$terms, training, info),
112+
skip = x$skip,
113+
id = x$id
114+
)
115+
}
116+
117+
#' @export
118+
bake.step_epi_ahead <- function(object, new_data, ...) {
119+
if (!all(object$ahead == as.integer(object$ahead))) {
120+
rlang::abort("step_epi_ahead requires 'ahead' argument to be integer valued.")
121+
}
122+
123+
grid <- tidyr::expand_grid(
124+
col = object$columns, lag_val = -object$ahead) %>%
125+
dplyr::mutate(
126+
ahead_val = -lag_val,
127+
newname = glue::glue("{object$prefix}{ahead_val}_{col}")
128+
) %>%
129+
dplyr::select(-ahead_val)
130+
131+
## ensure no name clashes
132+
new_data_names <- colnames(new_data)
133+
intersection <- new_data_names %in% grid$newname
134+
if (any(intersection)) {
135+
rlang::abort(
136+
paste0("Name collision occured in `", class(object)[1],
137+
"`. The following variable names already exists: ",
138+
paste0(new_data_names[intersection], collapse = ", "),
139+
"."))
140+
}
141+
142+
ok <- object$keys
143+
lagged <- purrr::reduce(
144+
purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok),
145+
dplyr::full_join,
146+
by = ok
147+
)
148+
149+
dplyr::full_join(new_data, lagged, by = ok) %>%
150+
dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>%
151+
dplyr::arrange(time_value) %>%
152+
dplyr::ungroup()
153+
154+
}
155+
156+
#' @export
157+
print.step_epi_ahead <-
158+
function(x, width = max(20, options()$width - 30), ...) {
159+
## TODO add printing of the lags
160+
title <- "Leading "
161+
recipes::print_step(x$columns, x$terms, x$trained, title, width)
162+
invisible(x)
163+
}

R/step_epi_lag.R renamed to R/epi_lag.R

+6-9
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,19 @@
1616
#'
1717
#' @family row operation steps
1818
#' @export
19-
#' @rdname step_epi_lag
19+
#' @rdname step_epi_ahead
2020
step_epi_lag <-
2121
function(recipe,
2222
...,
2323
role = "predictor",
2424
trained = FALSE,
2525
lag = 1,
26-
prefix = ifelse(lag >= 0, "lag_","ahead_"),
26+
prefix = "lag_",
2727
default = NA,
2828
keys = epi_keys(recipe),
2929
columns = NULL,
3030
skip = FALSE,
31-
id = rand_id(ifelse(lag >= 0, "epi_lag","epi_ahead"))) {
31+
id = rand_id("epi_lag")) {
3232
add_step(
3333
recipe,
3434
step_epi_lag_new(
@@ -50,7 +50,7 @@ step_epi_lag_new <-
5050
function(terms, role, trained, lag, prefix, default, keys,
5151
columns, skip, id) {
5252
step(
53-
subclass = "step_epi_lag",
53+
subclass = "epi_lag",
5454
terms = terms,
5555
role = role,
5656
trained = trained,
@@ -86,10 +86,7 @@ bake.step_epi_lag <- function(object, new_data, ...) {
8686
rlang::abort("step_epi_lag requires 'lag' argument to be integer valued.")
8787
}
8888

89-
is_neg <- object$lag < 0
90-
91-
grid <- tidyr::expand_grid(col = object$columns,
92-
lag_val = object$lag) %>%
89+
grid <- tidyr::expand_grid(col = object$columns, lag_val = object$lag) %>%
9390
dplyr::mutate(newname = glue::glue("{object$prefix}{lag_val}_{col}"))
9491

9592
## ensure no name clashes
@@ -120,7 +117,7 @@ bake.step_epi_lag <- function(object, new_data, ...) {
120117
print.step_epi_lag <-
121118
function(x, width = max(20, options()$width - 30), ...) {
122119
## TODO add printing of the lags
123-
title <- ifelse(x$lag >= 0, "Lagging", "Leading")
120+
title <- "Lagging "
124121
recipes::print_step(x$columns, x$terms, x$trained, title, width)
125122
invisible(x)
126123
}

musings/example-recipe.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ xx <- x %>% filter(time_value > "2021-12-01")
3636
# Baseline AR3
3737
r <- epi_recipe(x) %>% # if we add this as a class, maybe we get better
3838
# behaviour downstream?
39-
step_epi_lag(death_rate, lag = -7) %>%
39+
step_epi_ahead(death_rate, ahead = 7) %>%
4040
step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
4141
step_epi_lag(case_rate, lag = c(0, 7, 14)) %>%
4242
step_naomit(all_predictors()) %>%

0 commit comments

Comments
 (0)