Skip to content

Commit b4a45e6

Browse files
authored
Merge branch 'main' into main
2 parents 194a911 + c5b3715 commit b4a45e6

19 files changed

+1183
-26
lines changed

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@
88
^_pkgdown\.yml$
99
^docs$
1010
^pkgdown$
11+
^musings$

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
.Ruserdata
55
docs
66
inst/doc
7+
.DS_Store

DESCRIPTION

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,44 @@
11
Package: epipredict
22
Title: Basic epidemiology forecasting methods
33
Version: 0.0.0.9000
4-
Authors@R:
5-
c(
6-
person(given = "Jacob",
7-
family = "Bien",
8-
role = "aut"),
9-
person(given = "Daniel",
10-
family = "McDonald",
11-
role = "aut"),
12-
person(given = "Ryan",
13-
family = "Tibshirani",
14-
role = c("aut", "cre"),
15-
email = "[email protected]"))
4+
Authors@R: c(
5+
person("Jacob", "Bien", role = "aut"),
6+
person("Daniel", "McDonald", role = "aut"),
7+
person("Ryan", "Tibshirani", , "[email protected]", role = c("aut", "cre"))
8+
)
169
Description: What the package does (one paragraph).
1710
License: MIT + file LICENSE
18-
Encoding: UTF-8
19-
Roxygen: list(markdown = TRUE)
20-
RoxygenNote: 7.1.2
21-
Remotes:
22-
cmu-delphi/epiprocess#58
11+
URL: https://github.com/cmu-delphi/epipredict/,
12+
https://cmu-delphi.github.io/epiprocess
2313
Imports:
14+
assertthat,
15+
cli,
2416
dplyr,
17+
glue,
2518
magrittr,
26-
tibble,
27-
rlang,
2819
purrr,
29-
cli,
20+
recipes,
21+
rlang,
3022
stats,
23+
tibble,
3124
tidyr,
32-
assertthat,
3325
tidyselect,
3426
tensr
3527
Suggests:
36-
epiprocess,
37-
data.table,
3828
covidcast,
29+
data.table,
30+
epiprocess,
3931
ggplot2,
4032
knitr,
4133
lubridate,
4234
RcppRoll,
4335
rmarkdown,
4436
testthat (>= 3.0.0)
37+
VignetteBuilder:
38+
knitr
39+
Remotes:
40+
dajmcdon/epiprocess
4541
Config/testthat/edition: 3
46-
URL: https://github.com/cmu-delphi/epipredict/,
47-
https://cmu-delphi.github.io/epiprocess
48-
VignetteBuilder: knitr
42+
Encoding: UTF-8
43+
Roxygen: list(markdown = TRUE)
44+
RoxygenNote: 7.2.0

NAMESPACE

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,24 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
S3method(bake,step_epi_ahead)
4+
S3method(bake,step_epi_lag)
5+
S3method(epi_keys,default)
6+
S3method(epi_keys,epi_df)
7+
S3method(epi_keys,recipe)
8+
S3method(epi_recipe,default)
9+
S3method(epi_recipe,epi_df)
10+
S3method(epi_recipe,formula)
11+
S3method(prep,step_epi_ahead)
12+
S3method(prep,step_epi_lag)
13+
S3method(print,step_epi_ahead)
14+
S3method(print,step_epi_lag)
315
export("%>%")
416
export(arx_args_list)
517
export(arx_forecaster)
618
export(create_lags_and_leads)
719
export(df_mat_mul)
20+
export(epi_keys)
21+
export(epi_recipe)
822
export(get_precision)
923
export(grab_names)
1024
export(knn_iteraive_ar_args_list)
@@ -13,10 +27,15 @@ export(knnarx_args_list)
1327
export(knnarx_forecaster)
1428
export(smooth_arx_args_list)
1529
export(smooth_arx_forecaster)
30+
export(step_epi_ahead)
31+
export(step_epi_lag)
32+
import(recipes)
1633
importFrom(magrittr,"%>%")
1734
importFrom(rlang,"!!")
1835
importFrom(rlang,":=")
36+
importFrom(stats,as.formula)
1937
importFrom(stats,lm)
38+
importFrom(stats,model.frame)
2039
importFrom(stats,poly)
2140
importFrom(stats,predict)
2241
importFrom(stats,quantile)

R/compat-recipes.R

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# These are copied from `recipes` where they are unexported
2+
3+
fun_calls <- function (f) {
4+
if (is.function(f)) fun_calls(body(f))
5+
else if (rlang::is_quosure(f)) fun_calls(rlang::quo_get_expr(f))
6+
else if (is.call(f)) {
7+
fname <- as.character(f[[1]])
8+
if (identical(fname, ".Internal"))
9+
return(fname)
10+
unique(c(fname, unlist(lapply(f[-1], fun_calls), use.names = FALSE)))
11+
}
12+
}
13+
14+
inline_check <- function(x) {
15+
funs <- fun_calls(x)
16+
funs <- funs[!(funs %in% c("~", "+", "-"))]
17+
if (length(funs) > 0) {
18+
rlang::abort(paste0(
19+
"No in-line functions should be used here; ",
20+
"use steps to define baking actions."
21+
))
22+
}
23+
invisible(x)
24+
}
25+
26+
#' @importFrom stats as.formula
27+
get_lhs_vars <- function(formula, data) {
28+
if (!rlang::is_formula(formula)) {
29+
formula <- as.formula(formula)
30+
}
31+
## Want to make sure that multiple outcomes can be expressed as
32+
## additions with no cbind business and that `.` works too (maybe)
33+
new_formula <- rlang::new_formula(lhs = NULL, rhs = rlang::f_lhs(formula))
34+
get_rhs_vars(new_formula, data)
35+
}
36+
37+
#' @importFrom stats model.frame
38+
get_rhs_vars <- function(formula, data, no_lhs = FALSE) {
39+
if (!rlang::is_formula(formula)) {
40+
formula <- as.formula(formula)
41+
}
42+
if (no_lhs) {
43+
formula <- rlang::new_formula(lhs = NULL, rhs = rlang::f_rhs(formula))
44+
}
45+
46+
## This will need a lot of work to account for cases with `.`
47+
## or embedded functions like `Sepal.Length + poly(Sepal.Width)`.
48+
## or should it? what about Y ~ log(x)?
49+
## Answer: when called from `form2args`, the function
50+
## `inline_check` stops when in-line functions are used.
51+
data_info <- attr(model.frame(formula, data[1, ]), "terms")
52+
response_info <- attr(data_info, "response")
53+
predictor_names <- names(attr(data_info, "dataClasses"))
54+
if (length(response_info) > 0 && all(response_info > 0)) {
55+
predictor_names <- predictor_names[-response_info]
56+
}
57+
predictor_names
58+
}
59+
60+
## Buckets variables into discrete, mutally exclusive types
61+
get_types <- function(x) {
62+
var_types <-
63+
c(
64+
character = "nominal",
65+
factor = "nominal",
66+
ordered = "nominal",
67+
integer = "numeric",
68+
numeric = "numeric",
69+
double = "numeric",
70+
Surv = "censored",
71+
logical = "logical",
72+
Date = "date",
73+
POSIXct = "date",
74+
list = "list",
75+
textrecipes_tokenlist = "tokenlist"
76+
)
77+
78+
classes <- lapply(x, class)
79+
res <- lapply(
80+
classes,
81+
function(x, types) {
82+
in_types <- x %in% names(types)
83+
if (sum(in_types) > 0) {
84+
# not sure what to do with multiple matches; right now
85+
## pick the first match which favors "factor" over "ordered"
86+
out <- unname(types[min(which(names(types) %in% x))])
87+
} else {
88+
out <- "other"
89+
}
90+
out
91+
},
92+
types = var_types
93+
)
94+
res <- unlist(res)
95+
tibble(variable = names(res), type = unname(res))
96+
}

R/epi_ahead.R

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
#' Create a leading outcome
2+
#'
3+
#' `step_epi_ahead` creates a *specification* of a recipe step that
4+
#' will add new columns of leading data. Leading data will
5+
#' by default include NA values where the lag was induced.
6+
#' These can be removed with [step_naomit()], or you may
7+
#' specify an alternative filler value with the `default`
8+
#' argument.
9+
#'
10+
#' @param recipe A recipe object. The step will be added to the
11+
#' sequence of operations for this recipe.
12+
#' @param ... One or more selector functions to choose variables
13+
#' for this step. See [selections()] for more details.
14+
#' @param role For model terms created by this step, what analysis role should
15+
#' they be assigned?
16+
#' @param trained A logical to indicate if the quantities for
17+
#' preprocessing have been estimated.
18+
#' @param ahead A vector of positive integers. Each specified column will be
19+
#' lead for each value in the vector.
20+
#' @param prefix A prefix for generated column names, default to "ahead_".
21+
#' @param default Determines what fills empty rows
22+
#' left by leading/lagging (defaults to NA).
23+
#' @param keys A character vector of the keys in an epi_df
24+
#' @param columns A character string of variable names that will
25+
#' be populated (eventually) by the `terms` argument.
26+
#' @param skip A logical. Should the step be skipped when the
27+
#' recipe is baked by [bake()]? While all operations are baked
28+
#' when [prep()] is run, some operations may not be able to be
29+
#' conducted on new data (e.g. processing the outcome variable(s)).
30+
#' Care should be taken when using `skip = TRUE` as it may affect
31+
#' the computations for subsequent operations.
32+
#' @param id A character string that is unique to this step to identify it.
33+
#' @template step-return
34+
#'
35+
#' @details The step assumes that the data are already _in the proper sequential
36+
#' order_ for leading.
37+
#'
38+
#' @family row operation steps
39+
#' @export
40+
#'
41+
#' @examples
42+
#' tib <- tibble::tibble(
43+
#' x = 1:5, y = 1:5,
44+
#' time_value = seq(as.Date("2020-01-01"), by = 1, length.out = 5),
45+
#' geo_value = "ca"
46+
#' ) %>% epiprocess::as_epi_df()
47+
#'
48+
#' library(recipes)
49+
#' epi_recipe(y ~ x, data = tib) %>%
50+
#' step_epi_lag(x, lag = 2:3) %>%
51+
#' step_epi_ahead(y, ahead = 1) %>%
52+
#' prep(tib) %>%
53+
#' bake(tib)
54+
step_epi_ahead <-
55+
function(recipe,
56+
...,
57+
role = "outcome",
58+
trained = FALSE,
59+
ahead = 1,
60+
prefix = "ahead_",
61+
default = NA,
62+
keys = epi_keys(recipe),
63+
columns = NULL,
64+
skip = FALSE,
65+
id = rand_id("epi_ahead")) {
66+
add_step(
67+
recipe,
68+
step_epi_ahead_new(
69+
terms = dplyr::enquos(...),
70+
role = role,
71+
trained = trained,
72+
ahead = ahead,
73+
prefix = prefix,
74+
default = default,
75+
keys = keys,
76+
columns = columns,
77+
skip = skip,
78+
id = id
79+
)
80+
)
81+
}
82+
83+
step_epi_ahead_new <-
84+
function(terms, role, trained, ahead, prefix, default, keys,
85+
columns, skip, id) {
86+
step(
87+
subclass = "epi_ahead",
88+
terms = terms,
89+
role = role,
90+
trained = trained,
91+
ahead = ahead,
92+
prefix = prefix,
93+
default = default,
94+
keys = keys,
95+
columns = columns,
96+
skip = skip,
97+
id = id
98+
)
99+
}
100+
101+
#' @export
102+
prep.step_epi_ahead <- function(x, training, info = NULL, ...) {
103+
step_epi_ahead_new(
104+
terms = x$terms,
105+
role = x$role,
106+
trained = TRUE,
107+
ahead = x$ahead,
108+
prefix = x$prefix,
109+
default = x$default,
110+
keys = x$keys,
111+
columns = recipes_eval_select(x$terms, training, info),
112+
skip = x$skip,
113+
id = x$id
114+
)
115+
}
116+
117+
#' @export
118+
bake.step_epi_ahead <- function(object, new_data, ...) {
119+
if (!all(object$ahead == as.integer(object$ahead))) {
120+
rlang::abort("step_epi_ahead requires 'ahead' argument to be integer valued.")
121+
}
122+
123+
grid <- tidyr::expand_grid(
124+
col = object$columns, lag_val = -object$ahead) %>%
125+
dplyr::mutate(
126+
ahead_val = -lag_val,
127+
newname = glue::glue("{object$prefix}{ahead_val}_{col}")
128+
) %>%
129+
dplyr::select(-ahead_val)
130+
131+
## ensure no name clashes
132+
new_data_names <- colnames(new_data)
133+
intersection <- new_data_names %in% grid$newname
134+
if (any(intersection)) {
135+
rlang::abort(
136+
paste0("Name collision occured in `", class(object)[1],
137+
"`. The following variable names already exists: ",
138+
paste0(new_data_names[intersection], collapse = ", "),
139+
"."))
140+
}
141+
142+
ok <- object$keys
143+
lagged <- purrr::reduce(
144+
purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok),
145+
dplyr::full_join,
146+
by = ok
147+
)
148+
149+
dplyr::full_join(new_data, lagged, by = ok) %>%
150+
dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>%
151+
dplyr::arrange(time_value) %>%
152+
dplyr::ungroup()
153+
154+
}
155+
156+
#' @export
157+
print.step_epi_ahead <-
158+
function(x, width = max(20, options()$width - 30), ...) {
159+
## TODO add printing of the lags
160+
title <- "Leading "
161+
recipes::print_step(x$columns, x$terms, x$trained, title, width)
162+
invisible(x)
163+
}

0 commit comments

Comments
 (0)