Skip to content

Commit 91bd0b1

Browse files
authored
Merge pull request #148 from cmu-delphi/fix-epirecipe-var_info-bug
Fix epirecipe var info bug
2 parents 3011fc6 + 8fe414b commit 91bd0b1

File tree

5 files changed

+34
-28
lines changed

5 files changed

+34
-28
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,4 @@ Config/testthat/edition: 3
6767
Encoding: UTF-8
6868
LazyData: true
6969
Roxygen: list(markdown = TRUE)
70-
RoxygenNote: 7.2.1
70+
RoxygenNote: 7.2.2

R/epi_recipe.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ epi_recipe.epi_df <-
113113
)
114114

115115
## Add types
116-
var_info <- dplyr::full_join(get_types(x), var_info, by = "variable")
116+
var_info <- dplyr::full_join(recipes:::get_types(x), var_info, by = "variable")
117117
var_info$source <- "original"
118118

119119
## arrange to easy order
@@ -371,7 +371,7 @@ prep.epi_recipe <- function(
371371
dplyr::group_by(variable) %>%
372372
dplyr::arrange(dplyr::desc(number)) %>%
373373
dplyr::summarise(
374-
type = dplyr::first(type),
374+
type = list(dplyr::first(type)),
375375
role = as.list(unique(unlist(role))),
376376
source = dplyr::first(source),
377377
number = dplyr::first(number),

tests/testthat/test-epi_recipe.R

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -36,19 +36,21 @@ test_that("epi_recipe formula works", {
3636
r <- epi_recipe(y ~ x, tib)
3737
ref_var_info <- tibble::tribble(
3838
~ variable, ~ type, ~ role, ~ source,
39-
"x", "numeric", "predictor", "original",
40-
"y", "numeric", "outcome", "original",
39+
"x", c("integer", "numeric"), "predictor", "original",
40+
"y", c("integer", "numeric"), "outcome", "original",
4141
"time_value", "date", "time_value", "original",
42-
"geo_value", "nominal", "geo_value", "original"
42+
"geo_value", c("string", "unordered", "nominal"), "geo_value", "original"
4343
)
4444
expect_identical(r$var_info, ref_var_info)
4545
expect_equal(nrow(r$template), 1L)
4646

4747
# with an epi_key as a predictor
4848
r <- epi_recipe(y ~ x + geo_value, tib)
49-
ref_var_info <- ref_var_info %>% tibble::add_row(
50-
variable = "geo_value", type = "nominal", role = "predictor",
51-
source = "original", .after = 1)
49+
ref_var_info <- ref_var_info %>%
50+
tibble::add_row(
51+
variable = "geo_value", type = list(c("string", "unordered", "nominal")),
52+
role = "predictor",
53+
source = "original", .after = 1)
5254
expect_identical(r$var_info, ref_var_info)
5355
expect_equal(nrow(r$template), 1L)
5456

@@ -61,11 +63,13 @@ test_that("epi_recipe formula works", {
6163

6264
# with an additional key
6365
r <- epi_recipe(y ~ x + geo_value, tib)
64-
ref_var_info <- ref_var_info %>% tibble::add_row(
65-
variable = "z", type = "nominal", role = "key",
66-
source = "original")
66+
ref_var_info <- ref_var_info %>%
67+
tibble::add_row(
68+
variable = "z", type = list(c("string", "unordered", "nominal")),
69+
role = "key",
70+
source = "original")
6771

68-
#expect_identical(r$var_info, ref_var_info)
72+
expect_identical(r$var_info, ref_var_info)
6973

7074
})
7175

@@ -81,20 +85,20 @@ test_that("epi_recipe epi_df works", {
8185
ref_var_info <- tibble::tribble(
8286
~ variable, ~ type, ~ role, ~ source,
8387
"time_value", "date", "time_value", "original",
84-
"geo_value", "nominal", "geo_value", "original",
85-
"x", "numeric", "raw", "original",
86-
"y", "numeric", "raw", "original"
88+
"geo_value", c("string", "unordered", "nominal"), "geo_value", "original",
89+
"x", c("integer", "numeric"), "raw", "original",
90+
"y", c("integer", "numeric"), "raw", "original"
8791
)
8892
expect_identical(r$var_info, ref_var_info)
8993
expect_equal(nrow(r$template), 1L)
9094

9195
r <- epi_recipe(tib, formula = y ~ x)
9296
ref_var_info <- tibble::tribble(
9397
~ variable, ~ type, ~ role, ~ source,
94-
"x", "numeric", "predictor", "original",
95-
"y", "numeric", "outcome", "original",
98+
"x", c("integer", "numeric"), "predictor", "original",
99+
"y", c("integer", "numeric"), "outcome", "original",
96100
"time_value", "date", "time_value", "original",
97-
"geo_value", "nominal", "geo_value", "original"
101+
"geo_value", c("string", "unordered", "nominal"), "geo_value", "original"
98102
)
99103
expect_identical(r$var_info, ref_var_info)
100104
expect_equal(nrow(r$template), 1L)
@@ -106,7 +110,7 @@ test_that("epi_recipe epi_df works", {
106110
)
107111
ref_var_info <- ref_var_info %>%
108112
tibble::add_row(
109-
variable = "time_value", type = "date", role = "funny_business",
113+
variable = "time_value", type = list("date"), role = "funny_business",
110114
source = "original"
111115
)
112116
expect_identical(r$var_info, ref_var_info)

vignettes/epipredict.Rmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ out_q <- arx_forecaster(jhu, "death_rate", c("case_rate", "death_rate"),
154154
The column `.pred_dstn` in the `predictions` object is actually a "distribution" here parameterized by its quantiles. For this default forecaster, these are created using the quantiles of the residuals of the predictive model (possibly symmetrized). Here, we used 23 quantiles, but one can grab a particular quantile
155155

156156
```{r q1}
157-
quantile(out_q$predictions$.pred_distn, p = .4)
157+
head(quantile(out_q$predictions$.pred_distn, p = .4))
158158
```
159159

160160
Or extract the entire distribution into a "long" `epi_df` with `tau` being the probability and `q` being the value associated to that quantile.
@@ -172,7 +172,7 @@ Further simple adjustments can be made using the function.
172172
arx_args_list(
173173
lags = c(0L, 7L, 14L), ahead = 7L, min_train_window = 20L,
174174
forecast_date = NULL, target_date = NULL, levels = c(0.05, 0.95),
175-
symmetrize = TRUE, nonneg = TRUE, quantile_by_key = TRUE
175+
symmetrize = TRUE, nonneg = TRUE, quantile_by_key = "geo_value"
176176
)
177177
```
178178

vignettes/preprocessing-and-models.Rmd

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -549,14 +549,16 @@ Notice the difference in number of rows `b1` and `b2` returns. This is because
549549
the second version, the one that doesn't use `step_epi_ahead` and `step_epi_lag`,
550550
has omitted dates compared to the one that used the `epipredict` functions.
551551
```{r}
552-
dates_used_in_training1 <-
553-
b1 %>% select(- ahead_7_death_rate) %>% na.omit() %>% select(time_value)
554-
552+
dates_used_in_training1 <- b1 %>%
553+
select(- ahead_7_death_rate) %>%
554+
na.omit() %>%
555+
select(time_value)
555556
dates_used_in_training1
556557
557-
dates_used_in_training2 <-
558-
b2 %>% select(- ahead7death_rate) %>% na.omit() %>% select(time_value)
559-
558+
dates_used_in_training2 <- b2 %>%
559+
select(- ahead7death_rate) %>%
560+
na.omit() %>%
561+
select(time_value)
560562
dates_used_in_training2
561563
```
562564

0 commit comments

Comments
 (0)