Skip to content

Commit 9fe4f85

Browse files
dsweber2dshemetov
authored andcommitted
test+fix: drop_na in data counting, default as_of
1 parent 275b7af commit 9fe4f85

File tree

3 files changed

+61
-9
lines changed

3 files changed

+61
-9
lines changed

R/forecaster.R

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,10 @@ confirm_sufficient_data <- function(epi_data, ahead, args_input, buffer = 15) {
6262
} else {
6363
lag_max <- 14 # default value of 2 weeks
6464
}
65-
6665
return(
6766
!is.infinite(ahead) &&
6867
epi_data %>%
69-
# TODO: This isn't generalizable to other signals.
70-
filter(!is.na(hhs) & !is.na(chng)) %>%
71-
# TODO: Quitting forecasting because of one geo_value is bad.
68+
drop_na() %>%
7269
group_by(geo_value) %>%
7370
summarise(has_enough_data = n_distinct(time_value) >= lag_max + ahead + buffer) %>%
7471
pull(has_enough_data) %>%
@@ -159,7 +156,11 @@ run_workflow_and_format <- function(preproc, postproc, trainer, epi_data) {
159156
latest <- get_test_data(recipe = preproc, x = epi_data)
160157
pred <- predict(workflow, latest)
161158
# the forecast_date may currently be the max time_value
162-
true_forecast_date <- attributes(epi_data)$metadata$as_of
159+
as_of <- attributes(epi_data)$metadata$as_of
160+
if (is.null(as_of)) {
161+
as_of <- max(epi_data$time_value)
162+
}
163+
true_forecast_date <- as_of
163164
return(format_storage(pred, true_forecast_date))
164165
}
165166

R/latency_adjusting.R

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,13 @@
1111
extend_ahead <- function(epi_data, ahead) {
1212
time_values <- epi_data$time_value
1313
if (length(time_values) > 0) {
14+
as_of <- attributes(epi_data)$metadata$as_of
15+
max_time <- max(time_values)
16+
if (is.null(as_of)) {
17+
as_of <- max_time
18+
}
1419
effective_ahead <- as.integer(
15-
as.Date(attributes(epi_data)$metadata$as_of) -
16-
max(time_values) +
17-
ahead
20+
as.Date(as_of) - max_time + ahead
1821
)
1922
} else {
2023
effective_ahead <- Inf

tests/testthat/test-forecasters-basics.R

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ forecasters <- list(
55
c("flatline_fc", flatline_fc)
66
)
77
for (forecaster in forecasters) {
8-
test_that(forecaster[[1]], {
8+
test_that(paste(forecaster[[1]], "gets the date and columns right"), {
99
jhu <- epipredict::case_death_rate_subset %>%
1010
dplyr::filter(time_value >= as.Date("2021-12-01"))
1111
# the as_of for this is wildly far in the future
@@ -19,8 +19,50 @@ for (forecaster in forecasters) {
1919
res$target_end_date ==
2020
as.Date("2022-01-01")
2121
))
22+
})
23+
24+
test_that(paste(forecaster[[1]], "deals with no as_of"), {
25+
jhu <- epipredict::case_death_rate_subset %>%
26+
dplyr::filter(time_value >= as.Date("2021-12-01"))
27+
# what if we have no as_of date? assume they mean the last available data
28+
attributes(jhu)$metadata$as_of <- NULL
29+
expect_no_error(res <- forecaster[[2]](jhu, "case_rate", c("death_rate"), 2L))
30+
expect_equal(res$target_end_date %>% unique, max(jhu$time_value)+2)
31+
}
32+
33+
test_that(paste(forecaster[[1]], "handles last second NA's"), {
34+
# if the last entries are NA, we should still predict
35+
# TODO: currently this checks that we DON'T predict
36+
jhu <- epipredict::case_death_rate_subset %>%
37+
dplyr::filter(time_value >= as.Date("2021-12-01"))
38+
geo_values <-jhu$geo_value %>% unique()
39+
one_day_nas <- tibble(
40+
geo_value = geo_values,
41+
time_value = as.Date("2022-01-01"),
42+
case_rate = NA,
43+
death_rate = runif(length(geo_values))
44+
)
45+
second_day_nas <- one_day_nas %>%
46+
mutate(time_value = as.Date("2022-01-02"))
47+
jhu_nad <- jhu %>%
48+
as_tibble() %>%
49+
bind_rows(one_day_nas, second_day_nas) %>%
50+
as_epi_df()
51+
attributes(jhu_nad)$metadata$as_of <- max(jhu_nad$time_value) + 3
52+
expect_no_error(nas_forecast <- forecaster[[2]](jhu_nad, "case_rate", c("death_rate")))
53+
# TODO: this shouldn't actually be null, it should be a bit further delayed
54+
expect_equal(nrow(nas_forecast), 0)
55+
})
56+
57+
#################################
2258
# any forecaster specific tests
2359
if (forecaster[[1]] == "scaled_pop") {
60+
test_that(paste(forecaster[[1]], "scaled and unscaled don't make the same predictions"), {
61+
jhu <- epipredict::case_death_rate_subset %>%
62+
dplyr::filter(time_value >= as.Date("2021-12-01"))
63+
# the as_of for this is wildly far in the future
64+
attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3
65+
res <- forecaster[[2]](jhu, "case_rate", c("death_rate"), -2L)
2466
# confirm scaling produces different results
2567
res_unscaled <- forecaster[[2]](jhu,
2668
"case_rate",
@@ -35,10 +77,16 @@ for (forecaster in forecasters) {
3577
) %>%
3678
mutate(equal = value.unscaled == value.scaled) %>%
3779
summarize(all(equal)) %>% pull(`all(equal)`))
80+
})
3881
}
3982
# TODO confirming that it produces exactly the same result as arx_forecaster
4083
# test case where extra_sources is "empty"
4184
# test case where the epi_df is empty
85+
test_that(paste(forecaster[[1]], "scaled and unscaled don't make the same predictions"), {
86+
jhu <- epipredict::case_death_rate_subset %>%
87+
dplyr::filter(time_value >= as.Date("2021-12-01"))
88+
# the as_of for this is wildly far in the future
89+
attributes(jhu)$metadata$as_of <- max(jhu$time_value) + 3
4290
null_jhu <- jhu %>% filter(time_value < as.Date("0009-01-01"))
4391
expect_no_error(null_res <- forecaster[[2]](null_jhu, "case_rate", c("death_rate")))
4492
expect_identical(names(null_res), names(res))

0 commit comments

Comments
 (0)