@@ -43,29 +43,39 @@ perform_sanity_checks <- function(epi_data,
43
43
# ' confirm that there's enough data to run this model
44
44
# ' @description
45
45
# ' epipredict is a little bit fragile about having enough data to train; we want
46
- # ' to be able to return a null result rather than error out; this check say to
47
- # ' return a null
46
+ # ' to be able to return a null result rather than error out.
48
47
# ' @param epi_data the input data
49
- # ' @param buffer how many training data to insist on having (e.g. if `buffer=1`,
50
- # ' this trains on one sample; the default is set so that `linear_reg` isn't
51
- # ' rank deficient)
52
48
# ' @param ahead the effective ahead; may be infinite if there isn't enough data.
53
49
# ' @param args_input the input as supplied to `forecaster_pred`; lags is the
54
50
# ' important argument, which may or may not be defined, with the default
55
51
# ' coming from `arx_args_list`
52
+ # ' @param buffer how many training data to insist on having (e.g. if `buffer=1`,
53
+ # ' this trains on one sample; the default is set so that `linear_reg` isn't
54
+ # ' rank deficient)
55
+ # ' @importFrom tidyr drop_na
56
56
# ' @export
57
- confirm_insufficient_data <- function (epi_data , ahead , args_input , buffer = 9 ) {
57
+ confirm_sufficient_data <- function (epi_data , ahead , args_input , buffer = 9 ) {
58
58
if (! is.null(args_input $ lags )) {
59
59
lag_max <- max(args_input $ lags )
60
60
} else {
61
61
lag_max <- 14 # default value of 2 weeks
62
62
}
63
+
64
+ # TODO: Buffer should probably be 2 * n(lags) * n(predictors). But honestly,
65
+ # this needs to be fixed in epipredict itself, see
66
+ # https://github.com/cmu-delphi/epipredict/issues/106.
67
+
63
68
return (
64
- is.infinite(ahead ) ||
65
- as.integer(max(epi_data $ time_value ) - min(epi_data $ time_value )) < =
66
- lag_max + ahead + buffer
69
+ ! is.infinite(ahead ) &&
70
+ epi_data %> %
71
+ drop_na() %> %
72
+ group_by(geo_value ) %> %
73
+ summarise(has_enough_data = n_distinct(time_value ) > = lag_max + ahead + buffer ) %> %
74
+ pull(has_enough_data ) %> %
75
+ any()
67
76
)
68
77
}
78
+
69
79
# TODO replace with `step_arx_forecaster`
70
80
# ' add the default steps for arx_forecaster
71
81
# ' @description
@@ -149,7 +159,11 @@ run_workflow_and_format <- function(preproc, postproc, trainer, epi_data) {
149
159
latest <- get_test_data(recipe = preproc , x = epi_data )
150
160
pred <- predict(workflow , latest )
151
161
# the forecast_date may currently be the max time_value
152
- true_forecast_date <- attributes(epi_data )$ metadata $ as_of
162
+ as_of <- attributes(epi_data )$ metadata $ as_of
163
+ if (is.null(as_of )) {
164
+ as_of <- max(epi_data $ time_value )
165
+ }
166
+ true_forecast_date <- as_of
153
167
return (format_storage(pred , true_forecast_date ))
154
168
}
155
169
@@ -176,6 +190,8 @@ run_workflow_and_format <- function(preproc, postproc, trainer, epi_data) {
176
190
# ' contain `ahead`
177
191
# ' @param forecaster_args_names a bit of a hack around targets, it contains
178
192
# ' the names of the `forecaster_args`.
193
+ # ' @param date_range_step_size the step size (in days) to use when generating
194
+ # ' the forecast dates.
179
195
# ' @importFrom epiprocess epix_slide
180
196
# ' @importFrom cli cli_abort
181
197
# ' @importFrom rlang !!
@@ -187,7 +203,8 @@ forecaster_pred <- function(data,
187
203
slide_training = 0 ,
188
204
n_training_pad = 5 ,
189
205
forecaster_args = list (),
190
- forecaster_args_names = list ()) {
206
+ forecaster_args_names = list (),
207
+ date_range_step_size = 1L ) {
191
208
archive <- data
192
209
if (length(forecaster_args ) > 0 ) {
193
210
names(forecaster_args ) <- forecaster_args_names
@@ -210,25 +227,47 @@ forecaster_pred <- function(data,
210
227
# restrict the dataset to areas where training is possible
211
228
start_date <- min(archive $ DT $ time_value ) + net_slide_training
212
229
end_date <- max(archive $ DT $ time_value ) - forecaster_args $ ahead
213
- valid_predict_dates <- seq.Date(from = start_date , to = end_date , by = 1 )
230
+ valid_predict_dates <- seq.Date(from = start_date , to = end_date , by = date_range_step_size )
214
231
215
232
# first generate the forecasts
216
233
before <- n_training + n_training_pad - 1
217
- # # TODO epix_slide doesn't support infinite `before`
234
+ # # TODO: epix_slide doesn't support infinite `before`
218
235
# # https://github.com/cmu-delphi/epiprocess/issues/219
219
236
if (before == Inf ) before <- 365L * 10000
220
237
res <- epix_slide(archive ,
221
238
function (data , gk , rtv , ... ) {
222
- do.call(
223
- forecaster ,
224
- append(
225
- list (
226
- epi_data = data ,
227
- outcome = outcome ,
228
- extra_sources = extra_sources
229
- ),
230
- forecaster_args
231
- )
239
+ # TODO: Can we get rid of this tryCatch and instead hook it up to targets
240
+ # error handling or something else?
241
+ # https://github.com/cmu-delphi/exploration-tooling/issues/41
242
+ tryCatch(
243
+ {
244
+ do.call(
245
+ forecaster ,
246
+ append(
247
+ list (
248
+ epi_data = data ,
249
+ outcome = outcome ,
250
+ extra_sources = extra_sources
251
+ ),
252
+ forecaster_args
253
+ )
254
+ )
255
+ },
256
+ error = function (e ) {
257
+ if (interactive()) {
258
+ browser()
259
+ } else {
260
+ dump_vars <- list (
261
+ data = data ,
262
+ rtv = rtv ,
263
+ forecaster = forecaster ,
264
+ forecaster_args = forecaster_args ,
265
+ e = e
266
+ )
267
+ saveRDS(dump_vars , " forecaster_pred_error.rds" )
268
+ e
269
+ }
270
+ }
232
271
)
233
272
},
234
273
before = before ,
0 commit comments