14
14
# ' to the max time value of the `epi_df`. how to handle this is a modelling
15
15
# ' question left up to each forecaster; see latency_adjusting.R for the
16
16
# ' existing examples)
17
- # ' @param pop_scaling an example extra parameter unique to this forecaster
18
- # ' @param trainer an example extra parameter that is fairly common
17
+ # ' @param pop_scaling bool; if `TRUE`, assume all numeric columns are on the
18
+ # ' count scale and translate them to a rate scale for model fitting.
19
+ # ' Predictions will be translated back to count scale. Any
20
+ # ' `layer_residual_quantiles` (for non-`"quantile_reg"` `trainer`s) will be
21
+ # ' done on the rate scale. When specifying predictor lags, note that rate
22
+ # ' variables will use the same names as and overwrite the count variables.
23
+ # ' Rates here will be counts per 100k population, based on
24
+ # ' `epipredict::state_census`.
25
+ # ' @param trainer optional; parsnip model specification to use for the core
26
+ # ' fitting & prediction (the `spec` of the internal
27
+ # ' [`epipredict::epi_workflow`]). Default is `parsnip::linear_reg()`.
19
28
# ' @param smooth_width the number of days over which to do smoothing. If `NULL`,
20
29
# ' then no smoothing is applied.
21
30
# ' @param smooth_cols the names of the columns to smooth. If `NULL` it smooths
34
43
# ' @importFrom epipredict epi_recipe step_population_scaling frosting arx_args_list layer_population_scaling
35
44
# ' @importFrom tibble tibble
36
45
# ' @importFrom recipes all_numeric
46
+ # ' @importFrom zeallot %<-%
37
47
# ' @export
38
48
smoothed_scaled <- function (epi_data ,
39
- outcome ,
40
- extra_sources = " " ,
41
- ahead = 1 ,
42
- pop_scaling = TRUE ,
43
- trainer = parsnip :: linear_reg(),
44
- quantile_levels = covidhub_probs(),
45
- smooth_width = 7 ,
46
- smooth_cols = NULL ,
47
- sd_width = 28 ,
48
- sd_mean_width = 14 ,
49
- sd_cols = NULL ,
50
- ... ) {
49
+ outcome ,
50
+ extra_sources = " " ,
51
+ ahead = 1 ,
52
+ pop_scaling = TRUE ,
53
+ trainer = parsnip :: linear_reg(),
54
+ quantile_levels = covidhub_probs(),
55
+ smooth_width = 7 ,
56
+ smooth_cols = NULL ,
57
+ sd_width = 28 ,
58
+ sd_mean_width = 14 ,
59
+ sd_cols = NULL ,
60
+ ... ) {
51
61
# perform any preprocessing not supported by epipredict
52
62
# this is a temp fix until a real fix gets put into epipredict
53
63
epi_data <- clear_lastminute_nas(epi_data )
54
64
# one that every forecaster will need to handle: how to manage max(time_value)
55
65
# that's older than the `as_of` date
56
- epidataAhead <- extend_ahead(epi_data , ahead )
66
+ c( epi_data , effective_ahead ) % <- % extend_ahead(epi_data , ahead )
57
67
# see latency_adjusting for other examples
58
- # this next part is basically unavoidable boilerplate you'll want to copy
59
- epi_data <- epidataAhead [[1 ]]
60
- effective_ahead <- epidataAhead [[2 ]]
61
68
args_input <- list (... )
62
69
# edge case where there is no data or less data than the lags; eventually epipredict will handle this
63
70
if (! confirm_sufficient_data(epi_data , effective_ahead , args_input )) {
64
- null_result <- tibble(
65
- geo_value = character (),
66
- forecast_date = lubridate :: Date() ,
67
- target_end_date = lubridate :: Date() ,
68
- quantile = numeric (),
69
- value = numeric ()
70
- )
71
+ null_result <- epi_data [ 0L , c( " geo_value " , attr( epi_data , " metadata " , exact = TRUE )[[ " other_keys " ]])] % > %
72
+ mutate(
73
+ forecast_date = epi_data $ time_value [ 0 ] ,
74
+ target_end_date = epi_data $ time_value [ 0 ] ,
75
+ quantile = numeric (),
76
+ value = numeric ()
77
+ )
71
78
return (null_result )
72
79
}
73
80
args_input [[" ahead" ]] <- effective_ahead
74
81
args_input [[" quantile_levels" ]] <- quantile_levels
75
82
args_list <- do.call(arx_args_list , args_input )
76
- # if you want to ignore extra_sources, setting predictors is the way to do it
83
+ # `extra_sources` sets which variables beyond the outcome are lagged and used as predictors
84
+ # any which are modified by `rolling_mean` or `rolling_sd` have their original values dropped later
77
85
predictors <- c(outcome , extra_sources )
78
- # TODO: Partial match quantile_level coming from here (on Dmitry's machine)
79
- argsPredictorsTrainer <- perform_sanity_checks(epi_data , outcome , predictors , trainer , args_list )
80
- args_list <- argsPredictorsTrainer [[1 ]]
81
- predictors <- argsPredictorsTrainer [[2 ]]
82
- trainer <- argsPredictorsTrainer [[3 ]]
83
86
# end of the copypasta
84
87
# finally, any other pre-processing (e.g. smoothing) that isn't performed by
85
88
# epipredict
86
89
# smoothing
87
- keep_mean <- (smooth_width == sd_mean_width ) # do we need to do the mean separately?
90
+ keep_mean <- ! is.null(smooth_width ) && ! is.null(sd_mean_width ) &&
91
+ smooth_width == sd_mean_width # do we (not) need to do the mean separately?
88
92
if (! is.null(smooth_width ) && ! keep_mean ) {
89
93
epi_data %<> % rolling_mean(
90
94
width = smooth_width ,
@@ -101,8 +105,10 @@ smoothed_scaled <- function(epi_data,
101
105
keep_mean = keep_mean
102
106
)
103
107
}
104
- # and need to make sure we exclude the original varialbes as predictors
108
+ # and need to make sure we exclude the original variables as predictors
105
109
predictors <- update_predictors(epi_data , c(smooth_cols , sd_cols ), predictors )
110
+ # TODO: Partial match quantile_level coming from here (on Dmitry's machine)
111
+ c(args_list , predictors , trainer ) %<- % perform_sanity_checks(epi_data , outcome , predictors , trainer , args_list )
106
112
# preprocessing supported by epipredict
107
113
preproc <- epi_recipe(epi_data )
108
114
if (pop_scaling ) {
0 commit comments