10
10
extend_either <- function (new_data , shift_cols , keys ) {
11
11
shifted <-
12
12
shift_cols %> %
13
- select(- any_of(c( " shifts " , " effective_shift " , " type " , " role " , " source " )) ) %> %
14
- pmap(\ (original_name , latency , new_name ) {
13
+ select(original_name , latency , new_name ) %> %
14
+ pmap(function (original_name , latency , new_name ) {
15
15
epi_shift_single(
16
16
x = new_data ,
17
17
col = original_name ,
@@ -20,7 +20,7 @@ extend_either <- function(new_data, shift_cols, keys) {
20
20
key_cols = keys
21
21
)
22
22
}) %> %
23
- map(\ (x ) zoo :: na.trim(x )) %> %
23
+ map(function (x ) zoo :: na.trim(x )) %> %
24
24
reduce(
25
25
dplyr :: full_join ,
26
26
by = keys
@@ -34,60 +34,66 @@ extend_either <- function(new_data, shift_cols, keys) {
34
34
dplyr :: ungroup())
35
35
}
36
36
37
+ # ' create a table of the columns to modify, their shifts, and their prefixes
38
+ # ' @keywords internal
39
+ # ' @importFrom dplyr tibble
40
+ # ' @importFrom tidyr unnest
41
+ construct_shift_tibble <- function (terms_used , recipe , rel_step_type , shift_name ) {
42
+ # for the right step types (either "step_epi_lag" or "step_epi_shift"), grab
43
+ # the useful parameters, including the evaluated column names
44
+ extract_named_rates <- function (recipe_step ) {
45
+ if (inherits(recipe_step , rel_step_type )) {
46
+ recipe_columns <- recipes_eval_select(recipe_step $ terms , recipe $ template , recipe $ term_info )
47
+ if (any(recipe_columns %in% terms_used )) {
48
+ return (list (term = recipe_columns , shift = recipe_step [shift_name ], prefix = recipe_step $ prefix ))
49
+ }
50
+ }
51
+ return (NULL )
52
+ }
53
+ rel_list <- recipe $ steps %> %
54
+ purrr :: map(extract_named_rates ) %> %
55
+ unlist(recursive = FALSE ) %> %
56
+ split(c(" term" , " shift" , " prefix" ))
57
+ relevant_shifts <- tibble(
58
+ terms = lapply(rel_list $ term , unname ),
59
+ shift = lapply(rel_list $ shift , unname ),
60
+ prefix = unname(unlist(rel_list $ prefix ))
61
+ ) %> %
62
+ unnest(c(terms , shift )) %> %
63
+ unnest(shift )
64
+ return (relevant_shifts )
65
+ }
66
+
37
67
# ' find the columns added with the lags or aheads, and the amounts they have
38
68
# ' been changed
39
69
# ' @param prefix the prefix indicating if we are adjusting lags or aheads
40
70
# ' @param new_data the data transformed so far
41
71
# ' @return a tibble with columns `column` (relevant shifted names), `shift` (the
42
72
# ' amount that one is shifted), `latency` (original columns difference between
43
73
# ' max_time_value and as_of (on a per-initial column basis)),
44
- # ' `effective_shift` (shifts +latency), and `new_name` (adjusted names with the
74
+ # ' `effective_shift` (shift +latency), and `new_name` (adjusted names with the
45
75
# ' effective_shift)
46
76
# ' @keywords internal
47
- # ' @importFrom stringr str_match
48
77
# ' @importFrom dplyr rowwise %>%
49
- # ' @importFrom magrittr %<>%
50
- get_shifted_column_tibble <- function (
51
- prefix , new_data , terms_used , as_of , latency ,
78
+ # ' @importFrom purrr map_lgl
79
+ # ' @importFrom glue glue
80
+ get_latent_column_tibble <- function (
81
+ shift_cols , new_data , as_of , latency ,
52
82
sign_shift , info , call = caller_env()) {
53
- relevant_columns <- names(new_data )[grepl(prefix , names(new_data ))]
54
- to_keep <- rep(FALSE , length(relevant_columns ))
55
- for (col_name in terms_used ) {
56
- to_keep <- to_keep | grepl(col_name , relevant_columns )
57
- }
58
- relevant_columns <- relevant_columns [to_keep ]
59
- if (length(relevant_columns ) == 0 ) {
60
- cli :: cli_abort(" There is no column(s) {terms_used}." ,
61
- current_column_names = names(new_data ),
62
- class = " epipredict_adjust_latency_nonexistent_column_used" ,
63
- call = call
64
- )
65
- }
66
- # this pulls text that is any number of digits between two _, e.g. _3557_, and
67
- # converts them to an integer
68
- shift_amounts <- stringr :: str_match(relevant_columns , " _(\\ d+)_" ) %> %
69
- `[`(, 2 ) %> %
70
- as.integer()
71
-
72
- shift_cols <- dplyr :: tibble(
73
- original_name = relevant_columns ,
74
- shifts = shift_amounts
75
- )
83
+ shift_cols <- shift_cols %> % mutate(original_name = glue(" {prefix}{shift}_{terms}" ))
76
84
if (is.null(latency )) {
77
85
shift_cols <- shift_cols %> %
78
86
rowwise() %> %
79
87
# add the latencies to shift_cols
80
88
mutate(latency = get_latency(
81
- new_data , as_of , original_name , shifts , sign_shift
89
+ new_data , as_of , original_name , shift , sign_shift
82
90
)) %> %
83
91
ungroup()
84
92
} else if (length(latency ) > 1 ) {
93
+ # if latency has a length, we assign based on comparing the name in the list with the `terms` column
85
94
shift_cols <- shift_cols %> %
86
95
rowwise() %> %
87
- mutate(latency = unname(latency [purrr :: map_lgl(
88
- names(latency ),
89
- \(x ) grepl(x , original_name )
90
- )])) %> %
96
+ mutate(latency = unname(latency [names(latency ) == terms ])) %> %
91
97
ungroup()
92
98
} else {
93
99
shift_cols <- shift_cols %> % mutate(latency = latency )
@@ -96,10 +102,10 @@ get_shifted_column_tibble <- function(
96
102
# add the updated names to shift_cols
97
103
shift_cols <- shift_cols %> %
98
104
mutate(
99
- effective_shift = shifts + abs(latency )
105
+ effective_shift = shift + abs(latency )
100
106
) %> %
101
107
mutate(
102
- new_name = adjust_name( prefix , original_name , effective_shift )
108
+ new_name = glue( " { prefix}{ effective_shift}_{terms} " )
103
109
)
104
110
info <- info %> % select(variable , type , role )
105
111
shift_cols <- left_join(shift_cols , info , by = join_by(original_name == variable ))
@@ -166,19 +172,6 @@ set_asof <- function(new_data, info) {
166
172
return (as_of )
167
173
}
168
174
169
- # ' adjust the shifts by latency for the names in column assumes e.g.
170
- # ' `"lag_6_case_rate"` and returns something like `"lag_10_case_rate"`
171
- # ' @keywords internal
172
- # ' @importFrom stringi stri_replace_all_regex
173
- adjust_name <- function (prefix , column , effective_shift ) {
174
- pattern <- paste0(prefix , " \\ d+" , " _" )
175
- adjusted_shifts <- paste0(prefix , effective_shift , " _" )
176
- stringi :: stri_replace_all_regex(
177
- column ,
178
- pattern , adjusted_shifts
179
- )
180
- }
181
-
182
175
# ' the latency is also the amount the shift is off by
183
176
# ' @param sign_shift integer. 1 if lag and -1 if ahead. These represent how you
184
177
# ' need to shift the data to bring the 3 day lagged value to today.
0 commit comments