Skip to content

Commit 62375f8

Browse files
committed
fix: output filter w/ before=Inf, wrong before/after types
Also change a map -> lapply so we can get srcref to internal error immediately, add a missed .window_size-missingness check, delete some helper functions that were sort of helpful but also tacked on significant run time when used in a natural way.
1 parent 4792aea commit 62375f8

5 files changed

+57
-90
lines changed

R/epi_slide_opt_archive.R

+19-6
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,16 @@
4747
epi_slide_opt_archive_one_epikey <- function(
4848
grp_updates,
4949
in_colnames,
50-
f_dots_baked, f_from_package, before, after, time_type,
50+
f_dots_baked, f_from_package,
51+
before_steps, after_steps, time_type,
5152
out_colnames) {
5253
grp_updates_by_version <- grp_updates %>%
5354
nest(.by = version, .key = "subtbl") %>%
5455
arrange(version)
5556
unit_step <- unit_time_delta(time_type, format = "fast")
5657
prev_inp_snapshot <- NULL
5758
prev_out_snapshot <- NULL
58-
result <- map(seq_len(nrow(grp_updates_by_version)), function(version_i) {
59+
result <- lapply(seq_len(nrow(grp_updates_by_version)), function(version_i) {
5960
version <- grp_updates_by_version$version[[version_i]]
6061
inp_update <- grp_updates_by_version$subtbl[[version_i]]
6162
inp_snapshot <- tbl_patch(prev_inp_snapshot, inp_update, "time_value")
@@ -67,9 +68,13 @@ epi_slide_opt_archive_one_epikey <- function(
6768
# inp_update_min_t - after, or anything in between these two bounds. If
6869
# before == Inf, we need to update outputs all the way to the end of the
6970
# input *snapshot*.
70-
out_update_min_t <- time_minus_slide_window_arg(inp_update_min_t, after, time_type)
71-
out_update_max_t <- time_plus_slide_window_arg(inp_update_max_t, before, time_type, max(inp_snapshot$time_value))
72-
out_update <- epi_slide_opt_one_epikey(inp_snapshot, f_dots_baked, f_from_package, before, after, unit_step, time_type, c(out_update_min_t, out_update_max_t), NULL, in_colnames, out_colnames)
71+
out_update_min_t <- inp_update_min_t - after_steps * unit_step
72+
if (before_steps == Inf) {
73+
out_update_max_t <- max(inp_snapshot$time_value)
74+
} else {
75+
out_update_max_t <- inp_update_max_t + before_steps * unit_step
76+
}
77+
out_update <- epi_slide_opt_one_epikey(inp_snapshot, f_dots_baked, f_from_package, before_steps, after_steps, unit_step, time_type, c(out_update_min_t, out_update_max_t), NULL, in_colnames, out_colnames)
7378
out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
7479
prev_inp_snapshot <<- inp_snapshot
7580
prev_out_snapshot <<- tbl_patch(prev_out_snapshot, out_diff, "time_value")
@@ -118,11 +123,19 @@ epi_slide_opt.epi_archive <-
118123
purrr::partial(.f, ...)
119124
}
120125
col_names_quo <- enquo(.col_names)
126+
if (is.null(.window_size)) {
127+
cli_abort(
128+
"epi_slide_opt: `.window_size` must be specified.",
129+
class = "epiprocess__epi_slide_opt__window_size_missing"
130+
)
131+
}
121132
names_info <- across_ish_names_info(
122133
.x$DT, time_type, col_names_quo, .f_info$namer,
123134
.window_size, .align, .prefix, .suffix, .new_col_names
124135
)
125136
window_args <- get_before_after_from_window(.window_size, .align, time_type)
137+
before_steps <- time_delta_to_n_steps(window_args$before, time_type)
138+
after_steps <- time_delta_to_n_steps(window_args$after, time_type)
126139
if (!is.null(.ref_time_values)) {
127140
cli_abort("epi_slide.epi_archive does not support the `.ref_time_values` argument",
128141
class = "epiprocess__epi_slide_opt_archive__ref_time_values_unsupported"
@@ -154,7 +167,7 @@ epi_slide_opt.epi_archive <-
154167
res <- epi_slide_opt_archive_one_epikey(
155168
group_values,
156169
names_info$input_col_names,
157-
.f_dots_baked, .f_info$from_package, window_args$before, window_args$after, time_type,
170+
.f_dots_baked, .f_info$from_package, before_steps, after_steps, time_type,
158171
names_info$output_col_names
159172
)
160173
if (use_progress) cli::cli_progress_update(id = progress_bar_id)

R/epi_slide_opt_edf.R

+31-15
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,12 @@ across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer,
168168
}
169169

170170
epi_slide_opt_one_epikey <- function(inp_tbl,
171-
f_dots_baked, f_from_package, before, after, unit_step, time_type,
171+
f_dots_baked, f_from_package,
172+
before_steps, after_steps, unit_step, time_type,
172173
out_filter_time_range, out_filter_time_set,
173174
in_colnames, out_colnames) {
175+
# TODO rename function, reorder args, roxygen2
176+
#
174177
# TODO try converting time values to reals, do work on reals, convert back at very end?
175178
#
176179
# TODO loosen restrictions here. each filter optional?
@@ -187,8 +190,14 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
187190
} else {
188191
cli_abort("Exactly one of `out_filter_time_range` and `out_filter_time_set` must be non-`NULL`.")
189192
}
190-
slide_t_min <- time_minus_slide_window_arg(out_t_min, before, time_type, min(inp_tbl$time_value))
191-
slide_t_max <- time_plus_slide_window_arg(out_t_max, after, time_type)
193+
if (before_steps == Inf) {
194+
slide_t_min <- min(inp_tbl$time_value)
195+
slide_start_padding_n <- time_minus_time_in_n_steps(out_t_min, slide_t_min, time_type)
196+
} else {
197+
slide_t_min <- out_t_min - before_steps * unit_step
198+
slide_start_padding_n <- before_steps # perf: avoid time_minus_time_in_n_steps
199+
}
200+
slide_t_max <- out_t_max + after_steps * unit_step
192201
slide_nrow <- time_delta_to_n_steps(slide_t_max - slide_t_min, time_type) + 1L
193202
slide_time_values <- slide_t_min + 0L:(slide_nrow - 1L) * unit_step
194203
slide_inp_backrefs <- vec_match(slide_time_values, inp_tbl$time_value)
@@ -200,37 +209,41 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
200209
# try removing time_value column before slice?
201210
slide$time_value <- slide_time_values
202211
if (f_from_package == "data.table") {
203-
if (before == Inf) {
212+
if (before_steps == Inf) {
204213
slide[, out_colnames] <-
205214
f_dots_baked(slide[, in_colnames], seq_len(slide_nrow), adaptive = TRUE)
206215
} else {
207-
out_cols <- f_dots_baked(slide[, in_colnames], before + after + 1L)
208-
if (after != 0L) {
216+
out_cols <- f_dots_baked(slide[, in_colnames], before_steps + after_steps + 1L)
217+
if (after_steps != 0L) {
209218
# Shift an appropriate amount of NA padding from the start to the end.
210219
# (This padding will later be cut off when we filter down to the
211220
# original time_values.)
212221
out_cols <- lapply(out_cols, function(out_col) {
213-
c(out_col[(after + 1L):length(out_col)], rep(NA, after))
222+
c(out_col[(after_steps + 1L):length(out_col)], rep(NA, after_steps))
214223
})
215224
}
216225
slide[, out_colnames] <- out_cols
217226
}
218227
} else if (f_from_package == "slider") {
219228
for (col_i in seq_along(in_colnames)) {
220-
slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], before = before, after = after)
229+
slide[[out_colnames[[col_i]]]] <- f_dots_baked(slide[[in_colnames[[col_i]]]], before = before_steps, after = after_steps)
221230
}
222231
} else {
223232
cli_abort(
224233
"epiprocess internal error: `f_from_package` was {format_chr_deparse(f_from_package)}, which is unsupported",
225234
class = "epiprocess__epi_slide_opt_archive__f_from_package_invalid"
226235
)
227236
}
237+
# We should filter down the slide time values to ones in the input time values
238+
# when preparing the output:
228239
rows_should_keep1 <- !is.na(slide_inp_backrefs)
229-
rows_should_keep2 <- switch(
230-
out_filter_time_style,
240+
# We also need to apply the out_filter.
241+
#
242+
# TODO comments + test vs. just using inequality
243+
rows_should_keep2 <- switch(out_filter_time_style,
231244
range = vec_rep_each(
232245
c(FALSE, TRUE, FALSE),
233-
c(before, slide_nrow - before - after, after),
246+
c(slide_start_padding_n, slide_nrow - slide_start_padding_n - after_steps, after_steps),
234247
),
235248
set = vec_in(slide_time_values, out_time_values)
236249
)
@@ -493,12 +506,15 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
493506
.align <- rlang::arg_match(.align)
494507
time_type <- attr(.x, "metadata")$time_type
495508
if (is.null(.window_size)) {
496-
cli_abort("epi_slide_opt: `.window_size` must be specified.")
509+
cli_abort(
510+
"epi_slide_opt: `.window_size` must be specified.",
511+
class = "epiprocess__epi_slide_opt__window_size_missing"
512+
)
497513
}
498514
validate_slide_window_arg(.window_size, time_type)
499515
window_args <- get_before_after_from_window(.window_size, .align, time_type)
500-
before <- time_delta_to_n_steps(window_args$before, time_type)
501-
after <- time_delta_to_n_steps(window_args$after, time_type)
516+
before_steps <- time_delta_to_n_steps(window_args$before, time_type)
517+
after_steps <- time_delta_to_n_steps(window_args$after, time_type)
502518
unit_step <- unit_time_delta(time_type, format = "fast")
503519

504520
# Handle output naming:
@@ -520,7 +536,7 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
520536

521537
result <- .x %>%
522538
group_modify(function(grp_data, grp_key) {
523-
epi_slide_opt_one_epikey(grp_data, f_dots_baked, f_from_package, before, after, unit_step, time_type, NULL, ref_time_values, names_info$input_col_names, names_info$output_col_names)
539+
epi_slide_opt_one_epikey(grp_data, f_dots_baked, f_from_package, before_steps, after_steps, unit_step, time_type, NULL, ref_time_values, names_info$input_col_names, names_info$output_col_names)
524540
}) %>%
525541
arrange_col_canonical()
526542

R/time-utils.R

-29
Original file line numberDiff line numberDiff line change
@@ -373,32 +373,3 @@ time_plus_n_steps <- function(x, y, time_type) {
373373
time_minus_n_steps <- function(x, y, time_type) {
374374
x - y * unit_time_delta(time_type, "fast")
375375
}
376-
377-
#' Advance/retreat time_value(s) by specified amount (slide window arg)
378-
#'
379-
#' @param x a time_value (vector) of time type `time_type`
380-
#' @param y a (scalar) slide window arg; should pass [`validate_slide_window_arg()`]
381-
#' @param time_type as in [`validate_slide_window_arg()`]
382-
#' @param max_time_value when `y == Inf`, what should be the result of adding `y`?
383-
#' @param min_time_value when `y == Inf`, what should be the result of subtracting `y`?
384-
#' @return a time_value (vector) of time type `time_type`
385-
#'
386-
#' @keywords internal
387-
#' @seealso [`time_plus_n_steps`], if you're working with an integerish vector
388-
#' number of time steps `y` (output from other `*n_steps` functions) instead.
389-
time_plus_slide_window_arg <- function(x, y, time_type, max_time_value) {
390-
if (y == Inf) {
391-
rep(max_time_value, vec_size(x))
392-
} else {
393-
x + y
394-
}
395-
}
396-
397-
#' @rdname time_plus_slide_window_arg
398-
time_minus_slide_window_arg <- function(x, y, time_type, min_time_value) {
399-
if (y == Inf) {
400-
rep(min_time_value, vec_size(x))
401-
} else {
402-
x - y
403-
}
404-
}

man/epi_slide_opt_archive_one_epikey.Rd

+7-7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/time_plus_slide_window_arg.Rd

-33
This file was deleted.

0 commit comments

Comments
 (0)