Skip to content

Commit 0080425

Browse files
committed
Fix guess_period on datetimes, make it more precise + generic
- Don't discard units and effectively replace them with seconds - Don't allow any tolerance in judging a remainder to be zero, since when we use it to generate the default `ref_time_values` that means we could miss reproducing some of the actual input time values. - Make it into an S3 generic so it can be extended for more time classes.
1 parent 5fb62f3 commit 0080425

File tree

6 files changed

+119
-37
lines changed

6 files changed

+119
-37
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: epiprocess
33
Title: Tools for basic signal processing in epidemiology
4-
Version: 0.7.12
4+
Version: 0.7.13
55
Authors@R: c(
66
person("Jacob", "Bien", role = "ctb"),
77
person("Logan", "Brooks", email = "[email protected]", role = c("aut", "cre")),

NAMESPACE

+4
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ S3method(group_by,grouped_epi_archive)
2525
S3method(group_by_drop_default,grouped_epi_archive)
2626
S3method(group_modify,epi_df)
2727
S3method(groups,grouped_epi_archive)
28+
S3method(guess_period,Date)
29+
S3method(guess_period,POSIXt)
30+
S3method(guess_period,default)
2831
S3method(key_colnames,data.frame)
2932
S3method(key_colnames,default)
3033
S3method(key_colnames,epi_archive)
@@ -64,6 +67,7 @@ export(filter)
6467
export(group_by)
6568
export(group_modify)
6669
export(growth_rate)
70+
export(guess_period)
6771
export(is_epi_df)
6872
export(is_grouped_epi_archive)
6973
export(key_colnames)

NEWS.md

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
3535
- Improved documentation web site landing page's introduction.
3636
- Fixed documentation referring to old `epi_slide()` interface (#466, thanks
3737
@XuedaShen!).
38+
- Fixed bug where `epix_slide_ref_time_values_default()` on datetimes would
39+
output a huge number of `ref_time_values` spaced apart by mere seconds.
3840

3941
## Cleanup
4042
- Resolved some linting messages in package checks (#468).

R/utils.R

+47-23
Original file line numberDiff line numberDiff line change
@@ -670,28 +670,52 @@ gcd_num <- function(dividends, ..., rrtol = 1e-6, pqlim = 1e6, irtol = 1e-6) {
670670
vctrs::vec_cast(numeric_gcd, dividends)
671671
}
672672

673-
#' Use max valid period as guess for `period` of `ref_time_values`
674-
#'
675-
#' @param ref_time_values Vector containing time-interval-like or time-like
676-
#' data, with at least two distinct values, [`diff`]-able (e.g., a
677-
#' `time_value` or `version` column), and should have a sensible result from
678-
#' adding `is.numeric` versions of its `diff` result (via `as.integer` if its
679-
#' `typeof` is `"integer"`, otherwise via `as.numeric`).
680-
#' @param ref_time_values_arg Optional, string; name to give `ref_time_values`
681-
#' in error messages. Defaults to quoting the expression the caller fed into
682-
#' the `ref_time_values` argument.
683-
#' @return `is.numeric`, length 1; attempts to match `typeof(ref_time_values)`
684-
guess_period <- function(ref_time_values, ref_time_values_arg = rlang::caller_arg(ref_time_values)) {
685-
sorted_distinct_ref_time_values <- sort(unique(ref_time_values))
686-
if (length(sorted_distinct_ref_time_values) < 2L) {
687-
cli_abort("Not enough distinct values in {.code {ref_time_values_arg}} to guess the period.", ref_time_values_arg)
673+
#' Use max valid period as guess for `period` of `time_values`
674+
#'
675+
#' `r lifecycle::badge("experimental")`
676+
#'
677+
#' @param time_values Vector containing time-interval-like or time-point-like
678+
#' data, with at least two distinct values.
679+
#' @param time_values_arg Optional, string; name to give `time_values` in error
680+
#' messages. Defaults to quoting the expression the caller fed into the
681+
#' `time_values` argument.
682+
#' @return length-1 vector; `r lifecycle::badge("experimental")` class will
683+
#' either be the same class as [`base::diff()`] on such time values, an
684+
#' integer, or a double, such that all `time_values` can be exactly obtained
685+
#' by adding `k * result` for an integer k, and such that there is no smaller
686+
#' `result` that can achieve this.
687+
#' @export
688+
guess_period <- function(time_values, time_values_arg = rlang::caller_arg(time_values), ...) {
689+
UseMethod("guess_period")
690+
}
691+
692+
#' @export
693+
guess_period.default <- function(time_values, time_values_arg = rlang::caller_arg(time_values), ...) {
694+
rlang::check_dots_empty()
695+
sorted_distinct_time_values <- sort(unique(time_values))
696+
if (length(sorted_distinct_time_values) < 2L) {
697+
cli_abort("Not enough distinct values in {.code {time_values_arg}} to guess the period.",
698+
class = "epiprocess__guess_period__not_enough_times",
699+
time_values = time_values
700+
)
688701
}
689-
skips <- diff(sorted_distinct_ref_time_values)
690-
decayed_skips <-
691-
if (typeof(skips) == "integer") {
692-
as.integer(skips)
693-
} else {
694-
as.numeric(skips)
695-
}
696-
gcd_num(decayed_skips)
702+
skips <- diff(sorted_distinct_time_values)
703+
# Certain diff results have special classes or attributes; use vctrs to try to
704+
# appropriately destructure for gcd_num, then restore to their original class
705+
# & attributes.
706+
skips_data <- vctrs::vec_data(skips)
707+
period_data <- gcd_num(skips_data, rrtol = 0)
708+
vctrs::vec_restore(period_data, skips)
709+
}
710+
711+
# `full_seq()` doesn't like difftimes, so convert to the natural units of some time types:
712+
713+
#' @export
714+
guess_period.Date <- function(time_values, time_values_arg = rlang::caller_arg(time_values), ...) {
715+
as.numeric(NextMethod(), units = "days")
716+
}
717+
718+
#' @export
719+
guess_period.POSIXt <- function(time_values, time_values_arg = rlang::caller_arg(time_values), ...) {
720+
as.numeric(NextMethod(), units = "secs")
697721
}

man/guess_period.Rd

+15-13
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-utils.R

+50
Original file line numberDiff line numberDiff line change
@@ -231,3 +231,53 @@ test_that("as_slide_computation raises errors as expected", {
231231
class = "epiprocess__as_slide_computation__cant_convert_catchall"
232232
)
233233
})
234+
235+
test_that("guess_period works", {
236+
# Error cases:
237+
expect_error(guess_period(numeric(0L)), class = "epiprocess__guess_period__not_enough_times")
238+
expect_error(guess_period(c(1)), class = "epiprocess__guess_period__not_enough_times")
239+
# Different numeric classes and cases:
240+
expect_identical(guess_period(c(1, 8)), 7)
241+
expect_identical(guess_period(c(1, 8, 15)), 7)
242+
expect_identical(guess_period(c(1L, 8L, 15L)), 7L)
243+
expect_identical(guess_period(c(0, 7, 14, 15)), 1)
244+
# We currently allow the guessed frequency to no appear in the diffs, but this
245+
# might not be a good idea as it likely indicates an issue with the data. If
246+
# we drop this behavior we could also drop the gcd algorithm by just checking
247+
# the validity of the smallest diff:
248+
expect_identical(guess_period(c(0, 2, 5)), 1)
249+
expect_identical(guess_period(c(0, 4, 10)), 2)
250+
# On Dates:
251+
daily_dates <- seq(as.Date("2020-01-01"), as.Date("2020-01-15"), by = "day")
252+
weekly_dates <- seq(as.Date("2020-01-01"), as.Date("2020-01-15"), by = "week")
253+
expect_identical(
254+
daily_dates[[1L]] + guess_period(daily_dates) * (seq_along(daily_dates) - 1L),
255+
daily_dates
256+
)
257+
expect_identical(
258+
weekly_dates[[1L]] + guess_period(weekly_dates) * (seq_along(weekly_dates) - 1L),
259+
weekly_dates
260+
)
261+
# On POSIXcts:
262+
daily_posixcts <- as.POSIXct(daily_dates, tz = "ET") + 3600
263+
weekly_posixcts <- as.POSIXct(weekly_dates, tz = "ET") + 3600
264+
expect_identical(
265+
daily_posixcts[[1L]] + guess_period(daily_posixcts) * (seq_along(daily_posixcts) - 1L),
266+
daily_posixcts
267+
)
268+
expect_identical(
269+
weekly_posixcts[[1L]] + guess_period(weekly_posixcts) * (seq_along(weekly_posixcts) - 1L),
270+
weekly_posixcts
271+
)
272+
# On POSIXlts:
273+
daily_posixlts <- as.POSIXlt(daily_dates, tz = "ET") + 3600
274+
weekly_posixlts <- as.POSIXlt(weekly_dates, tz = "ET") + 3600
275+
expect_identical(
276+
daily_posixlts[[1L]] + guess_period(daily_posixlts) * (seq_along(daily_posixlts) - 1L),
277+
daily_posixlts
278+
)
279+
expect_identical(
280+
weekly_posixlts[[1L]] + guess_period(weekly_posixlts) * (seq_along(weekly_posixlts) - 1L),
281+
weekly_posixlts
282+
)
283+
})

0 commit comments

Comments
 (0)