add math ops and related tests

dajmcdon · dsweber2 · commit 7a87ae62465e · 2025-03-12T13:29:42.000-05:00
diff --git a/NAMESPACE b/NAMESPACE
@@ -39,9 +39,7 @@ S3method(extract_frosting,default)
 S3method(extract_frosting,epi_workflow)
 S3method(extract_layers,frosting)
 S3method(extract_layers,workflow)
-S3method(extrapolate_quantiles,dist_default)
-S3method(extrapolate_quantiles,dist_quantiles)
-S3method(extrapolate_quantiles,distribution)
+S3method(extrapolate_quantiles,quantile_pred)
 S3method(fit,epi_workflow)
 S3method(flusight_hub_formatter,canned_epipred)
 S3method(flusight_hub_formatter,data.frame)
@@ -119,6 +117,10 @@ S3method(tidy,check_enough_train_data)
 S3method(tidy,frosting)
 S3method(tidy,layer)
 S3method(update,layer)
+S3method(vec_arith,quantile_pred)
+S3method(vec_arith.numeric,quantile_pred)
+S3method(vec_arith.quantile_pred,numeric)
+S3method(vec_math,quantile_pred)
 S3method(weighted_interval_score,default)
 S3method(weighted_interval_score,dist_default)
 S3method(weighted_interval_score,dist_quantiles)
@@ -333,6 +335,8 @@ importFrom(tidyr,pivot_wider)
 importFrom(tidyr,unnest)
 importFrom(tidyselect,all_of)
 importFrom(utils,capture.output)
+importFrom(vctrs,vec_arith)
+importFrom(vctrs,vec_arith.numeric)
 importFrom(vctrs,vec_cast)
-importFrom(vctrs,vec_data)
+importFrom(vctrs,vec_math)
 importFrom(workflows,extract_preprocessor)
diff --git a/R/extrapolate_quantiles.R b/R/extrapolate_quantiles.R
@@ -32,34 +32,28 @@ extrapolate_quantiles <- function(x, probs, replace_na = TRUE, ...) {
 }
 
 #' @export
-#' @importFrom vctrs vec_data
-extrapolate_quantiles.distribution <- function(x, probs, replace_na = TRUE, ...) {
-  rlang::check_dots_empty()
+extrapolate_quantiles.quantile_pred <- function(x, probs, replace_na = TRUE, ...) {
   arg_is_lgl_scalar(replace_na)
   arg_is_probabilities(probs)
   if (is.unsorted(probs)) probs <- sort(probs)
-  dstn <- lapply(vec_data(x), extrapolate_quantiles, probs = probs, replace_na = replace_na)
-  new_vctr(dstn, vars = NULL, class = "distribution")
-}
-
-#' @export
-extrapolate_quantiles.dist_default <- function(x, probs, replace_na = TRUE, ...) {
-  values <- quantile(x, probs, ...)
-  new_quantiles(values = values, quantile_levels = probs)
-}
+  orig_probs <- x %@% "quantile_levels"
+  orig_values <- as.matrix(x)
 
-#' @export
-extrapolate_quantiles.dist_quantiles <- function(x, probs, replace_na = TRUE, ...) {
-  orig_probs <- field(x, "quantile_levels")
-  orig_values <- field(x, "values")
-  new_probs <- c(orig_probs, probs)
-  dups <- duplicated(new_probs)
   if (!replace_na || !anyNA(orig_values)) {
-    new_values <- c(orig_values, quantile(x, probs, ...))
+    all_values <- cbind(orig_values, quantile(x, probs, ...))
   } else {
-    nas <- is.na(orig_values)
-    orig_values[nas] <- quantile(x, orig_probs[nas], ...)
-    new_values <- c(orig_values, quantile(x, probs, ...))
+    newx <- quantile(x, orig_probs, ...) %>%
+      hardhat::quantile_pred(orig_probs)
+    all_values <- cbind(as.matrix(newx), quantile(newx, probs, ...))
   }
-  new_quantiles(new_values[!dups], new_probs[!dups])
+  all_probs <- c(orig_probs, probs)
+  dups <- duplicated(all_probs)
+  all_values <- all_values[, !dups, drop = FALSE]
+  all_probs <- all_probs[!dups]
+  o <- order(all_probs)
+
+  hardhat::quantile_pred(
+    all_values[, o, drop = FALSE],
+    quantile_levels = all_probs[o]
+  )
 }
diff --git a/R/quantile_pred-methods.R b/R/quantile_pred-methods.R
@@ -5,26 +5,29 @@ mean.quantile_pred <- function(x, na.rm = FALSE, ...) {
   median(x, ...)
 }
 
+
+# quantiles by treating quantile_pred like a distribution -----------------
+
+
 #' @export
 #' @importFrom stats quantile
-quantile.quantile_pred <- function(x, p, ..., middle = c("cubic", "linear")) {
+quantile.quantile_pred <- function(x, p, na.rm = FALSE, ...,
+                                   middle = c("cubic", "linear")) {
   arg_is_probabilities(p)
   p <- sort(p)
   middle <- rlang::arg_match(middle)
-  quantile_extrapolate(x, p, middle)
+  quantile_internal(x, p, middle)
 }
 
 
-quantile_extrapolate <- function(x, tau_out, middle) {
+quantile_internal <- function(x, tau_out, middle) {
   tau <- x %@% "quantile_levels"
   qvals <- as.matrix(x)
 
   # short circuit if we aren't actually extrapolating
   # matches to ~15 decimals
-  if (all(tau_out %in% tau)) {
-    return(hardhat::quantile_pred(
-      qvals[ ,match(tau_out, tau), drop = FALSE], tau_out
-    ))
+  if (all(tau_out %in% tau) && !anyNA(qvals)) {
+    return(qvals[ , match(tau_out, tau), drop = FALSE])
   }
   if (length(tau) < 2) {
     cli_abort(paste(
@@ -36,15 +39,26 @@ quantile_extrapolate <- function(x, tau_out, middle) {
     vctrs::vec_chop(qvals),
     ~ extrapolate_quantiles_single(.x, tau, tau_out, middle)
   )
-
-  hardhat::quantile_pred(qvals_out, tau_out)
+  qvals_out <- do.call(rbind, qvals_out) # ensure a matrix of the proper dims
+  qvals_out
 }
 
 extrapolate_quantiles_single <- function(qvals, tau, tau_out, middle) {
+  qvals_out <- rep(NA, length(tau_out))
+  good <- !is.na(qvals)
+  qvals <- qvals[good]
+  tau <- tau[good]
+
+  # in case we only have one point, and it matches something we wanted
+  if (length(good) < 2) {
+    matched_one <- tau_out %in% tau
+    qvals_out[matched_one] <- qvals[matched_one]
+    return(qvals_out)
+  }
+
   indl <- tau_out < min(tau)
   indr <- tau_out > max(tau)
   indm <- !indl & !indr
-  qvals_out <- rep(NA, length(tau_out))
 
   if (middle == "cubic") {
     method <- "cubic"
@@ -101,3 +115,44 @@ tail_extrapolate <- function(tau_out, qv) {
   m <- diff(y) / diff(x)
   m * (x0 - x[1]) + y[1]
 }
+
+
+# mathematical operations on the values -----------------------------------
+
+
+#' @importFrom vctrs vec_math
+#' @export
+#' @method vec_math quantile_pred
+vec_math.quantile_pred <- function(.fn, .x, ...) {
+  fn <- .fn
+  .fn <- getExportedValue("base", .fn)
+  if (fn %in% c("any", "all", "prod", "sum", "cumsum", "cummax", "cummin", "cumprod")) {
+    cli_abort("{.fn {fn}} is not a supported operation for {.cls quantile_pred}.")
+  }
+  quantile_levels <- .x %@% "quantile_levels"
+  .x <- as.matrix(.x)
+  hardhat::quantile_pred(.fn(.x), quantile_levels)
+}
+
+#' @importFrom vctrs vec_arith vec_arith.numeric
+#' @export
+#' @method vec_arith quantile_pred
+vec_arith.quantile_pred <- function(op, x, y, ...) {
+  UseMethod("vec_arith.quantile_pred", y)
+}
+
+#' @export
+#' @method vec_arith.quantile_pred numeric
+vec_arith.quantile_pred.numeric <- function(op, x, y, ...) {
+  op_fn <- getExportedValue("base", op)
+  out <- op_fn(as.matrix(x), y)
+  hardhat::quantile_pred(out, x %@% "quantile_levels")
+}
+
+#' @export
+#' @method vec_arith.numeric quantile_pred
+vec_arith.numeric.quantile_pred <- function(op, x, y, ...) {
+  op_fn <- getExportedValue("base", op)
+  out <- op_fn(x, as.matrix(y))
+  hardhat::quantile_pred(out, y %@% "quantile_levels")
+}
diff --git a/tests/testthat/test-dist_quantiles.R b/tests/testthat/test-dist_quantiles.R
@@ -4,53 +4,38 @@ test_that("single quantile_pred works, quantiles are accessible", {
     quantile_levels = c(.2, .4, .5, .6, .8)
   )
   expect_equal(median(z), 3)
-  expect_equal(
-    quantile(z, c(.2, .4, .5, .6, .8)),
-    hardhat::quantile_pred(matrix(1:5, nrow = 1), c(.2, .4, .5, .6, .8))
-  )
+  expect_equal(quantile(z, c(.2, .4, .5, .6, .8)), matrix(1:5, nrow = 1))
   expect_equal(
     quantile(z, c(.3, .7), middle = "linear"),
-    hardhat::quantile_pred(matrix(c(1.5, 4.5), nrow = 1), c(.3, .7))
+    matrix(c(1.5, 4.5), nrow = 1)
   )
 
   Q <- stats::splinefun(c(.2, .4, .5, .6, .8), 1:5, method = "hyman")
-  expect_equal(quantile(z, c(.3, .7), middle = "cubic"), Q(c(.3, .7)))
+  expect_equal(quantile(z, c(.3, .7)), Q(c(.3, .7)))
   expect_identical(
     extrapolate_quantiles(z, c(.3, .7), middle = "linear"),
-    hardhat::quantile_pred(c(1, 1.5, 2, 3, 4, 4.5, 5), 2:8 / 10)
+    hardhat::quantile_pred(matrix(c(1, 1.5, 2, 3, 4, 4.5, 5), nrow = 1), 2:8 / 10)
   )
-  # empty values slot results in a length zero distribution
-  # see issue #361
-  # expect_length(dist_quantiles(list(), c(.1, .9)), 0L)
-  # expect_identical(
-  #   dist_quantiles(list(), c(.1, .9)),
-  #   distributional::dist_degenerate(double())
-  # )
 })
 
 
 test_that("quantile extrapolator works", {
-  dstn <- dist_normal(c(10, 2), c(5, 10))
-  qq <- extrapolate_quantiles(dstn, probs = c(.25, 0.5, .75))
-  expect_s3_class(qq, "distribution")
-  expect_s3_class(vctrs::vec_data(qq[1])[[1]], "dist_quantiles")
-  expect_length(parameters(qq[1])$quantile_levels[[1]], 3L)
-
-
-  dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8)))
+  dstn <- hardhat::quantile_pred(
+    matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE),
+    c(.2, .4, .6, .8)
+  )
   qq <- extrapolate_quantiles(dstn, probs = c(.25, 0.5, .75))
-  expect_s3_class(qq, "distribution")
-  expect_s3_class(vctrs::vec_data(qq[1])[[1]], "dist_quantiles")
-  expect_length(parameters(qq[1])$quantile_levels[[1]], 7L)
+  expect_s3_class(qq, c("quantile_pred", "vctrs_vctr", "list"))
+  expect_length(qq %@% "quantile_levels", 7L)
 
-  dstn <- dist_quantiles(1:4, 1:4 / 5)
+  dstn <- hardhat::quantile_pred(matrix(1:4, nrow = 1), 1:4 / 5)
   qq <- extrapolate_quantiles(dstn, 1:9 / 10)
-  dstn_na <- dist_quantiles(c(1, 2, NA, 4), 1:4 / 5)
+  dstn_na <- hardhat::quantile_pred(matrix(c(1, 2, NA, 4), nrow = 1), 1:4 / 5)
   qq2 <- extrapolate_quantiles(dstn_na, 1:9 / 10)
   expect_equal(qq, qq2)
   qq3 <- extrapolate_quantiles(dstn_na, 1:9 / 10, replace_na = FALSE)
-  qq2_vals <- field(vec_data(qq2)[[1]], "values")
-  qq3_vals <- field(vec_data(qq3)[[1]], "values")
+  qq2_vals <- unlist(qq2)
+  qq3_vals <- unlist(qq3)
   qq2_vals[6] <- NA
   expect_equal(qq2_vals, qq3_vals)
 })
@@ -60,7 +45,7 @@ test_that("small deviations of quantile requests work", {
   v <- c(0.0890306, 0.1424997, 0.1971793, 0.2850978, 0.3832912, 0.4240479)
   badl <- l
   badl[1] <- badl[1] - 1e-14
-  distn <- dist_quantiles(list(v), list(l))
+  distn <- hardhat::quantile_pred(matrix(v, nrow = 1), l)
 
   # was broken before, now works
   expect_equal(quantile(distn, l), quantile(distn, badl))
@@ -69,50 +54,51 @@ test_that("small deviations of quantile requests work", {
   # the smallest (largest) values or we could end up unsorted
   l <- 1:9 / 10
   v <- 1:9
-  distn <- dist_quantiles(list(v), list(l))
-  expect_equal(quantile(distn, c(.25, .75)), list(c(2.5, 7.5)))
-  expect_equal(quantile(distn, c(.1, .9)), list(c(1, 9)))
+  distn <- hardhat::quantile_pred(matrix(v, nrow = 1), l)
+  expect_equal(quantile(distn, c(.25, .75)), matrix(c(2.5, 7.5), nrow = 1))
+  expect_equal(quantile(distn, c(.1, .9)), matrix(c(1, 9), nrow = 1))
   qv <- data.frame(q = l, v = v)
   expect_equal(
-    unlist(quantile(distn, c(.01, .05))),
+    drop(quantile(distn, c(.01, .05))),
     tail_extrapolate(c(.01, .05), head(qv, 2))
   )
   expect_equal(
-    unlist(quantile(distn, c(.99, .95))),
+    drop(quantile(distn, c(.99, .95))),
     tail_extrapolate(c(.95, .99), tail(qv, 2))
   )
 })
 
 test_that("unary math works on quantiles", {
-  dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8)))
-  dstn2 <- dist_quantiles(list(log(1:4), log(8:11)), list(c(.2, .4, .6, .8)))
+  dstn <- hardhat::quantile_pred(
+    matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE),
+    1:4 / 5
+  )
+  dstn2 <- hardhat::quantile_pred(
+    log(matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE)),
+    1:4 / 5
+  )
   expect_identical(log(dstn), dstn2)
 
-  dstn2 <- dist_quantiles(list(cumsum(1:4), cumsum(8:11)), list(c(.2, .4, .6, .8)))
-  expect_identical(cumsum(dstn), dstn2)
 })
 
 test_that("arithmetic works on quantiles", {
-  dstn <- dist_quantiles(list(1:4, 8:11), list(c(.2, .4, .6, .8)))
-  dstn2 <- dist_quantiles(list(1:4 + 1, 8:11 + 1), list(c(.2, .4, .6, .8)))
+  dstn <- hardhat::quantile_pred(
+    matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE),
+    1:4 / 5
+  )
+  dstn2 <- hardhat::quantile_pred(
+    matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE) + 1,
+    1:4 / 5
+  )
   expect_identical(dstn + 1, dstn2)
   expect_identical(1 + dstn, dstn2)
 
-  dstn2 <- dist_quantiles(list(1:4 / 4, 8:11 / 4), list(c(.2, .4, .6, .8)))
+  dstn2 <- hardhat::quantile_pred(
+    matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE) / 4,
+    1:4 / 5
+  )
   expect_identical(dstn / 4, dstn2)
   expect_identical((1 / 4) * dstn, dstn2)
 
-  expect_snapshot(error = TRUE, sum(dstn))
-  expect_snapshot(error = TRUE, suppressWarnings(dstn + distributional::dist_normal()))
-})
-
-test_that("quantile.dist_quantile works for NA vectors", {
-  distn <- dist_quantiles(
-    list(c(NA, NA)),
-    list(1:2 / 3)
-  )
-  expect_true(is.na(quantile(distn, p = 0.5)))
-  expect_true(is.na(median(distn)))
-  expect_true(is.na(mean(distn)))
-  expect_equal(format(distn), "quantiles(NA)[2]")
+  expect_error(sum(dstn))
 })