Skip to content

Smooth quant reg #183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jun 16, 2023
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Imports:
quantreg,
recipes (>= 1.0.4),
rlang,
smoothqr,
stats,
tibble,
tidyr,
Expand All @@ -61,7 +62,8 @@ VignetteBuilder:
knitr
Remotes:
cmu-delphi/epidatr,
cmu-delphi/epiprocess@dev
cmu-delphi/epiprocess@dev,
dajmcdon/smoothqr
Config/testthat/edition: 3
Encoding: UTF-8
LazyData: true
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ S3method(extrapolate_quantiles,dist_default)
S3method(extrapolate_quantiles,dist_quantiles)
S3method(extrapolate_quantiles,distribution)
S3method(format,dist_quantiles)
S3method(is.na,dist_quantiles)
S3method(is.na,distribution)
S3method(mean,dist_quantiles)
S3method(median,dist_quantiles)
S3method(predict,epi_workflow)
Expand Down Expand Up @@ -61,6 +63,7 @@ S3method(print,layer_predictive_distn)
S3method(print,layer_quantile_distn)
S3method(print,layer_residual_quantiles)
S3method(print,layer_threshold)
S3method(print,layer_unnest)
S3method(print,step_epi_ahead)
S3method(print,step_epi_lag)
S3method(print,step_growth_rate)
Expand All @@ -81,6 +84,7 @@ S3method(slather,layer_predictive_distn)
S3method(slather,layer_quantile_distn)
S3method(slather,layer_residual_quantiles)
S3method(slather,layer_threshold)
S3method(slather,layer_unnest)
S3method(snap,default)
S3method(snap,dist_default)
S3method(snap,dist_quantiles)
Expand Down Expand Up @@ -132,6 +136,7 @@ export(layer_predictive_distn)
export(layer_quantile_distn)
export(layer_residual_quantiles)
export(layer_threshold)
export(layer_unnest)
export(nested_quantiles)
export(new_default_epi_recipe_blueprint)
export(new_epi_recipe_blueprint)
Expand All @@ -140,6 +145,7 @@ export(prep)
export(quantile_reg)
export(remove_frosting)
export(slather)
export(smooth_quantile_reg)
export(step_epi_ahead)
export(step_epi_lag)
export(step_epi_naomit)
Expand Down
8 changes: 8 additions & 0 deletions R/create-layer.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@
#'
create_layer <- function(name = NULL, open = rlang::is_interactive()) {
name <- name %||% usethis:::get_active_r_file(path = "R")
if (substr(name, 1, 5) == "layer") {
nn <- substring(name, 6)
if (substr(nn, 1, 1) == "_") nn <- substring(nn, 2)
cli::cli_abort(
c('`name` should not begin with "layer" or "layer_".',
i = 'Did you mean to use `create_layer("{ nn }")`?')
)
}
layer_name <- name
name <- paste0("layer_", name)
name <- usethis:::slug(name, "R")
Expand Down
12 changes: 12 additions & 0 deletions R/dist_quantiles.R
Original file line number Diff line number Diff line change
Expand Up @@ -360,3 +360,15 @@ Ops.dist_quantiles <- function(e1, e2) {
new_quantiles(q = q, tau = tau)
}

#' @method is.na distribution
#' @export
is.na.distribution <- function(x) {
sapply(vctrs::vec_data(x), is.na)
}

#' @method is.na dist_quantiles
#' @export
is.na.dist_quantiles <- function(x) {
q <- field(x, "q")
all(is.na(q))
}
46 changes: 46 additions & 0 deletions R/layer_unnest.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#' Unnest prediction list-cols
#'
#' @param frosting a `frosting` postprocessor
#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted
#' expressions separated by commas. Variable names can be used as if they
#' were positions in the data frame, so expressions like `x:y` can
#' be used to select a range of variables.
#' @param id a random id string
#'
#' @return an updated `frosting` postprocessor
#' @export
layer_unnest <- function(frosting, ..., id = rand_id("unnest")) {
arg_is_chr_scalar(id)

add_layer(
frosting,
layer_unnest_new(
terms = dplyr::enquos(...),
id = id
)
)
}

layer_unnest_new <- function(terms, id) {
layer("unnest", terms = terms, id = id)
}

#' @export
slather.layer_unnest <-
function(object, components, the_fit, the_recipe, ...) {
exprs <- rlang::expr(c(!!!object$terms))
pos <- tidyselect::eval_select(exprs, components$predictions)
col_names <- names(pos)
components$predictions <- components$predictions %>%
tidyr::unnest(col_names)

components
}

#' @export
print.layer_unnest <- function(
x, width = max(20, options()$width - 30), ...) {

title <- "Unnesting prediction list-cols"
print_layer(x$terms, title = title, width = width)
}
201 changes: 201 additions & 0 deletions R/make_smooth_quantile_reg.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@

#' Smooth quantile regression
#'
#' @description
#' `smooth_quantile_reg()` generates a quantile regression model _specification_ for
#' the [tidymodels](https://www.tidymodels.org/) framework. Currently, the
#' only supported engine is [smoothqr::smooth_qr()].
#'
#' @param mode A single character string for the type of model.
#' The only possible value for this model is "regression".
#' @param engine Character string naming the fitting function. Currently, only
#' "smooth_qr" is supported.
#' @param tau A scalar or vector of values in (0, 1) to determine which
#' quantiles to estimate (default is 0.5).
#' @param outcome_locations Defaults to the vector `1:ncol(y)` but if the
#' responses are observed at a different spacing (or appear in a different
#' order), that information should be used here. This
#' argument will be mapped to the `ahead` argument of [smoothqr::smooth_qr()].
#' @param degree the number of polynomials used for response smoothing. Must
#' be no more than the number of responses.
#' @export
#'
#' @seealso [fit.model_spec()], [set_engine()]
#'
#' @importFrom quantreg rq
#' @examples
#' tib <- data.frame(
#' y1 = rnorm(100), y2 = rnorm(100), y3 = rnorm(100),
#' y4 = rnorm(100), y5 = rnorm(100), y6 = rnorm(100),
#' x1 = rnorm(100), x2 = rnorm(100))
#' qr_spec <- smooth_quantile_reg(tau = c(.2, .5, .8), outcome_locations = 1:6)
#' ff <- qr_spec %>% fit(cbind(y1, y2 , y3 , y4 , y5 , y6) ~ ., data = tib)
#' p <- predict(ff, new_data = tib)
#'
#' x <- -99:99 / 100 * 2 * pi
#' y <- sin(x) + rnorm(length(x), sd = .1)
#' fd <- x[length(x) - 20]
#' XY <- smoothqr::lagmat(y[1:(length(y) - 20)], c(-20:20))
#' XY <- tibble::as_tibble(XY)
#' qr_spec <- smooth_quantile_reg(tau = c(.2, .5, .8), outcome_locations = 20:1)
#' tt <- qr_spec %>% fit_xy(x = XY[,21:41], y = XY[,1:20])
#'
#' library(tidyr)
#' library(dplyr)
#' pl <- predict(
#' object = tt,
#' new_data = XY[max(which(complete.cases(XY[,21:41]))), 21:41]
#' )
#' pl <- pl %>%
#' unnest(.pred) %>%
#' mutate(distn = nested_quantiles(distn)) %>%
#' unnest(distn) %>%
#' mutate(x = x[length(x) - 20] + ahead / 100 * 2 * pi,
#' ahead = NULL) %>%
#' pivot_wider(names_from = tau, values_from = q)
#' plot(x, y, pch = 16, xlim = c(pi, 2 * pi), col = "lightgrey")
#' curve(sin(x), add = TRUE)
#' abline(v = fd, lty = 2)
#' lines(pl$x, pl$`0.2`, col = "blue")
#' lines(pl$x, pl$`0.8`, col = "blue")
#' lines(pl$x, pl$`0.5`, col = "red")
#' \dontrun{
#' ggplot(data.frame(x = x, y = y), aes(x)) +
#' geom_ribbon(data = pl, aes(ymin = `0.2`, ymax = `0.8`), fill = "lightblue") +
#' geom_point(aes(y = y), colour = "grey") + # observed data
#' geom_function(fun = sin, colour = "black") + # truth
#' geom_vline(xintercept = fd, linetype = "dashed") + # end of training data
#' geom_line(data = pl, aes(y = `0.5`), colour = "red") + # median prediction
#' theme_bw() +
#' coord_cartesian(xlim = c(0, NA)) +
#' ylab("y")
#' }
smooth_quantile_reg <- function(
mode = "regression",
engine = "smoothqr",
outcome_locations = NULL,
tau = 0.5,
degree = 3L) {

# Check for correct mode
if (mode != "regression") rlang::abort("`mode` must be 'regression'")
if (engine != "smoothqr") rlang::abort("`engine` must be 'smoothqr'")

arg_is_probabilities(tau)
arg_is_pos_int(degree)
arg_is_scalar(degree)
arg_is_numeric(outcome_locations, allow_null = TRUE)
if (is.unsorted(tau)) {
rlang::warn("Sorting tau to increasing order.")
tau <- sort(tau)
}

args <- list(tau = rlang::enquo(tau), degree = rlang::enquo(degree),
outcome_locations = rlang::enquo(outcome_locations))

# Save some empty slots for future parts of the specification
parsnip::new_model_spec(
"smooth_quantile_reg",
args = args,
eng_args = NULL,
mode = mode,
method = NULL,
engine = engine
)
}


make_smooth_quantile_reg <- function() {
parsnip::set_new_model("smooth_quantile_reg")
parsnip::set_model_mode("smooth_quantile_reg", "regression")
parsnip::set_model_engine("smooth_quantile_reg", "regression", eng = "smoothqr")
parsnip::set_dependency(
"smooth_quantile_reg",
eng = "smoothqr",
pkg = "smoothqr",
mode = "regression"
)

parsnip::set_model_arg(
model = "smooth_quantile_reg",
eng = "smoothqr",
parsnip = "tau",
original = "tau",
func = list(pkg = "smoothqr", fun = "smooth_qr"),
has_submodel = FALSE
)
parsnip::set_model_arg(
model = "smooth_quantile_reg",
eng = "smoothqr",
parsnip = "degree",
original = "degree",
func = list(pkg = "smoothqr", fun = "smooth_qr"),
has_submodel = FALSE
)
parsnip::set_model_arg(
model = "smooth_quantile_reg",
eng = "smoothqr",
parsnip = "outcome_locations",
original = "aheads",
func = list(pkg = "smoothqr", fun = "smooth_qr"),
has_submodel = FALSE
)

parsnip::set_fit(
model = "smooth_quantile_reg",
eng = "smoothqr",
mode = "regression",
value = list(
interface = "data.frame",
protect = c("x", "y"), # prevent user from touching these
func = c(pkg = "smoothqr", fun = "smooth_qr"),
defaults = list(intercept = TRUE)
)
)

parsnip::set_encoding(
model = "smooth_quantile_reg",
eng = "smoothqr",
mode = "regression",
options = list(
predictor_indicators = "traditional", # factor -> dummy conversion w/ baseline
compute_intercept = TRUE, # put an intercept into the design matrix
remove_intercept = TRUE, # but then remove it, we'll put it back in the function
allow_sparse_x = FALSE # quantgen::rq can't handle sparse x, unfortunately
)
)

process_smooth_qr_preds <- function(x, object) {
object <- parsnip::extract_fit_engine(object)
list_of_pred_distns <- lapply(x, function(p) {
x <- unname(apply(
p, 1, function(q) unname(sort(q, na.last = TRUE)), simplify = FALSE
))
dist_quantiles(x, list(object$tau))
})
n_preds <- length(list_of_pred_distns[[1]])
nout <- length(list_of_pred_distns)
tib <- tibble::tibble(
ids = rep(seq(n_preds), times = nout),
ahead = rep(object$aheads, each = n_preds),
distn = do.call(c, unname(list_of_pred_distns))) %>%
tidyr::nest(.pred = c(ahead, distn))

return(tib[".pred"])
}


parsnip::set_pred(
model = "smooth_quantile_reg",
eng = "smoothqr",
mode = "regression",
type = "numeric",
value = list(
pre = NULL,
post = process_smooth_qr_preds,
func = c(fun = "predict"),
args = list(object = quote(object$fit), newdata = quote(new_data))
)
)
}

Loading