-
Notifications
You must be signed in to change notification settings - Fork 10
240 quantile pivot #241
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
240 quantile pivot #241
Changes from 4 commits
8a1e2d6
cea1599
d606741
7294c00
f18e88f
d6a28f3
237ec50
c13b83e
16f6c2c
ce0b180
f97166b
9dd0a2c
16139ff
c9b4667
d59a691
21b4c85
965155d
1cf5dff
1458ab0
0b9b537
7358c13
169f764
8d1e47d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,4 @@ | |
^musings$ | ||
^data-raw$ | ||
^vignettes/articles$ | ||
^.git-blame-ignore-revs$ |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,7 @@ Imports: | |
generics, | ||
glue, | ||
hardhat (>= 1.3.0), | ||
lifecycle, | ||
magrittr, | ||
methods, | ||
quantreg, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,8 @@ | ||
## usethis namespace: start | ||
#' @importFrom tibble tibble | ||
#' @importFrom rlang := !! | ||
#' @importFrom stats poly predict lm residuals quantile | ||
#' @importFrom lifecycle deprecated | ||
#' @import epiprocess parsnip | ||
## usethis namespace: end | ||
NULL |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
#' Turn a vector of quantile distributions into a list-col | ||
#' | ||
#' @param x a `distribution` containing `dist_quantiles` | ||
#' | ||
#' @return a list-col | ||
#' @export | ||
#' | ||
#' @examples | ||
#' edf <- case_death_rate_subset[1:3, ] | ||
#' edf$q <- dist_quantiles(list(1:5, 2:4, 3:10), list(1:5 / 6, 2:4 / 5, 3:10 / 11)) | ||
#' | ||
#' edf_nested <- edf %>% dplyr::mutate(q = nested_quantiles(q)) | ||
#' edf_nested %>% tidyr::unnest(q) | ||
nested_quantiles <- function(x) { | ||
stopifnot(is_dist_quantiles(x)) | ||
distributional:::dist_apply(x, .f = function(z) { | ||
tibble::as_tibble(vec_data(z)) %>% | ||
dplyr::mutate(dplyr::across(tidyselect::everything(), as.double)) %>% | ||
list_of() | ||
}) | ||
} | ||
|
||
|
||
#' Pivot columns containing `dist_quantile` longer | ||
#' | ||
#' Selected columns that contains `dist_quantiles` will be "lengthened" with | ||
#' the "taus" (quantile) serving as 1 column and the values as another. If | ||
#' multiple columns are selected, these will be prefixed the the column name. | ||
dajmcdon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
#' | ||
#' @param .data A data frame, or a data frame extension such as a tibble or | ||
#' epi_df. | ||
#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted | ||
#' expressions separated by commas. Variable names can be used as if they | ||
#' were positions in the data frame, so expressions like `x:y` can | ||
#' be used to select a range of variables. | ||
#' @param .ignore_length_check If multiple columns are selected, as long as | ||
#' each row has contains the same number of quantiles, the result will be | ||
#' reasonable. But if, for example, `var1[1]` has 5 quantiles while `var2[1]` | ||
#' has 7, then the only option would be to recycle everything, creating a | ||
#' _very_ long result. By default, this would throw an error. But if this is | ||
#' really the goal, then the error can be bypassed by setting this argument | ||
#' to `TRUE`. The first selected column will vary fastest. | ||
nmdefries marked this conversation as resolved.
Show resolved
Hide resolved
|
||
#' | ||
#' @return An object of the same class as `.data`. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) | ||
#' d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) | ||
#' tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) | ||
#' | ||
#' pivot_quantiles_longer(tib, "d1") | ||
nmdefries marked this conversation as resolved.
Show resolved
Hide resolved
|
||
#' pivot_quantiles_longer(tib, tidyselect::ends_with("1")) | ||
#' pivot_quantiles_longer(tib, d1, d2) | ||
pivot_quantiles_longer <- function(.data, ..., .ignore_length_check = FALSE) { | ||
cols <- validate_pivot_quantiles(.data, ...) | ||
.data <- .data %>% | ||
dplyr::mutate(dplyr::across(tidyselect::all_of(cols), nested_quantiles)) | ||
if (length(cols) > 1L) { | ||
lengths_check <- .data %>% | ||
dplyr::transmute(dplyr::across( | ||
tidyselect::all_of(cols), | ||
~ map_int(.x, vctrs::vec_size) | ||
)) %>% | ||
as.matrix() %>% | ||
apply(1, function(x) dplyr::n_distinct(x) == 1L) %>% | ||
all() | ||
if (lengths_check) { | ||
.data <- tidyr::unnest(.data, tidyselect::all_of(cols), names_sep = "_") | ||
} else { | ||
if (.ignore_length_check) { | ||
for (col in cols) { | ||
.data <- .data %>% | ||
tidyr::unnest(tidyselect::all_of(col), names_sep = "_") | ||
} | ||
} else { | ||
cli::cli_abort(c( | ||
"Some selected columns contain different numbers of quantiles.", | ||
"The result would be a {.emph very} long {.cls tibble}.", | ||
"To do this anyway, rerun with `.ignore_length_check = TRUE`." | ||
)) | ||
} | ||
} | ||
} else { | ||
.data <- .data %>% tidyr::unnest(tidyselect::all_of(cols)) | ||
} | ||
.data | ||
} | ||
|
||
#' Pivot columns containing `dist_quantile` wider | ||
#' | ||
#' Any selected columns that contain `dist_quantiles` will be "widened" with | ||
#' the "taus" (quantile) serving as names and the values in the data frame. | ||
#' When pivoting multiple columns, the original column name will be used as | ||
#' a prefix. | ||
#' | ||
#' @param .data A data frame, or a data frame extension such as a tibble or | ||
#' epi_df. | ||
#' @param ... <[`tidy-select`][dplyr::dplyr_tidy_select]> One or more unquoted | ||
#' expressions separated by commas. Variable names can be used as if they | ||
#' were positions in the data frame, so expressions like `x:y` can | ||
#' be used to select a range of variables. | ||
#' | ||
#' @return An object of the same class as `.data` | ||
#' @export | ||
#' | ||
#' @examples | ||
#' d1 <- c(dist_quantiles(1:3, 1:3 / 4), dist_quantiles(2:4, 1:3 / 4)) | ||
#' d2 <- c(dist_quantiles(2:4, 2:4 / 5), dist_quantiles(3:5, 2:4 / 5)) | ||
#' tib <- tibble::tibble(g = c("a", "b"), d1 = d1, d2 = d2) | ||
#' | ||
#' pivot_quantiles_wider(tib, c("d1", "d2")) | ||
#' pivot_quantiles_wider(tib, tidyselect::starts_with("d")) | ||
#' pivot_quantiles_wider(tib, d2) | ||
pivot_quantiles_wider <- function(.data, ...) { | ||
cols <- validate_pivot_quantiles(.data, ...) | ||
.data <- .data %>% | ||
dplyr::mutate(dplyr::across(tidyselect::all_of(cols), nested_quantiles)) | ||
checks <- map_lgl(cols, ~ diff(range(vctrs::list_sizes(.data[[.x]]))) == 0L) | ||
if (!all(checks)) { | ||
nms <- cols[!checks] | ||
cli::cli_abort( | ||
c("Quantiles must be the same length and have the same set of taus.", | ||
i = "Check failed for variables(s) {.var {nms}}." | ||
) | ||
) | ||
} | ||
if (length(cols) > 1L) { | ||
for (col in cols) { | ||
.data <- .data %>% | ||
tidyr::unnest(tidyselect::all_of(col)) %>% | ||
tidyr::pivot_wider( | ||
names_from = "tau", values_from = "q", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Definitely hairy, unfortunately. |
||
names_prefix = paste0(col, "_") | ||
) | ||
} | ||
} else { | ||
.data <- .data %>% | ||
tidyr::unnest(tidyselect::all_of(cols)) %>% | ||
tidyr::pivot_wider(names_from = "tau", values_from = "q") | ||
} | ||
.data | ||
} | ||
|
||
pivot_quantiles <- function(.data, ...) { | ||
lifecycle::deprecate_stop("0.0.6", "pivot_quantiles()", "pivot_quantiles_wider()") | ||
} | ||
|
||
validate_pivot_quantiles <- function(.data, ...) { | ||
expr <- rlang::expr(c(...)) | ||
cols <- names(tidyselect::eval_select(expr, .data)) | ||
dqs <- map_lgl(cols, ~ is_dist_quantiles(.data[[.x]])) | ||
if (!all(dqs)) { | ||
nms <- cols[!dqs] | ||
cli::cli_abort( | ||
"Variables(s) {.var {nms}} are not `dist_quantiles`. Cannot pivot them." | ||
) | ||
} | ||
cols | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Uh oh!
There was an error while loading. Please reload this page.