Skip to content

Commit 7eeb636

Browse files
authored
Sanitise bin calculations (#6212)
* capture bin argument fixup in function * capture binning logic in function * helper for cutting bins * consistency of `stat_bin2d()` * necromancy: resurrect `stat_bin(drop)` by sacrificing `stat_bin(keep.zeroes)` * fix `boundary = 0` * same treatment for `stat_summary2d()` * Implement `StatBin2d` as subclass of `StatSummary2d` * consistency for `stat_summary_bin()` * document * collect bin utilities in one place * remove vestigial `bin2d_breaks()` * discard superfluous `self` * bring `stat_bindot()` into the fold * centralise argument checking in `compute_bins()` * `stat_bin_2d(boundary)` internally defaults to 0 * add news bullets * allow lambda notation for breaks/binwidth/bins * improve docs
1 parent 321b300 commit 7eeb636

19 files changed

+327
-247
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ Collate:
242242
'scales-.R'
243243
'stat-align.R'
244244
'stat-bin.R'
245+
'stat-summary-2d.R'
245246
'stat-bin2d.R'
246247
'stat-bindot.R'
247248
'stat-binhex.R'
@@ -263,7 +264,6 @@ Collate:
263264
'stat-smooth-methods.R'
264265
'stat-smooth.R'
265266
'stat-sum.R'
266-
'stat-summary-2d.R'
267267
'stat-summary-bin.R'
268268
'stat-summary-hex.R'
269269
'stat-summary.R'

NEWS.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@
254254
* The ellipsis argument is now checked in `fortify()`, `get_alt_text()`,
255255
`labs()` and several guides (@teunbrand, #3196).
256256
* `stat_summary_bin()` no longer ignores `width` parameter (@teunbrand, #4647).
257-
* Added `keep.zeroes` argument to `stat_bin()` (@teunbrand, #3449)
257+
* Reintroduced `drop` argument to `stat_bin()` (@teunbrand, #3449)
258258
* (internal) removed barriers for using 2D structures as aesthetics
259259
(@teunbrand, #4189).
260260
* `coord_sf()` no longer errors when dealing with empty graticules (@teunbrand, #6052)
@@ -272,6 +272,10 @@
272272
* Stricter check on `register_theme_elements(element_tree)` (@teunbrand, #6162)
273273
* Added `weight` aesthetic for `stat_ellipse()` (@teunbrand, #5272)
274274
* Fixed a bug where the `guide_custom(order)` wasn't working (@teunbrand, #6195)
275+
* All binning stats now use the `boundary`/`center` parametrisation rather
276+
than `origin`, following in `stat_bin()`'s footsteps (@teunbrand).
277+
* `stat_summary_2d()` and `stat_bin_2d()` now deal with zero-range data
278+
more elegantly (@teunbrand, #6207).
275279

276280
# ggplot2 3.5.1
277281

R/bin.R

Lines changed: 94 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -54,19 +54,12 @@ bin_breaks <- function(breaks, closed = c("right", "left")) {
5454

5555
bin_breaks_width <- function(x_range, width = NULL, center = NULL,
5656
boundary = NULL, closed = c("right", "left")) {
57-
check_length(x_range, 2L)
5857

59-
# binwidth seems to be the argument name supplied to width. (stat-bin and stat-bindot)
60-
check_number_decimal(width, min = 0, allow_infinite = FALSE, arg = "binwidth")
61-
62-
if (!is.null(boundary) && !is.null(center)) {
63-
cli::cli_abort("Only one of {.arg boundary} and {.arg center} may be specified.")
64-
} else if (is.null(boundary)) {
58+
if (is.null(boundary)) {
6559
if (is.null(center)) {
6660
# If neither edge nor center given, compute both using tile layer's
6761
# algorithm. This puts min and max of data in outer half of their bins.
6862
boundary <- width / 2
69-
7063
} else {
7164
# If center given but not boundary, compute boundary.
7265
boundary <- center - width / 2
@@ -75,9 +68,6 @@ bin_breaks_width <- function(x_range, width = NULL, center = NULL,
7568

7669
# Find the left side of left-most bin: inputs could be Dates or POSIXct, so
7770
# coerce to numeric first.
78-
x_range <- as.numeric(x_range)
79-
width <- as.numeric(width)
80-
boundary <- as.numeric(boundary)
8171
shift <- floor((x_range[1] - boundary) / width)
8272
origin <- boundary + shift * width
8373

@@ -104,9 +94,7 @@ bin_breaks_width <- function(x_range, width = NULL, center = NULL,
10494

10595
bin_breaks_bins <- function(x_range, bins = 30, center = NULL,
10696
boundary = NULL, closed = c("right", "left")) {
107-
check_length(x_range, 2L)
10897

109-
check_number_whole(bins, min = 1)
11098
if (zero_range(x_range)) {
11199
# 0.1 is the same width as the expansion `default_expansion()` gives for 0-width data
112100
width <- 0.1
@@ -128,6 +116,56 @@ bin_breaks_bins <- function(x_range, bins = 30, center = NULL,
128116

129117
# Compute bins ------------------------------------------------------------
130118

119+
compute_bins <- function(x, scale = NULL, breaks = NULL, binwidth = NULL, bins = NULL,
120+
center = NULL, boundary = NULL,
121+
closed = c("right", "left")) {
122+
123+
range <- if (is.scale(scale)) scale$dimension() else range(x)
124+
check_length(range, 2L)
125+
126+
if (!is.null(breaks)) {
127+
breaks <- allow_lambda(breaks)
128+
if (is.function(breaks)) {
129+
breaks <- breaks(x)
130+
}
131+
if (is.scale(scale) && !scale$is_discrete()) {
132+
breaks <- scale$transform(breaks)
133+
}
134+
check_numeric(breaks)
135+
bins <- bin_breaks(breaks, closed)
136+
return(bins)
137+
}
138+
139+
check_number_decimal(boundary, allow_infinite = FALSE, allow_null = TRUE)
140+
check_number_decimal(center, allow_infinite = FALSE, allow_null = TRUE)
141+
if (!is.null(boundary) && !is.null(center)) {
142+
cli::cli_abort("Only one of {.arg boundary} and {.arg center} may be specified.")
143+
}
144+
145+
if (!is.null(binwidth)) {
146+
binwidth <- allow_lambda(binwidth)
147+
if (is.function(binwidth)) {
148+
binwidth <- binwidth(x)
149+
}
150+
check_number_decimal(binwidth, min = 0, allow_infinite = FALSE)
151+
bins <- bin_breaks_width(
152+
range, binwidth,
153+
center = center, boundary = boundary, closed = closed
154+
)
155+
return(bins)
156+
}
157+
158+
bins <- allow_lambda(bins)
159+
if (is.function(bins)) {
160+
bins <- bins(x)
161+
}
162+
check_number_whole(bins, min = 1, allow_infinite = FALSE)
163+
bin_breaks_bins(
164+
range, bins,
165+
center = center, boundary = boundary, closed = closed
166+
)
167+
}
168+
131169
bin_vector <- function(x, bins, weight = NULL, pad = FALSE) {
132170
check_object(bins, is_bins, "a {.cls ggplot2_bins} object")
133171

@@ -141,8 +179,7 @@ bin_vector <- function(x, bins, weight = NULL, pad = FALSE) {
141179
weight[is.na(weight)] <- 0
142180
}
143181

144-
bin_idx <- cut(x, bins$fuzzy, right = bins$right_closed,
145-
include.lowest = TRUE)
182+
bin_idx <- bin_cut(x, bins)
146183
bin_count <- as.numeric(tapply(weight, bin_idx, sum, na.rm = TRUE))
147184
bin_count[is.na(bin_count)] <- 0
148185

@@ -170,6 +207,10 @@ bin_vector <- function(x, bins, weight = NULL, pad = FALSE) {
170207
bin_out(bin_count, bin_x, bin_widths)
171208
}
172209

210+
bin_cut <- function(x, bins) {
211+
cut(x, bins$fuzzy, right = bins$right_closed, include.lowest = TRUE)
212+
}
213+
173214
bin_out <- function(count = integer(0), x = numeric(0), width = numeric(0),
174215
xmin = x - width / 2, xmax = x + width / 2) {
175216
density <- count / width / sum(abs(count))
@@ -186,3 +227,41 @@ bin_out <- function(count = integer(0), x = numeric(0), width = numeric(0),
186227
.size = length(count)
187228
)
188229
}
230+
231+
bin_loc <- function(x, id) {
232+
left <- x[-length(x)]
233+
right <- x[-1]
234+
235+
list(
236+
left = left[id],
237+
right = right[id],
238+
mid = ((left + right) / 2)[id],
239+
length = diff(x)[id]
240+
)
241+
}
242+
243+
fix_bin_params = function(params, fun, version) {
244+
245+
if (!is.null(params$origin)) {
246+
args <- paste0(fun, c("(origin)", "(boundary)"))
247+
deprecate_warn0(version, args[1], args[2])
248+
params$boudnary <- params$origin
249+
params$origin <- NULL
250+
}
251+
252+
if (!is.null(params$right)) {
253+
args <- paste0(fun, c("(right)", "(closed)"))
254+
deprecate_warn0(version, args[1], args[2])
255+
params$closed <- if (isTRUE(params$right)) "right" else "left"
256+
params$right <- NULL
257+
}
258+
259+
if (is.null(params$breaks %||% params$binwidth %||% params$bins)) {
260+
cli::cli_inform(
261+
"{.fn {fun}} using {.code bins = 30}. Pick better value {.arg binwidth}."
262+
)
263+
params$bins <- 30
264+
}
265+
266+
params
267+
}

R/geom-bin2d.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ NULL
2525
#' # You can control the size of the bins by specifying the number of
2626
#' # bins in each direction:
2727
#' d + geom_bin_2d(bins = 10)
28-
#' d + geom_bin_2d(bins = 30)
28+
#' d + geom_bin_2d(bins = list(x = 30, y = 10))
2929
#'
3030
#' # Or by specifying the width of the bins
3131
#' d + geom_bin_2d(binwidth = c(0.1, 0.1))

R/stat-bin.R

Lines changed: 21 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,11 @@
2626
#' or left edges of bins are included in the bin.
2727
#' @param pad If `TRUE`, adds empty bins at either end of x. This ensures
2828
#' frequency polygons touch 0. Defaults to `FALSE`.
29-
#' @param keep.zeroes Treatment of zero count bins. If `"all"` (default), such
29+
#' @param drop Treatment of zero count bins. If `"all"` (default), such
3030
#' bins are kept as-is. If `"none"`, all zero count bins are filtered out.
3131
#' If `"inner"` only zero count bins at the flanks are filtered out, but not
32-
#' in the middle.
32+
#' in the middle. `TRUE` is shorthand for `"all"` and `FALSE` is shorthand
33+
#' for `"none"`.
3334
#' @eval rd_computed_vars(
3435
#' count = "number of points in bin.",
3536
#' density = "density of points in bin, scaled to integrate to 1.",
@@ -59,7 +60,7 @@ stat_bin <- function(mapping = NULL, data = NULL,
5960
closed = c("right", "left"),
6061
pad = FALSE,
6162
na.rm = FALSE,
62-
keep.zeroes = "all",
63+
drop = "all",
6364
orientation = NA,
6465
show.legend = NA,
6566
inherit.aes = TRUE) {
@@ -82,7 +83,7 @@ stat_bin <- function(mapping = NULL, data = NULL,
8283
pad = pad,
8384
na.rm = na.rm,
8485
orientation = orientation,
85-
keep.zeroes = keep.zeroes,
86+
drop = drop,
8687
...
8788
)
8889
)
@@ -95,9 +96,13 @@ stat_bin <- function(mapping = NULL, data = NULL,
9596
StatBin <- ggproto("StatBin", Stat,
9697
setup_params = function(self, data, params) {
9798
params$flipped_aes <- has_flipped_aes(data, params, main_is_orthogonal = FALSE)
98-
params$keep.zeroes <- arg_match0(
99-
params$keep.zeroes %||% "all",
100-
c("all", "none", "inner"), arg_nm = "keep.zeroes"
99+
100+
if (is.logical(params$drop)) {
101+
params$drop <- if (isTRUE(params$drop)) "all" else "none"
102+
}
103+
params$drop <- arg_match0(
104+
params$drop %||% "all",
105+
c("all", "none", "inner"), arg_nm = "drop"
101106
)
102107

103108
has_x <- !(is.null(data$x) && is.null(params$x))
@@ -118,29 +123,7 @@ StatBin <- ggproto("StatBin", Stat,
118123
))
119124
}
120125

121-
if (!is.null(params$drop)) {
122-
deprecate_warn0("2.1.0", "stat_bin(drop)", "stat_bin(pad)")
123-
params$drop <- NULL
124-
}
125-
if (!is.null(params$origin)) {
126-
deprecate_warn0("2.1.0", "stat_bin(origin)", "stat_bin(boundary)")
127-
params$boundary <- params$origin
128-
params$origin <- NULL
129-
}
130-
if (!is.null(params$right)) {
131-
deprecate_warn0("2.1.0", "stat_bin(right)", "stat_bin(closed)")
132-
params$closed <- if (params$right) "right" else "left"
133-
params$right <- NULL
134-
}
135-
if (!is.null(params$boundary) && !is.null(params$center)) {
136-
cli::cli_abort("Only one of {.arg boundary} and {.arg center} may be specified in {.fn {snake_class(self)}}.")
137-
}
138-
139-
if (is.null(params$breaks) && is.null(params$binwidth) && is.null(params$bins)) {
140-
cli::cli_inform("{.fn {snake_class(self)}} using {.code bins = 30}. Pick better value with {.arg binwidth}.")
141-
params$bins <- 30
142-
}
143-
126+
params <- fix_bin_params(params, fun = snake_class(self), version = "2.1.0")
144127
params
145128
},
146129

@@ -149,33 +132,20 @@ StatBin <- ggproto("StatBin", Stat,
149132
compute_group = function(data, scales, binwidth = NULL, bins = NULL,
150133
center = NULL, boundary = NULL,
151134
closed = c("right", "left"), pad = FALSE,
152-
breaks = NULL, flipped_aes = FALSE, keep.zeroes = "all",
135+
breaks = NULL, flipped_aes = FALSE, drop = "all",
153136
# The following arguments are not used, but must
154137
# be listed so parameters are computed correctly
155-
origin = NULL, right = NULL, drop = NULL) {
138+
origin = NULL, right = NULL) {
156139
x <- flipped_names(flipped_aes)$x
157-
if (!is.null(breaks)) {
158-
if (is.function(breaks)) {
159-
breaks <- breaks(data[[x]])
160-
}
161-
if (!scales[[x]]$is_discrete()) {
162-
breaks <- scales[[x]]$transform(breaks)
163-
}
164-
bins <- bin_breaks(breaks, closed)
165-
} else if (!is.null(binwidth)) {
166-
if (is.function(binwidth)) {
167-
binwidth <- binwidth(data[[x]])
168-
}
169-
bins <- bin_breaks_width(scales[[x]]$dimension(), binwidth,
170-
center = center, boundary = boundary, closed = closed)
171-
} else {
172-
bins <- bin_breaks_bins(scales[[x]]$dimension(), bins, center = center,
173-
boundary = boundary, closed = closed)
174-
}
140+
bins <- compute_bins(
141+
data[[x]], scales[[x]],
142+
breaks = breaks, binwidth = binwidth, bins = bins,
143+
center = center, boundary = boundary, closed = closed
144+
)
175145
bins <- bin_vector(data[[x]], bins, weight = data$weight, pad = pad)
176146

177147
keep <- switch(
178-
keep.zeroes,
148+
drop,
179149
none = bins$count != 0,
180150
inner = inner_runs(bins$count != 0),
181151
TRUE

0 commit comments

Comments
 (0)