Skip to content

Commit 60f8654

Browse files
authored
Check for dropped aesthetics (#4866)
* Check for dropped aesthetics and issue a warning if aesthetics are dropped that shouldn't be. Fixes #3250. * update documentation
1 parent 7484bd7 commit 60f8654

14 files changed

+123
-14
lines changed

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# ggplot2 (development version)
22

3+
* ggplot now checks during statistical transformations whether any data
4+
columns were dropped and warns about this. If stats intend to drop
5+
data columns they can declare them in the new field `dropped_aes`.
6+
(@clauswilke, #3250)
37
* Added `stat_align()` to align data without common x-coordinates prior to
48
stacking. This is now the default stat for `geom_area()` (@thomasp85, #4850)
59

R/stat-.r

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ Stat <- ggproto("Stat",
6161

6262
non_missing_aes = character(),
6363

64+
# Any aesthetics that are dropped from the data frame during the
65+
# statistical transformation should be listed here to suppress a
66+
# warning about dropped aesthetics
67+
dropped_aes = character(),
68+
6469
optional_aes = character(),
6570

6671
setup_params = function(data, params) {
@@ -125,7 +130,20 @@ Stat <- ggproto("Stat",
125130
)
126131
}, stats, groups, SIMPLIFY = FALSE)
127132

128-
vec_rbind(!!!stats)
133+
data_new <- vec_rbind(!!!stats)
134+
135+
# The above code will drop columns that are not constant within groups and not
136+
# carried over/recreated by the stat. This can produce unexpected results,
137+
# and hence we warn about it.
138+
dropped <- base::setdiff(names(data), base::union(self$dropped_aes, names(data_new)))
139+
if (length(dropped) > 0) {
140+
cli::cli_warn(c(
141+
"The following aesthetics were dropped during statistical transformation: {.field {glue_collapse(dropped, sep = ', ')}}",
142+
"i" = "This can happen when ggplot fails to infer the correct grouping structure in the data.",
143+
"i" = "Did you forget to specify a {.code group} aesthetic or to convert a numerical variable into a factor?"
144+
))
145+
}
146+
data_new
129147
},
130148

131149
compute_group = function(self, data, scales) {

R/stat-bin.r

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,16 @@
2929
#' frequency polygons touch 0. Defaults to `FALSE`.
3030
#' @section Computed variables:
3131
#' \describe{
32-
#' \item{count}{number of points in bin}
33-
#' \item{density}{density of points in bin, scaled to integrate to 1}
34-
#' \item{ncount}{count, scaled to maximum of 1}
35-
#' \item{ndensity}{density, scaled to maximum of 1}
36-
#' \item{width}{widths of bins}
32+
#' \item{`count`}{number of points in bin}
33+
#' \item{`density`}{density of points in bin, scaled to integrate to 1}
34+
#' \item{`ncount`}{count, scaled to maximum of 1}
35+
#' \item{`ndensity`}{density, scaled to maximum of 1}
36+
#' \item{`width`}{widths of bins}
37+
#' }
38+
#'
39+
#' @section Dropped variables:
40+
#' \describe{
41+
#' \item{`weight`}{After binning, weights of individual data points (if supplied) are no longer available.}
3742
#' }
3843
#'
3944
#' @seealso [stat_count()], which counts the number of cases at each x
@@ -167,6 +172,8 @@ StatBin <- ggproto("StatBin", Stat,
167172

168173
default_aes = aes(x = after_stat(count), y = after_stat(count), weight = 1),
169174

170-
required_aes = "x|y"
175+
required_aes = "x|y",
176+
177+
dropped_aes = "weight" # after statistical transformation, weights are no longer available
171178
)
172179

R/stat-bindot.r

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ StatBindot <- ggproto("StatBindot", Stat,
66
required_aes = "x",
77
non_missing_aes = "weight",
88
default_aes = aes(y = after_stat(count)),
9+
dropped_aes = c("bin", "bincenter"), # these are temporary variables that are created and then removed by the stat
910

1011
setup_params = function(data, params) {
1112
if (is.null(params$binwidth)) {

R/stat-boxplot.r

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ stat_boxplot <- function(mapping = NULL, data = NULL,
4747
StatBoxplot <- ggproto("StatBoxplot", Stat,
4848
required_aes = c("y|x"),
4949
non_missing_aes = "weight",
50+
# either the x or y aesthetic will get dropped during
51+
# statistical transformation, depending on the orientation
52+
dropped_aes = c("x", "y"),
5053
setup_data = function(self, data, params) {
5154
data <- flip_data(data, params$flipped_aes)
5255
data$x <- data$x %||% 0

R/stat-contour.r

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@
1818
#' \item{`nlevel`}{Height of contour, scaled to maximum of 1.}
1919
#' \item{`piece`}{Contour piece (an integer).}
2020
#' }
21+
#'
22+
#' @section Dropped variables:
23+
#' \describe{
24+
#' \item{`z`}{After contouring, the z values of individual data points are no longer available.}
25+
#' }
26+
#'
27+
#'
2128
#' @rdname geom_contour
2229
stat_contour <- function(mapping = NULL, data = NULL,
2330
geom = "contour", position = "identity",
@@ -83,6 +90,7 @@ StatContour <- ggproto("StatContour", Stat,
8390

8491
required_aes = c("x", "y", "z"),
8592
default_aes = aes(order = after_stat(level)),
93+
dropped_aes = "z", # z gets dropped during statistical transformation
8694

8795
setup_params = function(data, params) {
8896
params$z.range <- range(data$z, na.rm = TRUE, finite = TRUE)
@@ -112,6 +120,7 @@ StatContourFilled <- ggproto("StatContourFilled", Stat,
112120

113121
required_aes = c("x", "y", "z"),
114122
default_aes = aes(order = after_stat(level), fill = after_stat(level)),
123+
dropped_aes = "z", # z gets dropped during statistical transformation
115124

116125
setup_params = function(data, params) {
117126
params$z.range <- range(data$z, na.rm = TRUE, finite = TRUE)

R/stat-density-2d.r

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,15 @@
3232
#' Contours are calculated for one of the three types of density estimates
3333
#' obtained before contouring, `density`, `ndensity`, and `count`. Which
3434
#' of those should be used is determined by the `contour_var` parameter.
35+
#'
36+
#' @section Dropped variables:
37+
#' \describe{
38+
#' \item{`z`}{After density estimation, the z values of individual data points are no longer available.}
39+
#' }
40+
#'
41+
#' If contouring is enabled, then similarly `density`, `ndensity`, and `count`
42+
#' are no longer available after the contouring pass.
43+
#'
3544
stat_density_2d <- function(mapping = NULL, data = NULL,
3645
geom = "density_2d", position = "identity",
3746
...,
@@ -115,6 +124,10 @@ StatDensity2d <- ggproto("StatDensity2d", Stat,
115124
default_aes = aes(colour = "#3366FF", size = 0.5),
116125

117126
required_aes = c("x", "y"),
127+
# because of the chained calculation in compute_panel(),
128+
# which calls compute_panel() of a different stat, we declare
129+
# dropped aesthetics there
130+
dropped_aes = character(0),
118131

119132
extra_params = c(
120133
"na.rm", "contour", "contour_var",
@@ -145,10 +158,12 @@ StatDensity2d <- ggproto("StatDensity2d", Stat,
145158
params$z.range <- z.range
146159

147160
if (isTRUE(self$contour_type == "bands")) {
148-
contour_stat <- StatContourFilled
161+
contour_stat <- ggproto(NULL, StatContourFilled)
149162
} else { # lines is the default
150-
contour_stat <- StatContour
163+
contour_stat <- ggproto(NULL, StatContour)
151164
}
165+
# update dropped aes
166+
contour_stat$dropped_aes <- c(contour_stat$dropped_aes, "density", "ndensity", "count")
152167

153168
dapply(data, "PANEL", function(data) {
154169
scales <- layout$get_scales(data$PANEL[1])

R/stat-summary-2d.r

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
#' \item{x,y}{Location}
1616
#' \item{value}{Value of summary statistic.}
1717
#' }
18+
#'
19+
#' @section Dropped variables:
20+
#' \describe{
21+
#' \item{`z`}{After binning, the z values of individual data points are no longer available.}
22+
#' }
1823
#' @seealso [stat_summary_hex()] for hexagonal summarization.
1924
#' [stat_bin2d()] for the binning options.
2025
#' @inheritParams layer
@@ -85,6 +90,7 @@ StatSummary2d <- ggproto("StatSummary2d", Stat,
8590
default_aes = aes(fill = after_stat(value)),
8691

8792
required_aes = c("x", "y", "z"),
93+
dropped_aes = "z", # z gets dropped during statistical transformation
8894

8995
compute_group = function(data, scales, binwidth = NULL, bins = 30,
9096
breaks = NULL, origin = NULL, drop = TRUE,

R/stat-summary-hex.r

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ StatSummaryHex <- ggproto("StatSummaryHex", Stat,
4141

4242
required_aes = c("x", "y", "z"),
4343

44+
dropped_aes = "z", # z gets dropped during statistical transformation
45+
4446
compute_group = function(data, scales, binwidth = NULL, bins = 30, drop = TRUE,
4547
fun = "mean", fun.args = list()) {
4648
check_installed("hexbin", reason = "for `stat_summary_hex()`")

man/geom_contour.Rd

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/geom_density_2d.Rd

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/geom_histogram.Rd

Lines changed: 12 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/stat_summary_2d.Rd

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-stats.r

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,16 @@ test_that("error message is thrown when aesthetics are missing", {
1919
p <- ggplot(mtcars) + stat_sum()
2020
expect_error(ggplot_build(p), "x and y$")
2121
})
22+
23+
test_that("erroneously dropped aesthetics are found and issue a warning", {
24+
df <- data_frame(
25+
x = c( # arbitrary random numbers
26+
0.42986445, 1.11153170, -1.22318013, 0.90982003,
27+
0.46454276, -0.42300004, -1.76139834, -0.75060412,
28+
0.01635474, -0.63202159
29+
),
30+
g = rep(1:2, each = 5)
31+
)
32+
p <- ggplot(df, aes(x, fill = g)) + geom_density()
33+
expect_warning(ggplot_build(p), "aesthetics were dropped")
34+
})

0 commit comments

Comments
 (0)