|
17 | 17 | #' one change at a time. You may need to look at a few options to uncover
|
18 | 18 | #' the full story behind your data.
|
19 | 19 | #'
|
| 20 | +#' By default, the _height_ of the bars represent the counts within each bin. |
| 21 | +#' However, there are situations where this behavior might produce misleading |
| 22 | +#' plots (e.g., when non-equal-width bins are used), in which case it might be |
| 23 | +#' preferable to have the _area_ of the bars represent the counts (by setting |
| 24 | +#' `aes(y = after_stat(count / width))`). See example below. |
| 25 | +#' |
20 | 26 | #' In addition to `geom_histogram()`, you can create a histogram plot by using
|
21 | 27 | #' `scale_x_binned()` with [geom_bar()]. This method by default plots tick marks
|
22 | 28 | #' in between each bar.
|
|
63 | 69 | #' ggplot(diamonds, aes(price, after_stat(density), colour = cut)) +
|
64 | 70 | #' geom_freqpoly(binwidth = 500)
|
65 | 71 | #'
|
| 72 | +#' |
| 73 | +#' # When using the non-equal-width bins, we should set the area of the bars to |
| 74 | +#' # represent the counts (not the height). |
| 75 | +#' # Here we're using 10 equi-probable bins: |
| 76 | +#' price_bins <- quantile(diamonds$price, probs = seq(0, 1, length = 11)) |
| 77 | +#' |
| 78 | +#' ggplot(diamonds, aes(price)) + |
| 79 | +#' geom_histogram(breaks = price_bins, color = "black") # misleading (height = count) |
| 80 | +#' |
| 81 | +#' ggplot(diamonds, aes(price, after_stat(count / width))) + |
| 82 | +#' geom_histogram(breaks = price_bins, color = "black") # area = count |
| 83 | +#' |
66 | 84 | #' if (require("ggplot2movies")) {
|
67 | 85 | #' # Often we don't want the height of the bar to represent the
|
68 | 86 | #' # count of observations, but the sum of some other variable.
|
|
0 commit comments