Skip to content

Commit 35a8122

Browse files
committed
Merge pull request #293 from wch/feature/violin-clean
Feature/violin-clean
2 parents dced3a3 + 0a7530d commit 35a8122

File tree

7 files changed

+259
-2
lines changed

7 files changed

+259
-2
lines changed

DESCRIPTION

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ Collate:
102102
'geom-smooth.r'
103103
'geom-text.r'
104104
'geom-tile.r'
105+
'geom-violin.r'
105106
'geom-vline.r'
106107
'ggplot2.r'
107108
'grob-absolute.r'
@@ -170,6 +171,7 @@ Collate:
170171
'stat-summary.r'
171172
'stat-unique.r'
172173
'stat-vline.r'
174+
'stat-ydensity.r'
173175
'summary.r'
174176
'templates.r'
175177
'theme-defaults.r'

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ export(geom_smooth)
6060
export(geom_step)
6161
export(geom_text)
6262
export(geom_tile)
63+
export(geom_violin)
6364
export(geom_vline)
6465
export(ggfluctuation)
6566
export(ggmissing)
@@ -185,6 +186,7 @@ export(stat_sum)
185186
export(stat_summary)
186187
export(stat_unique)
187188
export(stat_vline)
189+
export(stat_ydensity)
188190
export(theme_blank)
189191
export(theme_bw)
190192
export(theme_get)

NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ NEW FEATURES
1313
`annotation_raster`, an even faster special case, for when you want to draw
1414
the same raster in each panel.
1515

16+
* `geom_violin`: an implementation violin plots, which are a way of visualizing
17+
kernel density estimates. (Thanks to Winston Chang)
18+
1619
* New fortify methods have been added for objects produced by the `multcomp`
1720
package.
1821

R/geom-violin.r

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#' Violin plot.
2+
#'
3+
#' @param trim If \code{TRUE} (default), trim the tails of the violins
4+
#' to the range of the data. If \code{FALSE}, don't trim the tails.
5+
#' @param scale if "equal" (default), all violins have the same area (to be
6+
#' precise, they would have the same area if tails are not trimmed). If
7+
#' "count", the areas are scaled proportionally to the number of observations.
8+
#' @export
9+
#' @examples
10+
#' p <- ggplot(mtcars, aes(factor(cyl), mpg))
11+
#'
12+
#' p + geom_violin()
13+
#' qplot(factor(cyl), mpg, data = mtcars, geom = "violin")
14+
#'
15+
#' p + geom_violin() + geom_jitter(height = 0)
16+
#' p + geom_violin() + coord_flip()
17+
#' qplot(factor(cyl), mpg, data = mtcars, geom = "violin") +
18+
#' coord_flip()
19+
#'
20+
#' # Scale maximum width proportional to sample size:
21+
#' p + geom_violin(scale = "count")
22+
#'
23+
#' # Default is to trim violins to the range of the data. To disable:
24+
#' p + geom_violin(trim = FALSE)
25+
#'
26+
#' # Use a smaller bandwidth for closer density fit (default is 1).
27+
#' p + geom_violin(adjust = .5)
28+
#'
29+
#' # Add aesthetic mappings
30+
#' # Note that violins are automatically dodged when any aesthetic is
31+
#' # a factor
32+
#' p + geom_violin(aes(fill = cyl))
33+
#' p + geom_violin(aes(fill = factor(cyl)))
34+
#' p + geom_violin(aes(fill = factor(vs)))
35+
#' p + geom_violin(aes(fill = factor(am)))
36+
#'
37+
#' # Set aesthetics to fixed value
38+
#' p + geom_violin(fill = "grey80", colour = "#3366FF")
39+
#' qplot(factor(cyl), mpg, data = mtcars, geom = "violin",
40+
#' colour = I("#3366FF"))
41+
#'
42+
#' # Scales vs. coordinate transforms -------
43+
#' # Scale transformations occur before the density statistics are computed.
44+
#' # Coordinate transformations occur afterwards. Observe the effect on the
45+
#' # number of outliers.
46+
#' m <- ggplot(movies, aes(y = votes, x = rating,
47+
#' group = round_any(rating, 0.5)))
48+
#' m + geom_violin()
49+
#' m + geom_violin() + scale_y_log10()
50+
#' m + geom_violin() + coord_trans(y = "log10")
51+
#' m + geom_violin() + scale_y_log10() + coord_trans(y = "log10")
52+
#'
53+
#' # Violin plots with continuous x:
54+
#' # Use the group aesthetic to group observations in violins
55+
#' qplot(year, budget, data = movies, geom = "violin")
56+
#' qplot(year, budget, data = movies, geom = "violin",
57+
#' group = round_any(year, 10, floor))
58+
#'
59+
geom_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "dodge",
60+
trim = TRUE, scale = "equal", ...) {
61+
GeomViolin$new(mapping = mapping, data = data, stat = stat,
62+
position = position, trim = trim, scale = scale, ...)
63+
}
64+
65+
GeomViolin <- proto(Geom, {
66+
objname <- "violin"
67+
68+
reparameterise <- function(., df, params) {
69+
df$width <- df$width %||%
70+
params$width %||% (resolution(df$x, FALSE) * 0.9)
71+
72+
# ymin, ymax, xmin, and xmax define the bounding rectangle for each group
73+
ddply(df, .(group), transform,
74+
ymin = min(y),
75+
ymax = max(y),
76+
xmin = x - width / 2,
77+
xmax = x + width / 2)
78+
79+
}
80+
81+
draw <- function(., data, ...) {
82+
83+
# Find the points for the line to go all the way around
84+
data <- transform(data, xminv = x - scaled * (x-xmin),
85+
xmaxv = x + scaled * (xmax-x))
86+
87+
# Make sure it's sorted properly to draw the outline
88+
newdata <- rbind(arrange(transform(data, x = xminv), y),
89+
arrange(transform(data, x = xmaxv), -y))
90+
91+
# Close the polygon: set first and last point the same
92+
# Needed for coord_polar and such
93+
newdata <- rbind(newdata, newdata[1,])
94+
95+
ggname(.$my_name(), GeomPolygon$draw(newdata, ...))
96+
}
97+
98+
guide_geom <- function(.) "polygon"
99+
100+
icon <- function(.) {
101+
y <- seq(-.3, .3, length=40)
102+
x1 <- dnorm(y, mean = -.15, sd = 0.05) + 1.5*dnorm(y, mean = 0.1, sd = 0.1)
103+
x2 <- dnorm(y, mean = -.1, sd = 0.1) + dnorm(y, mean = 0.1, sd = 0.1)
104+
105+
y <- c(y, rev(y))
106+
x1 <- c(x1, -rev(x1)) / max(8 * x1)
107+
x2 <- c(x2, -rev(x2)) / max(8 * x2)
108+
gTree(children = gList(
109+
polygonGrob(x1 + .30, y + .35, default = "npc", gp = gpar(fill = "black")),
110+
polygonGrob(x2 + .70, y + .55, default = "npc", gp = gpar(fill = "black"))
111+
))
112+
}
113+
114+
default_stat <- function(.) StatYdensity
115+
default_pos <- function(.) PositionDodge
116+
default_aes <- function(.) aes(weight=1, colour="grey20", fill="white", size=0.5, alpha = 1, linetype = "solid")
117+
required_aes <- c("x", "y")
118+
119+
})

R/position-collide.r

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ pos_fill <- function(df, width) {
8585
# Dodge overlapping interval.
8686
# Assumes that each set has the same horizontal position.
8787
pos_dodge <- function(df, width) {
88-
n <- nrow(df)
88+
n <- length(unique(df$group))
8989
if (n == 1) return(df)
9090

9191
if (!all(c("xmin", "xmax") %in% names(df))) {
@@ -99,8 +99,12 @@ pos_dodge <- function(df, width) {
9999
# df <- data.frame(n = c(2:5, 10, 26), div = c(4, 3, 2.666666, 2.5, 2.2, 2.1))
100100
# qplot(n, div, data = df)
101101

102+
# Have a new group index from 1 to number of groups.
103+
# This might be needed if the group numbers in this set don't include all of 1:n
104+
groupidx <- match(df$group, sort(unique(df$group)))
105+
102106
within(df, {
103-
xmin <- xmin + width / n * (seq_len(n) - 1) - diff * (n - 1) / (2 * n)
107+
xmin <- xmin + width / n * (groupidx - 1) - diff * (n - 1) / (2 * n)
104108
xmax <- xmin + d_width / n
105109
x <- (xmin + xmax) / 2
106110
})

R/stat-ydensity.r

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#' 1d kernel density estimate along y axis, for violin plot.
2+
#'
3+
#' @inheritParams stat_density
4+
#' @param trim If \code{TRUE} (default), trim the tails of the violins
5+
#' to the range of the data. If \code{FALSE}, don't trim the tails.
6+
#' @param scale if "equal" (default), all violins have the same area (to be
7+
#' precise, they would have the same area if tails are not trimmed). If
8+
#' "count", the areas are scaled proportionally to the number of observations.
9+
#'
10+
#' @return A data frame with additional columns:
11+
#' \item{width}{width of violin bounding box}
12+
#' \item{ydensity}{density estimate}
13+
#' \item{scaled}{density estimate, scaled depending on scalearea and scalecount}
14+
#' \item{count}{ydensity * number of points}
15+
#' \item{counttotal}{number of points}
16+
#' @examples
17+
#' # See geom_violin for examples
18+
#' # Also see stat_density for similar examples with data along x axis
19+
stat_ydensity <- function (mapping = NULL, data = NULL, geom = "violin", position = "dodge",
20+
adjust = 1, kernel = "gaussian", trim = TRUE, scale = "equal", na.rm = FALSE, ...) {
21+
StatYdensity$new(mapping = mapping, data = data, geom = geom, position = position,
22+
adjust = adjust, kernel = kernel, trim = trim, scale = scale,
23+
na.rm = na.rm, ...)
24+
}
25+
26+
StatYdensity <- proto(Stat, {
27+
objname <- "ydensity"
28+
29+
calculate_groups <- function(., data, na.rm = FALSE, width = NULL,
30+
scale = "equal", ...) {
31+
data <- remove_missing(data, na.rm, "y", name = "stat_ydensity", finite = TRUE)
32+
data <- .super$calculate_groups(., data, na.rm = na.rm, width = width, ...)
33+
34+
# Scale to have equal areas
35+
data$scaled <- data$ydensity / max(data$ydensity)
36+
37+
if (scale == "count") {
38+
data$scaled <- data$scaled * data$counttotal/max(data$counttotal)
39+
} else if (scale != "equal") {
40+
stop('scale must be "equal" or "count".')
41+
}
42+
43+
data
44+
}
45+
46+
calculate <- function(., data, scales, width=NULL, adjust=1, kernel="gaussian",
47+
trim=TRUE, na.rm = FALSE, ...) {
48+
49+
n <- nrow(data)
50+
51+
# If just 1 point, return a flat violin
52+
if (n < 2) { return(data.frame(data, scaled = 1, ydensity = 1, count = 1)) }
53+
if (is.null(data$weight)) { data$weight <- rep(1, n) / n }
54+
55+
if(trim) {
56+
dens <- density(data$y, adjust = adjust, kernel = kernel, weight = data$weight,
57+
n = 200, from = min(data$y), to = max(data$y))
58+
} else {
59+
dens <- density(data$y, adjust = adjust, kernel = kernel, weight = data$weight,
60+
n = 200)
61+
}
62+
63+
# We predict ydensity from y ('density' calls them y and x, respectively)
64+
densdf <- data.frame(ydensity = dens$y, y = dens$x)
65+
densdf$scaled <- densdf$ydensity / max(densdf$ydensity, na.rm = TRUE)
66+
67+
if (length(unique(data$x)) > 1) { width <- diff(range(data$x)) * 0.9 }
68+
69+
densdf$x <- mean(range(data$x))
70+
densdf$count <- densdf$ydensity * n
71+
densdf$counttotal <- n
72+
densdf$width <- width
73+
74+
densdf
75+
}
76+
77+
icon <- function(.) GeomViolin$icon()
78+
default_geom <- function(.) GeomViolin
79+
required_aes <- c("x", "y")
80+
81+
})

inst/tests/visual-violin.r

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
set.seed(111)
2+
dat <- data.frame(x=LETTERS[1:3], y=rnorm(90))
3+
dat <- dat[ dat$x!="C" | c(T,F), ] # Keep half the C's
4+
5+
6+
ggplot(dat, aes(x=x, y=y)) + geom_violin() +
7+
opts(title="Basic violin")
8+
9+
ggplot(dat, aes(x=x, y=y)) + geom_violin() + geom_point(shape=21) +
10+
opts(title="Basic violin with overlaid points")
11+
12+
ggplot(dat, aes(x=x, y=y)) + geom_violin(scale="count") +
13+
opts(title="Scale area to sample size (C is smaller)")
14+
15+
ggplot(dat, aes(x=x, y=y)) + geom_violin(width=.5) +
16+
opts(title="Narrower (width=.5)")
17+
18+
19+
ggplot(dat, aes(x=x, y=y)) + geom_violin(trim=FALSE) + geom_point(shape=21) +
20+
opts(title="With tails and points")
21+
22+
ggplot(dat, aes(x=x, y=y)) + geom_violin(adjust=.3) + geom_point(shape=21) +
23+
opts(title="With smaller bandwidth and points")
24+
25+
26+
ggplot(dat, aes(x="foo", y=y, fill=x)) + geom_violin() +
27+
opts(title="Dodging")
28+
29+
ggplot(dat, aes(x=x, y=y)) + geom_violin() + coord_polar() +
30+
opts(title="coord_polar")
31+
32+
ggplot(dat, aes(x=x, y=y)) + geom_violin() + coord_flip() +
33+
opts(title="coord_flip")
34+
35+
ggplot(dat, aes(x="foo", y=y, fill=x)) + geom_violin() + coord_flip() +
36+
opts(title="Dodging and coord_flip")
37+
38+
ggplot(dat, aes(x=as.numeric(x), y=y)) + geom_violin() +
39+
opts(title="Continuous x axis, multiple groups\n(center should be 2)")
40+
41+
ggplot(dat, aes(x=as.numeric(1), y=y)) + geom_violin() +
42+
opts(title="Continuous x axis, single group")
43+
44+
ggplot(d2, aes(x=x, y=y, fill=g)) + geom_violin() +
45+
opts(title="Grouping on x and fill")
46+

0 commit comments

Comments
 (0)