Skip to content

Let ggplot handle histogran binning. Fix #198 #200

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 16, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: plotly
Type: Package
Title: Interactive, publication-quality graphs online.
Version: 0.5.27
Version: 0.5.28
Authors@R: c(person("Chris", "Parmer", role = c("aut", "cre"),
email = "[email protected]"),
person("Scott", "Chamberlain", role = "aut",
Expand Down
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
0.5.27 -- 15 April 2015

Let ggplot handle histogram binning. Fix #198

0.5.27 -- 19 Mar 2015

Reimplement geom_ribbon as a basic polygon. Fix #191. Fix #192.
Expand Down
35 changes: 35 additions & 0 deletions R/ggplotly.R
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,41 @@ gg2list <- function(p){
if (!all(barmodes == barmodes[1]))
warning(paste0("You have multiple barcharts or histograms with different positions; ",
"Plotly's layout barmode will be '", layout$barmode, "'."))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

want to add a test with expect_warning?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This warning existed before and I'm not sure of an example where it's applicable, but it might be worth keeping in case we run across an edge case...

# for stacked bar charts, plotly cumulates bar heights, but ggplot doesn't
if (layout$barmode == "stack") {
# could speed up this function with environments or C/C++
unStack <- function(vec) {
n <- length(vec)
if (n == 1) return(vec)
seq.n <- seq_len(n)
names(vec) <- seq.n
vec <- sort(vec)
for (k in seq(2, n)) {
vec[k] <- vec[k] - sum(vec[seq(1, k-1)])
}
as.numeric(vec[as.character(seq.n)])
}
ys <- lapply(trace.list, "[[", "y")
xs <- lapply(trace.list, "[[", "x")
x.vals <- unique(unlist(xs))
# if there are two or more y-values (for a particular x value),
# then modify those y-values so they *add up* to the correct value(s)
for (val in x.vals) {
zs <- lapply(xs, function(x) which(x == val))
ys.given.x <- Map(function(x, y) y[x], zs, ys)
if (length(unlist(ys.given.x)) < 2) next
st <- unStack(unlist(ys.given.x))
lens <- sapply(ys.given.x, length)
trace.seq <- seq_along(trace.list)
ws <- split(st, rep(trace.seq, lens))
for (tr in seq_along(ws)) {
idx <- zs[[tr]]
replacement <- ws[[tr]]
if (length(idx) > 0 && length(replacement) > 0)
trace.list[[tr]]$y[idx] <- replacement
}
}
}
}

# Bar Gap for histograms should be 0
Expand Down
99 changes: 32 additions & 67 deletions R/trace_generation.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,9 @@ layer2traces <- function(l, d, misc) {
g$geom <- "smoothLine"
}
}
# Barmode and bargap
barmode <- "group"
if (g$geom == "bar" || g$geom == "histogram") {
if (l$stat$objname == "bin") {
if (g$geom != "histogram") {
warning("You may want to use geom_histogram.")
}
} else {
bargap <- "default"
}
g$geom <- "bar" # histogram is just an alias for geom_bar + stat_bin
pos <- l$position$.super$objname
if (pos == "identity") {
barmode <- "overlay"
} else if (pos == "stack") {
barmode <- "stack"
}
}
if (g$geom == "density") {
# histogram is essentially a bar chart with no gaps (after stats are computed)
if (g$geom == "histogram") {
g$geom <- "bar"
bargap <- 0
}

Expand Down Expand Up @@ -182,12 +166,11 @@ layer2traces <- function(l, d, misc) {
lapply(df.list, function(df){
params <- basic$params
params[invariable.names] <- if (ncol(x <- df[1, invariable.names]) > 0) x else NULL
list(data=df[other.names],
list(data=df[other.names],
params=params)
})
}
}

# Split hline and vline when multiple panels or intercepts:
# Need multiple traces accordingly.
if (g$geom == "hline" || g$geom == "vline") {
Expand Down Expand Up @@ -216,7 +199,6 @@ layer2traces <- function(l, d, misc) {
}
traces <- NULL
names.in.legend <- NULL

for (data.i in seq_along(data.list)) {
data.params <- data.list[[data.i]]
data.params$params$stat.type <- l$stat$objname
Expand Down Expand Up @@ -260,18 +242,23 @@ layer2traces <- function(l, d, misc) {
if (is.null(tr$name) || tr$name %in% names.in.legend)
tr$showlegend <- FALSE
names.in.legend <- c(names.in.legend, tr$name)

if (g$geom == "bar")
tr$barmode <- barmode

# Bar Gap
if (exists("bargap")) {
tr$bargap <- bargap

# special handling for bars
if (g$geom == "bar") {
tr$bargap <- if (exists("bargap")) bargap else "default"
pos <- l$position$.super$objname
tr$barmode <- if (pos %in% c("identity", "stack", "fill")) {
"stack"
} else "group"
}
# TODO: remove this once we reimplement density as area
if (g$geom == "density") {
tr$bargap <- 0
}

traces <- c(traces, list(tr))
}


sort.val <- sapply(traces, function(tr){
rank.val <- unlist(tr$sort)
if(is.null(rank.val)){
Expand Down Expand Up @@ -357,16 +344,9 @@ toBasic <- list(
g$data <- g$prestats.data
g
},
bar=function(g) {
if (any(is.na(g$prestats.data$x)))
g$prestats.data$x <- g$prestats.data$x.name
for(a in c("fill", "colour")){
g$prestats.data[[a]] <-
g$data[[a]][match(g$prestats.data$group, g$data$group)]
}
g$params$xstart <- min(g$data$xmin)
g$params$xend <- max(g$data$xmax)
g$data <- g$prestats.data
bar=function(g){
g <- group2NA(g, "bar")
g$data <- g$data[!is.na(g$data$y), ]
g
},
contour=function(g) {
Expand Down Expand Up @@ -591,40 +571,25 @@ geom2trace <- list(
L
},
bar=function(data, params) {
L <- list(x=data$x,
x <- if ("x.name" %in% names(data)) data$x.name else data$x
if (inherits(x, "POSIXt")) {
# Convert seconds into milliseconds
x <- as.numeric(x) * 1000
} else if (inherits(x, "Date")) {
# Convert days into milliseconds
x <- as.numeric(x) * 24 * 60 * 60 * 1000
}
L <- list(x=x,
y=data$y,
type="bar",
name=params$name,
text=data$text,
marker=list(color=toRGB(params$fill)))

if (!is.null(params$colour)) {
L$marker$line <- list(color=toRGB(params$colour))
L$marker$line$width <- if (is.null(params$size)) 1 else params$size
}

if (!is.null(params$alpha))
L$opacity <- params$alpha

if (params$stat.type == "bin") {
L$type <- "histogram"
if (is.null(params$binwidth)) {
L$autobinx <- TRUE
} else {
L$autobinx <- FALSE
L$xbins=list(start=params$xstart,
end=params$xend,
size=params$binwidth)
if (inherits(data$x.name, "POSIXt")) {
# Convert seconds into milliseconds
L$xbins <- lapply(L$xbins, function(x) x * 1000)
} else if (inherits(data$x.name, "Date")) {
# Convert days into milliseconds
L$xbins <- lapply(L$xbins, function(x) x * 24 * 60 * 60 * 1000)
}
}
} else {
L$y <- data$y
L$type <- "bar"
}
if (!is.null(params$alpha)) L$opacity <- params$alpha
L
},
step=function(data, params) {
Expand Down
107 changes: 58 additions & 49 deletions tests/testthat/test-ggplot-bar.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
context("bar")

expect_traces <- function(gg, n.traces, name){
stopifnot(is.ggplot(gg))
stopifnot(is.numeric(n.traces))
save_outputs(gg, paste0("bar-", name))
L <- gg2list(gg)
is.trace <- names(L) == ""
all.traces <- L[is.trace]
no.data <- sapply(all.traces, function(tr) {
is.null(tr[["x"]]) && is.null(tr[["y"]])
})
has.data <- all.traces[!no.data]
expect_equal(length(has.data), n.traces)
list(traces=has.data, kwargs=L$kwargs)
}

researchers <-
data.frame(country=c("Canada", "Canada", "Germany", "USA"),
name=c("Warren", "Andreanne", "Stefan", "Toby"),
Expand All @@ -10,75 +25,47 @@ gg <- ggplot(researchers, aes(country, papers, fill=field))

test_that("position_dodge is translated to barmode=group", {
gg.dodge <- gg + geom_bar(stat="identity", position="dodge")
L <- gg2list(gg.dodge)
expect_equal(length(L), 3)
trace.names <- sapply(L[1:2], "[[", "name")
info <- expect_traces(gg.dodge, 2, "dodge")
trs <- info$traces
trace.names <- sapply(trs[1:2], "[[", "name")
expect_true(all(c("Math", "Bio") %in% trace.names))
expect_identical(L$kwargs$layout$barmode, "group")
expect_identical(info$kwargs$layout$barmode, "group")
# Check x values
expect_identical(as.character(L[[1]]$x[1]), "Canada")
expect_identical(as.character(L[[1]]$x[2]), "Germany")
expect_identical(as.character(L[[2]]$x[1]), "Canada")
expect_identical(as.character(L[[2]]$x[2]), "USA")

save_outputs(gg.dodge, "bar-dodge")
expect_identical(as.character(trs[[1]]$x), c("Canada", "Germany"))
expect_identical(as.character(trs[[2]]$x), c("Canada", "USA"))
})

test_that("position_stack is translated to barmode=stack", {
gg.stack <- gg + geom_bar(stat="identity", position="stack")
L <- gg2list(gg.stack)
expect_equal(length(L), 3)
trace.names <- sapply(L[1:2], "[[", "name")
info <- expect_traces(gg.stack, 2, "stack")
trs <- info$traces
trace.names <- sapply(trs[1:2], "[[", "name")
expect_true(all(c("Math", "Bio") %in% trace.names))
expect_identical(L$kwargs$layout$barmode, "stack")

save_outputs(gg.stack, "bar-stack")
expect_identical(info$kwargs$layout$barmode, "stack")
})

test_that("position_identity is translated to barmode=overlay", {
test_that("position_identity is translated to barmode=stack", {
gg.identity <- gg + geom_bar(stat="identity", position="identity")
L <- gg2list(gg.identity)
expect_equal(length(L), 3)
trace.names <- sapply(L[1:2], "[[", "name")
info <- expect_traces(gg.identity, 2, "identity")
trs <- info$traces
trace.names <- sapply(trs[1:2], "[[", "name")
expect_true(all(c("Math", "Bio") %in% trace.names))
expect_identical(L$kwargs$layout$barmode, "overlay")

save_outputs(gg.identity, "bar-identity")
expect_identical(info$kwargs$layout$barmode, "stack")
})

test_that("dates work well with bar charts", {

researchers$month <- c("2012-01-01", "2012-01-01", "2012-02-01", "2012-02-01")
researchers$month <- as.Date(researchers$month)

gd <- ggplot(researchers, aes(month, papers, fill=field)) +
geom_bar(stat="identity")

L <- gg2list(gd)

expect_equal(length(L), 3) # 2 traces + layout
expect_identical(L$kwargs$layout$xaxis$type, "date")
expect_identical(L[[1]]$x[1], "2012-01-01 00:00:00")
expect_identical(L[[1]]$x[2], "2012-02-01 00:00:00")

save_outputs(gd, "bar-dates")
info <- expect_traces(gd, 2, "dates")
trs <- info$traces
expect_identical(info$kwargs$layout$xaxis$type, "date")
# plotly likes time in milliseconds
t <- as.numeric(unique(researchers$month)) * 24 * 60 * 60 * 1000
expect_identical(trs[[1]]$x, t)
})

expect_traces <- function(gg, n.traces, name){
stopifnot(is.ggplot(gg))
stopifnot(is.numeric(n.traces))
save_outputs(gg, paste0("bar-", name))
L <- gg2list(gg)
is.trace <- names(L) == ""
all.traces <- L[is.trace]
no.data <- sapply(all.traces, function(tr) {
is.null(tr[["x"]]) && is.null(tr[["y"]])
})
has.data <- all.traces[!no.data]
expect_equal(length(has.data), n.traces)
list(traces=has.data, kwargs=L$kwargs)
}

## http://www.cookbook-r.com/Graphs/Bar_and_line_graphs_%28ggplot2%29/
df <- data.frame(time = factor(c("Lunch","Dinner"), levels=c("Lunch","Dinner")),
total_bill = c(14.89, 17.23))
Expand Down Expand Up @@ -182,3 +169,25 @@ test_that("guides(fill=FALSE) does not affect colour legend", {
expect_true(info$kwargs$layout$showlegend)
})


base <- ggplot(mtcars, aes(factor(vs), fill=factor(cyl)))

test_that("geom_bar() stacks counts", {
info <- expect_traces(base + geom_bar(), 3, "position-stack")
expect_identical(info$kwargs$layout$barmode, "stack")
trs <- info$traces
# sum of y values for each trace
test <- as.numeric(sort(sapply(trs, function(x) sum(x$y))))
true <- as.numeric(sort(table(mtcars$cyl)))
expect_identical(test, true)
})

test_that("geom_bar(position = 'fill') stacks proportions", {
info <- expect_traces(base + geom_bar(position = "fill"), 3, "position-fill")
expect_identical(info$kwargs$layout$barmode, "stack")
trs <- info$traces
# sum of y-values *conditioned* on a x-value
prop <- sum(sapply(sapply(trs, "[[", "y"), "[", 1))
expect_identical(prop, 1)
})

Loading