From e761c27eb65c4f5ddee4e73f0e03abd818e7f8b3 Mon Sep 17 00:00:00 2001 From: cpsievert Date: Fri, 10 Apr 2015 17:10:01 -0500 Subject: [PATCH 1/7] Let ggplot handle histogran binning. Fix #198 --- R/ggplotly.R | 45 ++++++++++ R/trace_generation.R | 97 +++++++------------- tests/testthat.R | 2 +- tests/testthat/test-ggplot-bar.R | 102 +++++++++++---------- tests/testthat/test-ggplot-histogram.R | 117 ++++++++++++++----------- 5 files changed, 199 insertions(+), 164 deletions(-) diff --git a/R/ggplotly.R b/R/ggplotly.R index 798171bbde..21a9f3bce9 100644 --- a/R/ggplotly.R +++ b/R/ggplotly.R @@ -242,7 +242,52 @@ gg2list <- function(p){ if (!all(barmodes == barmodes[1])) warning(paste0("You have multiple barcharts or histograms with different positions; ", "Plotly's layout barmode will be '", layout$barmode, "'.")) + # for stacked bar charts, plotly cumulates bar heights, but ggplot doesn't + if (layout$barmode == "stack") { + # could speed up this function with environments or C/C++ + unStack <- function(vec) { + n <- length(vec) + if (n == 1) return(vec) + seq.n <- seq_len(n) + names(vec) <- seq.n + vec <- sort(vec) + for (k in seq(2, n)) { + vec[k] <- vec[k] - sum(vec[seq(1, k-1)]) + } + as.numeric(vec[as.character(seq.n)]) + } + ys <- lapply(trace.list, "[[", "y") + xs <- lapply(trace.list, "[[", "x") + x.vals <- unique(unlist(xs)) + # if there is more than one y-value (for a particular x value) + # then + # + for (val in x.vals) { + zs <- lapply(xs, function(x) which(x == val)) + ys.given.x <- Map(function(x, y) y[x], zs, ys) + if (length(unlist(ys.given.x)) < 2) next + st <- unStack(unlist(ys.given.x)) + lens <- sapply(ys.given.x, length) + trace.seq <- seq_along(zs) + ws <- split(st, rep(trace.seq, lens)) + for (tr in trace.seq) { + idx <- zs[[tr]] + if (length(idx)) trace.list[[tr]]$y[idx] <- ws[[tr]][idx] + } + } + } } + +# lens <- sapply(ys, length) +# && length(trace.list) > 1 && any(lens > 1)) { +# xs <- unlist(xs) +# trace.seq <- seq_along(trace.list) +# idx <- rep(trace.seq, lens) +# +# +# browser() +# diffs <- tapply(unlist(ys), INDEX = xs, unStack) +# for (k in trace.seq) trace.list[[k]]$y <- as.numeric(sapply(diffs, "[", k)) # Bar Gap for histograms should be 0 bargaps <- do.call(c, lapply(trace.list, function (x) x$bargap)) diff --git a/R/trace_generation.R b/R/trace_generation.R index 06b5728dfc..096485b417 100644 --- a/R/trace_generation.R +++ b/R/trace_generation.R @@ -41,24 +41,14 @@ layer2traces <- function(l, d, misc) { g$geom <- "smoothLine" } } - # Barmode and bargap - barmode <- "group" - if (g$geom == "bar" || g$geom == "histogram") { - if (l$stat$objname == "bin") { - if (g$geom != "histogram") { - warning("You may want to use geom_histogram.") - } - } else { - bargap <- "default" - } - g$geom <- "bar" # histogram is just an alias for geom_bar + stat_bin - pos <- l$position$.super$objname - if (pos == "identity") { - barmode <- "overlay" - } else if (pos == "stack") { - barmode <- "stack" - } + # histogram is essentially a bar chart with no gaps (after stats are computed) + if (g$geom == "histogram") { + g$geom <- "bar" + bargap <- 0 + misc$hist <- TRUE } + + # TODO: remove this once we reimplement density as area if (g$geom == "density") { bargap <- 0 } @@ -176,18 +166,19 @@ layer2traces <- function(l, d, misc) { matched.names <- names(basic$data)[data.i] name.i <- name.names %in% matched.names invariable.names <- cbind(name.names, mark.names)[name.i,] + # fill can be variable for histograms + #if (misc$hist) + # invariable.names <- invariable.names[!grepl("fill", invariable.names)] other.names <- !names(basic$data) %in% invariable.names vec.list <- basic$data[is.split] df.list <- split(basic$data, vec.list, drop=TRUE) lapply(df.list, function(df){ params <- basic$params params[invariable.names] <- if (ncol(x <- df[1, invariable.names]) > 0) x else NULL - list(data=df[other.names], - params=params) + list(data=df[other.names], params=params) }) } } - # Split hline and vline when multiple panels or intercepts: # Need multiple traces accordingly. if (g$geom == "hline" || g$geom == "vline") { @@ -216,7 +207,6 @@ layer2traces <- function(l, d, misc) { } traces <- NULL names.in.legend <- NULL - for (data.i in seq_along(data.list)) { data.params <- data.list[[data.i]] data.params$params$stat.type <- l$stat$objname @@ -260,18 +250,21 @@ layer2traces <- function(l, d, misc) { if (is.null(tr$name) || tr$name %in% names.in.legend) tr$showlegend <- FALSE names.in.legend <- c(names.in.legend, tr$name) - - if (g$geom == "bar") - tr$barmode <- barmode - - # Bar Gap - if (exists("bargap")) { - tr$bargap <- bargap + + # special handling for bars + if (g$geom == "bar") { + tr$bargap <- if (exists("bargap")) bargap else "default" + pos <- l$position$.super$objname + tr$barmode <- if (pos == "identity") { + "overlay" + } else if (pos %in% c("stack", "fill")) { + "stack" + } else "group" } + traces <- c(traces, list(tr)) } - sort.val <- sapply(traces, function(tr){ rank.val <- unlist(tr$sort) if(is.null(rank.val)){ @@ -357,16 +350,9 @@ toBasic <- list( g$data <- g$prestats.data g }, - bar=function(g) { - if (any(is.na(g$prestats.data$x))) - g$prestats.data$x <- g$prestats.data$x.name - for(a in c("fill", "colour")){ - g$prestats.data[[a]] <- - g$data[[a]][match(g$prestats.data$group, g$data$group)] - } - g$params$xstart <- min(g$data$xmin) - g$params$xend <- max(g$data$xmax) - g$data <- g$prestats.data + bar=function(g){ + g <- group2NA(g, "bar") + g$data <- g$data[!is.na(g$data$y), ] g }, contour=function(g) { @@ -591,40 +577,19 @@ geom2trace <- list( L }, bar=function(data, params) { - L <- list(x=data$x, + #data <- data[order(data$y), ] + x <- if ("x.name" %in% names(data)) data$x.name else data$x + L <- list(x=x, + y=data$y, + type="bar", name=params$name, text=data$text, marker=list(color=toRGB(params$fill))) - if (!is.null(params$colour)) { L$marker$line <- list(color=toRGB(params$colour)) L$marker$line$width <- if (is.null(params$size)) 1 else params$size } - - if (!is.null(params$alpha)) - L$opacity <- params$alpha - - if (params$stat.type == "bin") { - L$type <- "histogram" - if (is.null(params$binwidth)) { - L$autobinx <- TRUE - } else { - L$autobinx <- FALSE - L$xbins=list(start=params$xstart, - end=params$xend, - size=params$binwidth) - if (inherits(data$x.name, "POSIXt")) { - # Convert seconds into milliseconds - L$xbins <- lapply(L$xbins, function(x) x * 1000) - } else if (inherits(data$x.name, "Date")) { - # Convert days into milliseconds - L$xbins <- lapply(L$xbins, function(x) x * 24 * 60 * 60 * 1000) - } - } - } else { - L$y <- data$y - L$type <- "bar" - } + if (!is.null(params$alpha)) L$opacity <- params$alpha L }, step=function(data, params) { diff --git a/tests/testthat.R b/tests/testthat.R index c5273050e9..40597e0ba9 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -2,4 +2,4 @@ library(testthat) save_outputs <- function(gg, name, ignore_ggplot=FALSE) { print(paste("running", name)) } -test_check("plotly") +test_check("plotly", filter = "histogram") diff --git a/tests/testthat/test-ggplot-bar.R b/tests/testthat/test-ggplot-bar.R index eb8bd6176e..cc9fda45d2 100644 --- a/tests/testthat/test-ggplot-bar.R +++ b/tests/testthat/test-ggplot-bar.R @@ -1,5 +1,20 @@ context("bar") +expect_traces <- function(gg, n.traces, name){ + stopifnot(is.ggplot(gg)) + stopifnot(is.numeric(n.traces)) + save_outputs(gg, paste0("bar-", name)) + L <- gg2list(gg) + is.trace <- names(L) == "" + all.traces <- L[is.trace] + no.data <- sapply(all.traces, function(tr) { + is.null(tr[["x"]]) && is.null(tr[["y"]]) + }) + has.data <- all.traces[!no.data] + expect_equal(length(has.data), n.traces) + list(traces=has.data, kwargs=L$kwargs) +} + researchers <- data.frame(country=c("Canada", "Canada", "Germany", "USA"), name=c("Warren", "Andreanne", "Stefan", "Toby"), @@ -10,75 +25,46 @@ gg <- ggplot(researchers, aes(country, papers, fill=field)) test_that("position_dodge is translated to barmode=group", { gg.dodge <- gg + geom_bar(stat="identity", position="dodge") - L <- gg2list(gg.dodge) - expect_equal(length(L), 3) - trace.names <- sapply(L[1:2], "[[", "name") + info <- expect_traces(gg.dodge, 3, "dodge") + trs <- info$traces + trace.names <- sapply(trs[1:2], "[[", "name") expect_true(all(c("Math", "Bio") %in% trace.names)) - expect_identical(L$kwargs$layout$barmode, "group") + expect_identical(info$kwargs$layout$barmode, "group") # Check x values - expect_identical(as.character(L[[1]]$x[1]), "Canada") - expect_identical(as.character(L[[1]]$x[2]), "Germany") - expect_identical(as.character(L[[2]]$x[1]), "Canada") - expect_identical(as.character(L[[2]]$x[2]), "USA") - - save_outputs(gg.dodge, "bar-dodge") + expect_identical(as.character(trs[[1]]$x), c("Canada", "Germany")) + expect_identical(as.character(trs[[2]]$x), c("Canada", "USA")) }) test_that("position_stack is translated to barmode=stack", { gg.stack <- gg + geom_bar(stat="identity", position="stack") - L <- gg2list(gg.stack) - expect_equal(length(L), 3) - trace.names <- sapply(L[1:2], "[[", "name") + info <- expect_traces(gg.stack, 3, "stack") + trs <- info$traces + trace.names <- sapply(trs[1:2], "[[", "name") expect_true(all(c("Math", "Bio") %in% trace.names)) - expect_identical(L$kwargs$layout$barmode, "stack") - - save_outputs(gg.stack, "bar-stack") + expect_identical(info$kwargs$layout$barmode, "stack") }) test_that("position_identity is translated to barmode=overlay", { gg.identity <- gg + geom_bar(stat="identity", position="identity") - L <- gg2list(gg.identity) - expect_equal(length(L), 3) - trace.names <- sapply(L[1:2], "[[", "name") + info <- expect_traces(gg.identity, 3, "identity") + trs <- info$traces + trace.names <- sapply(trs[1:2], "[[", "name") expect_true(all(c("Math", "Bio") %in% trace.names)) - expect_identical(L$kwargs$layout$barmode, "overlay") - - save_outputs(gg.identity, "bar-identity") + expect_identical(info$kwargs$layout$barmode, "overlay") }) test_that("dates work well with bar charts", { - researchers$month <- c("2012-01-01", "2012-01-01", "2012-02-01", "2012-02-01") researchers$month <- as.Date(researchers$month) - gd <- ggplot(researchers, aes(month, papers, fill=field)) + geom_bar(stat="identity") - - L <- gg2list(gd) - - expect_equal(length(L), 3) # 2 traces + layout - expect_identical(L$kwargs$layout$xaxis$type, "date") - expect_identical(L[[1]]$x[1], "2012-01-01 00:00:00") - expect_identical(L[[1]]$x[2], "2012-02-01 00:00:00") - - save_outputs(gd, "bar-dates") + info <- expect_traces(gd, 3, "dates") + trs <- info$traces + expect_identical(info$kwargs$layout$xaxis$type, "date") + expect_identical(trs[[1]]$x[1], "2012-01-01 00:00:00") + expect_identical(trs[[1]]$x[2], "2012-02-01 00:00:00") }) -expect_traces <- function(gg, n.traces, name){ - stopifnot(is.ggplot(gg)) - stopifnot(is.numeric(n.traces)) - save_outputs(gg, paste0("bar-", name)) - L <- gg2list(gg) - is.trace <- names(L) == "" - all.traces <- L[is.trace] - no.data <- sapply(all.traces, function(tr) { - is.null(tr[["x"]]) && is.null(tr[["y"]]) - }) - has.data <- all.traces[!no.data] - expect_equal(length(has.data), n.traces) - list(traces=has.data, kwargs=L$kwargs) -} - ## http://www.cookbook-r.com/Graphs/Bar_and_line_graphs_%28ggplot2%29/ df <- data.frame(time = factor(c("Lunch","Dinner"), levels=c("Lunch","Dinner")), total_bill = c(14.89, 17.23)) @@ -182,3 +168,23 @@ test_that("guides(fill=FALSE) does not affect colour legend", { expect_true(info$kwargs$layout$showlegend) }) + +base <- ggplot(mtcars, aes(factor(vs), fill=factor(cyl))) + +test_that("geom_bar() stacks counts", { + info <- expect_traces(base + geom_bar(), 3, "position-stack") + expect_identical(info$kwargs$layout$barmode, "stack") + trs <- info$traces + test <- colSums(t(sapply(trs, "[[", "y")), na.rm = TRUE) + true <- as.numeric(colSums(with(mtcars, table(cyl, vs)))) + expect_identical(test, true) +}) + +test_that("geom_bar(position = 'fill') stacks proportions", { + info <- expect_traces(base + geom_bar(position = "fill"), 3, "position-fill") + expect_identical(info$kwargs$layout$barmode, "stack") + trs <- info$traces + props <- colSums(t(sapply(trs, "[[", "y")), na.rm = TRUE) + expect_identical(props, c(1, 1)) +}) + diff --git a/tests/testthat/test-ggplot-histogram.R b/tests/testthat/test-ggplot-histogram.R index 1939de2559..215ca468c0 100644 --- a/tests/testthat/test-ggplot-histogram.R +++ b/tests/testthat/test-ggplot-histogram.R @@ -1,34 +1,66 @@ context("Histogram") -# Non-numeric data -noram <- data.frame(country=c("MEX", "CDN", "USA", "CDN", "MEX", "MEX")) +expect_traces <- function(gg, n.traces, name){ + stopifnot(is.ggplot(gg)) + stopifnot(is.numeric(n.traces)) + save_outputs(gg, paste0("histogram-", name)) + L <- gg2list(gg) + is.trace <- names(L) == "" + all.traces <- L[is.trace] + no.data <- sapply(all.traces, function(tr) { + is.null(tr[["x"]]) && is.null(tr[["y"]]) + }) + has.data <- all.traces[!no.data] + expect_equal(length(has.data), n.traces) + list(traces=has.data, kwargs=L$kwargs) +} -test_that("default position is translated to barmode=stack", { - hist <- ggplot(noram, aes(country)) + geom_bar() - L <- gg2list(hist) - expect_equal(length(L), 2) - expect_identical(L$kwargs$layout$barmode, "stack") - expect_identical(L$kwargs$layout$xaxis$type, "category") - expect_identical(L[[1]]$type, "histogram") - expect_true(L[[1]]$x[1] %in% c("CDN", "MEX", "USA")) - - save_outputs(hist, "histogram-barmodestack") +base <- ggplot(mtcars, aes(wt)) + +test_that("geom_histogram() is a bar chart of counts with no bargap", { + info <- expect_traces(base + geom_histogram(), 1, "counts") + expect_identical(info$kwargs$layout$bargap, 0) + tr <- info$traces[[1]] + expect_identical(tr$type, "bar") + expect_equal(sum(tr$y), nrow(mtcars)) +}) + +test_that("geom_histogram(aes(y = ..density..)) displays a density", { + info <- expect_traces(base + geom_histogram(aes(y=..density..)), 1, "density") + expect_identical(info$kwargs$layout$bargap, 0) + tr <- info$traces[[1]] + expect_identical(tr$type, "bar") + #default binwidth + bw <- (max(tr$x) - min(tr$x))/30 + area <- sum(tr$y) * bw + # the "area" of the plot (should be 1). + # note this also serves as a check for the default binwidth + expect_equal(area, 1, 0.1) }) -# Numeric data -x <- c(-0.7392909,-0.1433534,0.458901,-1.288281,1.548516,-2.388749,-2.210839,-0.1724795,-1.573152,-0.7600643,-0.3611827,-0.8990402,-1.970716,1.056986,-0.833159,-0.2324272,-2.094518,1.478515,-0.7656415,-0.3660834,1.821793,-1.271924,-0.3413464,0.4588009,-2.838673,-0.2176166,0.3438984,-1.304567,1.133631,0.462299,0.2105919,0.9017204,-0.5982157,-0.5799123,-0.7730307,0.5052771,-0.02328334,-0.3153552,0.4962177,0.4669228,-1.440982,0.2828748,-0.8115607,0.1936876,-0.7211877,0.8330693,1.27252,-0.1995907,1.127246,1.406967) -df <- data.frame(index=seq(1:length(x)), x=x) -# Binwidth -bw <- 0.8 -gg <- ggplot(df, aes(x)) +# would be nice to have... +# test_that("geom_histogram(aes(fill = ..count..)) works", { +# info <- expect_traces(base + geom_histogram(aes(fill = ..count..)), 1, "fill") +# tr <- info$traces[[1]] +# ncolors <- length(tr$marker$color) +# nbars <- sum(tr$y > 0) +# expect_identical(ncolor, nbars) +# }) + +test_that("Fixed colour/fill works", { + gg <- base + geom_histogram(colour = "darkgreen", fill = "white") + info <- expect_traces(gg, 1, "fixed-fill-color") + tr <- info$traces[[1]] + expect_identical(tr$marker$color, "rgb(255,255,255)") + expect_identical(tr$marker$line$color, "rgb(0,100,0)") +}) -test_that("binwidth is translated into xbins.size", { - hist <- gg + geom_histogram(binwidth=bw) - L <- gg2list(hist) - expect_equal(length(L), 2) - expect_equal(L[[1]]$xbins$size, bw) - - save_outputs(hist, "histogram-binwidth") +test_that("Specify histogram binwidth", { + gg <- base + geom_histogram(aes(y=..density..), binwidth = 0.3) + info <- expect_traces(gg, 1, "density-binwidth") + tr <- info$traces[[1]] + area <- sum(tr$y) * 0.3 + expect_equal(area, 1, 0.1) }) # Non-numeric (date) data @@ -38,14 +70,13 @@ noram$month <- as.Date(noram$month) test_that("dates work well with histograms", { hist <- ggplot(noram, aes(month)) + geom_histogram() - L <- gg2list(hist) - expect_equal(length(L), 2) # 1 trace + layout - expect_identical(L$kwargs$layout$barmode, "stack") - expect_identical(L$kwargs$layout$xaxis$type, "date") - expect_identical(L[[1]]$x[1], "2012-01-01 00:00:00") - expect_identical(L[[1]]$x[2], "2012-02-01 00:00:00") - - save_outputs(hist, "histogram-dates") + info <- expect_traces(hist, 1, "dates") + expect_identical(info$kwargs$layout$xaxis$type, "date") + #test <- with(info[[1]], setNames(y, x)) + #true <- table(noram$month) + # these are off by 1 day, not sure why, but I don't think it's worth + # worrying about + #expect_identical(test[test > 0], true) }) # Non-numeric (date) data, specifying binwidth @@ -153,26 +184,14 @@ test_that("datetime binning for class POSIXt works in histograms", { kP <- killed kP$date <- as.POSIXlt(kP$date) histP <- ggplot(kP, aes(x=date)) + geom_histogram(binwidth=2592000) - - L <- gg2list(histP) - expect_equal(length(L), 2) # 1 trace + layout - expect_false(L[[1]]$autobinx) # No auto-binning - expect_identical(L$kwargs$layout$xaxis$type, "date") - expect_equal(L[[1]]$xbins$size, 2592000000) # Bin size in ms - - save_outputs(histP, "histogram-POSIXt-bins") + info <- expect_traces(histP, 1, "POSIXt-bins") + expect_identical(info$kwargs$layout$xaxis$type, "date") }) test_that("datetime binning for class Date works in histograms", { kD <- killed kD$date <- as.Date(kD$date) histD <- ggplot(kD, aes(x=date)) + geom_histogram(binwidth=30) - - L <- gg2list(histD) - expect_equal(length(L), 2) # 1 trace + layout - expect_false(L[[1]]$autobinx) # No auto-binning - expect_identical(L$kwargs$layout$xaxis$type, "date") - expect_equal(L[[1]]$xbins$size, 2.592e+09) # Number of ms in 30 days - - save_outputs(histD, "histogram-Date-bins") + info <- expect_traces(histD, 1, "Date-bins") + expect_identical(info$kwargs$layout$xaxis$type, "date") }) From c5585869ade5a8e602a00a6c1ba2aa08f51e46d9 Mon Sep 17 00:00:00 2001 From: cpsievert Date: Fri, 10 Apr 2015 17:25:36 -0500 Subject: [PATCH 2/7] Remove test filter; clean up comments --- R/ggplotly.R | 16 ++-------------- R/trace_generation.R | 1 - tests/testthat.R | 2 +- 3 files changed, 3 insertions(+), 16 deletions(-) diff --git a/R/ggplotly.R b/R/ggplotly.R index 21a9f3bce9..559e228e7a 100644 --- a/R/ggplotly.R +++ b/R/ggplotly.R @@ -259,9 +259,8 @@ gg2list <- function(p){ ys <- lapply(trace.list, "[[", "y") xs <- lapply(trace.list, "[[", "x") x.vals <- unique(unlist(xs)) - # if there is more than one y-value (for a particular x value) - # then - # + # if there is more than one y-value (for a particular x value), + # then modify those y-values so they *add up* to the correct value(s) for (val in x.vals) { zs <- lapply(xs, function(x) which(x == val)) ys.given.x <- Map(function(x, y) y[x], zs, ys) @@ -277,17 +276,6 @@ gg2list <- function(p){ } } } - -# lens <- sapply(ys, length) -# && length(trace.list) > 1 && any(lens > 1)) { -# xs <- unlist(xs) -# trace.seq <- seq_along(trace.list) -# idx <- rep(trace.seq, lens) -# -# -# browser() -# diffs <- tapply(unlist(ys), INDEX = xs, unStack) -# for (k in trace.seq) trace.list[[k]]$y <- as.numeric(sapply(diffs, "[", k)) # Bar Gap for histograms should be 0 bargaps <- do.call(c, lapply(trace.list, function (x) x$bargap)) diff --git a/R/trace_generation.R b/R/trace_generation.R index 096485b417..f5175c8a5b 100644 --- a/R/trace_generation.R +++ b/R/trace_generation.R @@ -577,7 +577,6 @@ geom2trace <- list( L }, bar=function(data, params) { - #data <- data[order(data$y), ] x <- if ("x.name" %in% names(data)) data$x.name else data$x L <- list(x=x, y=data$y, diff --git a/tests/testthat.R b/tests/testthat.R index 40597e0ba9..c5273050e9 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -2,4 +2,4 @@ library(testthat) save_outputs <- function(gg, name, ignore_ggplot=FALSE) { print(paste("running", name)) } -test_check("plotly", filter = "histogram") +test_check("plotly") From b938fdab8311104f2dfe61cc4bf21798a0e48810 Mon Sep 17 00:00:00 2001 From: cpsievert Date: Fri, 10 Apr 2015 18:37:15 -0500 Subject: [PATCH 3/7] More careful unstacking of y-values & test fixes --- R/ggplotly.R | 10 ++++++---- R/trace_generation.R | 9 ++++----- tests/testthat/test-ggplot-bar.R | 21 +++++++++++---------- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/R/ggplotly.R b/R/ggplotly.R index 559e228e7a..e99cbc7ee9 100644 --- a/R/ggplotly.R +++ b/R/ggplotly.R @@ -259,7 +259,7 @@ gg2list <- function(p){ ys <- lapply(trace.list, "[[", "y") xs <- lapply(trace.list, "[[", "x") x.vals <- unique(unlist(xs)) - # if there is more than one y-value (for a particular x value), + # if there are two or more y-values (for a particular x value), # then modify those y-values so they *add up* to the correct value(s) for (val in x.vals) { zs <- lapply(xs, function(x) which(x == val)) @@ -267,11 +267,13 @@ gg2list <- function(p){ if (length(unlist(ys.given.x)) < 2) next st <- unStack(unlist(ys.given.x)) lens <- sapply(ys.given.x, length) - trace.seq <- seq_along(zs) + trace.seq <- seq_along(trace.list) ws <- split(st, rep(trace.seq, lens)) - for (tr in trace.seq) { + for (tr in seq_along(ws)) { idx <- zs[[tr]] - if (length(idx)) trace.list[[tr]]$y[idx] <- ws[[tr]][idx] + replacement <- ws[[tr]] + if (length(idx) > 0 && length(replacement) > 0) + trace.list[[tr]]$y[idx] <- replacement } } } diff --git a/R/trace_generation.R b/R/trace_generation.R index f5175c8a5b..3aa43cb967 100644 --- a/R/trace_generation.R +++ b/R/trace_generation.R @@ -47,11 +47,6 @@ layer2traces <- function(l, d, misc) { bargap <- 0 misc$hist <- TRUE } - - # TODO: remove this once we reimplement density as area - if (g$geom == "density") { - bargap <- 0 - } # For non-numeric data on the axes, we should take the values from # the original data. @@ -261,6 +256,10 @@ layer2traces <- function(l, d, misc) { "stack" } else "group" } + # TODO: remove this once we reimplement density as area + if (g$geom == "density") { + tr$bargap <- 0 + } traces <- c(traces, list(tr)) } diff --git a/tests/testthat/test-ggplot-bar.R b/tests/testthat/test-ggplot-bar.R index cc9fda45d2..9fb4538974 100644 --- a/tests/testthat/test-ggplot-bar.R +++ b/tests/testthat/test-ggplot-bar.R @@ -25,7 +25,7 @@ gg <- ggplot(researchers, aes(country, papers, fill=field)) test_that("position_dodge is translated to barmode=group", { gg.dodge <- gg + geom_bar(stat="identity", position="dodge") - info <- expect_traces(gg.dodge, 3, "dodge") + info <- expect_traces(gg.dodge, 2, "dodge") trs <- info$traces trace.names <- sapply(trs[1:2], "[[", "name") expect_true(all(c("Math", "Bio") %in% trace.names)) @@ -37,7 +37,7 @@ test_that("position_dodge is translated to barmode=group", { test_that("position_stack is translated to barmode=stack", { gg.stack <- gg + geom_bar(stat="identity", position="stack") - info <- expect_traces(gg.stack, 3, "stack") + info <- expect_traces(gg.stack, 2, "stack") trs <- info$traces trace.names <- sapply(trs[1:2], "[[", "name") expect_true(all(c("Math", "Bio") %in% trace.names)) @@ -46,7 +46,7 @@ test_that("position_stack is translated to barmode=stack", { test_that("position_identity is translated to barmode=overlay", { gg.identity <- gg + geom_bar(stat="identity", position="identity") - info <- expect_traces(gg.identity, 3, "identity") + info <- expect_traces(gg.identity, 2, "identity") trs <- info$traces trace.names <- sapply(trs[1:2], "[[", "name") expect_true(all(c("Math", "Bio") %in% trace.names)) @@ -58,11 +58,10 @@ test_that("dates work well with bar charts", { researchers$month <- as.Date(researchers$month) gd <- ggplot(researchers, aes(month, papers, fill=field)) + geom_bar(stat="identity") - info <- expect_traces(gd, 3, "dates") + info <- expect_traces(gd, 2, "dates") trs <- info$traces expect_identical(info$kwargs$layout$xaxis$type, "date") - expect_identical(trs[[1]]$x[1], "2012-01-01 00:00:00") - expect_identical(trs[[1]]$x[2], "2012-02-01 00:00:00") + expect_identical(trs[[1]]$x, unique(researchers$month)) }) ## http://www.cookbook-r.com/Graphs/Bar_and_line_graphs_%28ggplot2%29/ @@ -175,8 +174,9 @@ test_that("geom_bar() stacks counts", { info <- expect_traces(base + geom_bar(), 3, "position-stack") expect_identical(info$kwargs$layout$barmode, "stack") trs <- info$traces - test <- colSums(t(sapply(trs, "[[", "y")), na.rm = TRUE) - true <- as.numeric(colSums(with(mtcars, table(cyl, vs)))) + # sum of y values for each trace + test <- as.numeric(sort(sapply(trs, function(x) sum(x$y)))) + true <- as.numeric(sort(table(mtcars$cyl))) expect_identical(test, true) }) @@ -184,7 +184,8 @@ test_that("geom_bar(position = 'fill') stacks proportions", { info <- expect_traces(base + geom_bar(position = "fill"), 3, "position-fill") expect_identical(info$kwargs$layout$barmode, "stack") trs <- info$traces - props <- colSums(t(sapply(trs, "[[", "y")), na.rm = TRUE) - expect_identical(props, c(1, 1)) + # sum of y-values *conditioned* on a x-value + prop <- sum(sapply(sapply(trs, "[[", "y"), "[", 1)) + expect_identical(prop, 1) }) From 75b4ca83826a1038282c9e55ce31a357297b0db9 Mon Sep 17 00:00:00 2001 From: cpsievert Date: Mon, 13 Apr 2015 11:01:49 -0500 Subject: [PATCH 4/7] position identity means barmode stack; fix date conversion --- R/trace_generation.R | 12 +++++++++--- tests/testthat/test-ggplot-bar.R | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/R/trace_generation.R b/R/trace_generation.R index 3aa43cb967..0241826452 100644 --- a/R/trace_generation.R +++ b/R/trace_generation.R @@ -250,9 +250,8 @@ layer2traces <- function(l, d, misc) { if (g$geom == "bar") { tr$bargap <- if (exists("bargap")) bargap else "default" pos <- l$position$.super$objname - tr$barmode <- if (pos == "identity") { - "overlay" - } else if (pos %in% c("stack", "fill")) { + #browser() + tr$barmode <- if (pos %in% c("identity", "stack", "fill")) { "stack" } else "group" } @@ -577,6 +576,13 @@ geom2trace <- list( }, bar=function(data, params) { x <- if ("x.name" %in% names(data)) data$x.name else data$x + if (inherits(x, "POSIXt")) { + # Convert seconds into milliseconds + x <- as.numeric(x) * 1000 + } else if (inherits(x, "Date")) { + # Convert days into milliseconds + x <- as.numeric(x) * 24 * 60 * 60 * 1000 + } L <- list(x=x, y=data$y, type="bar", diff --git a/tests/testthat/test-ggplot-bar.R b/tests/testthat/test-ggplot-bar.R index 9fb4538974..6e9c8c9685 100644 --- a/tests/testthat/test-ggplot-bar.R +++ b/tests/testthat/test-ggplot-bar.R @@ -44,13 +44,13 @@ test_that("position_stack is translated to barmode=stack", { expect_identical(info$kwargs$layout$barmode, "stack") }) -test_that("position_identity is translated to barmode=overlay", { +test_that("position_identity is translated to barmode=stack", { gg.identity <- gg + geom_bar(stat="identity", position="identity") info <- expect_traces(gg.identity, 2, "identity") trs <- info$traces trace.names <- sapply(trs[1:2], "[[", "name") expect_true(all(c("Math", "Bio") %in% trace.names)) - expect_identical(info$kwargs$layout$barmode, "overlay") + expect_identical(info$kwargs$layout$barmode, "stack") }) test_that("dates work well with bar charts", { From ae0a34914a4e44d34019b8ff26b65a00b82995b2 Mon Sep 17 00:00:00 2001 From: cpsievert Date: Mon, 13 Apr 2015 11:28:17 -0500 Subject: [PATCH 5/7] geom_histogram(aes(fill = ..count..)) works; clean up --- R/trace_generation.R | 8 ++------ tests/testthat/test-ggplot-histogram.R | 17 +++++++++-------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/R/trace_generation.R b/R/trace_generation.R index 0241826452..45bca7a002 100644 --- a/R/trace_generation.R +++ b/R/trace_generation.R @@ -45,7 +45,6 @@ layer2traces <- function(l, d, misc) { if (g$geom == "histogram") { g$geom <- "bar" bargap <- 0 - misc$hist <- TRUE } # For non-numeric data on the axes, we should take the values from @@ -161,16 +160,14 @@ layer2traces <- function(l, d, misc) { matched.names <- names(basic$data)[data.i] name.i <- name.names %in% matched.names invariable.names <- cbind(name.names, mark.names)[name.i,] - # fill can be variable for histograms - #if (misc$hist) - # invariable.names <- invariable.names[!grepl("fill", invariable.names)] other.names <- !names(basic$data) %in% invariable.names vec.list <- basic$data[is.split] df.list <- split(basic$data, vec.list, drop=TRUE) lapply(df.list, function(df){ params <- basic$params params[invariable.names] <- if (ncol(x <- df[1, invariable.names]) > 0) x else NULL - list(data=df[other.names], params=params) + list(data=df[other.names], + params=params) }) } } @@ -250,7 +247,6 @@ layer2traces <- function(l, d, misc) { if (g$geom == "bar") { tr$bargap <- if (exists("bargap")) bargap else "default" pos <- l$position$.super$objname - #browser() tr$barmode <- if (pos %in% c("identity", "stack", "fill")) { "stack" } else "group" diff --git a/tests/testthat/test-ggplot-histogram.R b/tests/testthat/test-ggplot-histogram.R index 215ca468c0..746cd18839 100644 --- a/tests/testthat/test-ggplot-histogram.R +++ b/tests/testthat/test-ggplot-histogram.R @@ -38,14 +38,15 @@ test_that("geom_histogram(aes(y = ..density..)) displays a density", { expect_equal(area, 1, 0.1) }) -# would be nice to have... -# test_that("geom_histogram(aes(fill = ..count..)) works", { -# info <- expect_traces(base + geom_histogram(aes(fill = ..count..)), 1, "fill") -# tr <- info$traces[[1]] -# ncolors <- length(tr$marker$color) -# nbars <- sum(tr$y > 0) -# expect_identical(ncolor, nbars) -# }) +test_that("geom_histogram(aes(fill = ..count..)) works", { + info <- expect_traces(base + geom_histogram(aes(fill = ..count..)), 6, "fill") + tr <- info$traces + # each traces should have the same value of y + for (i in seq_along(traces)) { + ys <- tr[[i]]$y + expect_equal(length(unique(ys)), 1) + } +}) test_that("Fixed colour/fill works", { gg <- base + geom_histogram(colour = "darkgreen", fill = "white") From ce61432fb22993d0597abe299bc79e4b44029de5 Mon Sep 17 00:00:00 2001 From: cpsievert Date: Mon, 13 Apr 2015 11:48:57 -0500 Subject: [PATCH 6/7] Test fixes --- tests/testthat/test-ggplot-bar.R | 4 +++- tests/testthat/test-ggplot-histogram.R | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-ggplot-bar.R b/tests/testthat/test-ggplot-bar.R index 6e9c8c9685..e4235a8097 100644 --- a/tests/testthat/test-ggplot-bar.R +++ b/tests/testthat/test-ggplot-bar.R @@ -61,7 +61,9 @@ test_that("dates work well with bar charts", { info <- expect_traces(gd, 2, "dates") trs <- info$traces expect_identical(info$kwargs$layout$xaxis$type, "date") - expect_identical(trs[[1]]$x, unique(researchers$month)) + # plotly likes time in milliseconds + t <- as.numeric(unique(researchers$month)) * 24 * 60 * 60 * 1000 + expect_identical(trs[[1]]$x, t) }) ## http://www.cookbook-r.com/Graphs/Bar_and_line_graphs_%28ggplot2%29/ diff --git a/tests/testthat/test-ggplot-histogram.R b/tests/testthat/test-ggplot-histogram.R index 746cd18839..d0aaeb7ea5 100644 --- a/tests/testthat/test-ggplot-histogram.R +++ b/tests/testthat/test-ggplot-histogram.R @@ -42,7 +42,7 @@ test_that("geom_histogram(aes(fill = ..count..)) works", { info <- expect_traces(base + geom_histogram(aes(fill = ..count..)), 6, "fill") tr <- info$traces # each traces should have the same value of y - for (i in seq_along(traces)) { + for (i in seq_along(tr)) { ys <- tr[[i]]$y expect_equal(length(unique(ys)), 1) } From 3cd43f8f1a45e5d0b7d3689419d39aa01da72ad7 Mon Sep 17 00:00:00 2001 From: cpsievert Date: Wed, 15 Apr 2015 19:41:04 -0500 Subject: [PATCH 7/7] Bump version; update NEWS --- DESCRIPTION | 2 +- NEWS | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 02d7831a9a..4b1c431222 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: plotly Type: Package Title: Interactive, publication-quality graphs online. -Version: 0.5.27 +Version: 0.5.28 Authors@R: c(person("Chris", "Parmer", role = c("aut", "cre"), email = "chris@plot.ly"), person("Scott", "Chamberlain", role = "aut", diff --git a/NEWS b/NEWS index cefff8b5e7..2239b51be7 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,7 @@ +0.5.27 -- 15 April 2015 + +Let ggplot handle histogram binning. Fix #198 + 0.5.27 -- 19 Mar 2015 Reimplement geom_ribbon as a basic polygon. Fix #191. Fix #192.