Skip to content

RFC: fix traces subdivision by multiple properties #471

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ Imports:
base64enc,
htmlwidgets,
tidyr,
dplyr,
hexbin
Suggests:
dplyr,
maps,
ggthemes,
GGally,
Expand Down
225 changes: 163 additions & 62 deletions R/plotly.R
Original file line number Diff line number Diff line change
Expand Up @@ -292,8 +292,7 @@ plotly_build.plotly_hash <- function(l = last_plot()) {
# this is ugly, but I think it is necessary, since we don't know how many
# traces we have until we evaluate args and call traceify() (or similar)
x <- list()
for (i in seq_along(l$data)) {
d <- l$data[[i]]
x$data <- unlist(lapply(l$data, function(d) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alyst is the unlist() necessary here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cpsievert Yes, the function inside lapply() returns a list of traces for each l$data element. unlist() just combines these lists into one.

if (should_eval(d)) {
dat <- do_eval(d)
# start processing specially named arguments
Expand All @@ -315,25 +314,109 @@ plotly_build.plotly_hash <- function(l = last_plot()) {
txt <- paste0(as.list(d$args)[["size"]], " (size): ", s)
dat[["text"]] <- if (is.null(dat[["text"]])) txt else paste0(dat[["text"]], "<br>", txt)
}
has_color <- !is.null(dat[["color"]]) ||
isTRUE(!is.null(dat[["z"]]) && !dat[["type"]] %in% "scatter3d")
has_symbol <- !is.null(dat[["symbol"]])
has_group <- !is.null(dat[["group"]])
if (has_color) {

# helper functions to process dat
tracify_by_data <- function(points_df, data, data_name, force_numeric=FALSE) {
if (!is.null(data) && (!is.numeric(data) || force_numeric)) {
points_df[[paste0(data_name,"_index")]] <- as.factor(data)
}
return(points_df)
}
tracify_by_column <- function(points_df, dat, col_name, force_numeric=FALSE) {
tracify_by_data(points_df, dat[[col_name]], col_name, force_numeric=force_numeric)
}
tracify_by_color <- function(points_df, dat) {
if (!is.null(dat[["color"]])) {
cols <- dat[["color"]]
} else if (isTRUE(!is.null(dat[["z"]]) && !dat[["type"]] %in% "scatter3d")) {
cols <- dat[["z"]]
} else {
cols <- NULL
}
tracify_by_data(points_df, cols, "color")
}

# define the dat traces
points_df <- data.frame(dat_index = seq_along(dat[["x"]] %||% dat[["y"]] %||% dat[["z"]])) %>% # indices of the original data elements used in the trace FIXME properly define data length
tracify_by_color(dat) %>%
tracify_by_column(dat, "symbol", force_numeric=TRUE) %>%
tracify_by_column(dat, "group", force_numeric=TRUE) %>%
tracify_by_column(dat, "legenditem", force_numeric=TRUE)
subtrace_key_cols <- setdiff(colnames(points_df), "dat_index")
trace_key_cols <- setdiff(subtrace_key_cols, "group_index")
points_df <- dplyr::arrange_(points_df, .dots = c(subtrace_key_cols, "dat_index")) %>%
dplyr::group_by_(.dots = subtrace_key_cols)
points_df$subtrace_index <- dplyr::group_indices(points_df)
points_df <- dplyr::group_by_(points_df, .dots = trace_key_cols)
points_df$trace_index <- dplyr::group_indices(points_df)
points_df <- dplyr::ungroup(points_df)
points_df$point_order <- seq_len(nrow(points_df))

# polylines should be further disrupted at 'group' boundaries by inserting NAs
if (grepl("lines", dat[["mode"]] %||% "markers+lines") && "group_index" %in% subtrace_key_cols) {
subtrace_bound <- points_df$trace_index[-1] == points_df$trace_index[-nrow(points_df)] &
points_df$subtrace_index[-1] != points_df$subtrace_index[-nrow(points_df)]
if (any(subtrace_bound)) {
points_df <- rbind(points_df, points_df[subtrace_bound,]) %>% dplyr::arrange(point_order)
points_df$dat_index[c(FALSE, points_df$point_order[-1] == points_df$point_order[-nrow(points_df)])] <- NA
points_df$point_order <- seq_len(nrow(points_df)) # order wrt added points
}
}

trace_point_indices <- attr(dplyr::group_by(points_df, trace_index), "indices")
if (length(trace_point_indices) > 0) {
trace_point_indices <- lapply(trace_point_indices, function(ixs) ixs+1L)
trace_dat_indices <- lapply(trace_point_indices, function(ixs) points_df$dat_index[ixs])
} else { # single/empty trace
trace_point_indices <- list(as.integer())
trace_dat_indices <- list(points_df$dat_index)
}

# assign trace names
names(trace_point_indices) <- sapply(trace_point_indices, function(point_indices){
if (length(point_indices)>0) {
paste0(lapply(points_df[point_indices[[1]], trace_key_cols], as.character), collapse="/")
} else {
NA
}
})

# list of the functions to apply for each created trace
trace_brushes <- list()

# assigns name to the trace
trace_brushes <- append(trace_brushes, function(trace, trace_ix, dat, dat_point_indices) {
trace$name <- names(trace_point_indices)[[trace_ix]]
trace
})

if (!is.null(dat[["color"]]) ||
isTRUE(!is.null(dat[["z"]]) && !dat[["type"]] %in% "scatter3d")) {
title <- as.list(d$args)[["color"]] %||% as.list(d$args)[["z"]] %||% ""
x$data <- c(x$data, colorize(dat, title))
trace_brushes <- append(trace_brushes, color_brush(dat, title))
}
# TODO: add a legend title (is this only possible via annotations?!?)
if (has_symbol) x$data <- c(x$data, symbolize(dat))
if (has_group) x$data <- c(x$data, traceify(dat, "group"))
if (!has_color && !has_symbol && !has_group) x$data <- c(x$data, list(dat))
if (!is.null(dat[["symbol"]])) {
trace_brushes <- append(trace_brushes, symbol_brush(dat))
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alyst I'm not quite sure what this if statement is doing. Should this say legendgroup instead of legenditem?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cpsievert It tries to solve the problem of having an item in the legend for each trace (which is not what you want if e.g. you are doing something similar to geom_segment()). The new property legenditem specifies the name of the single element in the legend that all the traces in the same legenditem-group would be referring to (unless they are further subdivided by something else).
To implement this, only the first trace of the group is shown in the legend, but all the other traces still share the same legendgroup.
Although it utilizes legendgroup mechanism, I thought it's better to call the property legenditem, because from the user POV it doesn't do any grouping of legend elements, it just creates one item for the group of traces.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should have mentioned this a long time ago in response to your comment:

Instead of creating a trace for each group and managing legend entries via legendgroup, I'd prefer to take the same approach as ggplotly() where group doesn't create multiple traces, but groups are separated by NAs. For a simple example, plot_ly(x = c(1:10), y = c(1:10), group = rep(1:2, each = 5)) should translate to plot_ly(x = c(1:5, NA, 6:10), y = c(1:5, NA, 6:10)).

We use plotly:::group2NA() to handle this ggplotly(). Note that this line checks to see if we are drawing a polygon (in plotlyjs, a polygon is a scatter trace with mode of lines). In that case, we need to "retrace" the first point in each group to effectively close each polygon. There isn't any nice, high-level way to say "draw a polygon" using plot_ly(), so for now, I think we could just slip in NAs without worrying about retracing points. Does that make sense @alyst?

if ("legenditem_index" %in% trace_key_cols) {
legenditems <- unique(dat[["legenditem"]])
first_traces <- dplyr::group_by(points_df, legenditem_index) %>% dplyr::filter(row_number() == 1) %>% .$trace_index
trace_brushes <- append(trace_brushes, function(trace, trace_ix, dat, dat_indices) {
trace$legendgroup <- trace[["legenditem"]][[1]]
trace$name <- trace[["legenditem"]][[1]] # override the trace name by legenditem
trace$showlegend <- trace_ix %in% first_traces # put each trace group to legend once
trace$legenditem <- NULL
trace
})
}
generate_traces(dat, max(points_df$dat_index, na.rm=TRUE), trace_dat_indices, trace_brushes)
} else {
x$data <- c(x$data, list(d))
list(d)
}
}
}), recursive=FALSE)
# it's possible have nested layouts (e.g., plot_ly() %>% layout() %>% layout())
nms <- names(l$layout)
idx <- nms %in% "layout"
idx <- names(l$layout) %in% "layout"
l$layout <- c(list(l$layout[!idx]), setNames(l$layout[idx], NULL))
for (i in seq_along(l$layout)) {
x$layout[[i]] <- perform_eval(l$layout[[i]])
Expand Down Expand Up @@ -371,7 +454,7 @@ plotly_build.plotly_hash <- function(l = last_plot()) {
}

# returns a _list of traces_.
colorize <- function(dat, title = "") {
color_brush <- function(dat, title = "") {
cols <- dat[["color"]] %||% dat[["z"]]
if (is.numeric(cols)) {
# by default, use viridis::viridis(10) -> http://rud.is/b/2015/07/20/using-the-new-viridis-colormap-in-r-thanks-to-simon-garnier/
Expand All @@ -380,74 +463,92 @@ colorize <- function(dat, title = "") {
rng <- range(cols, na.rm = TRUE)
x <- seq(min(rng), max(rng), length.out = 10)
colz <- scales::col_numeric(colors, rng, na.color = "transparent")(x)
df <- if (length(cols) > 1) data.frame(scales::rescale(x), colz)
else data.frame(c(0, 1), rep(colz, 2))
df <- if (length(cols) > 1) data.frame(scales::rescale(x), colz) else data.frame(c(0, 1), rep(colz, 2))
col_list <- list(
colorbar = list(title = as.character(title)),
colorscale = setNames(df, NULL)
)
# scatter-like traces can have both line and marker objects
if (grepl("scatter", dat[["type"]] %||% "scatter")) {
col_list$color <- cols
dat[["marker"]] <- modifyList(col_list, dat[["marker"]] %||% list())
#mode <- dat[["mode"]] %||% "markers+lines"
# can't have a colorscale for both markers and lines???
#dat[["line"]] <- modifyList(col_list, dat[["line"]] %||% list())
return (function(trace, trace_ix, dat, dat_indices) {
trace[["marker"]] <- modifyList(col_list, trace[["marker"]] %||% list())
trace[["marker"]]$color <- cols[dat_indices]
#mode <- dat[["mode"]] %||% "markers+lines"
# can't have a colorscale for both markers and lines???
#dat[["line"]] <- modifyList(col_list, dat[["line"]] %||% list())
trace$color <- NULL; trace$colors <- NULL
trace
})
} else {
dat <- c(dat, col_list)
return (function(trace, trace_ix, dat, dat_indices) {
trace$color <- NULL; trace$colors <- NULL
c(trace, col_list)
})
}
dat <- list(dat)
} else { # discrete color scale
lvls <- unique(cols)
N <- length(lvls)
default <- if (is.ordered(cols)) viridis::viridis(N)
else RColorBrewer::brewer.pal(N, "Set2")
colors <- dat[["colors"]] %||% default
colz <- scales::col_factor(colors, levels = lvls, na.color = "transparent")(lvls)
dat <- traceify(dat, "color")
dat <- Map(function(x, y) { x[["marker"]] <- c(x[["marker"]], list(color = y)); x },
dat, colz)
names(colz) <- lvls
return (function(trace, trace_ix, dat, dat_indices) {
trace$marker <- c(trace$marker, list(color = colz[[trace[["color"]][[1]]]]))
trace$color <- NULL
trace$colors <- NULL
trace
})
}
dat <- lapply(dat, function(x) { x$color <- NULL; x$colors <- NULL; x })
dat
}

symbolize <- function(dat) {
# symbols really only make sense when markers are in the mode, right?
dat$mode <- dat$mode %||% "markers"
dat <- traceify(dat, "symbol")
dat <- lapply(dat, function(x) { x$symbol <- NULL; x })
N <- length(dat)
if (N > 8) warning("Plotly supports 8 different symbols, but you have ", N, " levels!")
symbol_brush <- function(dat) {
lvls <- unique(dat[["symbol"]])
N <- length(lvls)
if (N > 8) warning("Plotly supports 8 different symbols, but you have ", N, " levels!") # FIXME: actually, plotly supports more
symbols <- c('dot', 'cross', 'diamond', 'square', 'triangle-down', 'triangle-left', 'triangle-right', 'triangle-up')
sym <- dat[[1]][["symbols"]][seq_len(N)] %||% symbols[seq_len(N)]
dat <- Map(function(x, y) { x$marker$symbol <- y; x }, dat, sym)
dat
sym <- (dat[["symbols"]] %||% symbols)[seq_len(N)]
names(sym) <- lvls
# return brush function
function(trace, trace_ix, dat, dat_indices) {
trace$marker <- c(trace$marker, list(symbol=sym[[trace[["symbol"]][[1]]]]))
trace$symbol <- NULL
# symbols really only make sense when markers are in the mode, right?
trace$mode <- dat$mode %||% "markers"
trace
}
}

# break up a single trace into multiple traces according to values stored
# a particular key name
traceify <- function(dat, nm = "group") {
x <- dat[[nm]]
if (is.null(x)) {
return(list(dat))
} else {
# the order of lvls determines the order in which traces are drawn
# for ordered factors at least, it makes sense to draw the highest level first
# since that _should_ be the darkest color in a sequential pallette
lvls <- if (is.factor(x)) rev(levels(x)) else unique(x)
n <- length(x)
# recursively search for a non-list of appropriate length (if it is, subset it)
recurse <- function(z, n, idx) {
if (is.list(z)) lapply(z, recurse, n, idx) else if (length(z) == n) z[idx] else z
}
new_dat <- list()
for (j in seq_along(lvls)) {
new_dat[[j]] <- lapply(dat, function(y) recurse(y, n, x %in% lvls[j]))
new_dat[[j]]$name <- lvls[j]
}
return(new_dat)
# Split dat into traces given indices of dat elements for each trace,
# then apply brushes to each resulting trace.
# A brush is a function function(trace, trace_ix, dat, dat_indices)
# that is supposed to return the modified version of the trace
generate_traces <- function(dat, dat_len, traces_dat_indices, trace_brushes){
# create trace by subseting dat columns and copying non-vector elements as-is
subset_dat <- function(dat, dat_indices) {
lapply(dat, function(dat_el) {
# FIXME better check for subsettable property
if ((is.vector(dat_el) || is.factor(dat_el)) && (length(dat_el) == dat_len)) {
dat_el[dat_indices]
} else if (is.list(dat_el)) {
# recursion
lapply(dat_el, subset_dat, dat_indices)
} else {
dat_el # as-is
}
})
}
# create traces by subsetting dat and applying brushes
# start with the traces of highest index so that they are drawn first
# since that _should_ be the darkest color in a sequential pallette
lapply(rev(seq_along(traces_dat_indices)), function(trace_ix) {
dat_indices <- traces_dat_indices[[trace_ix]]
trace <- subset_dat(dat, dat_indices)
for (brush in trace_brushes) {
trace <- brush(trace, trace_ix, dat, dat_indices)
}
trace
})
}

axis_titles <- function(x, l) {
Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-plotly-subplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ test_that("nrows argument works", {
expect_true(doms$yaxis2[2] > doms$yaxis2[1])
})

test_that("group + [x/y]axis works", {
test_that("color + [x/y]axis works", {
iris$id <- as.integer(iris$Species)
p <- plot_ly(iris, x = Petal.Length, y = Petal.Width, group = Species,
p <- plot_ly(iris, x = Petal.Length, y = Petal.Width, color = Species,
xaxis = paste0("x", id), mode = "markers")
s <- expect_traces(subplot(p, margin = 0.05), 3, "group")
s <- expect_traces(subplot(p, margin = 0.05), 3, "color")
ax <- s$layout[grepl("^[x-y]axis", names(s$layout))]
doms <- lapply(ax, "[[", "domain")
# make sure y domain is [0, 1] on every axis
Expand Down
23 changes: 19 additions & 4 deletions tests/testthat/test-plotly.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,34 @@ test_that("plot_ly() handles a simple scatterplot", {
expect_identical(l$layout$yaxis$title, "Petal.Length")
})

test_that("Using group argument creates multiple traces", {
p <- plot_ly(data = iris, x = Sepal.Length, y = Petal.Length, group = Species)
l <- expect_traces(p, 3, "scatterplot-group")
test_that("Using group argument doesn't transform the data in markers mode", {
p <- plot_ly(data = iris, x = Sepal.Length, y = Petal.Length, group = Species, mode = "markers")
l <- expect_traces(p, 1, "scatterplot-group-markers")
expect_identical(l$layout$xaxis$title, "Sepal.Length")
expect_identical(l$layout$yaxis$title, "Petal.Length")
expect_identical(l$data[[1]]$group, iris$Species)
})

test_that("Using group argument for scatter plot in lines mode introduces breaks (NA values separating different groups)", {
p <- plot_ly(data = iris, x = Sepal.Length, y = Petal.Length, group = Species, mode = "lines")
l <- expect_traces(p, 1, "scatterplot-group-lines")
expect_identical(l$layout$xaxis$title, "Sepal.Length")
expect_identical(l$layout$yaxis$title, "Petal.Length")
n_groups <- length(unique(iris$Species))
n_pts <- length(l$data[[1]]$x)
expect_identical(n_pts, nrow(iris)+n_groups-1L)
expect_identical(sum(is.na(l$data[[1]]$x)), n_groups-1L)
expect_true(all(is.na(l$data[[1]]$group[-1]) |
is.na(l$data[[1]]$group[-n_pts]) |
(l$data[[1]]$group[-1] == l$data[[1]]$group[-n_pts])))
})

test_that("Mapping a variable to symbol works", {
p <- plot_ly(data = iris, x = Sepal.Length, y = Petal.Length, symbol = Species)
l <- expect_traces(p, 3, "scatterplot-symbol")
markers <- lapply(l$data, "[[", "marker")
syms <- unlist(lapply(markers, "[[", "symbol"))
expect_identical(syms, c("dot", "cross", "diamond"))
expect_identical(syms, rev(c("dot", "cross", "diamond"))) # rev() because traces are drawn in reverse order (higher to lower)
})

test_that("Mapping a factor variable to color works", {
Expand Down