Skip to content

On-par autorange for scattergl #2404

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 28, 2018
Merged
14 changes: 8 additions & 6 deletions src/plots/cartesian/axes.js
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,13 @@ axes.saveShowSpikeInitial = function(gd, overwrite) {
return hasOneAxisChanged;
};

axes.doesAxisNeedAutoRange = function(ax) {
return (
ax.autorange ||
!!Lib.nestedProperty(ax, 'rangeslider.autorange').get()
);
};

// axes.expand: if autoranging, include new data in the outer limits
// for this axis
// data is an array of numbers (ie already run through ax.d2c)
Expand All @@ -436,12 +443,7 @@ axes.saveShowSpikeInitial = function(gd, overwrite) {
// tozero: (boolean) make sure to include zero if axis is linear,
// and make it a tight bound if possible
axes.expand = function(ax, data, options) {
var needsAutorange = (
ax.autorange ||
!!Lib.nestedProperty(ax, 'rangeslider.autorange').get()
);

if(!needsAutorange || !data) return;
if(!axes.doesAxisNeedAutoRange(ax) || !data) return;

if(!ax._min) ax._min = [];
if(!ax._max) ax._max = [];
Expand Down
51 changes: 30 additions & 21 deletions src/traces/scatter/calc.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,31 @@ function calc(gd, trace) {
var x = xa.makeCalcdata(trace, 'x');
var y = ya.makeCalcdata(trace, 'y');
var serieslen = trace._length;
var cd = new Array(serieslen);

var ppad = calcMarkerSize(trace, serieslen);
calcAxisExpansion(gd, trace, xa, ya, x, y, ppad);

for(var i = 0; i < serieslen; i++) {
cd[i] = (isNumeric(x[i]) && isNumeric(y[i])) ?
{x: x[i], y: y[i]} :
{x: BADNUM, y: BADNUM};

if(trace.ids) {
cd[i].id = String(trace.ids[i]);
}
}

arraysToCalcdata(cd, trace);
calcColorscale(trace);
calcSelection(cd, trace);

gd.firstscatter = false;
return cd;
}

function calcAxisExpansion(gd, trace, xa, ya, x, y, ppad) {
var serieslen = trace._length;

// cancel minimum tick spacings (only applies to bars and boxes)
xa._minDtick = 0;
Expand All @@ -35,8 +60,9 @@ function calc(gd, trace) {
var xOptions = {padded: true};
var yOptions = {padded: true};

var ppad = calcMarkerSize(trace, serieslen);
if(ppad) xOptions.ppad = yOptions.ppad = ppad;
if(ppad) {
xOptions.ppad = yOptions.ppad = ppad;
}

// TODO: text size

Expand Down Expand Up @@ -72,24 +98,6 @@ function calc(gd, trace) {

Axes.expand(xa, x, xOptions);
Axes.expand(ya, y, yOptions);

// create the "calculated data" to plot
var cd = new Array(serieslen);
for(var i = 0; i < serieslen; i++) {
cd[i] = (isNumeric(x[i]) && isNumeric(y[i])) ?
{x: x[i], y: y[i]} : {x: BADNUM, y: BADNUM};

if(trace.ids) {
cd[i].id = String(trace.ids[i]);
}
}

arraysToCalcdata(cd, trace);
calcColorscale(trace);
calcSelection(cd, trace);

gd.firstscatter = false;
return cd;
}

function calcMarkerSize(trace, serieslen) {
Expand Down Expand Up @@ -131,5 +139,6 @@ function calcMarkerSize(trace, serieslen) {

module.exports = {
calc: calc,
calcMarkerSize: calcMarkerSize
calcMarkerSize: calcMarkerSize,
calcAxisExpansion: calcAxisExpansion
};
192 changes: 86 additions & 106 deletions src/traces/scattergl/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,15 @@ var formatColor = require('../../lib/gl_format_color');

var subTypes = require('../scatter/subtypes');
var calcMarkerSize = require('../scatter/calc').calcMarkerSize;
var calcAxisExpansion = require('../scatter/calc').calcAxisExpansion;
var calcColorscales = require('../scatter/colorscale_calc');
var makeBubbleSizeFn = require('../scatter/make_bubble_size_func');
var linkTraces = require('../scatter/link_traces');
var getTraceColor = require('../scatter/get_trace_color');
var fillHoverText = require('../scatter/fill_hover_text');
var isNumeric = require('fast-isnumeric');

var DASHES = require('../../constants/gl2d_dashes');
var BADNUM = require('../../constants/numerical').BADNUM;
var SYMBOL_SDF_SIZE = 200;
var SYMBOL_SIZE = 20;
var SYMBOL_STROKE = SYMBOL_SIZE / 20;
Expand All @@ -47,116 +48,73 @@ function calc(gd, trace) {
var xa = Axes.getFromId(gd, trace.xaxis);
var ya = Axes.getFromId(gd, trace.yaxis);
var subplot = fullLayout._plots[trace.xaxis + trace.yaxis];
var count = trace._length;
var count2 = count * 2;
var stash = {};
var i, xx, yy;

var x = xa.makeCalcdata(trace, 'x');
var y = ya.makeCalcdata(trace, 'y');

var x = xaxis.type === 'linear' ? trace.x : xaxis.makeCalcdata(trace, 'x');
var y = yaxis.type === 'linear' ? trace.y : yaxis.makeCalcdata(trace, 'y');
Copy link
Contributor Author

@etpinard etpinard Feb 26, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMPORTANT with typed array support in mind (-> #2388), I made all traces pass through makeCalcdata unlike previously where x/y array on linear axes bypassed it. Please note that makeCalcdata creates a new array (i.e. x isn't the same as trace.x), unless trace.x is a typed array. So memory-conscious user should switch to using typed arrays.

Note that at 1e6 on my setup, one makeCalcdata call clocks in at roughly 25ms. So using typed arrays can save about 50ms

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even with the extra arrays, memory consumption appears fine. I wouldn't having @dfcreative double-check though.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess there are situations we accept now that would have broken previously (but only on linear axes) - the "junk" characters stripped by cleanNumber - worth a test case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Good call!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added mock in c15722f.

On master, the new mock would have looked like:

image


var count = trace._length, i, xx, yy;
// we need hi-precision for scatter2d,
// regl-scatter2d uses NaNs for bad/missing values
//
// TODO should this be a Float32Array ??
var positions = new Array(count2);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dfcreative could we set up positions with a Float32Array here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Float32 is not enough precision for some timedate and precise linear/etc ranges.

for(i = 0; i < count; i++) {
xx = x[i];
yy = y[i];
// TODO does d2c output any other bad value as BADNUM ever?
positions[i * 2] = xx === BADNUM ? NaN : xx;
positions[i * 2 + 1] = yy === BADNUM ? NaN : yy;
}

if(!x) {
x = Array(count);
for(i = 0; i < count; i++) {
x[i] = i;
if(xa.type === 'log') {
for(i = 0; i < count2; i += 2) {
positions[i] = xa.d2l(positions[i]);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🐎 we've already been through makeCalcData so we should be able to do c2l here

Copy link
Contributor Author

@etpinard etpinard Feb 28, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing this out! Done in -> 98d2407

}
}
if(!y) {
y = Array(count);
for(i = 0; i < count; i++) {
y[i] = i;
if(ya.type === 'log') {
for(i = 1; i < count2; i += 2) {
positions[i] = ya.d2l(positions[i]);
}
}

// get log converted positions
var rawx = (xaxis.type === 'log' || x.length > count) ? x.slice(0, count) : x;
var rawy = (yaxis.type === 'log' || y.length > count) ? y.slice(0, count) : y;

var convertX = (xaxis.type === 'log') ? xaxis.d2l : parseFloat;
var convertY = (yaxis.type === 'log') ? yaxis.d2l : parseFloat;

// we need hi-precision for scatter2d
positions = new Array(count * 2);

for(i = 0; i < count; i++) {
x[i] = convertX(x[i]);
y[i] = convertY(y[i]);

// if no x defined, we are creating simple int sequence (API)
// we use parseFloat because it gives NaN (we need that for empty values to avoid drawing lines) and it is incredibly fast
xx = isNumeric(x[i]) ? +x[i] : NaN;
yy = isNumeric(y[i]) ? +y[i] : NaN;

positions[i * 2] = xx;
positions[i * 2 + 1] = yy;
}

// we don't build a tree for log axes since it takes long to convert log2px
// and it is also
if(xaxis.type !== 'log' && yaxis.type !== 'log') {
if(xa.type !== 'log' && ya.type !== 'log') {
// FIXME: delegate this to webworker
stash.tree = kdtree(positions, 512);
}
else {
var ids = stash.ids = Array(count);
} else {
var ids = stash.ids = new Array(count);
for(i = 0; i < count; i++) {
ids[i] = i;
}
}

// create scene options and scene
calcColorscales(trace);

var options = sceneOptions(container, subplot, trace, positions);

// expanding axes is separate from options
if(!options.markers) {
Axes.expand(xaxis, rawx, { padded: true });
Axes.expand(yaxis, rawy, { padded: true });
}
else if(Lib.isArrayOrTypedArray(options.markers.sizes)) {
var sizes = options.markers.sizes;
Axes.expand(xaxis, rawx, { padded: true, ppad: sizes });
Axes.expand(yaxis, rawy, { padded: true, ppad: sizes });
}
else {
var xbounds = [Infinity, -Infinity], ybounds = [Infinity, -Infinity];
var size = options.markers.size;

// axes bounds
for(i = 0; i < count; i++) {
xx = x[i], yy = y[i];
if(xbounds[0] > xx) xbounds[0] = xx;
if(xbounds[1] < xx) xbounds[1] = xx;
if(ybounds[0] > yy) ybounds[0] = yy;
if(ybounds[1] < yy) ybounds[1] = yy;
}

// FIXME: is there a better way to separate expansion?
if(count < TOO_MANY_POINTS) {
Axes.expand(xaxis, rawx, { padded: true, ppad: size });
Axes.expand(yaxis, rawy, { padded: true, ppad: size });
}
// update axes fast for big number of points
else {
if(xaxis._min) {
xaxis._min.push({ val: xbounds[0], pad: size });
}
if(xaxis._max) {
xaxis._max.push({ val: xbounds[1], pad: size });
}

if(yaxis._min) {
yaxis._min.push({ val: ybounds[0], pad: size });
}
if(yaxis._max) {
yaxis._max.push({ val: ybounds[1], pad: size });
}
var options = sceneOptions(gd, subplot, trace, positions);
var markerOptions = options.marker;
var scene = sceneUpdate(gd, subplot);
var ppad;

// Re-use SVG scatter axis expansion routine except
// for graph with very large number of points where it
// performs poorly.
// In big data case, fake Axes.expand outputs with data bounds,
// and an average size for array marker.size inputs.
if(count < TOO_MANY_POINTS) {
ppad = calcMarkerSize(trace, count);
calcAxisExpansion(gd, trace, xa, ya, x, y, ppad);
} else {
if(markerOptions) {
ppad = 2 * (markerOptions.sizeAvg || Math.max(markerOptions.size, 3));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How did you find sizeAvg to work in practice? A little hard to say I guess, since we don't have a lot of real data > 1e5 points to play with... You're right that we don't want to just use sizeMax, it's worth accepting a bit of clipping in order to generally have less wasted space, and much of the time the largest point won't be on any edge... just wondering how that balance plays out in practice, whether we would be better off with something like halfway between the average and max.

Anyway perhaps we don't have a god way to answer that question right now. I'll just mention that in case we do want to try and do better later, we could find some heuristics that only add a little bit of computation, like binning data points into top/middle/bottom thirds, and only using the top third for the top padding... maybe even with a smooth weighting of the size based on how far it is from the edge. That would still be far faster than the full calculation but could do a better job reducing wasted space without too much clipping.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-> #2417

}
fastAxisExpand(xa, x, ppad);
fastAxisExpand(ya, y, ppad);
}

// create scene
var scene = sceneUpdate(container, subplot);

// set flags to create scene renderers
if(options.fill && !scene.fill2d) scene.fill2d = true;
if(options.marker && !scene.scatter2d) scene.scatter2d = true;
Expand All @@ -178,14 +136,33 @@ function calc(gd, trace) {
stash.index = scene.count - 1;
stash.x = x;
stash.y = y;
stash.rawx = rawx;
stash.rawy = rawy;
stash.positions = positions;
stash.count = count;

gd.firstscatter = false;
return [{x: false, y: false, t: stash, trace: trace}];
}

// Approximate Axes.expand results with speed
function fastAxisExpand(ax, vals, ppad) {
if(!Axes.doesAxisNeedAutoRange(ax) || !vals) return;

var b0 = Infinity;
var b1 = -Infinity;

for(var i = 0; i < vals.length; i += 2) {
var v = vals[i];
if(v < b0) b0 = v;
if(v > b1) b1 = v;
}

if(ax._min) ax._min = [];
ax._min.push({val: b0, pad: ppad});

if(ax._max) ax._max = [];
ax._max.push({val: b1, pad: ppad});
}

// create scene options
function sceneOptions(gd, subplot, trace, positions) {
var fullLayout = gd._fullLayout;
Expand Down Expand Up @@ -481,11 +458,15 @@ function sceneOptions(gd, subplot, trace, positions) {
if(multiSize || multiLineWidth) {
var sizes = markerOptions.sizes = new Array(count);
var borderSizes = markerOptions.borderSizes = new Array(count);
var sizeTotal = 0;
var sizeAvg;

if(multiSize) {
for(i = 0; i < count; i++) {
sizes[i] = markerSizeFunc(markerOpts.size[i]);
sizeTotal += sizes[i];
}
sizeAvg = sizeTotal / count;
} else {
s = markerSizeFunc(markerOpts.size);
for(i = 0; i < count; i++) {
Expand All @@ -504,6 +485,8 @@ function sceneOptions(gd, subplot, trace, positions) {
borderSizes[i] = s;
}
}

markerOptions.sizeAvg = sizeAvg;
} else {
markerOptions.size = markerSizeFunc(markerOpts && markerOpts.size || 10);
markerOptions.borderSizes = markerSizeFunc(markerOpts.line.width);
Expand Down Expand Up @@ -887,8 +870,8 @@ function plot(gd, subplot, cdata) {
var trace = cd.trace;
var stash = cd.t;
var id = stash.index;
var x = stash.rawx,
y = stash.rawy;
var x = stash.x;
var y = stash.y;

var xaxis = subplot.xaxis || Axes.getFromId(gd, trace.xaxis || 'x');
var yaxis = subplot.yaxis || Axes.getFromId(gd, trace.yaxis || 'y');
Expand Down Expand Up @@ -998,8 +981,8 @@ function hoverPoints(pointData, xval, yval, hovermode) {
var trace = cd[0].trace;
var xa = pointData.xa;
var ya = pointData.ya;
var x = stash.rawx;
var y = stash.rawy;
var x = stash.x;
var y = stash.y;
var xpx = xa.c2p(xval);
var ypx = ya.c2p(yval);
var maxDistance = pointData.distance;
Expand Down Expand Up @@ -1155,15 +1138,12 @@ function hoverPoints(pointData, xval, yval, hovermode) {
}

function selectPoints(searchInfo, polygon) {
var cd = searchInfo.cd,
selection = [],
trace = cd[0].trace,
stash = cd[0].t,
x = stash.x,
y = stash.y,
rawx = stash.rawx,
rawy = stash.rawy;

var cd = searchInfo.cd;
var selection = [];
var trace = cd[0].trace;
var stash = cd[0].t;
var x = stash.x;
var y = stash.y;
var scene = stash.scene;

if(!scene) return selection;
Expand All @@ -1183,8 +1163,8 @@ function selectPoints(searchInfo, polygon) {
els.push(i);
selection.push({
pointNumber: i,
x: rawx ? rawx[i] : x[i],
y: rawy ? rawy[i] : y[i]
x: x[i],
y: y[i]
});
}
else {
Expand Down