Skip to content

Box points hover & select #2094

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Oct 19, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
238 changes: 136 additions & 102 deletions src/traces/box/calc.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ var isNumeric = require('fast-isnumeric');
var Lib = require('../../lib');
var Axes = require('../../plots/cartesian/axes');


// outlier definition based on http://www.physics.csbsju.edu/stats/box2.html
module.exports = function calc(gd, trace) {
var xa = Axes.getFromId(gd, trace.xaxis || 'x'),
ya = Axes.getFromId(gd, trace.yaxis || 'y'),
orientation = trace.orientation,
cd = [],
valAxis, valLetter, val, valBinned,
posAxis, posLetter, pos, posDistinct, dPos;

// Set value (val) and position (pos) keys via orientation
var xa = Axes.getFromId(gd, trace.xaxis || 'x');
var ya = Axes.getFromId(gd, trace.yaxis || 'y');
var orientation = trace.orientation;
var cd = [];

var i;
var valAxis, valLetter;
var posAxis, posLetter;

if(orientation === 'h') {
valAxis = xa;
valLetter = 'x';
Expand All @@ -36,112 +36,146 @@ module.exports = function calc(gd, trace) {
posLetter = 'x';
}

val = valAxis.makeCalcdata(trace, valLetter); // get val

// size autorange based on all source points
// position happens afterward when we know all the pos
Axes.expand(valAxis, val, {padded: true});

// In vertical (horizontal) box plots:
// if no x (y) data, use x0 (y0), or name
// so if you want one box
// per trace, set x0 (y0) to the x (y) value or category for this trace
// (or set x (y) to a constant array matching y (x))
function getPos(gd, trace, posLetter, posAxis, val) {
var pos0;
if(posLetter in trace) pos = posAxis.makeCalcdata(trace, posLetter);
else {
if(posLetter + '0' in trace) pos0 = trace[posLetter + '0'];
else if('name' in trace && (
posAxis.type === 'category' ||
(isNumeric(trace.name) &&
['linear', 'log'].indexOf(posAxis.type) !== -1) ||
(Lib.isDateTime(trace.name) &&
posAxis.type === 'date')
)) {
pos0 = trace.name;
}
else pos0 = gd.numboxes;
pos0 = posAxis.d2c(pos0, 0, trace[posLetter + 'calendar']);
pos = val.map(function() { return pos0; });
}
return pos;
}

pos = getPos(gd, trace, posLetter, posAxis, val);
var val = valAxis.makeCalcdata(trace, valLetter);
var pos = getPos(trace, posLetter, posAxis, val, gd.numboxes);

// get distinct positions and min difference
var dv = Lib.distinctVals(pos);
posDistinct = dv.vals;
dPos = dv.minDiff / 2;

function binVal(cd, val, pos, posDistinct, dPos) {
var posDistinctLength = posDistinct.length,
valLength = val.length,
valBinned = [],
bins = [],
i, p, n, v;

// store distinct pos in cd, find bins, init. valBinned
for(i = 0; i < posDistinctLength; ++i) {
p = posDistinct[i];
cd[i] = {pos: p};
bins[i] = p - dPos;
valBinned[i] = [];
}
bins.push(posDistinct[posDistinctLength - 1] + dPos);

// bin the values
for(i = 0; i < valLength; ++i) {
v = val[i];
if(!isNumeric(v)) continue;
n = Lib.findBin(pos[i], bins);
if(n >= 0 && n < valLength) valBinned[n].push(v);
var posDistinct = dv.vals;
var dPos = dv.minDiff / 2;
var posBins = makeBins(posDistinct, dPos);

var vLen = val.length;
var pLen = posDistinct.length;
var ptsPerBin = initNestedArray(pLen);

// bin pts info per position bins
for(i = 0; i < vLen; i++) {
var v = val[i];
if(!isNumeric(v)) continue;

var n = Lib.findBin(pos[i], posBins);
if(n >= 0 && n < pLen) {
var pt = {v: v, i: i};
Copy link
Contributor Author

@etpinard etpinard Oct 17, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This here was the key to make box point hover and selection somewhat clean.

Box calcdata traces have one item per box to be displayed. Box points data-esque array of objects used to be created during Box.plot, now the structure is setup here. Note that it is important to track the original sample pt index (that i above) to convert hovered are selected calcdata items to event data pointNumber corresponding to indices in the input x/y sample arrays

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely nice and clean - I was curious whether this degraded performance for large data sets, but it looks like very little if at all

function r(n) { var v = 0; for(var i = 0; i < n; i++) v += Math.random(); return v; }
function a(n, m) { var out = new Array(n); for(var i = 0; i < n; i++) out[i] = r(m); return out; }
var y = a(1000000, 10)
// I had to patch timeit to support n = 1
timeit(function() { Plotly.newPlot(gd,[{type: 'box', y: y, jitter: 0.5, hoveron: 'points'}]) }, 1)

On my computer 1 million points (of which ~5k are outliers) takes ~3 sec, and difference between this branch and master is within the noise (~10% or less)

Copy link
Contributor Author

@etpinard etpinard Oct 18, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(~10% or less)

Thanks for taking a look at this 🐎

ptsPerBin[n].push(pt);
}

return valBinned;
}

valBinned = binVal(cd, val, pos, posDistinct, dPos);

// sort the bins and calculate the stats
function calculateStats(cd, valBinned) {
var v, l, cdi, i;

for(i = 0; i < valBinned.length; ++i) {
v = valBinned[i].sort(Lib.sorterAsc);
l = v.length;
cdi = cd[i];

cdi.val = v; // put all values into calcdata
cdi.min = v[0];
cdi.max = v[l - 1];
cdi.mean = Lib.mean(v, l);
cdi.sd = Lib.stdev(v, l, cdi.mean);
cdi.q1 = Lib.interp(v, 0.25); // first quartile
cdi.med = Lib.interp(v, 0.5); // median
cdi.q3 = Lib.interp(v, 0.75); // third quartile
// build calcdata trace items, one item per distinct position
for(i = 0; i < pLen; i++) {
if(ptsPerBin[i].length > 0) {
var pts = ptsPerBin[i].sort(sortByVal);
var boxVals = pts.map(extractVal);
var bvLen = boxVals.length;

var cdi = {
pos: posDistinct[i],
pts: pts
};

cdi.min = boxVals[0];
cdi.max = boxVals[bvLen - 1];
cdi.mean = Lib.mean(boxVals, bvLen);
cdi.sd = Lib.stdev(boxVals, bvLen, cdi.mean);

// first quartile
cdi.q1 = Lib.interp(boxVals, 0.25);
// median
cdi.med = Lib.interp(boxVals, 0.5);
// third quartile
cdi.q3 = Lib.interp(boxVals, 0.75);

// lower and upper fences - last point inside
// 1.5 interquartile ranges from quartiles
cdi.lf = Math.min(cdi.q1, v[
Math.min(Lib.findBin(2.5 * cdi.q1 - 1.5 * cdi.q3, v, true) + 1, l - 1)]);
cdi.uf = Math.max(cdi.q3, v[
Math.max(Lib.findBin(2.5 * cdi.q3 - 1.5 * cdi.q1, v), 0)]);
cdi.lf = Math.min(
cdi.q1,
boxVals[Math.min(
Lib.findBin(2.5 * cdi.q1 - 1.5 * cdi.q3, boxVals, true) + 1,
bvLen - 1
)]
);
cdi.uf = Math.max(
cdi.q3,
boxVals[Math.max(
Lib.findBin(2.5 * cdi.q3 - 1.5 * cdi.q1, boxVals),
0
)]
);

// lower and upper outliers - 3 IQR out (don't clip to max/min,
// this is only for discriminating suspected & far outliers)
cdi.lo = 4 * cdi.q1 - 3 * cdi.q3;
cdi.uo = 4 * cdi.q3 - 3 * cdi.q1;

cd.push(cdi);
}
}

calculateStats(cd, valBinned);

// remove empty bins
cd = cd.filter(function(cdi) { return cdi.val && cdi.val.length; });
if(!cd.length) return [{t: {emptybox: true}}];
Axes.expand(valAxis, val, {padded: true});

// add numboxes and dPos to cd
cd[0].t = {boxnum: gd.numboxes, dPos: dPos};
gd.numboxes++;
return cd;
if(cd.length > 0) {
cd[0].t = {
boxnum: gd.numboxes,
dPos: dPos
};
gd.numboxes++;
return cd;
} else {
return [{t: {emptybox: true}}];
}
};

// In vertical (horizontal) box plots:
// if no x (y) data, use x0 (y0), or name
// so if you want one box
// per trace, set x0 (y0) to the x (y) value or category for this trace
// (or set x (y) to a constant array matching y (x))
function getPos(trace, posLetter, posAxis, val, numboxes) {
if(posLetter in trace) {
return posAxis.makeCalcdata(trace, posLetter);
}

var pos0;

if(posLetter + '0' in trace) {
pos0 = trace[posLetter + '0'];
} else if('name' in trace && (
posAxis.type === 'category' || (
isNumeric(trace.name) &&
['linear', 'log'].indexOf(posAxis.type) !== -1
) || (
Lib.isDateTime(trace.name) &&
posAxis.type === 'date'
)
)) {
pos0 = trace.name;
} else {
pos0 = numboxes;
}

var pos0c = posAxis.d2c(pos0, 0, trace[posLetter + 'calendar']);
return val.map(function() { return pos0c; });
}

function makeBins(x, dx) {
var len = x.length;
var bins = new Array(len + 1);

for(var i = 0; i < len; i++) {
bins[i] = x[i] - dx;
}
bins[len] = x[len - 1] + dx;

return bins;
}

function initNestedArray(len) {
var arr = new Array(len);
for(var i = 0; i < len; i++) {
arr[i] = [];
}
return arr;
}

function sortByVal(a, b) { return a.v - b.v; }

function extractVal(o) { return o.v; }
41 changes: 22 additions & 19 deletions src/traces/box/plot.js
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,6 @@ module.exports = function plot(gd, plotinfo, cdbox) {
.attr('class', 'points')
.selectAll('path')
.data(function(d) {
var pts = (trace.boxpoints === 'all') ? d.val :
d.val.filter(function(v) { return (v < d.lf || v > d.uf); }),
// normally use IQR, but if this is 0 or too small, use max-min
typicalSpread = Math.max((d.max - d.min) / 10, d.q3 - d.q1),
minSpread = typicalSpread * 1e-9,
Expand All @@ -147,6 +145,10 @@ module.exports = function plot(gd, plotinfo, cdbox) {
jitterFactor,
newJitter;

var pts = trace.boxpoints === 'all' ?
d.pts :
d.pts.filter(function(pt) { return (pt.v < d.lf || pt.v > d.uf); });

// dynamic jitter
if(trace.jitter) {
if(typicalSpread === 0) {
Expand Down Expand Up @@ -179,31 +181,32 @@ module.exports = function plot(gd, plotinfo, cdbox) {
newJitter = trace.jitter * 2 / maxJitterFactor;
}

return pts.map(function(v, i) {
var posOffset = trace.pointpos,
p;
if(trace.jitter) {
posOffset += newJitter * jitterFactors[i] * (rand() - 0.5);
}
// fills in 'x' and 'y' in calcdata 'pts' item
for(i = 0; i < pts.length; i++) {
var pt = pts[i];
var v = pt.v;

var jitterOffset = trace.jitter ?
bdPos * (newJitter * jitterFactors[i] * (rand() - 0.5)) :
0;

var posPx = d.pos + bPos + bdPos * trace.pointpos + jitterOffset;

if(trace.orientation === 'h') {
p = {
y: d.pos + posOffset * bdPos + bPos,
x: v
};
pt.y = posPx;
pt.x = v;
} else {
p = {
x: d.pos + posOffset * bdPos + bPos,
y: v
};
pt.x = posPx;
pt.y = v;
}

// tag suspected outliers
if(trace.boxpoints === 'suspectedoutliers' && v < d.uo && v > d.lo) {
p.so = true;
pt.so = true;
}
return p;
});
}

return pts;
})
.enter().append('path')
.classed('point', true)
Expand Down