Skip to content

Commit e5a535c

Browse files
committed
histogram autobins match each other when possible
1 parent 4825f8e commit e5a535c

File tree

2 files changed

+261
-68
lines changed

2 files changed

+261
-68
lines changed

src/traces/histogram/calc.js

+212-64
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ var binFunctions = require('./bin_functions');
1919
var normFunctions = require('./norm_functions');
2020
var doAvg = require('./average');
2121
var cleanBins = require('./clean_bins');
22+
var oneMonth = require('../../constants/numerical').ONEAVGMONTH;
2223

2324

2425
module.exports = function calc(gd, trace) {
@@ -27,60 +28,35 @@ module.exports = function calc(gd, trace) {
2728

2829
// depending on orientation, set position and size axes and data ranges
2930
// note: this logic for choosing orientation is duplicated in graph_obj->setstyles
30-
var pos = [],
31-
size = [],
32-
i,
33-
pa = Axes.getFromId(gd,
34-
trace.orientation === 'h' ? (trace.yaxis || 'y') : (trace.xaxis || 'x')),
35-
maindata = trace.orientation === 'h' ? 'y' : 'x',
36-
counterdata = {x: 'y', y: 'x'}[maindata],
37-
calendar = trace[maindata + 'calendar'],
38-
cumulativeSpec = trace.cumulative;
31+
var pos = [];
32+
var size = [];
33+
var pa = Axes.getFromId(gd, trace.orientation === 'h' ?
34+
(trace.yaxis || 'y') : (trace.xaxis || 'x'));
35+
var maindata = trace.orientation === 'h' ? 'y' : 'x';
36+
var counterdata = {x: 'y', y: 'x'}[maindata];
37+
var calendar = trace[maindata + 'calendar'];
38+
var cumulativeSpec = trace.cumulative;
39+
var i;
3940

4041
cleanBins(trace, pa, maindata);
4142

42-
// prepare the raw data
43-
var pos0 = pa.makeCalcdata(trace, maindata);
43+
var binspec = calcAllAutoBins(gd, trace, pa, maindata);
4444

45-
// calculate the bins
46-
var binAttr = maindata + 'bins';
47-
var autoBinAttr = 'autobin' + maindata;
48-
var binspec = trace[binAttr];
49-
if((trace[autoBinAttr] !== false) || !binspec ||
50-
binspec.start === null || binspec.end === null) {
51-
binspec = Axes.autoBin(pos0, pa, trace['nbins' + maindata], false, calendar);
52-
53-
// adjust for CDF edge cases
54-
if(cumulativeSpec.enabled && (cumulativeSpec.currentbin !== 'include')) {
55-
if(cumulativeSpec.direction === 'decreasing') {
56-
binspec.start = pa.c2r(pa.r2c(binspec.start) - binspec.size);
57-
}
58-
else {
59-
binspec.end = pa.c2r(pa.r2c(binspec.end) + binspec.size);
60-
}
61-
}
45+
// the raw data was prepared in calcAllAutoBins (during the first trace in
46+
// this group) and stashed. Pull it out and drop the stash
47+
var pos0 = trace._pos0;
48+
delete trace._pos0;
6249

63-
// copy bin info back to the source and full data.
64-
trace._input[binAttr] = trace[binAttr] = binspec;
65-
// note that it's possible to get here with an explicit autobin: false
66-
// if the bins were not specified.
67-
// in that case this will remain in the trace, so that future updates
68-
// which would change the autobinning will not do so.
69-
trace._input[autoBinAttr] = trace[autoBinAttr];
70-
}
71-
72-
var nonuniformBins = typeof binspec.size === 'string',
73-
bins = nonuniformBins ? [] : binspec,
74-
// make the empty bin array
75-
i2,
76-
binend,
77-
n,
78-
inc = [],
79-
counts = [],
80-
total = 0,
81-
norm = trace.histnorm,
82-
func = trace.histfunc,
83-
densitynorm = norm.indexOf('density') !== -1;
50+
var nonuniformBins = typeof binspec.size === 'string';
51+
var bins = nonuniformBins ? [] : binspec;
52+
// make the empty bin array
53+
var inc = [];
54+
var counts = [];
55+
var total = 0;
56+
var norm = trace.histnorm;
57+
var func = trace.histfunc;
58+
var densitynorm = norm.indexOf('density') !== -1;
59+
var i2, binend, n;
8460

8561
if(cumulativeSpec.enabled && densitynorm) {
8662
// we treat "cumulative" like it means "integral" if you use a density norm,
@@ -89,13 +65,13 @@ module.exports = function calc(gd, trace) {
8965
densitynorm = false;
9066
}
9167

92-
var extremefunc = func === 'max' || func === 'min',
93-
sizeinit = extremefunc ? null : 0,
94-
binfunc = binFunctions.count,
95-
normfunc = normFunctions[norm],
96-
doavg = false,
97-
pr2c = function(v) { return pa.r2c(v, 0, calendar); },
98-
rawCounterData;
68+
var extremefunc = func === 'max' || func === 'min';
69+
var sizeinit = extremefunc ? null : 0;
70+
var binfunc = binFunctions.count;
71+
var normfunc = normFunctions[norm];
72+
var doavg = false;
73+
var pr2c = function(v) { return pa.r2c(v, 0, calendar); };
74+
var rawCounterData;
9975

10076
if(Array.isArray(trace[counterdata]) && func !== 'count') {
10177
rawCounterData = trace[counterdata];
@@ -104,7 +80,7 @@ module.exports = function calc(gd, trace) {
10480
}
10581

10682
// create the bins (and any extra arrays needed)
107-
// assume more than 5000 bins is an error, so we don't crash the browser
83+
// assume more than 1e6 bins is an error, so we don't crash the browser
10884
i = pr2c(binspec.start);
10985

11086
// decrease end a little in case of rounding errors
@@ -150,10 +126,11 @@ module.exports = function calc(gd, trace) {
150126
if(cumulativeSpec.enabled) cdf(size, cumulativeSpec.direction, cumulativeSpec.currentbin);
151127

152128

153-
var serieslen = Math.min(pos.length, size.length),
154-
cd = [],
155-
firstNonzero = 0,
156-
lastNonzero = serieslen - 1;
129+
var serieslen = Math.min(pos.length, size.length);
130+
var cd = [];
131+
var firstNonzero = 0;
132+
var lastNonzero = serieslen - 1;
133+
157134
// look for empty bins at the ends to remove, so autoscale omits them
158135
for(i = 0; i < serieslen; i++) {
159136
if(size[i]) {
@@ -180,10 +157,181 @@ module.exports = function calc(gd, trace) {
180157
return cd;
181158
};
182159

160+
/*
161+
* calcAllAutoBins: we want all histograms on the same axes to share bin specs
162+
* if they're grouped or stacked. If the user has explicitly specified differing
163+
* bin specs, there's nothing we can do, but if possible we will try to use the
164+
* smallest bins of any of the auto values for all histograms grouped/stacked
165+
* together.
166+
*/
167+
function calcAllAutoBins(gd, trace, pa, maindata) {
168+
var binAttr = maindata + 'bins';
169+
170+
// all but the first trace in this group has already been marked finished
171+
// clear this flag, so next time we run calc we will run autobin again
172+
if(trace._autoBinFinished) {
173+
delete trace._autoBinFinished;
174+
175+
return trace[binAttr];
176+
}
177+
178+
// must be the first trace in the group - do the autobinning on them all
179+
var traceGroup = getConnectedHistograms(gd, trace);
180+
var autoBinnedTraces = [];
181+
182+
var minSize = Infinity;
183+
var minStart = Infinity;
184+
var maxEnd = -Infinity;
185+
186+
var autoBinAttr = 'autobin' + maindata;
187+
var i, tracei, calendar, firstManual;
188+
189+
190+
for(i = 0; i < traceGroup.length; i++) {
191+
tracei = traceGroup[i];
192+
193+
// stash pos0 on the trace so we don't need to duplicate this
194+
// in the main body of calc
195+
var pos0 = tracei._pos0 = pa.makeCalcdata(tracei, maindata);
196+
var binspec = tracei[binAttr];
197+
198+
if((tracei[autoBinAttr]) || !binspec ||
199+
binspec.start === null || binspec.end === null) {
200+
calendar = tracei[maindata + 'calendar'];
201+
var cumulativeSpec = tracei.cumulative;
202+
203+
binspec = Axes.autoBin(pos0, pa, tracei['nbins' + maindata], false, calendar);
204+
205+
// adjust for CDF edge cases
206+
if(cumulativeSpec.enabled && (cumulativeSpec.currentbin !== 'include')) {
207+
if(cumulativeSpec.direction === 'decreasing') {
208+
minStart = Math.min(minStart, pa.r2c(binspec.start, 0, calendar) - binspec.size);
209+
}
210+
else {
211+
maxEnd = Math.max(maxEnd, pa.r2c(binspec.end, 0, calendar) + binspec.size);
212+
}
213+
}
214+
215+
// note that it's possible to get here with an explicit autobin: false
216+
// if the bins were not specified. mark this trace for followup
217+
autoBinnedTraces.push(tracei);
218+
}
219+
else if(!firstManual) {
220+
// Remember the first manually set binspec. We'll try to be extra
221+
// accommodating of this one, so other bins line up with these
222+
// if there's more than one manual bin set and they're mutually inconsistent,
223+
// then there's not much we can do...
224+
firstManual = {
225+
size: binspec.size,
226+
start: pa.r2c(binspec.start, 0, calendar),
227+
end: pa.r2c(binspec.end, 0, calendar)
228+
};
229+
}
230+
231+
// Even non-autobinned traces get included here, so we get the greatest extent
232+
// and minimum bin size of them all.
233+
// But manually binned traces won't be adjusted, even if the auto values
234+
// are inconsistent with the manual ones (or the manual ones are inconsistent
235+
// with each other).
236+
//
237+
// TODO: there's probably a weird case here where a larger bin pushes the
238+
// start/end out, then it gets shrunk and doesn't make sense with the smaller bin.
239+
// Need to look for cases like this and see if the results are acceptable
240+
// or we need to think harder about it.
241+
minSize = getMinSize(minSize, binspec.size);
242+
minStart = Math.min(minStart, pa.r2c(binspec.start, 0, calendar));
243+
maxEnd = Math.max(maxEnd, pa.r2c(binspec.end, 0, calendar));
244+
245+
// add the flag that lets us abort autobin on later traces
246+
if(i) trace._autoBinFinished = 1;
247+
}
248+
249+
// do what we can to match the auto bins to the first manual bins
250+
// but only if sizes are all numeric
251+
if(firstManual && isNumeric(firstManual.size) && isNumeric(minSize)) {
252+
// first need to ensure the bin size is the same as or an integer fraction
253+
// of the first manual bin
254+
// allow the bin size to increase just under the autobin step size to match,
255+
// (which is a factor of 2 or 2.5) otherwise shrink it
256+
if(minSize > firstManual.size / 1.9) minSize = firstManual.size;
257+
else minSize = firstManual.size / Math.ceil(firstManual.size / minSize);
258+
259+
// now decrease minStart if needed to make the bin centers line up
260+
var adjustedFirstStart = firstManual.start + (firstManual.size - minSize) / 2;
261+
minStart = adjustedFirstStart - minSize * Math.ceil((adjustedFirstStart - minStart) / minSize);
262+
}
263+
264+
// now go back to the autobinned traces and update their bin specs with the final values
265+
for(i = 0; i < autoBinnedTraces.length; i++) {
266+
tracei = autoBinnedTraces[i];
267+
calendar = tracei[maindata + 'calendar'];
268+
269+
tracei._input[binAttr] = tracei[binAttr] = {
270+
start: pa.c2r(minStart, 0, calendar),
271+
end: pa.c2r(maxEnd, 0, calendar),
272+
size: minSize
273+
};
274+
275+
// note that it's possible to get here with an explicit autobin: false
276+
// if the bins were not specified.
277+
// in that case this will remain in the trace, so that future updates
278+
// which would change the autobinning will not do so.
279+
tracei._input[autoBinAttr] = tracei[autoBinAttr];
280+
}
281+
282+
return trace[binAttr];
283+
}
284+
285+
/*
286+
* return an array of traces that are all stacked or grouped together
287+
* TODO: only considers histograms. Should we also harmonize with bars?
288+
* in principle people can mix and match these, but bars always
289+
* specify their positions explicitly...
290+
*/
291+
function getConnectedHistograms(gd, trace) {
292+
if(gd._fullLayout.barmode === 'overlay') return [trace];
293+
294+
var xid = trace.xaxis;
295+
var yid = trace.yaxis;
296+
var orientation = trace.orientation;
297+
298+
var out = [];
299+
var fullData = gd._fullData;
300+
for(var i = 0; i < fullData.length; i++) {
301+
var tracei = fullData[i];
302+
if(tracei.type === 'histogram' &&
303+
tracei.orientation === orientation &&
304+
tracei.xaxis === xid && tracei.yaxis === yid
305+
) {
306+
out.push(tracei);
307+
}
308+
}
309+
310+
return out;
311+
}
312+
313+
314+
/*
315+
* getMinSize: find the smallest given that size can be a string code
316+
* ie 'M6' for 6 months. ('L' wouldn't make sense to compare with numeric sizes)
317+
*/
318+
function getMinSize(size1, size2) {
319+
if(size1 === Infinity) return size2;
320+
var sizeNumeric1 = numericSize(size1);
321+
var sizeNumeric2 = numericSize(size2);
322+
return sizeNumeric2 < sizeNumeric1 ? size2 : size1;
323+
}
324+
325+
function numericSize(size) {
326+
if(isNumeric(size)) return size;
327+
if(typeof size === 'string' && size.charAt(0) === 'M') {
328+
return oneMonth * +(size.substr(1));
329+
}
330+
return Infinity;
331+
}
332+
183333
function cdf(size, direction, currentbin) {
184-
var i,
185-
vi,
186-
prevSum;
334+
var i, vi, prevSum;
187335

188336
function firstHalfPoint(i) {
189337
prevSum = size[i];

test/jasmine/tests/histogram_test.js

+49-4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ var calc = require('@src/traces/histogram/calc');
77

88
var createGraphDiv = require('../assets/create_graph_div');
99
var destroyGraphDiv = require('../assets/destroy_graph_div');
10+
var customMatchers = require('../assets/custom_matchers');
1011

1112

1213
describe('Test histogram', function() {
@@ -162,10 +163,20 @@ describe('Test histogram', function() {
162163

163164

164165
describe('calc', function() {
165-
function _calc(opts) {
166-
var base = { type: 'histogram' },
167-
trace = Lib.extendFlat({}, base, opts),
168-
gd = { data: [trace] };
166+
beforeAll(function() {
167+
jasmine.addMatchers(customMatchers);
168+
});
169+
170+
function _calc(opts, extraTraces) {
171+
var base = { type: 'histogram' };
172+
var trace = Lib.extendFlat({}, base, opts);
173+
var gd = { data: [trace] };
174+
175+
if(Array.isArray(extraTraces)) {
176+
extraTraces.forEach(function(extraTrace) {
177+
gd.data.push(Lib.extendFlat({}, base, extraTrace));
178+
});
179+
}
169180

170181
Plots.supplyDefaults(gd);
171182
var fullTrace = gd._fullData[0];
@@ -263,6 +274,40 @@ describe('Test histogram', function() {
263274
expect(out.length).toEqual(9001);
264275
});
265276

277+
function calcPositions(opts, extraTraces) {
278+
return _calc(opts, extraTraces).map(function(v) { return v.p; });
279+
}
280+
281+
it('harmonizes autobins when all traces are autobinned', function() {
282+
var trace1 = {x: [1, 2, 3, 4]};
283+
var trace2 = {x: [5, 5.5, 6, 6.5]};
284+
285+
expect(calcPositions(trace1)).toEqual([0.5, 2.5, 4.5]);
286+
287+
expect(calcPositions(trace2)).toEqual[5, 6, 7];
288+
289+
expect(calcPositions(trace1, [trace2])).toEqual([1, 2, 3, 4]);
290+
expect(calcPositions(trace2, [trace1])).toEqual([5, 6, 7]);
291+
});
292+
293+
it('harmonizes autobins with smaller manual bins', function() {
294+
var trace1 = {x: [1, 2, 3, 4]};
295+
var trace2 = {x: [5, 6, 7, 8], xbins: {start: 4.3, end: 7.1, size: 0.4}};
296+
297+
expect(calcPositions(trace1, [trace2])).toBeCloseToArray([
298+
0.9, 1.3, 1.7, 2.1, 2.5, 2.9, 3.3, 3.7, 4.1
299+
], 5);
300+
});
301+
302+
it('harmonizes autobins with larger manual bins', function() {
303+
var trace1 = {x: [1, 2, 3, 4]};
304+
var trace2 = {x: [5, 6, 7, 8], xbins: {start: 4.3, end: 15, size: 7}};
305+
306+
expect(calcPositions(trace1, [trace2])).toBeCloseToArray([
307+
0.8, 2.55, 4.3
308+
], 5);
309+
});
310+
266311
describe('cumulative distribution functions', function() {
267312
var base = {
268313
x: [0, 5, 10, 15, 5, 10, 15, 10, 15, 15],

0 commit comments

Comments
 (0)