-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Match hist bins #1944
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Match hist bins #1944
Changes from 2 commits
e5a535c
11821c2
ec12eab
3f173a4
89093ad
2071acb
e81fcb6
241d9a8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ var binFunctions = require('./bin_functions'); | |
var normFunctions = require('./norm_functions'); | ||
var doAvg = require('./average'); | ||
var cleanBins = require('./clean_bins'); | ||
var oneMonth = require('../../constants/numerical').ONEAVGMONTH; | ||
|
||
|
||
module.exports = function calc(gd, trace) { | ||
|
@@ -27,60 +28,35 @@ module.exports = function calc(gd, trace) { | |
|
||
// depending on orientation, set position and size axes and data ranges | ||
// note: this logic for choosing orientation is duplicated in graph_obj->setstyles | ||
var pos = [], | ||
size = [], | ||
i, | ||
pa = Axes.getFromId(gd, | ||
trace.orientation === 'h' ? (trace.yaxis || 'y') : (trace.xaxis || 'x')), | ||
maindata = trace.orientation === 'h' ? 'y' : 'x', | ||
counterdata = {x: 'y', y: 'x'}[maindata], | ||
calendar = trace[maindata + 'calendar'], | ||
cumulativeSpec = trace.cumulative; | ||
var pos = []; | ||
var size = []; | ||
var pa = Axes.getFromId(gd, trace.orientation === 'h' ? | ||
(trace.yaxis || 'y') : (trace.xaxis || 'x')); | ||
var maindata = trace.orientation === 'h' ? 'y' : 'x'; | ||
var counterdata = {x: 'y', y: 'x'}[maindata]; | ||
var calendar = trace[maindata + 'calendar']; | ||
var cumulativeSpec = trace.cumulative; | ||
var i; | ||
|
||
cleanBins(trace, pa, maindata); | ||
|
||
// prepare the raw data | ||
var pos0 = pa.makeCalcdata(trace, maindata); | ||
var binspec = calcAllAutoBins(gd, trace, pa, maindata); | ||
|
||
// calculate the bins | ||
var binAttr = maindata + 'bins'; | ||
var autoBinAttr = 'autobin' + maindata; | ||
var binspec = trace[binAttr]; | ||
if((trace[autoBinAttr] !== false) || !binspec || | ||
binspec.start === null || binspec.end === null) { | ||
binspec = Axes.autoBin(pos0, pa, trace['nbins' + maindata], false, calendar); | ||
|
||
// adjust for CDF edge cases | ||
if(cumulativeSpec.enabled && (cumulativeSpec.currentbin !== 'include')) { | ||
if(cumulativeSpec.direction === 'decreasing') { | ||
binspec.start = pa.c2r(pa.r2c(binspec.start) - binspec.size); | ||
} | ||
else { | ||
binspec.end = pa.c2r(pa.r2c(binspec.end) + binspec.size); | ||
} | ||
} | ||
// the raw data was prepared in calcAllAutoBins (during the first trace in | ||
// this group) and stashed. Pull it out and drop the stash | ||
var pos0 = trace._pos0; | ||
delete trace._pos0; | ||
|
||
// copy bin info back to the source and full data. | ||
trace._input[binAttr] = trace[binAttr] = binspec; | ||
// note that it's possible to get here with an explicit autobin: false | ||
// if the bins were not specified. | ||
// in that case this will remain in the trace, so that future updates | ||
// which would change the autobinning will not do so. | ||
trace._input[autoBinAttr] = trace[autoBinAttr]; | ||
} | ||
|
||
var nonuniformBins = typeof binspec.size === 'string', | ||
bins = nonuniformBins ? [] : binspec, | ||
// make the empty bin array | ||
i2, | ||
binend, | ||
n, | ||
inc = [], | ||
counts = [], | ||
total = 0, | ||
norm = trace.histnorm, | ||
func = trace.histfunc, | ||
densitynorm = norm.indexOf('density') !== -1; | ||
var nonuniformBins = typeof binspec.size === 'string'; | ||
var bins = nonuniformBins ? [] : binspec; | ||
// make the empty bin array | ||
var inc = []; | ||
var counts = []; | ||
var total = 0; | ||
var norm = trace.histnorm; | ||
var func = trace.histfunc; | ||
var densitynorm = norm.indexOf('density') !== -1; | ||
var i2, binend, n; | ||
|
||
if(cumulativeSpec.enabled && densitynorm) { | ||
// we treat "cumulative" like it means "integral" if you use a density norm, | ||
|
@@ -89,13 +65,13 @@ module.exports = function calc(gd, trace) { | |
densitynorm = false; | ||
} | ||
|
||
var extremefunc = func === 'max' || func === 'min', | ||
sizeinit = extremefunc ? null : 0, | ||
binfunc = binFunctions.count, | ||
normfunc = normFunctions[norm], | ||
doavg = false, | ||
pr2c = function(v) { return pa.r2c(v, 0, calendar); }, | ||
rawCounterData; | ||
var extremefunc = func === 'max' || func === 'min'; | ||
var sizeinit = extremefunc ? null : 0; | ||
var binfunc = binFunctions.count; | ||
var normfunc = normFunctions[norm]; | ||
var doavg = false; | ||
var pr2c = function(v) { return pa.r2c(v, 0, calendar); }; | ||
var rawCounterData; | ||
|
||
if(Array.isArray(trace[counterdata]) && func !== 'count') { | ||
rawCounterData = trace[counterdata]; | ||
|
@@ -104,7 +80,7 @@ module.exports = function calc(gd, trace) { | |
} | ||
|
||
// create the bins (and any extra arrays needed) | ||
// assume more than 5000 bins is an error, so we don't crash the browser | ||
// assume more than 1e6 bins is an error, so we don't crash the browser | ||
i = pr2c(binspec.start); | ||
|
||
// decrease end a little in case of rounding errors | ||
|
@@ -150,10 +126,11 @@ module.exports = function calc(gd, trace) { | |
if(cumulativeSpec.enabled) cdf(size, cumulativeSpec.direction, cumulativeSpec.currentbin); | ||
|
||
|
||
var serieslen = Math.min(pos.length, size.length), | ||
cd = [], | ||
firstNonzero = 0, | ||
lastNonzero = serieslen - 1; | ||
var serieslen = Math.min(pos.length, size.length); | ||
var cd = []; | ||
var firstNonzero = 0; | ||
var lastNonzero = serieslen - 1; | ||
|
||
// look for empty bins at the ends to remove, so autoscale omits them | ||
for(i = 0; i < serieslen; i++) { | ||
if(size[i]) { | ||
|
@@ -180,10 +157,181 @@ module.exports = function calc(gd, trace) { | |
return cd; | ||
}; | ||
|
||
/* | ||
* calcAllAutoBins: we want all histograms on the same axes to share bin specs | ||
* if they're grouped or stacked. If the user has explicitly specified differing | ||
* bin specs, there's nothing we can do, but if possible we will try to use the | ||
* smallest bins of any of the auto values for all histograms grouped/stacked | ||
* together. | ||
*/ | ||
function calcAllAutoBins(gd, trace, pa, maindata) { | ||
var binAttr = maindata + 'bins'; | ||
|
||
// all but the first trace in this group has already been marked finished | ||
// clear this flag, so next time we run calc we will run autobin again | ||
if(trace._autoBinFinished) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we should move auto-bins computations to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ... maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Pro: that would avoid coupling between traces in a part of the code ( Con: we'd need to be saving these sanitized So I feel like in the end it would be more confusing that way. Thoughts? Incidentally, it looks like we're probably calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Good point. I don't have a strong opinion. This whole There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK cool - I'm going to leave it then, but this discussion will be useful to keep in mind for the future, I suspect at some point we will want to reimagine the whole pipeline to be a bit more flexible - particularly in regards to minimizing redraw work. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes we were. Fixed in e81fcb6 - I couldn't think of an easy way to test that we're not doing extra work, but we do have a test that this didn't break anything, in |
||
delete trace._autoBinFinished; | ||
|
||
return trace[binAttr]; | ||
} | ||
|
||
// must be the first trace in the group - do the autobinning on them all | ||
var traceGroup = getConnectedHistograms(gd, trace); | ||
var autoBinnedTraces = []; | ||
|
||
var minSize = Infinity; | ||
var minStart = Infinity; | ||
var maxEnd = -Infinity; | ||
|
||
var autoBinAttr = 'autobin' + maindata; | ||
var i, tracei, calendar, firstManual; | ||
|
||
|
||
for(i = 0; i < traceGroup.length; i++) { | ||
tracei = traceGroup[i]; | ||
|
||
// stash pos0 on the trace so we don't need to duplicate this | ||
// in the main body of calc | ||
var pos0 = tracei._pos0 = pa.makeCalcdata(tracei, maindata); | ||
var binspec = tracei[binAttr]; | ||
|
||
if((tracei[autoBinAttr]) || !binspec || | ||
binspec.start === null || binspec.end === null) { | ||
calendar = tracei[maindata + 'calendar']; | ||
var cumulativeSpec = tracei.cumulative; | ||
|
||
binspec = Axes.autoBin(pos0, pa, tracei['nbins' + maindata], false, calendar); | ||
|
||
// adjust for CDF edge cases | ||
if(cumulativeSpec.enabled && (cumulativeSpec.currentbin !== 'include')) { | ||
if(cumulativeSpec.direction === 'decreasing') { | ||
minStart = Math.min(minStart, pa.r2c(binspec.start, 0, calendar) - binspec.size); | ||
} | ||
else { | ||
maxEnd = Math.max(maxEnd, pa.r2c(binspec.end, 0, calendar) + binspec.size); | ||
} | ||
} | ||
|
||
// note that it's possible to get here with an explicit autobin: false | ||
// if the bins were not specified. mark this trace for followup | ||
autoBinnedTraces.push(tracei); | ||
} | ||
else if(!firstManual) { | ||
// Remember the first manually set binspec. We'll try to be extra | ||
// accommodating of this one, so other bins line up with these | ||
// if there's more than one manual bin set and they're mutually inconsistent, | ||
// then there's not much we can do... | ||
firstManual = { | ||
size: binspec.size, | ||
start: pa.r2c(binspec.start, 0, calendar), | ||
end: pa.r2c(binspec.end, 0, calendar) | ||
}; | ||
} | ||
|
||
// Even non-autobinned traces get included here, so we get the greatest extent | ||
// and minimum bin size of them all. | ||
// But manually binned traces won't be adjusted, even if the auto values | ||
// are inconsistent with the manual ones (or the manual ones are inconsistent | ||
// with each other). | ||
// | ||
// TODO: there's probably a weird case here where a larger bin pushes the | ||
// start/end out, then it gets shrunk and doesn't make sense with the smaller bin. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In a small set of cases, by fixing the major problem (bin sizes getting drawn wrong) we introduce a smaller problem (bin edges get shifted so an undesirably large fraction of data is ambiguously right at bin edges). This is possible because we set There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't mind not fixing this either. It would be nice to add a test case to document this behavior though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tested in ec12eab |
||
// Need to look for cases like this and see if the results are acceptable | ||
// or we need to think harder about it. | ||
minSize = getMinSize(minSize, binspec.size); | ||
minStart = Math.min(minStart, pa.r2c(binspec.start, 0, calendar)); | ||
maxEnd = Math.max(maxEnd, pa.r2c(binspec.end, 0, calendar)); | ||
|
||
// add the flag that lets us abort autobin on later traces | ||
if(i) trace._autoBinFinished = 1; | ||
} | ||
|
||
// do what we can to match the auto bins to the first manual bins | ||
// but only if sizes are all numeric | ||
if(firstManual && isNumeric(firstManual.size) && isNumeric(minSize)) { | ||
// first need to ensure the bin size is the same as or an integer fraction | ||
// of the first manual bin | ||
// allow the bin size to increase just under the autobin step size to match, | ||
// (which is a factor of 2 or 2.5) otherwise shrink it | ||
if(minSize > firstManual.size / 1.9) minSize = firstManual.size; | ||
else minSize = firstManual.size / Math.ceil(firstManual.size / minSize); | ||
|
||
// now decrease minStart if needed to make the bin centers line up | ||
var adjustedFirstStart = firstManual.start + (firstManual.size - minSize) / 2; | ||
minStart = adjustedFirstStart - minSize * Math.ceil((adjustedFirstStart - minStart) / minSize); | ||
} | ||
|
||
// now go back to the autobinned traces and update their bin specs with the final values | ||
for(i = 0; i < autoBinnedTraces.length; i++) { | ||
tracei = autoBinnedTraces[i]; | ||
calendar = tracei[maindata + 'calendar']; | ||
|
||
tracei._input[binAttr] = tracei[binAttr] = { | ||
start: pa.c2r(minStart, 0, calendar), | ||
end: pa.c2r(maxEnd, 0, calendar), | ||
size: minSize | ||
}; | ||
|
||
// note that it's possible to get here with an explicit autobin: false | ||
// if the bins were not specified. | ||
// in that case this will remain in the trace, so that future updates | ||
// which would change the autobinning will not do so. | ||
tracei._input[autoBinAttr] = tracei[autoBinAttr]; | ||
} | ||
|
||
return trace[binAttr]; | ||
} | ||
|
||
/* | ||
* return an array of traces that are all stacked or grouped together | ||
* TODO: only considers histograms. Should we also harmonize with bars? | ||
* in principle people can mix and match these, but bars always | ||
* specify their positions explicitly... | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could try to treat bars the same as we treat manually-binned histograms in adjusting the autobins. That has more edge cases to consider though, nonuniform bar spacing being perhaps the hardest to manage, so I'm inclined not to worry about this one either. If someone is grouping/stacking bars with histograms I'd like to think it would be clear to them that they need to set the histogram binning to match the bar positions... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No worries for me either. |
||
*/ | ||
function getConnectedHistograms(gd, trace) { | ||
if(gd._fullLayout.barmode === 'overlay') return [trace]; | ||
|
||
var xid = trace.xaxis; | ||
var yid = trace.yaxis; | ||
var orientation = trace.orientation; | ||
|
||
var out = []; | ||
var fullData = gd._fullData; | ||
for(var i = 0; i < fullData.length; i++) { | ||
var tracei = fullData[i]; | ||
if(tracei.type === 'histogram' && | ||
tracei.orientation === orientation && | ||
tracei.xaxis === xid && tracei.yaxis === yid | ||
) { | ||
out.push(tracei); | ||
} | ||
} | ||
|
||
return out; | ||
} | ||
|
||
|
||
/* | ||
* getMinSize: find the smallest given that size can be a string code | ||
* ie 'M6' for 6 months. ('L' wouldn't make sense to compare with numeric sizes) | ||
*/ | ||
function getMinSize(size1, size2) { | ||
if(size1 === Infinity) return size2; | ||
var sizeNumeric1 = numericSize(size1); | ||
var sizeNumeric2 = numericSize(size2); | ||
return sizeNumeric2 < sizeNumeric1 ? size2 : size1; | ||
} | ||
|
||
function numericSize(size) { | ||
if(isNumeric(size)) return size; | ||
if(typeof size === 'string' && size.charAt(0) === 'M') { | ||
return oneMonth * +(size.substr(1)); | ||
} | ||
return Infinity; | ||
} | ||
|
||
function cdf(size, direction, currentbin) { | ||
var i, | ||
vi, | ||
prevSum; | ||
var i, vi, prevSum; | ||
|
||
function firstHalfPoint(i) { | ||
prevSum = size[i]; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe
calcAllAutoBins
should return a tuple e.g.[binspec, pos0]
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
perhaps... we'd still need to stash
_pos0
because it could be used in a totally separate call tocalc
, but at least that could all be dealt with insidecalcAllAutoBins
, so yeah that would be easier to understand. 👍 (unless we end up moving tosetPositions
...)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
reorg so
_pos0
is only incalcAllAutoBins
in 89093ad