Skip to content

Commit f6b902c

Browse files
committed
fixes #1151 - auto-shift bins for date histograms similar to numeric
to avoid all data lying ambiguously on bin edges
1 parent b9a379b commit f6b902c

File tree

4 files changed

+151
-76
lines changed

4 files changed

+151
-76
lines changed

src/plots/cartesian/axes.js

+106-9
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,11 @@ axes.expand = function(ax, data, options) {
487487

488488
axes.autoBin = function(data, ax, nbins, is2d) {
489489
var datamin = Lib.aggNums(Math.min, null, data),
490-
datamax = Lib.aggNums(Math.max, null, data);
490+
datamax = Lib.aggNums(Math.max, null, data),
491+
blankcount = 0,
492+
datacount,
493+
i;
494+
491495
if(ax.type === 'category') {
492496
return {
493497
start: datamin - 0.5,
@@ -548,16 +552,16 @@ axes.autoBin = function(data, ax, nbins, is2d) {
548552
if(typeof dummyax.dtick === 'number') {
549553
var edgecount = 0,
550554
midcount = 0,
551-
intcount = 0,
552-
blankcount = 0;
553-
for(var i = 0; i < data.length; i++) {
555+
intcount = 0;
556+
557+
for(i = 0; i < data.length; i++) {
554558
if(data[i] % 1 === 0) intcount++;
555559
else if(!isNumeric(data[i])) blankcount++;
556560

557561
if(nearEdge(data[i])) edgecount++;
558562
if(nearEdge(data[i] + dummyax.dtick / 2)) midcount++;
559563
}
560-
var datacount = data.length - blankcount;
564+
datacount = data.length - blankcount;
561565

562566
if(intcount === datacount && ax.type !== 'date') {
563567
// all integers: if bin size is <1, it's because
@@ -586,6 +590,12 @@ axes.autoBin = function(data, ax, nbins, is2d) {
586590
binend = binstart + bincount * dummyax.dtick;
587591
}
588592
else {
593+
// month ticks - should be the only nonlinear kind we have
594+
// at this point.
595+
if(dummyax.dtick.charAt(0) === 'M') {
596+
binstart = autoShiftMonthBins(binstart, data, dummyax.dtick, datamin);
597+
}
598+
589599
// calculate the endpoint for nonlinear ticks - you have to
590600
// just increment until you're done
591601
binend = binstart;
@@ -602,6 +612,79 @@ axes.autoBin = function(data, ax, nbins, is2d) {
602612
};
603613

604614

615+
function autoShiftMonthBins(binStart, data, dtick, dataMin) {
616+
var exactYears = 0,
617+
exactMonths = 0,
618+
exactDays = 0,
619+
blankCount = 0,
620+
dataCount,
621+
di,
622+
d,
623+
year,
624+
month;
625+
626+
for(var i = 0; i < data.length; i++) {
627+
di = data[i];
628+
if(!isNumeric(di)) {
629+
blankCount ++;
630+
continue;
631+
}
632+
d = new Date(di),
633+
year = d.getUTCFullYear();
634+
if(di === Date.UTC(year, 0, 1)) {
635+
exactYears ++;
636+
}
637+
else {
638+
month = d.getUTCMonth();
639+
if(di === Date.UTC(year, month, 1)) {
640+
exactMonths ++;
641+
}
642+
else if(di === Date.UTC(year, month, d.getUTCDate())) {
643+
exactDays ++;
644+
}
645+
}
646+
}
647+
648+
dataCount = data.length - blankCount;
649+
650+
// include bigger exact dates in the smaller ones
651+
exactMonths += exactYears;
652+
exactDays += exactMonths;
653+
654+
// unmber of data points that needs to be an exact value
655+
// to shift that increment to (near) the bin center
656+
var threshold = 0.8 * dataCount;
657+
658+
if(exactDays > threshold) {
659+
var numMonths = Number(dtick.substr(1));
660+
661+
if((exactYears > threshold) && (numMonths % 12 === 0)) {
662+
// The exact middle of a non-leap-year is 1.5 days into July
663+
// so if we start the bins here, all but leap years will
664+
// get hover-labeled as exact years.
665+
binStart = axes.tickIncrement(binStart, 'M6', 'reverse') + ONEDAY * 1.5;
666+
}
667+
else if(exactMonths > threshold) {
668+
// Months are not as clean, but if we shift half the *longest*
669+
// month (31/2 days) then 31-day months will get labeled exactly
670+
// and shorter months will get labeled with the correct month
671+
// but shifted 12-36 hours into it.
672+
binStart = axes.tickIncrement(binStart, 'M1', 'reverse') + ONEDAY * 15.5;
673+
}
674+
else {
675+
// Shifting half a day is exact, but since these are month bins it
676+
// will always give a somewhat odd-looking label, until we do something
677+
// smarter like showing the bin boundaries (or the bounds of the actual
678+
// data in each bin)
679+
binStart -= ONEDAY / 2;
680+
}
681+
var nextBinStart = axes.tickIncrement(binStart, dtick);
682+
683+
if(nextBinStart <= dataMin) return nextBinStart;
684+
}
685+
return binStart;
686+
}
687+
605688
// ----------------------------------------------------
606689
// Ticks and grids
607690
// ----------------------------------------------------
@@ -919,6 +1002,7 @@ function autoTickRound(ax) {
9191002
// for pure powers of 10
9201003
// numeric ticks always have constant differences, other datetime ticks
9211004
// can all be calculated as constant number of milliseconds
1005+
var THREEDAYS = 3 * ONEDAY;
9221006
axes.tickIncrement = function(x, dtick, axrev) {
9231007
var axSign = axrev ? -1 : 1;
9241008

@@ -930,10 +1014,23 @@ axes.tickIncrement = function(x, dtick, axrev) {
9301014

9311015
// Dates: months (or years)
9321016
if(tType === 'M') {
933-
var y = new Date(x);
934-
// is this browser consistent? setUTCMonth edits a date but
935-
// returns that date's milliseconds
936-
return y.setUTCMonth(y.getUTCMonth() + dtSigned);
1017+
/*
1018+
* set(UTC)Month does not (and CANNOT) always preserve day, since
1019+
* months have different lengths. The worst example of this is:
1020+
* d = new Date(1970,0,31); d.setMonth(1) -> Feb 31 turns into Mar 3
1021+
*
1022+
* But we want to be able to iterate over the last day of each month,
1023+
* regardless of what its number is.
1024+
* So shift 3 days forward, THEN set the new month, then unshift:
1025+
* 1/31 -> 2/28 (or 29) -> 3/31 -> 4/30 -> ...
1026+
*
1027+
* Note that odd behavior still exists if you start from the 26th-28th:
1028+
* 1/28 -> 2/28 -> 3/31
1029+
* but at least you can't shift any dates into the wrong month,
1030+
* and ticks on these days incrementing by month would be very unusual
1031+
*/
1032+
var y = new Date(x + THREEDAYS);
1033+
return y.setUTCMonth(y.getUTCMonth() + dtSigned) - THREEDAYS;
9371034
}
9381035

9391036
// Log scales: Linear, Digits

test/image/mocks/date_histogram.json

+3-46
Original file line numberDiff line numberDiff line change
@@ -9,66 +9,23 @@
99
"2012-03-01 00:00:00",
1010
"2012-02-01 00:00:00"
1111
],
12-
"name": "trace 0",
13-
"autobinx": false,
12+
"autobinx": true,
1413
"nbinsx": 3,
15-
"xbins": {
16-
"start": "2011-12-16",
17-
"end": "2012-03-16",
18-
"size": "M1"
19-
},
20-
"autobiny": true,
21-
"xaxis": "x",
22-
"yaxis": "y",
2314
"showlegend": false,
2415
"type": "histogram"
2516
}
2617
],
2718
"layout": {
28-
"title": "Click to enter Plot title",
29-
"font": {
30-
"family": "\"Open sans\", verdana, arial, sans-serif",
31-
"size": 12,
32-
"color": "#444"
33-
},
34-
"showlegend": true,
3519
"width": 600,
3620
"height": 400,
3721
"xaxis": {
38-
"title": "month",
39-
"showgrid": false,
40-
"zeroline": false,
41-
"showline": false,
42-
"ticks": "",
43-
"showticklabels": true,
44-
"tickcolor": "rgb(127,127,127)",
45-
"gridcolor": "rgb(255,255,255)"
22+
"title": "month"
4623
},
4724
"yaxis": {
4825
"title": "count",
49-
"showgrid": true,
50-
"zeroline": true,
51-
"showline": false,
52-
"ticks": "",
53-
"tickcolor": "rgb(127,127,127)",
5426
"gridcolor": "rgb(255,255,255)"
5527
},
56-
"legend": {
57-
"x": 100,
58-
"y": 0.5,
59-
"traceorder": "reversed",
60-
"font": {
61-
"family": "",
62-
"size": 0,
63-
"color": ""
64-
},
65-
"bgcolor": "#fff",
66-
"bordercolor": "transparent",
67-
"borderwidth": 0
68-
},
6928
"plot_bgcolor": "rgb(229,229,229)",
70-
"barmode": "stack",
71-
"bargap": 0.2,
72-
"bargroupgap": 0
29+
"bargap": 0.2
7330
}
7431
}

test/jasmine/tests/histogram2d_test.js

+3-6
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,11 @@ describe('Test histogram2d', function() {
9292
// TODO: even though the binning is done on non-uniform bins,
9393
// the display makes them linear (using only y0 and dy)
9494
// when we sort out https://github.com/plotly/plotly.js/issues/1151
95-
// lets also make it display the bins with nonuniform size,
96-
// and ensure we don't generate an extra bin on the end (see
97-
// first row of z below)
98-
expect(out.y0).toBe('1969-07-02 14:24');
99-
expect(out.dy).toBe(365.2 * oneDay);
95+
// lets also make it display the bins with nonuniform size
96+
expect(out.y0).toBe('1970-01-01 03:00');
97+
expect(out.dy).toBe(365.25 * oneDay);
10098

10199
expect(out.z).toEqual([
102-
[0, 0, 0, 0],
103100
[2, 0, 0, 0],
104101
[0, 1, 0, 0],
105102
[0, 0, 0, 0],

test/jasmine/tests/histogram_test.js

+39-15
Original file line numberDiff line numberDiff line change
@@ -149,32 +149,56 @@ describe('Test histogram', function() {
149149
var oneDay = 24 * 3600000;
150150

151151
it('should handle auto dates with nonuniform (month) bins', function() {
152+
// All data on exact years: shift so bin center is an
153+
// exact year, except on leap years
152154
var out = _calc({
153155
x: ['1970-01-01', '1970-01-01', '1971-01-01', '1973-01-01'],
154156
nbinsx: 4
155157
});
156158

157-
// TODO: https://github.com/plotly/plotly.js/issues/1151
158-
// these bins should shift when we implement that
159-
160-
// note that x1-x0 = 365 days, but the others are 365.5 days
161-
162-
// ALSO: this gives half-day gaps between all but the first two
159+
// Note: this gives half-day gaps between all but the first two
163160
// bars. Now that we have explicit per-bar positioning, perhaps
164161
// we should fill the space, rather than insisting on equal-width
165162
// bars?
166-
var x0 = 15768000000,
167-
x1 = x0 + oneDay * 365,
168-
x2 = x1 + oneDay * 365.5,
169-
x3 = x2 + oneDay * 365.5;
170-
171163
expect(out).toEqual([
172164
// full calcdata has x and y too (and t in the first one),
173165
// but those come later from setPositions.
174-
{b: 0, p: x0, s: 2},
175-
{b: 0, p: x1, s: 1},
176-
{b: 0, p: x2, s: 0},
177-
{b: 0, p: x3, s: 1}
166+
{b: 0, p: Date.UTC(1970, 0, 1), s: 2},
167+
{b: 0, p: Date.UTC(1971, 0, 1), s: 1},
168+
{b: 0, p: Date.UTC(1972, 0, 1, 12), s: 0},
169+
{b: 0, p: Date.UTC(1973, 0, 1), s: 1}
170+
]);
171+
172+
// All data on exact months: shift so bin center is on (31-day months)
173+
// or in (shorter months) that month
174+
out = _calc({
175+
x: ['1970-01-01', '1970-01-01', '1970-02-01', '1970-04-01'],
176+
nbinsx: 4
177+
});
178+
179+
expect(out).toEqual([
180+
{b: 0, p: Date.UTC(1970, 0, 1), s: 2},
181+
{b: 0, p: Date.UTC(1970, 1, 1), s: 1},
182+
{b: 0, p: Date.UTC(1970, 2, 2.5), s: 0},
183+
{b: 0, p: Date.UTC(1970, 3, 1), s: 1}
184+
]);
185+
186+
// data on exact days: shift so each bin goes from noon to noon
187+
// even though this gives kind of odd bin centers since the bins
188+
// are months... but the important thing is it's unambiguous which
189+
// bin any given day is in.
190+
out = _calc({
191+
x: ['1970-01-02', '1970-01-31', '1970-02-13', '1970-04-19'],
192+
nbinsx: 4
193+
});
194+
195+
expect(out).toEqual([
196+
// dec 31 12:00 -> jan 31 12:00, middle is jan 16
197+
{b: 0, p: Date.UTC(1970, 0, 16), s: 2},
198+
// jan 31 12:00 -> feb 28 12:00, middle is feb 14 12:00
199+
{b: 0, p: Date.UTC(1970, 1, 14, 12), s: 1},
200+
{b: 0, p: Date.UTC(1970, 2, 16), s: 0},
201+
{b: 0, p: Date.UTC(1970, 3, 15, 12), s: 1}
178202
]);
179203
});
180204

0 commit comments

Comments
 (0)