Skip to content

Commit c87ccb3

Browse files
committed
Lib.sort
faster sort of already-sorted arrays with minimal penalty for unsorted arrays
1 parent 19f3800 commit c87ccb3

File tree

3 files changed

+200
-0
lines changed

3 files changed

+200
-0
lines changed

src/lib/index.js

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ lib.sorterAsc = searchModule.sorterAsc;
6262
lib.sorterDes = searchModule.sorterDes;
6363
lib.distinctVals = searchModule.distinctVals;
6464
lib.roundUp = searchModule.roundUp;
65+
lib.sort = searchModule.sort;
6566

6667
var statsModule = require('./stats');
6768
lib.aggNums = statsModule.aggNums;

src/lib/search.js

+41
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,44 @@ exports.roundUp = function(val, arrayIn, reverse) {
113113
}
114114
return arrayIn[low];
115115
};
116+
117+
/**
118+
* Tweak to Array.sort(sortFn) that improves performance for pre-sorted arrays
119+
*
120+
* Motivation: sometimes we need to sort arrays but the input is likely to
121+
* already be sorted. Browsers don't seem to pick up on pre-sorted arrays,
122+
* and in fact Chrome is actually *slower* sorting pre-sorted arrays than purely
123+
* random arrays. FF is at least faster if the array is pre-sorted, but still
124+
* not as fast as it could be.
125+
* Here's how this plays out sorting a length-1e6 array:
126+
*
127+
* Calls to Sort FN | Chrome bare | FF bare | Chrome tweak | FF tweak
128+
* ------------------+---------------+-----------+----------------+------------
129+
* ordered | 30.4e6 | 10.1e6 | 1e6 | 1e6
130+
* reversed | 29.4e6 | 9.9e6 | 1e6 + reverse | 1e6 + reverse
131+
* random | ~21e6 | ~18.7e6 | ~21e6 | ~18.7e6
132+
*
133+
* So this is a substantial win for pre-sorted (ordered or exactly reversed)
134+
* arrays. Including this wrapper on an unsorted array adds a penalty that will
135+
* in general be only a few calls to the sort function. The only case this
136+
* penalty will be significant is if the array is mostly sorted but there are
137+
* a few unsorted items near the end, but the penalty is still at most N calls
138+
* out of (for N=1e6) ~20N total calls
139+
*
140+
* @param {Array} array: the array, to be sorted in place
141+
* @param {function} sortFn: As in Array.sort, function(a, b) that puts
142+
* item a before item b if the return is negative, a after b if positive,
143+
* and no change if zero.
144+
* @return {Array}: the original array, sorted in place.
145+
*/
146+
exports.sort = function(array, sortFn) {
147+
var notOrdered = 0;
148+
var notReversed = 0;
149+
for(var i = 1; i < array.length; i++) {
150+
var pairOrder = sortFn(array[i], array[i - 1]);
151+
if(pairOrder < 0) notOrdered = 1;
152+
else if(pairOrder > 0) notReversed = 1;
153+
if(notOrdered && notReversed) return array.sort(sortFn);
154+
}
155+
return notReversed ? array : array.reverse();
156+
};

test/jasmine/tests/lib_test.js

+158
Original file line numberDiff line numberDiff line change
@@ -2189,6 +2189,164 @@ describe('Test lib.js:', function() {
21892189
});
21902190
});
21912191

2192+
describe('sort', function() {
2193+
var callCount;
2194+
beforeEach(function() {
2195+
callCount = 0;
2196+
});
2197+
2198+
function sortCounter(a, b) {
2199+
callCount++;
2200+
return a - b;
2201+
}
2202+
2203+
function sortCounterReversed(a, b) {
2204+
callCount++;
2205+
return b - a;
2206+
}
2207+
2208+
function ascending(n) {
2209+
var out = new Array(n);
2210+
for(var i = 0; i < n; i++) {
2211+
out[i] = i;
2212+
}
2213+
assertAscending(out);
2214+
return out;
2215+
}
2216+
2217+
function descending(n) {
2218+
var out = new Array(n);
2219+
for(var i = 0; i < n; i++) {
2220+
out[i] = n - 1 - i;
2221+
}
2222+
assertDescending(out);
2223+
return out;
2224+
}
2225+
2226+
function rand(n) {
2227+
Lib.seedPseudoRandom();
2228+
var out = new Array(n);
2229+
for(var i = 0; i < n; i++) {
2230+
out[i] = Lib.pseudoRandom();
2231+
}
2232+
return out;
2233+
}
2234+
2235+
function assertAscending(array) {
2236+
for(var i = 1; i < array.length; i++) {
2237+
if(array[i] < array[i - 1]) {
2238+
// we already know this expect will fail,
2239+
// just want to format the message nicely and then
2240+
// quit so we don't get a million messages
2241+
expect(array[i]).not.toBeLessThan(array[i - 1]);
2242+
break;
2243+
}
2244+
}
2245+
}
2246+
2247+
function assertDescending(array) {
2248+
for(var i = 1; i < array.length; i++) {
2249+
if(array[i] < array[i - 1]) {
2250+
expect(array[i]).not.toBeGreaterThan(array[i - 1]);
2251+
break;
2252+
}
2253+
}
2254+
}
2255+
2256+
function _sort(array, sortFn) {
2257+
var arrayOut = Lib.sort(array, sortFn);
2258+
expect(arrayOut).toBe(array);
2259+
return array;
2260+
}
2261+
2262+
it('sorts ascending arrays ascending in N-1 calls', function() {
2263+
var arrayIn = _sort(ascending(100000), sortCounter);
2264+
expect(callCount).toBe(99999);
2265+
assertAscending(arrayIn);
2266+
});
2267+
2268+
it('sorts descending arrays ascending in N-1 calls', function() {
2269+
var arrayIn = _sort(descending(100000), sortCounter);
2270+
expect(callCount).toBe(99999);
2271+
assertAscending(arrayIn);
2272+
});
2273+
2274+
it('sorts ascending arrays descending in N-1 calls', function() {
2275+
var arrayIn = _sort(ascending(100000), sortCounterReversed);
2276+
expect(callCount).toBe(99999);
2277+
assertDescending(arrayIn);
2278+
});
2279+
2280+
it('sorts descending arrays descending in N-1 calls', function() {
2281+
var arrayIn = _sort(descending(100000), sortCounterReversed);
2282+
expect(callCount).toBe(99999);
2283+
assertDescending(arrayIn);
2284+
});
2285+
2286+
it('sorts random arrays ascending in a few more calls than bare sort', function() {
2287+
var arrayIn = _sort(rand(100000), sortCounter);
2288+
assertAscending(arrayIn);
2289+
2290+
var ourCallCount = callCount;
2291+
callCount = 0;
2292+
rand(100000).sort(sortCounter);
2293+
// in general this will be ~N*log_2(N)
2294+
expect(callCount).toBeGreaterThan(1e6);
2295+
// This number (2) is only repeatable because we used Lib.pseudoRandom
2296+
// should always be at least 2 and less than N - 1, and if
2297+
// the input array is really not sorted it will be close to 2. It will
2298+
// only be large if the array is sorted until near the end.
2299+
expect(ourCallCount - callCount).toBe(2);
2300+
});
2301+
2302+
it('sorts random arrays descending in a few more calls than bare sort', function() {
2303+
var arrayIn = _sort(rand(100000), sortCounterReversed);
2304+
assertDescending(arrayIn);
2305+
2306+
var ourCallCount = callCount;
2307+
callCount = 0;
2308+
rand(100000).sort(sortCounterReversed);
2309+
expect(callCount).toBeGreaterThan(1e6);
2310+
expect(ourCallCount - callCount).toBe(2);
2311+
});
2312+
2313+
it('supports short arrays', function() {
2314+
expect(_sort([], sortCounter)).toEqual([]);
2315+
expect(_sort([1], sortCounter)).toEqual([1]);
2316+
expect(callCount).toBe(0);
2317+
2318+
expect(_sort([1, 2], sortCounter)).toEqual([1, 2]);
2319+
expect(_sort([2, 3], sortCounterReversed)).toEqual([3, 2]);
2320+
expect(callCount).toBe(2);
2321+
});
2322+
2323+
function dupes() {
2324+
return [0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 7, 8, 9];
2325+
}
2326+
2327+
it('still short-circuits in order with duplicates', function() {
2328+
expect(_sort(dupes(), sortCounter))
2329+
.toEqual(dupes());
2330+
2331+
expect(callCount).toEqual(18);
2332+
2333+
callCount = 0;
2334+
dupes().sort(sortCounter);
2335+
expect(callCount).toBeGreaterThan(18);
2336+
});
2337+
2338+
it('still short-circuits reversed with duplicates', function() {
2339+
expect(_sort(dupes(), sortCounterReversed))
2340+
.toEqual(dupes().reverse());
2341+
2342+
expect(callCount).toEqual(18);
2343+
2344+
callCount = 0;
2345+
dupes().sort(sortCounterReversed);
2346+
expect(callCount).toBeGreaterThan(18);
2347+
});
2348+
});
2349+
21922350
describe('relinkPrivateKeys', function() {
21932351
it('ignores customdata and ids', function() {
21942352
var fromContainer = {

0 commit comments

Comments
 (0)