Skip to content

Sort transform #1609

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 10, 2017
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ Plotly.register([
//
Plotly.register([
require('./filter'),
require('./groupby')
require('./groupby'),
require('./sort')
]);

// components
Expand Down
11 changes: 11 additions & 0 deletions lib/sort.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/**
* Copyright 2012-2017, Plotly, Inc.
* All rights reserved.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

'use strict';

module.exports = require('../src/transforms/sort');
23 changes: 23 additions & 0 deletions src/lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,29 @@ lib.mergeArray = function(traceAttr, cd, cdAttr) {
}
};

/** Returns target as set by 'target' transform attribute
*
* @param {object} trace : full trace object
* @param {object} transformOpts : transform option object
* - target (string} :
* either an attribute string referencing an array in the trace object, or
* a set array.
*
* @return {array or false} : the target array (NOT a copy!!) or false if invalid
*/
lib.getTargetArray = function(trace, transformOpts) {
var target = transformOpts.target;

if(typeof target === 'string' && target) {
var array = lib.nestedProperty(trace, target).get();
return Array.isArray(array) ? array : false;
} else if(Array.isArray(target)) {
return target;
}

return false;
};

/**
* modified version of jQuery's extend to strip out private objs and functions,
* and cut arrays down to first <arraylen> or 1 elements
Expand Down
45 changes: 43 additions & 2 deletions src/plots/cartesian/axes.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ var ONEMIN = constants.ONEMIN;
var ONESEC = constants.ONESEC;
var BADNUM = constants.BADNUM;


var axes = module.exports = {};

axes.layoutAttributes = require('./layout_attributes');
axes.supplyLayoutDefaults = require('./layout_defaults');

axes.setConvert = require('./set_convert');
var autoType = require('./axis_autotype');

var axisIds = require('./axis_ids');
axes.id2name = axisIds.id2name;
Expand All @@ -45,7 +45,6 @@ axes.listIds = axisIds.listIds;
axes.getFromId = axisIds.getFromId;
axes.getFromTrace = axisIds.getFromTrace;


/*
* find the list of possible axes to reference with an xref or yref attribute
* and coerce it to that list
Expand Down Expand Up @@ -130,6 +129,48 @@ axes.coercePosition = function(containerOut, gd, coerce, axRef, attr, dflt) {
containerOut[attr] = isNumeric(pos) ? Number(pos) : dflt;
};

axes.getDataToCoordFunc = function(gd, trace, target, targetArray) {
var ax;

// If target points to an axis, use the type we already have for that
// axis to find the data type. Otherwise use the values to autotype.
var d2cTarget = (target === 'x' || target === 'y' || target === 'z') ?
target :
targetArray;

// In the case of an array target, make a mock data array
// and call supplyDefaults to the data type and
// setup the data-to-calc method.
if(Array.isArray(d2cTarget)) {
ax = {
type: autoType(targetArray),
_categories: []
};
axes.setConvert(ax);

// build up ax._categories (usually done during ax.makeCalcdata()
if(ax.type === 'category') {
for(var i = 0; i < targetArray.length; i++) {
ax.d2c(targetArray[i]);
}
}
} else {
ax = axes.getFromTrace(gd, trace, d2cTarget);
}

// if 'target' has corresponding axis
// -> use setConvert method
if(ax) return ax.d2c;

// special case for 'ids'
// -> cast to String
if(d2cTarget === 'ids') return function(v) { return String(v); };

// otherwise (e.g. numeric-array of 'marker.color' or 'marker.size')
// -> cast to Number
return function(v) { return +v; };
};

// empty out types for all axes containing these traces
// so we auto-set them again
axes.clearTypes = function(gd, traces) {
Expand Down
73 changes: 8 additions & 65 deletions src/transforms/filter.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
var Lib = require('../lib');
var Registry = require('../registry');
var PlotSchema = require('../plot_api/plot_schema');
var axisIds = require('../plots/cartesian/axis_ids');
var autoType = require('../plots/cartesian/axis_autotype');
var setConvert = require('../plots/cartesian/set_convert');
var Axes = require('../plots/cartesian/axes');

var COMPARISON_OPS = ['=', '!=', '<', '>=', '>', '<='];
var INTERVAL_OPS = ['[]', '()', '[)', '(]', '][', ')(', '](', ')['];
Expand Down Expand Up @@ -144,12 +142,11 @@ exports.supplyDefaults = function(transformIn) {
exports.calcTransform = function(gd, trace, opts) {
if(!opts.enabled) return;

var target = opts.target,
filterArray = getFilterArray(trace, target),
len = filterArray.length;

if(!len) return;
var targetArray = Lib.getTargetArray(trace, opts);
if(!targetArray) return;

var target = opts.target;
var len = targetArray.length;
var targetCalendar = opts.targetcalendar;

// even if you provide targetcalendar, if target is a string and there
Expand All @@ -159,13 +156,8 @@ exports.calcTransform = function(gd, trace, opts) {
if(attrTargetCalendar) targetCalendar = attrTargetCalendar;
}

// if target points to an axis, use the type we already have for that
// axis to find the data type. Otherwise use the values to autotype.
var d2cTarget = (target === 'x' || target === 'y' || target === 'z') ?
target : filterArray;

var dataToCoord = getDataToCoordFunc(gd, trace, d2cTarget);
var filterFunc = getFilterFunc(opts, dataToCoord, targetCalendar);
var d2c = Axes.getDataToCoordFunc(gd, trace, target, targetArray);
var filterFunc = getFilterFunc(opts, d2c, targetCalendar);
var arrayAttrs = PlotSchema.findArrayAttributes(trace);
var originalArrays = {};

Expand Down Expand Up @@ -203,60 +195,11 @@ exports.calcTransform = function(gd, trace, opts) {

// loop through filter array, fill trace arrays if passed
for(var i = 0; i < len; i++) {
var passed = filterFunc(filterArray[i]);
var passed = filterFunc(targetArray[i]);
if(passed) forAllAttrs(fillFn, i);
}
};

function getFilterArray(trace, target) {
if(typeof target === 'string' && target) {
var array = Lib.nestedProperty(trace, target).get();

return Array.isArray(array) ? array : [];
}
else if(Array.isArray(target)) return target.slice();

return false;
}

function getDataToCoordFunc(gd, trace, target) {
var ax;

// In the case of an array target, make a mock data array
// and call supplyDefaults to the data type and
// setup the data-to-calc method.
if(Array.isArray(target)) {
ax = {
type: autoType(target),
_categories: []
};

setConvert(ax);

if(ax.type === 'category') {
// build up ax._categories (usually done during ax.makeCalcdata()
for(var i = 0; i < target.length; i++) {
ax.d2c(target[i]);
}
}
}
else {
ax = axisIds.getFromTrace(gd, trace, target);
}

// if 'target' has corresponding axis
// -> use setConvert method
if(ax) return ax.d2c;

// special case for 'ids'
// -> cast to String
if(target === 'ids') return function(v) { return String(v); };

// otherwise (e.g. numeric-array of 'marker.color' or 'marker.size')
// -> cast to Number
return function(v) { return +v; };
}

function getFilterFunc(opts, d2c, targetCalendar) {
var operation = opts.operation,
value = opts.value,
Expand Down
133 changes: 133 additions & 0 deletions src/transforms/sort.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/**
* Copyright 2012-2017, Plotly, Inc.
* All rights reserved.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

'use strict';

var Lib = require('../lib');
var PlotSchema = require('../plot_api/plot_schema');
var Axes = require('../plots/cartesian/axes');

exports.moduleType = 'transform';

exports.name = 'sort';

exports.attributes = {
enabled: {
valType: 'boolean',
dflt: true,
description: [
'Determines whether this sort transform is enabled or disabled.'
].join(' ')
},
target: {
valType: 'string',
strict: true,
noBlank: true,
arrayOk: true,
dflt: 'x',
description: [
'Sets the target by which the sort transform is applied.',

'If a string, *target* is assumed to be a reference to a data array',
'in the parent trace object.',
'To sort about nested variables, use *.* to access them.',
'For example, set `target` to *marker.size* to sort',
'about the marker size array.',

'If an array, *target* is then the data array by which',
'the sort transform is applied.'
].join(' ')
},
order: {
valType: 'enumerated',
values: ['ascending', 'descending'],
Copy link
Contributor Author

@etpinard etpinard Apr 19, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I chose ascending and descending to be familiar with axis categoryorder.

dflt: 'ascending',
description: [
'Sets the sort transform order.'
].join(' ')
}
};

exports.supplyDefaults = function(transformIn) {
var transformOut = {};

function coerce(attr, dflt) {
return Lib.coerce(transformIn, transformOut, exports.attributes, attr, dflt);
}

var enabled = coerce('enabled');

if(enabled) {
coerce('target');
coerce('order');
}

return transformOut;
};

exports.calcTransform = function(gd, trace, opts) {
if(!opts.enabled) return;

var targetArray = Lib.getTargetArray(trace, opts);
if(!targetArray) return;

var target = opts.target;
var len = targetArray.length;
var arrayAttrs = PlotSchema.findArrayAttributes(trace);
var d2c = Axes.getDataToCoordFunc(gd, trace, target, targetArray);
var indices = getIndices(opts, targetArray, d2c);

for(var i = 0; i < arrayAttrs.length; i++) {
var np = Lib.nestedProperty(trace, arrayAttrs[i]);
var arrayOld = np.get();
var arrayNew = new Array(len);

for(var j = 0; j < len; j++) {
arrayNew[j] = arrayOld[indices[j]];
}

np.set(arrayNew);
}
};

function getIndices(opts, targetArray, d2c) {
Copy link
Contributor Author

@etpinard etpinard Apr 19, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Challenge to all plotly.js contributors: find a faster sort algorithm that this O(n^2) attempt. Note that binary search is nice, but doesn't work so great for arrays with duplicate items.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I quite understand. For the overall sort? Can you use something like stable? 1. assemble [{value: 2, index: 0}, {value: 5, index: 1}, ...]. 2. Apply stable sort. 3. collect sorted indices and reorder other data arrays by that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Stable sorting may be useful, e.g. the user presorted based on some criteria, we'd keep that, even if this new facility will support sorting based on multiple fields. Stable sorting is simple with the built-in Array.prototype.sort as in this recently merged thing. The built-in sort is rather fast.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function d2c seems to always be getDataToCoordFunc which looks a bit expensive. Since it's a lot of calls to this function - 2 calls on each comparison - there's probably benefit to first map (e.g. in a loop) to get the d2c values for all items, and then sort based on that. One way is sorting value/index pairs as above, or more simply, just the indices in a numeric array. Another is to add the d2c value as a property to the objects to be sorted, and then use return a.d2cValue - b.d2cValue || a.originalIndex - b.originalIndex similar to the above link in which case the objects themselves are easily sorted (using the accessor is very fast).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @monfera , that .map(d2c) pass pre-.sort sounds like a good idea 🐎

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

^^ put in a bit more code for copy/paste into console in isolation and fixed a typo

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

^^ another fix and using actual objects to verify stability of the sorting

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@monfera how does ^^ handle duplicate items?

Copy link
Contributor Author

@etpinard etpinard Apr 25, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some interesting results in https://gist.github.com/etpinard/86949a01309d5505fab8ccca2b0cd875

Looks like my dumb attempt isn't that bad after all (barring any typos ...)

var len = targetArray.length;
var indices = new Array(len);

var sortedArray = targetArray
.slice()
.sort(getSortFunc(opts, d2c));

for(var i = 0; i < len; i++) {
var vTarget = targetArray[i];

for(var j = 0; j < len; j++) {
var vSorted = sortedArray[j];

if(vTarget === vSorted) {
indices[j] = i;

// clear sortedArray item to get correct
// index of duplicate items (if any)
sortedArray[j] = null;
break;
}
}
}

return indices;
}

function getSortFunc(opts, d2c) {
switch(opts.order) {
case 'ascending':
return function(a, b) { return d2c(a) - d2c(b); };
case 'descending':
return function(a, b) { return d2c(b) - d2c(a); };
}
}
Loading