Skip to content

Commit 6a41d68

Browse files
author
Brian Hulette
committed
clean up table benchmarks
1 parent 2744c63 commit 6a41d68

File tree

5 files changed

+63
-173
lines changed

5 files changed

+63
-173
lines changed

js/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"clean": "gulp clean",
1313
"debug": "gulp debug",
1414
"perf": "node ./perf/index.js",
15+
"create:perfdata": "python ./test/data/tables/generate.py ./test/data/tables/tracks.arrow",
1516
"release": "./npm-release.sh",
1617
"clean:all": "run-p clean clean:testdata",
1718
"clean:testdata": "gulp clean:testdata",

js/perf/index.js

Lines changed: 32 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -16,54 +16,42 @@
1616
// under the License.
1717

1818
// Use the ES5 UMD target as perf baseline
19-
// const { lit, col, DataFrame, Table, readVectors } = require('../targets/es5/umd');
20-
// const { lit, col, DataFrame, Table, readVectors } = require('../targets/es5/cjs');
21-
// const { lit, col, DataFrame, Table, readVectors } = require('../targets/es2015/umd');
22-
const { lit, col, DataFrame, Table, readVectors } = require('../targets/es2015/cjs');
19+
const { col, DataFrame, Table, readVectors } = require('../targets/es5/umd');
20+
// const { col, DataFrame, Table, readVectors } = require('../targets/es5/cjs');
21+
// const { col, DataFrame, Table, readVectors } = require('../targets/es2015/umd');
22+
// const { col, DataFrame, Table, readVectors } = require('../targets/es2015/cjs');
2323

2424
const config = require('./config');
2525
const Benchmark = require('benchmark');
2626

2727
const suites = [];
2828

29-
//for (let { name, buffers} of config) {
30-
// const parseSuite = new Benchmark.Suite(`Parse "${name}"`, { async: true });
31-
// const sliceSuite = new Benchmark.Suite(`Slice "${name}" vectors`, { async: true });
32-
// const iterateSuite = new Benchmark.Suite(`Iterate "${name}" vectors`, { async: true });
33-
// const getByIndexSuite = new Benchmark.Suite(`Get "${name}" values by index`, { async: true });
34-
// parseSuite.add(createFromTableTest(name, buffers));
35-
// parseSuite.add(createReadVectorsTest(name, buffers));
36-
// for (const vector of Table.from(buffers).columns) {
37-
// sliceSuite.add(createSliceTest(vector));
38-
// iterateSuite.add(createIterateTest(vector));
39-
// getByIndexSuite.add(createGetByIndexTest(vector));
40-
// }
41-
// suites.push(getByIndexSuite, iterateSuite, sliceSuite, parseSuite);
42-
//}
29+
for (let { name, buffers} of config) {
30+
const parseSuite = new Benchmark.Suite(`Parse "${name}"`, { async: true });
31+
const sliceSuite = new Benchmark.Suite(`Slice "${name}" vectors`, { async: true });
32+
const iterateSuite = new Benchmark.Suite(`Iterate "${name}" vectors`, { async: true });
33+
const getByIndexSuite = new Benchmark.Suite(`Get "${name}" values by index`, { async: true });
34+
parseSuite.add(createFromTableTest(name, buffers));
35+
parseSuite.add(createReadVectorsTest(name, buffers));
36+
for (const vector of Table.from(buffers).columns) {
37+
sliceSuite.add(createSliceTest(vector));
38+
iterateSuite.add(createIterateTest(vector));
39+
getByIndexSuite.add(createGetByIndexTest(vector));
40+
}
41+
suites.push(getByIndexSuite, iterateSuite, sliceSuite, parseSuite);
42+
}
4343

4444
for (let {name, buffers, tests} of require('./table_config')) {
45-
const tableIteratorSuite = new Benchmark.Suite(`Table Iterator "${name}"`, { async: true });
46-
const tableCountSuite = new Benchmark.Suite(`Table Count "${name}"`, { async: true });
47-
const dfIteratorSuite = new Benchmark.Suite(`DataFrame Iterator "${name}"`, { async: true });
48-
const dfIteratorCountSuite = new Benchmark.Suite(`DataFrame Iterator Count "${name}"`, { async: true });
45+
const dfFilterCountSuite = new Benchmark.Suite(`DataFrame Filter-Scan Count "${name}"`, { async: true });
4946
const dfDirectCountSuite = new Benchmark.Suite(`DataFrame Direct Count "${name}"`, { async: true });
50-
const dfScanCountSuite = new Benchmark.Suite(`DataFrame Scan Count "${name}"`, { async: true });
51-
const dfFilterCountSuite = new Benchmark.Suite(`DataFrame Filter Scan Count "${name}"`, { async: true });
52-
const vectorCountSuite = new Benchmark.Suite(`Vector Count "${name}"`, { async: true });
5347
const table = Table.from(buffers);
5448

55-
tableIteratorSuite.add(createTableIteratorTest(table));
56-
dfIteratorSuite.add(createDataFrameIteratorTest(table));
5749
for (test of tests) {
58-
tableCountSuite.add(createTableCountTest(table, test.col, test.test, test.value))
59-
dfIteratorCountSuite.add(createDataFrameIteratorCountTest(table, test.col, test.test, test.value))
60-
dfDirectCountSuite.add(createDataFrameDirectCountTest(table, test.col, test.test, test.value))
61-
dfScanCountSuite.add(createDataFrameScanCountTest(table, test.col, test.test, test.value))
6250
dfFilterCountSuite.add(createDataFrameFilterCountTest(table, test.col, test.test, test.value))
63-
vectorCountSuite.add(createVectorCountTest(table.columns[test.col], test.test, test.value))
51+
dfDirectCountSuite.add(createDataFrameDirectCountTest(table, test.col, test.test, test.value))
6452
}
6553

66-
suites.push(tableIteratorSuite, tableCountSuite, dfIteratorSuite, dfIteratorCountSuite, dfDirectCountSuite, dfScanCountSuite, dfFilterCountSuite, vectorCountSuite)
54+
suites.push(dfFilterCountSuite, dfDirectCountSuite)
6755
}
6856

6957
console.log('Running apache-arrow performance tests...\n');
@@ -135,81 +123,9 @@ function createGetByIndexTest(vector) {
135123
};
136124
}
137125

138-
function createVectorCountTest(vector, test, value) {
139-
let op;
140-
if (test == 'gteq') {
141-
op = function () {
142-
sum = 0;
143-
for (cell of vector) {
144-
sum += (cell >= value)
145-
}
146-
}
147-
} else if (test == 'eq') {
148-
op = function () {
149-
sum = 0;
150-
for (cell of vector) {
151-
sum += (cell == value)
152-
}
153-
}
154-
} else {
155-
throw new Error(`Unrecognized test "$test"`);
156-
}
157-
158-
return {
159-
async: true,
160-
name: `name: '${vector.name}', length: ${vector.length}, type: ${vector.type}, test: ${test}, value: ${value}`,
161-
fn: op
162-
};
163-
}
164-
165-
function createTableIteratorTest(table) {
166-
let row;
167-
return {
168-
async: true,
169-
name: `length: ${table.length}`,
170-
fn() { for (row of table) {} }
171-
};
172-
}
173-
174-
function createTableCountTest(table, column, test, value) {
175-
let op;
176-
if (test == 'gteq') {
177-
op = function () {
178-
sum = 0;
179-
for (row of table) {
180-
sum += (row.get(column) >= value)
181-
}
182-
}
183-
} else if (test == 'eq') {
184-
op = function() {
185-
sum = 0;
186-
for (row of table) {
187-
sum += (row.get(column) == value)
188-
}
189-
}
190-
} else {
191-
throw new Error(`Unrecognized test "${test}"`);
192-
}
193-
194-
return {
195-
async: true,
196-
name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
197-
fn: op
198-
};
199-
}
200-
201-
function createDataFrameIteratorTest(table) {
202-
let df = DataFrame.from(table);
203-
let idx;
204-
return {
205-
async: true,
206-
name: `length: ${table.length}`,
207-
fn() { for (idx of table) {} }
208-
};
209-
}
210-
211126
function createDataFrameDirectCountTest(table, column, test, value) {
212127
let df = DataFrame.from(table);
128+
let colidx = table.columns.findIndex((c)=>c.name === column);
213129

214130
if (test == 'gteq') {
215131
op = function () {
@@ -218,11 +134,11 @@ function createDataFrameDirectCountTest(table, column, test, value) {
218134
const length = df.lengths[batch];
219135

220136
// load batches
221-
const columns = df.getBatch(batch);
137+
const columns = df.batches[batch];
222138

223139
// yield all indices
224140
for (let idx = -1; ++idx < length;) {
225-
sum += (columns[column].get(idx) >= value);
141+
sum += (columns[colidx].get(idx) >= value);
226142
}
227143
}
228144
}
@@ -233,11 +149,11 @@ function createDataFrameDirectCountTest(table, column, test, value) {
233149
const length = df.lengths[batch];
234150

235151
// load batches
236-
const columns = df.getBatch(batch);
152+
const columns = df.batches[batch]
237153

238154
// yield all indices
239155
for (let idx = -1; ++idx < length;) {
240-
sum += (columns[column].get(idx) == value);
156+
sum += (columns[colidx].get(idx) == value);
241157
}
242158
}
243159
}
@@ -247,79 +163,28 @@ function createDataFrameDirectCountTest(table, column, test, value) {
247163

248164
return {
249165
async: true,
250-
name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
251-
fn: op
252-
};
253-
}
254-
255-
function createDataFrameScanCountTest(table, column, test, value) {
256-
let df = DataFrame.from(table);
257-
258-
if (test == 'gteq') {
259-
op = function () {
260-
sum = 0;
261-
df.scan((idx, cols)=>{sum += cols[column].get(idx) >= value});
262-
}
263-
} else if (test == 'eq') {
264-
op = function() {
265-
sum = 0;
266-
df.scan((idx, cols)=>{sum += cols[column].get(idx) == value});
267-
console.log(sum);
268-
}
269-
} else {
270-
throw new Error(`Unrecognized test "${test}"`);
271-
}
272-
273-
return {
274-
async: true,
275-
name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
166+
name: `name: '${column}', length: ${table.length}, type: ${table.columns[colidx].type}, test: ${test}, value: ${value}`,
276167
fn: op
277168
};
278169
}
279170

280171
function createDataFrameFilterCountTest(table, column, test, value) {
281172
let df = DataFrame.from(table);
173+
let colidx = table.columns.findIndex((c)=>c.name === column);
174+
282175
if (test == 'gteq') {
283-
df = df.filter(col(table.columns[column].name).gteq(value));
176+
df = df.filter(col(column).gteq(value));
284177
} else if (test == 'eq') {
285-
df = df.filter(col(table.columns[column].name).eq(value));
178+
df = df.filter(col(column).eq(value));
286179
} else {
287180
throw new Error(`Unrecognized test "${test}"`);
288181
}
289182

290183
return {
291184
async: true,
292-
name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
185+
name: `name: '${column}', length: ${table.length}, type: ${table.columns[colidx].type}, test: ${test}, value: ${value}`,
293186
fn() {
294187
df.count();
295188
}
296189
};
297190
}
298-
299-
function createDataFrameIteratorCountTest(table, column, test, value) {
300-
let df = DataFrame.from(table);
301-
302-
if (test == 'gteq') {
303-
op = function () {
304-
sum = 0;
305-
for (idx of df) {
306-
sum += (df.columns[column].get(idx) >= value);
307-
}
308-
}
309-
} else if (test == 'eq') {
310-
op = function() {
311-
sum = 0;
312-
for (idx of df) {
313-
sum += (df.columns[column].get(idx) == value);
314-
}
315-
}
316-
} else {
317-
throw new Error(`Unrecognized test "${test}"`);
318-
}
319-
320-
return {
321-
async: true,
322-
name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
323-
fn: op
324-
};
325-
}

js/perf/table_config.js

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,23 @@ const glob = require('glob');
2222
const config = [];
2323
const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.arrow`));
2424

25-
tests = [
26-
{col: 0, test: 'gteq', value: 0 },
27-
{col: 1, test: 'gteq', value: 0 },
28-
{col: 2, test: 'eq', value: 'Seattle'},
29-
]
25+
tests = {
26+
"tracks": [
27+
{col: 'lat', test: 'gteq', value: 0 },
28+
{col: 'lng', test: 'gteq', value: 0 },
29+
{col: 'origin', test: 'eq', value: 'Seattle'},
30+
]
31+
}
3032

3133
for (const filename of filenames) {
3234
const { name } = path.parse(filename);
33-
config.push({ name, buffers: [fs.readFileSync(filename)], tests });
35+
if (name in tests) {
36+
config.push({
37+
name,
38+
buffers: [fs.readFileSync(filename)],
39+
tests: tests[name]
40+
});
41+
}
3442
}
3543

3644
module.exports = config;

js/src/Arrow.externs.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,19 @@ let DictionaryVector = function() {};
8282
DictionaryVector.prototype.getKey;
8383
/** @type {?} */
8484
DictionaryVector.prototype.getValue;
85+
86+
let DataFrame = function () {};
87+
/** @type {?} */
88+
DataFrame.prototype.lengths;
89+
/** @type {?} */
90+
DataFrame.prototype.columns;
91+
/** @type {?} */
92+
DataFrame.prototype.batches;
93+
94+
let Col = function() {};
95+
/** @type {?} */
96+
Col.prototype.gteq;
97+
/** @type {?} */
98+
Col.prototype.lteq;
99+
/** @type {?} */
100+
Col.prototype.eq;
File renamed without changes.

0 commit comments

Comments
 (0)