Skip to content

Commit a1edac2

Browse files
author
Brian Hulette
committed
Add perf tests for table scans
1 parent 99e58da commit a1edac2

File tree

3 files changed

+150
-0
lines changed

3 files changed

+150
-0
lines changed

js/generate.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import pyarrow as pa
2+
import random
3+
import numpy as np
4+
import pandas as pd
5+
6+
7+
cities = [u'Charlottesville', u'New York', u'San Francisco', u'Seattle', u'Terre Haute', u'Washington, DC']
8+
9+
def generate_batch(batch_len):
10+
return pa.RecordBatch.from_arrays([
11+
pa.Array.from_pandas(pd.Series(np.random.uniform(-90,90,batch_len), dtype="float32")),
12+
pa.Array.from_pandas(pd.Series(np.random.uniform(-180,180,batch_len), dtype="float32")),
13+
pa.Array.from_pandas(pd.Categorical((random.choice(cities) for i in range(batch_len)), cities)),
14+
pa.Array.from_pandas(pd.Categorical((random.choice(cities) for i in range(batch_len)), cities))
15+
], ['lat', 'lng', 'origin', 'destination'])
16+
17+
def write_record_batches(fd, batch_len, num_batches):
18+
writer = pa.ipc.RecordBatchStreamWriter(fd, generate_batch(1).schema)
19+
for batch in range(num_batches):
20+
writer.write_batch(generate_batch(batch_len))
21+
22+
writer.close()
23+
24+
if __name__ == "__main__":
25+
import argparse
26+
27+
parser = argparse.ArgumentParser()
28+
parser.add_argument('filename', help='number of batches')
29+
parser.add_argument('-n', '--num-batches', help='number of batches', type=int, default=10)
30+
parser.add_argument('-b', '--batch-size', help='size of each batch', type=int, default=100000)
31+
32+
args = parser.parse_args()
33+
34+
print "Writing {} {}-element batches to '{}'".format(args.num_batches, args.batch_size, args.filename)
35+
with open(args.filename, 'w') as fd:
36+
write_record_batches(fd, args.batch_size, args.num_batches)

js/perf/index.js

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,21 @@ for (let { name, buffers} of config) {
4141
suites.push(getByIndexSuite, iterateSuite, sliceSuite, parseSuite);
4242
}
4343

44+
for (let {name, buffers, tests} of require('./table_config')) {
45+
const tableIterateSuite = new Benchmark.Suite(`Table Iterate ${name}`, { async: true });
46+
const tableCountBySuite = new Benchmark.Suite(`Table Count By ${name}`, { async: true });
47+
const vectorCountBySuite = new Benchmark.Suite(`Vector Count By ${name}`, { async: true });
48+
const table = Table.from(buffers);
49+
50+
tableIterateSuite.add(createTableIterateTest(table));
51+
for (test of tests) {
52+
tableCountBySuite.add(createTableCountByTest(table, test.col, test.test, test.value))
53+
vectorCountBySuite.add(createVectorCountByTest(table.columns[test.col], test.test, test.value))
54+
}
55+
56+
suites.push(tableIterateSuite, tableCountBySuite, vectorCountBySuite)
57+
}
58+
4459
console.log('Running apache-arrow performance tests...\n');
4560

4661
run();
@@ -109,3 +124,66 @@ function createGetByIndexTest(vector) {
109124
}
110125
};
111126
}
127+
128+
function createVectorCountByTest(vector, test, value) {
129+
let op;
130+
if (test == 'gteq') {
131+
op = function () {
132+
sum = 0;
133+
for (cell of vector) {
134+
sum += (cell >= value)
135+
}
136+
}
137+
} else if (test == 'eq') {
138+
op = function () {
139+
sum = 0;
140+
for (cell of vector) {
141+
sum += (cell == value)
142+
}
143+
}
144+
} else {
145+
throw new Error(`Unrecognized test "$test"`);
146+
}
147+
148+
return {
149+
async: true,
150+
name: `name: '${vector.name}', length: ${vector.length}, type: ${vector.type}, test: ${test}, value: ${value}`,
151+
fn: op
152+
};
153+
}
154+
155+
function createTableIterateTest(table) {
156+
let row;
157+
return {
158+
async: true,
159+
name: `length: ${table.length}`,
160+
fn() { for (row of table) {} }
161+
};
162+
}
163+
164+
function createTableCountByTest(table, column, test, value) {
165+
let op;
166+
if (test == 'gteq') {
167+
op = function () {
168+
sum = 0;
169+
for (row of table) {
170+
sum += (row.get(column) >= value)
171+
}
172+
}
173+
} else if (test == 'eq') {
174+
op = function() {
175+
sum = 0;
176+
for (row of table) {
177+
sum += (row.get(column) == value)
178+
}
179+
}
180+
} else {
181+
throw new Error(`Unrecognized test "${test}"`);
182+
}
183+
184+
return {
185+
async: true,
186+
name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
187+
fn: op
188+
};
189+
}

js/perf/table_config.js

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
const fs = require('fs');
19+
const path = require('path');
20+
const glob = require('glob');
21+
22+
const config = [];
23+
const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.arrow`));
24+
25+
tests = [
26+
{col: 0, test: 'gteq', value: 0 },
27+
{col: 1, test: 'gteq', value: 0 },
28+
{col: 2, test: 'eq', value: 'Seattle'},
29+
]
30+
31+
for (const filename of filenames) {
32+
const { name } = path.parse(filename);
33+
config.push({ name, buffers: [fs.readFileSync(filename)], tests });
34+
}
35+
36+
module.exports = config;

0 commit comments

Comments
 (0)