16
16
// under the License.
17
17
18
18
// Use the ES5 UMD target as perf baseline
19
- // const { lit, col, DataFrame, Table, readVectors } = require('../targets/es5/umd');
20
- // const { lit, col, DataFrame, Table, readVectors } = require('../targets/es5/cjs');
21
- // const { lit, col, DataFrame, Table, readVectors } = require('../targets/es2015/umd');
22
- const { lit , col, DataFrame, Table, readVectors } = require ( '../targets/es2015/cjs' ) ;
19
+ const { col, DataFrame, Table, readVectors } = require ( '../targets/es5/umd' ) ;
20
+ // const { col, DataFrame, Table, readVectors } = require('../targets/es5/cjs');
21
+ // const { col, DataFrame, Table, readVectors } = require('../targets/es2015/umd');
22
+ // const { col, DataFrame, Table, readVectors } = require('../targets/es2015/cjs');
23
23
24
24
const config = require ( './config' ) ;
25
25
const Benchmark = require ( 'benchmark' ) ;
26
26
27
27
const suites = [ ] ;
28
28
29
- // for (let { name, buffers} of config) {
30
- // const parseSuite = new Benchmark.Suite(`Parse "${name}"`, { async: true });
31
- // const sliceSuite = new Benchmark.Suite(`Slice "${name}" vectors`, { async: true });
32
- // const iterateSuite = new Benchmark.Suite(`Iterate "${name}" vectors`, { async: true });
33
- // const getByIndexSuite = new Benchmark.Suite(`Get "${name}" values by index`, { async: true });
34
- // parseSuite.add(createFromTableTest(name, buffers));
35
- // parseSuite.add(createReadVectorsTest(name, buffers));
36
- // for (const vector of Table.from(buffers).columns) {
37
- // sliceSuite.add(createSliceTest(vector));
38
- // iterateSuite.add(createIterateTest(vector));
39
- // getByIndexSuite.add(createGetByIndexTest(vector));
40
- // }
41
- // suites.push(getByIndexSuite, iterateSuite, sliceSuite, parseSuite);
42
- // }
29
+ for ( let { name, buffers} of config ) {
30
+ const parseSuite = new Benchmark . Suite ( `Parse "${ name } "` , { async : true } ) ;
31
+ const sliceSuite = new Benchmark . Suite ( `Slice "${ name } " vectors` , { async : true } ) ;
32
+ const iterateSuite = new Benchmark . Suite ( `Iterate "${ name } " vectors` , { async : true } ) ;
33
+ const getByIndexSuite = new Benchmark . Suite ( `Get "${ name } " values by index` , { async : true } ) ;
34
+ parseSuite . add ( createFromTableTest ( name , buffers ) ) ;
35
+ parseSuite . add ( createReadVectorsTest ( name , buffers ) ) ;
36
+ for ( const vector of Table . from ( buffers ) . columns ) {
37
+ sliceSuite . add ( createSliceTest ( vector ) ) ;
38
+ iterateSuite . add ( createIterateTest ( vector ) ) ;
39
+ getByIndexSuite . add ( createGetByIndexTest ( vector ) ) ;
40
+ }
41
+ suites . push ( getByIndexSuite , iterateSuite , sliceSuite , parseSuite ) ;
42
+ }
43
43
44
44
for ( let { name, buffers, tests} of require ( './table_config' ) ) {
45
- const tableIteratorSuite = new Benchmark . Suite ( `Table Iterator "${ name } "` , { async : true } ) ;
46
- const tableCountSuite = new Benchmark . Suite ( `Table Count "${ name } "` , { async : true } ) ;
47
- const dfIteratorSuite = new Benchmark . Suite ( `DataFrame Iterator "${ name } "` , { async : true } ) ;
48
- const dfIteratorCountSuite = new Benchmark . Suite ( `DataFrame Iterator Count "${ name } "` , { async : true } ) ;
45
+ const dfFilterCountSuite = new Benchmark . Suite ( `DataFrame Filter-Scan Count "${ name } "` , { async : true } ) ;
49
46
const dfDirectCountSuite = new Benchmark . Suite ( `DataFrame Direct Count "${ name } "` , { async : true } ) ;
50
- const dfScanCountSuite = new Benchmark . Suite ( `DataFrame Scan Count "${ name } "` , { async : true } ) ;
51
- const dfFilterCountSuite = new Benchmark . Suite ( `DataFrame Filter Scan Count "${ name } "` , { async : true } ) ;
52
- const vectorCountSuite = new Benchmark . Suite ( `Vector Count "${ name } "` , { async : true } ) ;
53
47
const table = Table . from ( buffers ) ;
54
48
55
- tableIteratorSuite . add ( createTableIteratorTest ( table ) ) ;
56
- dfIteratorSuite . add ( createDataFrameIteratorTest ( table ) ) ;
57
49
for ( test of tests ) {
58
- tableCountSuite . add ( createTableCountTest ( table , test . col , test . test , test . value ) )
59
- dfIteratorCountSuite . add ( createDataFrameIteratorCountTest ( table , test . col , test . test , test . value ) )
60
- dfDirectCountSuite . add ( createDataFrameDirectCountTest ( table , test . col , test . test , test . value ) )
61
- dfScanCountSuite . add ( createDataFrameScanCountTest ( table , test . col , test . test , test . value ) )
62
50
dfFilterCountSuite . add ( createDataFrameFilterCountTest ( table , test . col , test . test , test . value ) )
63
- vectorCountSuite . add ( createVectorCountTest ( table . columns [ test . col ] , test . test , test . value ) )
51
+ dfDirectCountSuite . add ( createDataFrameDirectCountTest ( table , test . col , test . test , test . value ) )
64
52
}
65
53
66
- suites . push ( tableIteratorSuite , tableCountSuite , dfIteratorSuite , dfIteratorCountSuite , dfDirectCountSuite , dfScanCountSuite , dfFilterCountSuite , vectorCountSuite )
54
+ suites . push ( dfFilterCountSuite , dfDirectCountSuite )
67
55
}
68
56
69
57
console . log ( 'Running apache-arrow performance tests...\n' ) ;
@@ -135,81 +123,9 @@ function createGetByIndexTest(vector) {
135
123
} ;
136
124
}
137
125
138
- function createVectorCountTest ( vector , test , value ) {
139
- let op ;
140
- if ( test == 'gteq' ) {
141
- op = function ( ) {
142
- sum = 0 ;
143
- for ( cell of vector ) {
144
- sum += ( cell >= value )
145
- }
146
- }
147
- } else if ( test == 'eq' ) {
148
- op = function ( ) {
149
- sum = 0 ;
150
- for ( cell of vector ) {
151
- sum += ( cell == value )
152
- }
153
- }
154
- } else {
155
- throw new Error ( `Unrecognized test "$test"` ) ;
156
- }
157
-
158
- return {
159
- async : true ,
160
- name : `name: '${ vector . name } ', length: ${ vector . length } , type: ${ vector . type } , test: ${ test } , value: ${ value } ` ,
161
- fn : op
162
- } ;
163
- }
164
-
165
- function createTableIteratorTest ( table ) {
166
- let row ;
167
- return {
168
- async : true ,
169
- name : `length: ${ table . length } ` ,
170
- fn ( ) { for ( row of table ) { } }
171
- } ;
172
- }
173
-
174
- function createTableCountTest ( table , column , test , value ) {
175
- let op ;
176
- if ( test == 'gteq' ) {
177
- op = function ( ) {
178
- sum = 0 ;
179
- for ( row of table ) {
180
- sum += ( row . get ( column ) >= value )
181
- }
182
- }
183
- } else if ( test == 'eq' ) {
184
- op = function ( ) {
185
- sum = 0 ;
186
- for ( row of table ) {
187
- sum += ( row . get ( column ) == value )
188
- }
189
- }
190
- } else {
191
- throw new Error ( `Unrecognized test "${ test } "` ) ;
192
- }
193
-
194
- return {
195
- async : true ,
196
- name : `name: '${ table . columns [ column ] . name } ', length: ${ table . length } , type: ${ table . columns [ column ] . type } , test: ${ test } , value: ${ value } ` ,
197
- fn : op
198
- } ;
199
- }
200
-
201
- function createDataFrameIteratorTest ( table ) {
202
- let df = DataFrame . from ( table ) ;
203
- let idx ;
204
- return {
205
- async : true ,
206
- name : `length: ${ table . length } ` ,
207
- fn ( ) { for ( idx of table ) { } }
208
- } ;
209
- }
210
-
211
126
function createDataFrameDirectCountTest ( table , column , test , value ) {
212
127
let df = DataFrame . from ( table ) ;
128
+ let colidx = table . columns . findIndex ( ( c ) => c . name === column ) ;
213
129
214
130
if ( test == 'gteq' ) {
215
131
op = function ( ) {
@@ -218,11 +134,11 @@ function createDataFrameDirectCountTest(table, column, test, value) {
218
134
const length = df . lengths [ batch ] ;
219
135
220
136
// load batches
221
- const columns = df . getBatch ( batch ) ;
137
+ const columns = df . batches [ batch ] ;
222
138
223
139
// yield all indices
224
140
for ( let idx = - 1 ; ++ idx < length ; ) {
225
- sum += ( columns [ column ] . get ( idx ) >= value ) ;
141
+ sum += ( columns [ colidx ] . get ( idx ) >= value ) ;
226
142
}
227
143
}
228
144
}
@@ -233,11 +149,11 @@ function createDataFrameDirectCountTest(table, column, test, value) {
233
149
const length = df . lengths [ batch ] ;
234
150
235
151
// load batches
236
- const columns = df . getBatch ( batch ) ;
152
+ const columns = df . batches [ batch ]
237
153
238
154
// yield all indices
239
155
for ( let idx = - 1 ; ++ idx < length ; ) {
240
- sum += ( columns [ column ] . get ( idx ) == value ) ;
156
+ sum += ( columns [ colidx ] . get ( idx ) == value ) ;
241
157
}
242
158
}
243
159
}
@@ -247,79 +163,28 @@ function createDataFrameDirectCountTest(table, column, test, value) {
247
163
248
164
return {
249
165
async : true ,
250
- name : `name: '${ table . columns [ column ] . name } ', length: ${ table . length } , type: ${ table . columns [ column ] . type } , test: ${ test } , value: ${ value } ` ,
251
- fn : op
252
- } ;
253
- }
254
-
255
- function createDataFrameScanCountTest ( table , column , test , value ) {
256
- let df = DataFrame . from ( table ) ;
257
-
258
- if ( test == 'gteq' ) {
259
- op = function ( ) {
260
- sum = 0 ;
261
- df . scan ( ( idx , cols ) => { sum += cols [ column ] . get ( idx ) >= value } ) ;
262
- }
263
- } else if ( test == 'eq' ) {
264
- op = function ( ) {
265
- sum = 0 ;
266
- df . scan ( ( idx , cols ) => { sum += cols [ column ] . get ( idx ) == value } ) ;
267
- console . log ( sum ) ;
268
- }
269
- } else {
270
- throw new Error ( `Unrecognized test "${ test } "` ) ;
271
- }
272
-
273
- return {
274
- async : true ,
275
- name : `name: '${ table . columns [ column ] . name } ', length: ${ table . length } , type: ${ table . columns [ column ] . type } , test: ${ test } , value: ${ value } ` ,
166
+ name : `name: '${ column } ', length: ${ table . length } , type: ${ table . columns [ colidx ] . type } , test: ${ test } , value: ${ value } ` ,
276
167
fn : op
277
168
} ;
278
169
}
279
170
280
171
function createDataFrameFilterCountTest ( table , column , test , value ) {
281
172
let df = DataFrame . from ( table ) ;
173
+ let colidx = table . columns . findIndex ( ( c ) => c . name === column ) ;
174
+
282
175
if ( test == 'gteq' ) {
283
- df = df . filter ( col ( table . columns [ column ] . name ) . gteq ( value ) ) ;
176
+ df = df . filter ( col ( column ) . gteq ( value ) ) ;
284
177
} else if ( test == 'eq' ) {
285
- df = df . filter ( col ( table . columns [ column ] . name ) . eq ( value ) ) ;
178
+ df = df . filter ( col ( column ) . eq ( value ) ) ;
286
179
} else {
287
180
throw new Error ( `Unrecognized test "${ test } "` ) ;
288
181
}
289
182
290
183
return {
291
184
async : true ,
292
- name : `name: '${ table . columns [ column ] . name } ', length: ${ table . length } , type: ${ table . columns [ column ] . type } , test: ${ test } , value: ${ value } ` ,
185
+ name : `name: '${ column } ', length: ${ table . length } , type: ${ table . columns [ colidx ] . type } , test: ${ test } , value: ${ value } ` ,
293
186
fn ( ) {
294
187
df . count ( ) ;
295
188
}
296
189
} ;
297
190
}
298
-
299
- function createDataFrameIteratorCountTest ( table , column , test , value ) {
300
- let df = DataFrame . from ( table ) ;
301
-
302
- if ( test == 'gteq' ) {
303
- op = function ( ) {
304
- sum = 0 ;
305
- for ( idx of df ) {
306
- sum += ( df . columns [ column ] . get ( idx ) >= value ) ;
307
- }
308
- }
309
- } else if ( test == 'eq' ) {
310
- op = function ( ) {
311
- sum = 0 ;
312
- for ( idx of df ) {
313
- sum += ( df . columns [ column ] . get ( idx ) == value ) ;
314
- }
315
- }
316
- } else {
317
- throw new Error ( `Unrecognized test "${ test } "` ) ;
318
- }
319
-
320
- return {
321
- async : true ,
322
- name : `name: '${ table . columns [ column ] . name } ', length: ${ table . length } , type: ${ table . columns [ column ] . type } , test: ${ test } , value: ${ value } ` ,
323
- fn : op
324
- } ;
325
- }
0 commit comments