Skip to content

Commit 9da168b

Browse files
buffer: optimize Buffer.byteLength
Buffer.byteLength is important for speed because it is called whenever a new Buffer is created from a string. This commit optimizes Buffer.byteLength execution by: - moving base64 length calculation into JS-land, which is now much faster - remove redundant code and streamline the UTF8 length calculation It also adds a benchmark and better tests. PR-URL: #1713 Reviewed-By: Trevor Norris <[email protected]> Reviewed-By: Ben Noordhuis <[email protected]>
1 parent 2a71f02 commit 9da168b

File tree

5 files changed

+155
-41
lines changed

5 files changed

+155
-41
lines changed
+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
var common = require('../common');
2+
3+
var bench = common.createBenchmark(main, {
4+
encoding: ['utf8', 'base64'],
5+
len: [1, 2, 4, 16, 64, 256], // x16
6+
n: [5e6]
7+
});
8+
9+
// 16 chars each
10+
var chars = [
11+
'hello brendan!!!', // 1 byte
12+
'ΰαβγδεζηθικλμνξο', // 2 bytes
13+
'挰挱挲挳挴挵挶挷挸挹挺挻挼挽挾挿', // 3 bytes
14+
'𠜎𠜱𠝹𠱓𠱸𠲖𠳏𠳕𠴕𠵼𠵿𠸎𠸏𠹷𠺝𠺢' // 4 bytes
15+
];
16+
17+
function main(conf) {
18+
var n = conf.n | 0;
19+
var len = conf.len | 0;
20+
var encoding = conf.encoding;
21+
22+
var strings = [];
23+
for (var string of chars) {
24+
// Strings must be built differently, depending on encoding
25+
var data = buildString(string, len);
26+
if (encoding === 'utf8') {
27+
strings.push(data);
28+
} else if (encoding === 'base64') {
29+
// Base64 strings will be much longer than their UTF8 counterparts
30+
strings.push(new Buffer(data, 'utf8').toString('base64'));
31+
}
32+
}
33+
34+
// Check the result to ensure it is *properly* optimized
35+
var results = strings.map(function(val) {
36+
return Buffer.byteLength(val, encoding);
37+
});
38+
39+
bench.start();
40+
for (var i = 0; i < n; i++) {
41+
var index = n % strings.length;
42+
// Go!
43+
var r = Buffer.byteLength(strings[index], encoding);
44+
45+
if (r !== results[index])
46+
throw Error('incorrect return value');
47+
}
48+
bench.end(n);
49+
}
50+
51+
function buildString(str, times) {
52+
if (times == 1) return str;
53+
54+
return str + buildString(str, times - 1);
55+
}

lib/buffer.js

+49-17
Original file line numberDiff line numberDiff line change
@@ -272,30 +272,62 @@ Buffer.concat = function(list, length) {
272272
};
273273

274274

275+
function base64ByteLength(str, bytes) {
276+
// Handle padding
277+
if (str.charCodeAt(bytes - 1) === 0x3D)
278+
bytes--;
279+
if (bytes > 1 && str.charCodeAt(bytes - 1) === 0x3D)
280+
bytes--;
281+
282+
// Base64 ratio: 3/4
283+
return (bytes * 3) >>> 2;
284+
}
285+
286+
275287
function byteLength(string, encoding) {
276-
if (typeof(string) !== 'string')
277-
string = String(string);
288+
if (typeof string !== 'string')
289+
string = '' + string;
278290

279-
if (string.length === 0)
291+
var len = string.length;
292+
if (len === 0)
280293
return 0;
281294

282-
switch (encoding) {
283-
case 'ascii':
284-
case 'binary':
285-
case 'raw':
286-
return string.length;
295+
// Use a for loop to avoid recursion
296+
var loweredCase = false;
297+
for (;;) {
298+
switch (encoding) {
299+
case 'ascii':
300+
case 'binary':
301+
// Deprecated
302+
case 'raw':
303+
case 'raws':
304+
return len;
287305

288-
case 'ucs2':
289-
case 'ucs-2':
290-
case 'utf16le':
291-
case 'utf-16le':
292-
return string.length * 2;
306+
case 'utf8':
307+
case 'utf-8':
308+
return binding.byteLengthUtf8(string);
293309

294-
case 'hex':
295-
return string.length >>> 1;
296-
}
310+
case 'ucs2':
311+
case 'ucs-2':
312+
case 'utf16le':
313+
case 'utf-16le':
314+
return len * 2;
315+
316+
case 'hex':
317+
return len >>> 1;
297318

298-
return binding.byteLength(string, encoding);
319+
case 'base64':
320+
return base64ByteLength(string, len);
321+
322+
default:
323+
// The C++ binding defaulted to UTF8, we should too.
324+
if (loweredCase)
325+
return binding.byteLengthUtf8(string);
326+
327+
encoding = ('' + encoding).toLowerCase();
328+
loweredCase = true;
329+
}
330+
}
299331
}
300332

301333
Buffer.byteLength = byteLength;

src/node_buffer.cc

+5-11
Original file line numberDiff line numberDiff line change
@@ -541,17 +541,11 @@ void WriteDoubleBE(const FunctionCallbackInfo<Value>& args) {
541541
}
542542

543543

544-
void ByteLength(const FunctionCallbackInfo<Value> &args) {
545-
Environment* env = Environment::GetCurrent(args);
546-
547-
if (!args[0]->IsString())
548-
return env->ThrowTypeError("Argument must be a string");
549-
550-
Local<String> s = args[0]->ToString(env->isolate());
551-
enum encoding e = ParseEncoding(env->isolate(), args[1], UTF8);
544+
void ByteLengthUtf8(const FunctionCallbackInfo<Value> &args) {
545+
CHECK(args[0]->IsString());
552546

553-
uint32_t size = StringBytes::Size(env->isolate(), s, e);
554-
args.GetReturnValue().Set(size);
547+
// Fast case: avoid StringBytes on UTF8 string. Jump to v8.
548+
args.GetReturnValue().Set(args[0].As<String>()->Utf8Length());
555549
}
556550

557551

@@ -745,7 +739,7 @@ void Initialize(Handle<Object> target,
745739

746740
env->SetMethod(target, "setupBufferJS", SetupBufferJS);
747741

748-
env->SetMethod(target, "byteLength", ByteLength);
742+
env->SetMethod(target, "byteLengthUtf8", ByteLengthUtf8);
749743
env->SetMethod(target, "compare", Compare);
750744
env->SetMethod(target, "fill", Fill);
751745
env->SetMethod(target, "indexOfBuffer", IndexOfBuffer);
+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
'use strict';
2+
3+
var common = require('../common');
4+
var assert = require('assert');
5+
var Buffer = require('buffer').Buffer;
6+
7+
// coerce values to string
8+
assert.equal(Buffer.byteLength(32, 'raw'), 2);
9+
assert.equal(Buffer.byteLength(NaN, 'utf8'), 3);
10+
assert.equal(Buffer.byteLength({}, 'raws'), 15);
11+
assert.equal(Buffer.byteLength(), 9);
12+
13+
// special case: zero length string
14+
assert.equal(Buffer.byteLength('', 'ascii'), 0);
15+
assert.equal(Buffer.byteLength('', 'HeX'), 0);
16+
17+
// utf8
18+
assert.equal(Buffer.byteLength('∑éllö wørl∂!', 'utf-8'), 19);
19+
assert.equal(Buffer.byteLength('κλμνξο', 'utf8'), 12);
20+
assert.equal(Buffer.byteLength('挵挶挷挸挹', 'utf-8'), 15);
21+
assert.equal(Buffer.byteLength('𠝹𠱓𠱸', 'UTF8'), 12);
22+
// without an encoding, utf8 should be assumed
23+
assert.equal(Buffer.byteLength('hey there'), 9);
24+
assert.equal(Buffer.byteLength('𠱸挶νξ#xx :)'), 17);
25+
assert.equal(Buffer.byteLength('hello world', ''), 11);
26+
// it should also be assumed with unrecognized encoding
27+
assert.equal(Buffer.byteLength('hello world', 'abc'), 11);
28+
assert.equal(Buffer.byteLength('ßœ∑≈', 'unkn0wn enc0ding'), 10);
29+
30+
// base64
31+
assert.equal(Buffer.byteLength('aGVsbG8gd29ybGQ=', 'base64'), 11);
32+
assert.equal(Buffer.byteLength('bm9kZS5qcyByb2NrcyE=', 'base64'), 14);
33+
assert.equal(Buffer.byteLength('aGkk', 'base64'), 3);
34+
assert.equal(Buffer.byteLength('bHNrZGZsa3NqZmtsc2xrZmFqc2RsZmtqcw==',
35+
'base64'), 25);
36+
// special padding
37+
assert.equal(Buffer.byteLength('aaa=', 'base64'), 2);
38+
assert.equal(Buffer.byteLength('aaaa==', 'base64'), 3);
39+
40+
assert.equal(Buffer.byteLength('Il était tué'), 14);
41+
assert.equal(Buffer.byteLength('Il était tué', 'utf8'), 14);
42+
assert.equal(Buffer.byteLength('Il était tué', 'ascii'), 12);
43+
assert.equal(Buffer.byteLength('Il était tué', 'binary'), 12);
44+
['ucs2', 'ucs-2', 'utf16le', 'utf-16le'].forEach(function(encoding) {
45+
assert.equal(24, Buffer.byteLength('Il était tué', encoding));
46+
});

test/parallel/test-buffer.js

-13
Original file line numberDiff line numberDiff line change
@@ -561,15 +561,6 @@ assert.equal(sb, s);
561561
b = new Buffer('abcde');
562562
assert.equal('bcde', b.slice(1).toString());
563563

564-
// byte length
565-
assert.equal(14, Buffer.byteLength('Il était tué'));
566-
assert.equal(14, Buffer.byteLength('Il était tué', 'utf8'));
567-
['ucs2', 'ucs-2', 'utf16le', 'utf-16le'].forEach(function(encoding) {
568-
assert.equal(24, Buffer.byteLength('Il était tué', encoding));
569-
});
570-
assert.equal(12, Buffer.byteLength('Il était tué', 'ascii'));
571-
assert.equal(12, Buffer.byteLength('Il était tué', 'binary'));
572-
573564
// slice(0,0).length === 0
574565
assert.equal(0, Buffer('hello').slice(0, 0).length);
575566

@@ -1074,10 +1065,6 @@ assert.equal(buf.readInt8(0), -1);
10741065
assert.ok(typeof Buffer(5).slice(0, 5).parent === 'object');
10751066
})();
10761067

1077-
// Make sure byteLength properly checks for base64 padding
1078-
assert.equal(Buffer.byteLength('aaa=', 'base64'), 2);
1079-
assert.equal(Buffer.byteLength('aaaa==', 'base64'), 3);
1080-
10811068
// Regression test for #5482: should throw but not assert in C++ land.
10821069
assert.throws(function() {
10831070
Buffer('', 'buffer');

0 commit comments

Comments
 (0)