Skip to content

Commit 53421b1

Browse files
mscdexitaloacasas
authored andcommitted
querystring: improve parse() performance
PR-URL: #10874 Reviewed-By: James M Snell <[email protected]>
1 parent d13aba8 commit 53421b1

File tree

2 files changed

+74
-63
lines changed

2 files changed

+74
-63
lines changed

benchmark/querystring/querystring-parse.js

+12-21
Original file line numberDiff line numberDiff line change
@@ -3,35 +3,26 @@ var common = require('../common.js');
33
var querystring = require('querystring');
44
var v8 = require('v8');
55

6-
var types = [
7-
'noencode',
8-
'multicharsep',
9-
'encodemany',
10-
'encodelast',
11-
'multivalue',
12-
'multivaluemany',
13-
'manypairs'
14-
];
6+
var inputs = {
7+
noencode: 'foo=bar&baz=quux&xyzzy=thud',
8+
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
9+
encodefake: 'foo=%©ar&baz=%A©uux&xyzzy=%©ud',
10+
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
11+
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
12+
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
13+
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
14+
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
15+
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
16+
};
1517

1618
var bench = common.createBenchmark(main, {
17-
type: types,
19+
type: Object.keys(inputs),
1820
n: [1e6],
1921
});
2022

2123
function main(conf) {
2224
var type = conf.type;
2325
var n = conf.n | 0;
24-
25-
var inputs = {
26-
noencode: 'foo=bar&baz=quux&xyzzy=thud',
27-
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
28-
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
29-
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
30-
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
31-
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
32-
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
33-
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
34-
};
3526
var input = inputs[type];
3627

3728
// Force-optimize querystring.parse() so that the benchmark doesn't get

lib/querystring.js

+62-42
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,19 @@
1-
// Query String Utilities
2-
31
'use strict';
42

5-
const QueryString = exports;
3+
const QueryString = module.exports = {
4+
unescapeBuffer,
5+
// `unescape()` is a JS global, so we need to use a different local name
6+
unescape: qsUnescape,
7+
8+
// `escape()` is a JS global, so we need to use a different local name
9+
escape: qsEscape,
10+
11+
stringify,
12+
encode: stringify,
13+
14+
parse,
15+
decode: parse
16+
};
617
const Buffer = require('buffer').Buffer;
718

819
// This constructor is used to store parsed query string values. Instantiating
@@ -13,7 +24,7 @@ ParsedQueryString.prototype = Object.create(null);
1324

1425

1526
// a safe fast alternative to decodeURIComponent
16-
QueryString.unescapeBuffer = function(s, decodeSpaces) {
27+
function unescapeBuffer(s, decodeSpaces) {
1728
var out = Buffer.allocUnsafe(s.length);
1829
var state = 0;
1930
var n, m, hexchar;
@@ -77,7 +88,7 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) {
7788
// TODO support returning arbitrary buffers.
7889

7990
return out.slice(0, outIndex - 1);
80-
};
91+
}
8192

8293

8394
function qsUnescape(s, decodeSpaces) {
@@ -87,13 +98,12 @@ function qsUnescape(s, decodeSpaces) {
8798
return QueryString.unescapeBuffer(s, decodeSpaces).toString();
8899
}
89100
}
90-
QueryString.unescape = qsUnescape;
91101

92102

93103
var hexTable = new Array(256);
94104
for (var i = 0; i < 256; ++i)
95105
hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();
96-
QueryString.escape = function(str) {
106+
function qsEscape(str) {
97107
// replaces encodeURIComponent
98108
// http://www.ecma-international.org/ecma-262/5.1/#sec-15.1.3.4
99109
if (typeof str !== 'string') {
@@ -164,20 +174,20 @@ QueryString.escape = function(str) {
164174
if (lastPos < str.length)
165175
return out + str.slice(lastPos);
166176
return out;
167-
};
177+
}
168178

169-
var stringifyPrimitive = function(v) {
179+
function stringifyPrimitive(v) {
170180
if (typeof v === 'string')
171181
return v;
172182
if (typeof v === 'number' && isFinite(v))
173183
return '' + v;
174184
if (typeof v === 'boolean')
175185
return v ? 'true' : 'false';
176186
return '';
177-
};
187+
}
178188

179189

180-
QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
190+
function stringify(obj, sep, eq, options) {
181191
sep = sep || '&';
182192
eq = eq || '=';
183193

@@ -215,34 +225,43 @@ QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
215225
return fields;
216226
}
217227
return '';
218-
};
228+
}
219229

220-
// Parse a key/val string.
221-
QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
222-
sep = sep || '&';
223-
eq = eq || '=';
230+
function charCodes(str) {
231+
if (str.length === 0) return [];
232+
if (str.length === 1) return [str.charCodeAt(0)];
233+
const ret = [];
234+
for (var i = 0; i < str.length; ++i)
235+
ret[ret.length] = str.charCodeAt(i);
236+
return ret;
237+
}
238+
const defSepCodes = [38]; // &
239+
const defEqCodes = [61]; // =
224240

241+
// Parse a key/val string.
242+
function parse(qs, sep, eq, options) {
225243
const obj = new ParsedQueryString();
226244

227245
if (typeof qs !== 'string' || qs.length === 0) {
228246
return obj;
229247
}
230248

231-
if (typeof sep !== 'string')
232-
sep += '';
233-
234-
const eqLen = eq.length;
235-
const sepLen = sep.length;
249+
var sepCodes = (!sep ? defSepCodes : charCodes(sep + ''));
250+
var eqCodes = (!eq ? defEqCodes : charCodes(eq + ''));
251+
const sepLen = sepCodes.length;
252+
const eqLen = eqCodes.length;
236253

237-
var maxKeys = 1000;
254+
var pairs = 1000;
238255
if (options && typeof options.maxKeys === 'number') {
239-
maxKeys = options.maxKeys;
256+
// -1 is used in place of a value like Infinity for meaning
257+
// "unlimited pairs" because of additional checks V8 (at least as of v5.4)
258+
// has to do when using variables that contain values like Infinity. Since
259+
// `pairs` is always decremented and checked explicitly for 0, -1 works
260+
// effectively the same as Infinity, while providing a significant
261+
// performance boost.
262+
pairs = (options.maxKeys > 0 ? options.maxKeys : -1);
240263
}
241264

242-
var pairs = Infinity;
243-
if (maxKeys > 0)
244-
pairs = maxKeys;
245-
246265
var decode = QueryString.unescape;
247266
if (options && typeof options.decodeURIComponent === 'function') {
248267
decode = options.decodeURIComponent;
@@ -262,7 +281,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
262281
const code = qs.charCodeAt(i);
263282

264283
// Try matching key/value pair separator (e.g. '&')
265-
if (code === sep.charCodeAt(sepIdx)) {
284+
if (code === sepCodes[sepIdx]) {
266285
if (++sepIdx === sepLen) {
267286
// Key/value pair separator match!
268287
const end = i - sepIdx + 1;
@@ -284,10 +303,10 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
284303
keys[keys.length] = key;
285304
} else {
286305
const curValue = obj[key];
287-
// `instanceof Array` is used instead of Array.isArray() because it
288-
// is ~15-20% faster with v8 4.7 and is safe to use because we are
289-
// using it with values being created within this function
290-
if (curValue instanceof Array)
306+
// A simple Array-specific property check is enough here to
307+
// distinguish from a string value and is faster and still safe since
308+
// we are generating all of the values being assigned.
309+
if (curValue.pop)
291310
curValue[curValue.length] = value;
292311
else
293312
obj[key] = [curValue, value];
@@ -322,7 +341,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
322341

323342
// Try matching key/value separator (e.g. '=') if we haven't already
324343
if (eqIdx < eqLen) {
325-
if (code === eq.charCodeAt(eqIdx)) {
344+
if (code === eqCodes[eqIdx]) {
326345
if (++eqIdx === eqLen) {
327346
// Key/value separator match!
328347
const end = i - eqIdx + 1;
@@ -354,12 +373,12 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
354373

355374
if (code === 43/*+*/) {
356375
if (eqIdx < eqLen) {
357-
if (i - lastPos > 0)
376+
if (lastPos < i)
358377
key += qs.slice(lastPos, i);
359378
key += '%20';
360379
keyEncoded = true;
361380
} else {
362-
if (i - lastPos > 0)
381+
if (lastPos < i)
363382
value += qs.slice(lastPos, i);
364383
value += '%20';
365384
valEncoded = true;
@@ -369,7 +388,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
369388
}
370389

371390
// Check if we have leftover key or value data
372-
if (pairs > 0 && (lastPos < qs.length || eqIdx > 0)) {
391+
if (pairs !== 0 && (lastPos < qs.length || eqIdx > 0)) {
373392
if (lastPos < qs.length) {
374393
if (eqIdx < eqLen)
375394
key += qs.slice(lastPos);
@@ -387,22 +406,23 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
387406
keys[keys.length] = key;
388407
} else {
389408
const curValue = obj[key];
390-
// `instanceof Array` is used instead of Array.isArray() because it
391-
// is ~15-20% faster with v8 4.7 and is safe to use because we are
392-
// using it with values being created within this function
393-
if (curValue instanceof Array)
409+
// A simple Array-specific property check is enough here to
410+
// distinguish from a string value and is faster and still safe since
411+
// we are generating all of the values being assigned.
412+
if (curValue.pop)
394413
curValue[curValue.length] = value;
395414
else
396415
obj[key] = [curValue, value];
397416
}
398417
}
399418

400419
return obj;
401-
};
420+
}
402421

403422

404423
// v8 does not optimize functions with try-catch blocks, so we isolate them here
405-
// to minimize the damage
424+
// to minimize the damage (Note: no longer true as of V8 5.4 -- but still will
425+
// not be inlined).
406426
function decodeStr(s, decoder) {
407427
try {
408428
return decoder(s);

0 commit comments

Comments
 (0)