Skip to content

Commit f1c3592

Browse files
authored
Follow regexp validation as per tc39/ecma262#1869
1 parent 078e2cc commit f1c3592

File tree

4 files changed

+70
-35
lines changed

4 files changed

+70
-35
lines changed

acorn/src/regexp.js

+38-35
Original file line numberDiff line numberDiff line change
@@ -40,49 +40,49 @@ export class RegExpValidationState {
4040

4141
// If u flag is given, this returns the code point at the index (it combines a surrogate pair).
4242
// Otherwise, this returns the code unit of the index (can be a part of a surrogate pair).
43-
at(i) {
43+
at(i, forceU = false) {
4444
const s = this.source
4545
const l = s.length
4646
if (i >= l) {
4747
return -1
4848
}
4949
const c = s.charCodeAt(i)
50-
if (!this.switchU || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) {
50+
if (!(forceU || this.switchU) || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l) {
5151
return c
5252
}
5353
const next = s.charCodeAt(i + 1)
5454
return next >= 0xDC00 && next <= 0xDFFF ? (c << 10) + next - 0x35FDC00 : c
5555
}
5656

57-
nextIndex(i) {
57+
nextIndex(i, forceU = false) {
5858
const s = this.source
5959
const l = s.length
6060
if (i >= l) {
6161
return l
6262
}
6363
let c = s.charCodeAt(i), next
64-
if (!this.switchU || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l ||
64+
if (!(forceU || this.switchU) || c <= 0xD7FF || c >= 0xE000 || i + 1 >= l ||
6565
(next = s.charCodeAt(i + 1)) < 0xDC00 || next > 0xDFFF) {
6666
return i + 1
6767
}
6868
return i + 2
6969
}
7070

71-
current() {
72-
return this.at(this.pos)
71+
current(forceU = false) {
72+
return this.at(this.pos, forceU)
7373
}
7474

75-
lookahead() {
76-
return this.at(this.nextIndex(this.pos))
75+
lookahead(forceU = false) {
76+
return this.at(this.nextIndex(this.pos, forceU), forceU)
7777
}
7878

79-
advance() {
80-
this.pos = this.nextIndex(this.pos)
79+
advance(forceU = false) {
80+
this.pos = this.nextIndex(this.pos, forceU)
8181
}
8282

83-
eat(ch) {
84-
if (this.current() === ch) {
85-
this.advance()
83+
eat(ch, forceU = false) {
84+
if (this.current(forceU) === ch) {
85+
this.advance(forceU)
8686
return true
8787
}
8888
return false
@@ -418,9 +418,9 @@ pp.regexp_eatExtendedPatternCharacter = function(state) {
418418
return false
419419
}
420420

421-
// GroupSpecifier[U] ::
421+
// GroupSpecifier ::
422422
// [empty]
423-
// `?` GroupName[?U]
423+
// `?` GroupName
424424
pp.regexp_groupSpecifier = function(state) {
425425
if (state.eat(0x3F /* ? */)) {
426426
if (this.regexp_eatGroupName(state)) {
@@ -434,8 +434,8 @@ pp.regexp_groupSpecifier = function(state) {
434434
}
435435
}
436436

437-
// GroupName[U] ::
438-
// `<` RegExpIdentifierName[?U] `>`
437+
// GroupName ::
438+
// `<` RegExpIdentifierName `>`
439439
// Note: this updates `state.lastStringValue` property with the eaten name.
440440
pp.regexp_eatGroupName = function(state) {
441441
state.lastStringValue = ""
@@ -448,9 +448,9 @@ pp.regexp_eatGroupName = function(state) {
448448
return false
449449
}
450450

451-
// RegExpIdentifierName[U] ::
452-
// RegExpIdentifierStart[?U]
453-
// RegExpIdentifierName[?U] RegExpIdentifierPart[?U]
451+
// RegExpIdentifierName ::
452+
// RegExpIdentifierStart
453+
// RegExpIdentifierName RegExpIdentifierPart
454454
// Note: this updates `state.lastStringValue` property with the eaten name.
455455
pp.regexp_eatRegExpIdentifierName = function(state) {
456456
state.lastStringValue = ""
@@ -464,17 +464,18 @@ pp.regexp_eatRegExpIdentifierName = function(state) {
464464
return false
465465
}
466466

467-
// RegExpIdentifierStart[U] ::
467+
// RegExpIdentifierStart ::
468468
// UnicodeIDStart
469469
// `$`
470470
// `_`
471-
// `\` RegExpUnicodeEscapeSequence[?U]
471+
// `\` RegExpUnicodeEscapeSequence[+U]
472472
pp.regexp_eatRegExpIdentifierStart = function(state) {
473473
const start = state.pos
474-
let ch = state.current()
475-
state.advance()
474+
const forceU = this.options.ecmaVersion >= 11
475+
let ch = state.current(forceU)
476+
state.advance(forceU)
476477

477-
if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state)) {
478+
if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state, forceU)) {
478479
ch = state.lastIntValue
479480
}
480481
if (isRegExpIdentifierStart(ch)) {
@@ -489,19 +490,20 @@ function isRegExpIdentifierStart(ch) {
489490
return isIdentifierStart(ch, true) || ch === 0x24 /* $ */ || ch === 0x5F /* _ */
490491
}
491492

492-
// RegExpIdentifierPart[U] ::
493+
// RegExpIdentifierPart ::
493494
// UnicodeIDContinue
494495
// `$`
495496
// `_`
496-
// `\` RegExpUnicodeEscapeSequence[?U]
497+
// `\` RegExpUnicodeEscapeSequence[+U]
497498
// <ZWNJ>
498499
// <ZWJ>
499500
pp.regexp_eatRegExpIdentifierPart = function(state) {
500501
const start = state.pos
501-
let ch = state.current()
502-
state.advance()
502+
const forceU = this.options.ecmaVersion >= 11
503+
let ch = state.current(forceU)
504+
state.advance(forceU)
503505

504-
if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state)) {
506+
if (ch === 0x5C /* \ */ && this.regexp_eatRegExpUnicodeEscapeSequence(state, forceU)) {
505507
ch = state.lastIntValue
506508
}
507509
if (isRegExpIdentifierPart(ch)) {
@@ -571,7 +573,7 @@ pp.regexp_eatCharacterEscape = function(state) {
571573
this.regexp_eatCControlLetter(state) ||
572574
this.regexp_eatZero(state) ||
573575
this.regexp_eatHexEscapeSequence(state) ||
574-
this.regexp_eatRegExpUnicodeEscapeSequence(state) ||
576+
this.regexp_eatRegExpUnicodeEscapeSequence(state, false) ||
575577
(!state.switchU && this.regexp_eatLegacyOctalEscapeSequence(state)) ||
576578
this.regexp_eatIdentityEscape(state)
577579
)
@@ -644,13 +646,14 @@ function isControlLetter(ch) {
644646
}
645647

646648
// https://www.ecma-international.org/ecma-262/8.0/#prod-RegExpUnicodeEscapeSequence
647-
pp.regexp_eatRegExpUnicodeEscapeSequence = function(state) {
649+
pp.regexp_eatRegExpUnicodeEscapeSequence = function(state, forceU = false) {
648650
const start = state.pos
651+
const switchU = forceU || state.switchU
649652

650653
if (state.eat(0x75 /* u */)) {
651654
if (this.regexp_eatFixedHexDigits(state, 4)) {
652655
const lead = state.lastIntValue
653-
if (state.switchU && lead >= 0xD800 && lead <= 0xDBFF) {
656+
if (switchU && lead >= 0xD800 && lead <= 0xDBFF) {
654657
const leadSurrogateEnd = state.pos
655658
if (state.eat(0x5C /* \ */) && state.eat(0x75 /* u */) && this.regexp_eatFixedHexDigits(state, 4)) {
656659
const trail = state.lastIntValue
@@ -665,15 +668,15 @@ pp.regexp_eatRegExpUnicodeEscapeSequence = function(state) {
665668
return true
666669
}
667670
if (
668-
state.switchU &&
671+
switchU &&
669672
state.eat(0x7B /* { */) &&
670673
this.regexp_eatHexDigits(state) &&
671674
state.eat(0x7D /* } */) &&
672675
isValidUnicode(state.lastIntValue)
673676
) {
674677
return true
675678
}
676-
if (state.switchU) {
679+
if (switchU) {
677680
state.raise("Invalid unicode escape")
678681
}
679682
state.pos = start

bin/test262.whitelist

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
language/literals/regexp/named-groups/invalid-non-id-continue-groupspecifier.js (default)
2+
language/literals/regexp/named-groups/invalid-non-id-continue-groupspecifier.js (strict mode)
3+
language/literals/regexp/named-groups/invalid-non-id-start-groupspecifier-3.js (default)
4+
language/literals/regexp/named-groups/invalid-non-id-start-groupspecifier-3.js (strict mode)
5+
language/literals/regexp/named-groups/invalid-non-id-start-groupspecifier-6.js (default)
6+
language/literals/regexp/named-groups/invalid-non-id-start-groupspecifier-6.js (strict mode)
7+
language/literals/regexp/named-groups/invalid-u-escape-in-groupspecifier.js (default)
8+
language/literals/regexp/named-groups/invalid-u-escape-in-groupspecifier.js (strict mode)
9+
language/literals/regexp/named-groups/invalid-u-escape-in-groupspecifier-2.js (default)
10+
language/literals/regexp/named-groups/invalid-u-escape-in-groupspecifier-2.js (strict mode)

test/run.js

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
require("./tests-async-iteration.js");
1313
require("./tests-regexp.js");
1414
require("./tests-regexp-2018.js");
15+
require("./tests-regexp-2020.js");
1516
require("./tests-json-superset.js");
1617
require("./tests-optional-catch-binding.js");
1718
require("./tests-bigint.js");

test/tests-regexp-2020.js

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
if (typeof exports != "undefined") {
2+
var test = require("./driver.js").test
3+
var testFail = require("./driver.js").testFail
4+
}
5+
6+
// https://github.com/tc39/ecma262/pull/1869
7+
testFail("/(?<\\ud835\\udc9c>.)/", "Invalid regular expression: /(?<\\ud835\\udc9c>.)/: Invalid capture group name (1:1)", { ecmaVersion: 2019 })
8+
test("/(?<\\ud835\\udc9c>.)/", {}, { ecmaVersion: 2020 })
9+
test("/(?<\\ud835\\udc9c>.)/u", {}, { ecmaVersion: 2019 })
10+
test("/(?<\\ud835\\udc9c>.)/u", {}, { ecmaVersion: 2020 })
11+
12+
testFail("/(?<\\u{1d49c}>.)/", "Invalid regular expression: /(?<\\u{1d49c}>.)/: Invalid capture group name (1:1)", { ecmaVersion: 2019 })
13+
test("/(?<\\u{1d49c}>.)/", {}, { ecmaVersion: 2020 })
14+
test("/(?<\\u{1d49c}>.)/u", {}, { ecmaVersion: 2019 })
15+
test("/(?<\\u{1d49c}>.)/u", {}, { ecmaVersion: 2020 })
16+
17+
testFail("/(?<𝒜>.)/", "Invalid regular expression: /(?<𝒜>.)/: Invalid capture group name (1:1)", { ecmaVersion: 2019 })
18+
test("/(?<𝒜>.)/", {}, { ecmaVersion: 2020 })
19+
test("/(?<𝒜>.)/u", {}, { ecmaVersion: 2019 })
20+
test("/(?<𝒜>.)/u", {}, { ecmaVersion: 2020 })
21+

0 commit comments

Comments
 (0)