Skip to content

Commit f6f8b27

Browse files
committed
js: improve lexer identifier parsing performance
1 parent decf208 commit f6f8b27

File tree

2 files changed

+36
-8
lines changed

2 files changed

+36
-8
lines changed

internal/js_lexer/js_lexer.go

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1757,23 +1757,49 @@ func (lexer *Lexer) Next() {
17571757
lexer.addRangeError(lexer.Range(), "JSON strings must use double quotes")
17581758
}
17591759

1760+
// Note: This case is hot in profiles
17601761
case '_', '$',
17611762
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
17621763
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
17631764
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
17641765
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
1766+
// This is a fast path for long ASCII identifiers. Doing this in a loop
1767+
// first instead of doing "step()" and "IsIdentifierContinue()" like we
1768+
// do after this is noticeably faster in the common case of ASCII-only
1769+
// text. For example, doing this sped up end-to-end consuming of a large
1770+
// TypeScript type declaration file from 97ms to 79ms (around 20% faster).
1771+
contents := lexer.source.Contents
1772+
n := len(contents)
1773+
i := lexer.current
1774+
for i < n {
1775+
c := contents[i]
1776+
if (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && (c < '0' || c > '9') && c != '_' && c != '$' {
1777+
break
1778+
}
1779+
i++
1780+
}
1781+
lexer.current = i
1782+
1783+
// Now do the slow path for any remaining non-ASCII identifier characters
17651784
lexer.step()
1766-
for IsIdentifierContinue(lexer.codePoint) {
1767-
lexer.step()
1785+
if lexer.codePoint >= 0x80 {
1786+
for IsIdentifierContinue(lexer.codePoint) {
1787+
lexer.step()
1788+
}
17681789
}
1790+
1791+
// If there's a slash, then we're in the extra-slow (and extra-rare) case
1792+
// where the identifier has embedded escapes
17691793
if lexer.codePoint == '\\' {
17701794
lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier)
1771-
} else {
1772-
lexer.Identifier = lexer.rawIdentifier()
1773-
lexer.Token = Keywords[lexer.Raw()]
1774-
if lexer.Token == 0 {
1775-
lexer.Token = TIdentifier
1776-
}
1795+
break
1796+
}
1797+
1798+
// Otherwise (if there was no escape) we can slice the code verbatim
1799+
lexer.Identifier = lexer.rawIdentifier()
1800+
lexer.Token = Keywords[lexer.Raw()]
1801+
if lexer.Token == 0 {
1802+
lexer.Token = TIdentifier
17771803
}
17781804

17791805
case '\\':

internal/js_lexer/js_lexer_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ func TestIdentifier(t *testing.T) {
133133

134134
expectIdentifier(t, "a\u200C", "a\u200C")
135135
expectIdentifier(t, "a\u200D", "a\u200D")
136+
expectIdentifier(t, "a\u200Cb", "a\u200Cb")
137+
expectIdentifier(t, "a\u200Db", "a\u200Db")
136138
}
137139

138140
func expectNumber(t *testing.T, contents string, expected float64) {

0 commit comments

Comments
 (0)