@@ -1757,23 +1757,49 @@ func (lexer *Lexer) Next() {
1757
1757
lexer .addRangeError (lexer .Range (), "JSON strings must use double quotes" )
1758
1758
}
1759
1759
1760
+ // Note: This case is hot in profiles
1760
1761
case '_' , '$' ,
1761
1762
'a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' , 'i' , 'j' , 'k' , 'l' , 'm' ,
1762
1763
'n' , 'o' , 'p' , 'q' , 'r' , 's' , 't' , 'u' , 'v' , 'w' , 'x' , 'y' , 'z' ,
1763
1764
'A' , 'B' , 'C' , 'D' , 'E' , 'F' , 'G' , 'H' , 'I' , 'J' , 'K' , 'L' , 'M' ,
1764
1765
'N' , 'O' , 'P' , 'Q' , 'R' , 'S' , 'T' , 'U' , 'V' , 'W' , 'X' , 'Y' , 'Z' :
1766
+ // This is a fast path for long ASCII identifiers. Doing this in a loop
1767
+ // first instead of doing "step()" and "IsIdentifierContinue()" like we
1768
+ // do after this is noticeably faster in the common case of ASCII-only
1769
+ // text. For example, doing this sped up end-to-end consuming of a large
1770
+ // TypeScript type declaration file from 97ms to 79ms (around 20% faster).
1771
+ contents := lexer .source .Contents
1772
+ n := len (contents )
1773
+ i := lexer .current
1774
+ for i < n {
1775
+ c := contents [i ]
1776
+ if (c < 'a' || c > 'z' ) && (c < 'A' || c > 'Z' ) && (c < '0' || c > '9' ) && c != '_' && c != '$' {
1777
+ break
1778
+ }
1779
+ i ++
1780
+ }
1781
+ lexer .current = i
1782
+
1783
+ // Now do the slow path for any remaining non-ASCII identifier characters
1765
1784
lexer .step ()
1766
- for IsIdentifierContinue (lexer .codePoint ) {
1767
- lexer .step ()
1785
+ if lexer .codePoint >= 0x80 {
1786
+ for IsIdentifierContinue (lexer .codePoint ) {
1787
+ lexer .step ()
1788
+ }
1768
1789
}
1790
+
1791
+ // If there's a slash, then we're in the extra-slow (and extra-rare) case
1792
+ // where the identifier has embedded escapes
1769
1793
if lexer .codePoint == '\\' {
1770
1794
lexer .Identifier , lexer .Token = lexer .scanIdentifierWithEscapes (normalIdentifier )
1771
- } else {
1772
- lexer .Identifier = lexer .rawIdentifier ()
1773
- lexer .Token = Keywords [lexer .Raw ()]
1774
- if lexer .Token == 0 {
1775
- lexer .Token = TIdentifier
1776
- }
1795
+ break
1796
+ }
1797
+
1798
+ // Otherwise (if there was no escape) we can slice the code verbatim
1799
+ lexer .Identifier = lexer .rawIdentifier ()
1800
+ lexer .Token = Keywords [lexer .Raw ()]
1801
+ if lexer .Token == 0 {
1802
+ lexer .Token = TIdentifier
1777
1803
}
1778
1804
1779
1805
case '\\' :
0 commit comments