swiftlang
diff --git a/‎Sources/SwiftParser/Expressions.swift
Lines changed: 5 additions & 0 deletions b/‎Sources/SwiftParser/Expressions.swift
Lines changed: 5 additions & 0 deletions
diff --git a/‎Sources/SwiftParser/Lexer/Cursor.swift
Lines changed: 65 additions & 20 deletions b/‎Sources/SwiftParser/Lexer/Cursor.swift
Lines changed: 65 additions & 20 deletions
diff --git a/‎Sources/SwiftParser/Lexer/RegexLiteralLexer.swift
Lines changed: 29 additions & 9 deletions b/‎Sources/SwiftParser/Lexer/RegexLiteralLexer.swift
Lines changed: 29 additions & 9 deletions
@@ -1442,6 +1442,11 @@ extension Parser {
     // Parse the opening slash.
     let (unexpectedBeforeSlash, openSlash) = self.expect(.regexSlash)
 
+    // If we had opening pounds, there should be no trivia for the slash.
+    if let openPounds = openPounds {
+      precondition(openPounds.trailingTriviaByteLength == 0 && openSlash.leadingTriviaByteLength == 0)
+    }
+
     // Parse the pattern and closing slash, avoiding recovery or leading trivia
     // as the lexer should provide the tokens exactly in order without trivia,
     // otherwise they should be treated as missing.
 
@@ -57,9 +57,10 @@ extension Lexer.Cursor {
 
     /// A narrow mode that's used for 'try?' and 'try!' to ensure we prefer to
     /// lex a regex literal rather than a binary operator. This is needed as the
-    /// last token will be a postfix operator, which would normally indicate a
-    /// binary operator is expected next, but in this case we know it must be an
-    /// expression. See the comment in `tryScanOperatorAsRegexLiteral` for more info.
+    /// `previousTokenKind` will be `.postfixOperator`, which would normally
+    /// indicate a binary operator is expected next, but in this case we know it
+    /// must be an expression. See the comment in
+    /// `tryScanOperatorAsRegexLiteral` for more info.
     /// NOTE: This is a complete hack, do not add new uses of this.
     case preferRegexOverBinaryOperator
 
@@ -93,7 +94,8 @@ extension Lexer.Cursor {
     case inStringInterpolation(stringLiteralKind: StringLiteralKind, parenCount: Int)
 
     /// We have encountered a regex literal, and have its tokens to work
-    /// through.
+    /// through. `lexemes` is a pointer to the lexemes allocated in the state
+    /// stack bump pointer allocator.
     case inRegexLiteral(index: UInt8, lexemes: UnsafePointer<RegexLiteralLexemes>)
 
     /// The mode in which leading trivia should be lexed for this state or `nil`
@@ -212,6 +214,7 @@ extension Lexer.Cursor {
       self.kind = kind
       self.position = position
     }
+
     init(_ kind: TokenDiagnostic.Kind, position: Lexer.Cursor) {
       self.init(kind, position: position.position)
     }
@@ -239,6 +242,9 @@ extension Lexer {
 
     /// If we have already lexed a token, the kind of the previously lexed token
     var previousTokenKind: RawTokenKind?
+
+    /// If the `previousTokenKind` is `.keyword`, the keyword kind. Otherwise
+    /// `nil`.
     var previousKeyword: Keyword?
 
     private var stateStack: StateStack = StateStack()
@@ -309,18 +315,56 @@ extension Lexer {
     /// for this lexeme.
     let trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode?
 
-    init(
+    /// If `tokenKind` is `.keyword`, the kind of keyword produced, otherwise
+    /// `nil`.
+    let keywordKind: Keyword?
+
+    private init(
       _ tokenKind: RawTokenKind,
-      flags: Lexer.Lexeme.Flags = [],
-      error: Cursor.LexingDiagnostic? = nil,
-      stateTransition: StateTransition? = nil,
-      trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode? = nil
+      flags: Lexer.Lexeme.Flags,
+      error: Cursor.LexingDiagnostic?,
+      stateTransition: StateTransition?,
+      trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode?,
+      keywordKind: Keyword?
     ) {
       self.tokenKind = tokenKind
       self.flags = flags
       self.error = error
       self.stateTransition = stateTransition
       self.trailingTriviaLexingMode = trailingTriviaLexingMode
+      self.keywordKind = keywordKind
+    }
+
+    /// Create a lexer result. Note that keywords should use `Result.keyword`
+    /// instead.
+    init(
+      _ tokenKind: RawTokenKind,
+      flags: Lexer.Lexeme.Flags = [],
+      error: Cursor.LexingDiagnostic? = nil,
+      stateTransition: StateTransition? = nil,
+      trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode? = nil
+    ) {
+      precondition(tokenKind != .keyword, "Use Result.keyword instead")
+      self.init(
+        tokenKind,
+        flags: flags,
+        error: error,
+        stateTransition: stateTransition,
+        trailingTriviaLexingMode: trailingTriviaLexingMode,
+        keywordKind: nil
+      )
+    }
+
+    /// Produce a lexer result for a given keyword.
+    static func keyword(_ kind: Keyword) -> Self {
+      Self(
+        .keyword,
+        flags: [],
+        error: nil,
+        stateTransition: nil,
+        trailingTriviaLexingMode: nil,
+        keywordKind: kind
+      )
     }
   }
 }
@@ -366,7 +410,7 @@ extension Lexer.Cursor {
     let result: Lexer.Result
     switch currentState {
     case .normal:
-      result = lexNormal(sourceBufferStart: sourceBufferStart)
+      result = lexNormal(sourceBufferStart: sourceBufferStart, preferRegexOverBinaryOperator: false)
     case .preferRegexOverBinaryOperator:
       // In this state we lex a single token with the flag set, and then pop the state.
       result = lexNormal(sourceBufferStart: sourceBufferStart, preferRegexOverBinaryOperator: true)
@@ -420,7 +464,7 @@ extension Lexer.Cursor {
       cursor: cursor
     )
     self.previousTokenKind = result.tokenKind
-    self.previousKeyword = result.tokenKind == .keyword ? Keyword(lexeme.tokenText)! : nil
+    self.previousKeyword = result.keywordKind
 
     return lexeme
   }
@@ -554,6 +598,7 @@ extension Lexer.Cursor.Position {
     self.input = UnsafeBufferPointer(rebasing: input)
     return c
   }
+
   /// Advance the cursor position by `n` bytes. The offset must be valid.
   func advanced(by n: Int) -> Self {
     precondition(n > 0)
@@ -824,7 +869,7 @@ extension Lexer.Cursor {
 extension Lexer.Cursor {
   private mutating func lexNormal(
     sourceBufferStart: Lexer.Cursor,
-    preferRegexOverBinaryOperator: Bool = false
+    preferRegexOverBinaryOperator: Bool
   ) -> Lexer.Result {
     switch self.peek() {
     case UInt8(ascii: "@"): _ = self.advance(); return Lexer.Result(.atSign)
@@ -1010,7 +1055,7 @@ extension Lexer.Cursor {
       return Lexer.Result(.stringSegment, stateTransition: .pop)
     default:
       // If we haven't reached the end of the string interpolation, lex as if we were in a normal expression.
-      return self.lexNormal(sourceBufferStart: sourceBufferStart)
+      return self.lexNormal(sourceBufferStart: sourceBufferStart, preferRegexOverBinaryOperator: false)
     }
   }
 }
@@ -1916,7 +1961,7 @@ extension Lexer.Cursor {
 
     let text = tokStart.text(upTo: self)
     if let keyword = Keyword(text), keyword.isLexerClassified {
-      return Lexer.Result(.keyword)
+      return Lexer.Result.keyword(keyword)
     } else if text == "_" {
       return Lexer.Result(.wildcard)
     } else {
@@ -1977,7 +2022,7 @@ extension Lexer.Cursor {
       case UInt8(ascii: "?"):
         return .postfixQuestionMark
       default:
-        fatalError("Must be at '!' or '?'")
+        preconditionFailure("Must be at '!' or '?'")
       }
     }()
     _ = self.advance()
@@ -2006,17 +2051,17 @@ extension Lexer.Cursor {
 
     // Check to see if we have a regex literal starting in the operator.
     do {
-      var ptr = tokStart
-      while ptr.input.baseAddress! < self.input.baseAddress! {
+      var regexScan = tokStart
+      while regexScan.input.baseAddress! < self.input.baseAddress! {
         // Scan for the first '/' in the operator to see if it starts a regex
         // literal.
-        guard ptr.is(at: "/") else {
-          _ = ptr.advance()
+        guard regexScan.is(at: "/") else {
+          _ = regexScan.advance()
           continue
         }
         guard
           let result = self.tryLexOperatorAsRegexLiteral(
-            at: ptr,
+            at: regexScan,
             operatorStart: tokStart,
             operatorEnd: self,
             sourceBufferStart: sourceBufferStart,
 
@@ -15,9 +15,17 @@
 /// A separate lexer specifically for regex literals.
 fileprivate struct RegexLiteralLexer {
   enum LexResult {
+    /// Continue the lex, this is returned from `lexPatternCharacter` when
+    /// it successfully lexed a character.
     case `continue`
+
+    /// The lexing has finished successfully.
     case done
+
+    /// This is not, in fact, a regex.
     case notARegex
+
+    /// We have an unterminated regex.
     case unterminated
   }
 
@@ -29,10 +37,14 @@ fileprivate struct RegexLiteralLexer {
   private var firstNewline: Lexer.Cursor?
   private var isMultiline: Bool { firstNewline != nil }
 
-  /// Tracks the current group depth, used to enforce the heuristic that a bare
-  /// slash regex literal with an unbalanced ')' should be treated as an
+  /// Tracks the current group '(' depth, used to enforce the heuristic that a
+  /// bare slash regex literal with an unbalanced ')' should be treated as an
   /// operator instead.
   private var groupDepth = 0
+
+  /// Tracks the current '[' custom character class depth, used to ensure we
+  /// don't count '(' and ')' characters in a custom character class as counting
+  /// as group characters.
   private var customCharacterClassDepth = 0
 
   /// Tracks the last unescaped space or tab character, used to enforce that a
@@ -58,7 +70,7 @@ fileprivate struct RegexLiteralLexer {
   }
 
   /// Attempt to lex a character of the regex pattern.
-  mutating func lexPatternCharacter(escaped: Bool = false) -> LexResult {
+  private mutating func lexPatternCharacter(escaped: Bool) -> LexResult {
     if cursor.isAtEndOfFile {
       // We've hit the end of the buffer. In multi-line mode, we don't want to
       // skip over what is likely otherwise valid Swift code, so resume from the
@@ -136,7 +148,7 @@ fileprivate struct RegexLiteralLexer {
   }
 
   /// Attempt to eat a the closing delimiter.
-  mutating func tryEatEnding() -> LexResult? {
+  private mutating func tryEatEnding() -> LexResult? {
     let openPoundCount = builder.numOpenPounds
     let slashBegin = cursor
     var newCursor = cursor
@@ -209,7 +221,7 @@ fileprivate struct RegexLiteralLexer {
     return .done
   }
 
-  mutating func lexImpl() -> LexResult {
+  private mutating func lexImpl() -> LexResult {
     // We can consume any number of pound signs.
     var poundCount = 0
     while cursor.advance(matching: "#") {
@@ -276,7 +288,7 @@ fileprivate struct RegexLiteralLexer {
       if let result = tryEatEnding() {
         return result
       }
-      switch lexPatternCharacter() {
+      switch lexPatternCharacter(escaped: false) {
       case .continue:
         continue
       case let result:
@@ -288,7 +300,7 @@ fileprivate struct RegexLiteralLexer {
   mutating func lex() -> RegexLiteralLexemes? {
     switch lexImpl() {
     case .continue:
-      fatalError("Not a valid result")
+      preconditionFailure("Not a valid result")
     case .notARegex:
       return nil
     case .unterminated where !mustBeRegex:
@@ -337,6 +349,7 @@ extension RegexLiteralLexemes.Element {
     case closingSlash
     case closingPounds
   }
+
   /// Retrieve the actual token kind.
   var tokenKind: RawTokenKind {
     switch kind {
@@ -370,7 +383,8 @@ extension RegexLiteralLexemes {
   /// A builder type for the regex literal lexer.
   ///
   /// NOTE: This is stored for the regex literal lexer state, so should be kept
-  /// as small as possible.
+  /// as small as possible. Additionally, it is allocated using a bump pointer
+  /// allocator, so must remain a POD type (i.e no classes).
   fileprivate struct Builder {
     private(set) var numOpenPounds: Int = 0
     private(set) var patternByteLength: Int = 0
@@ -415,6 +429,7 @@ extension RegexLiteralLexemes {
         _patternErrorOffset = start.distance(to: newValue.position)
       }
     }
+
     var hasPounds: Bool { numOpenPounds > 0 }
   }
 }
@@ -487,7 +502,12 @@ extension RegexLiteralLexemes.Builder {
     at cursor: Lexer.Cursor
   ) {
     precondition(lastLexemeKind == .openingSlash)
-    patternError = .init(kind, position: cursor)
+
+    // Only record if we don't already have a pattern error, we want to prefer
+    // the first error we encounter.
+    if patternError == nil {
+      patternError = .init(kind, position: cursor)
+    }
   }
 
   /// Finish regex literal lexing.