Skip to content

Commit 2f19e8c

Browse files
committed
Address review feedback
1 parent 675f870 commit 2f19e8c

12 files changed

+447
-199
lines changed

Sources/SwiftParser/Expressions.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,6 +1442,11 @@ extension Parser {
14421442
// Parse the opening slash.
14431443
let (unexpectedBeforeSlash, openSlash) = self.expect(.regexSlash)
14441444

1445+
// If we had opening pounds, there should be no trivia for the slash.
1446+
if let openPounds = openPounds {
1447+
precondition(openPounds.trailingTriviaByteLength == 0 && openSlash.leadingTriviaByteLength == 0)
1448+
}
1449+
14451450
// Parse the pattern and closing slash, avoiding recovery or leading trivia
14461451
// as the lexer should provide the tokens exactly in order without trivia,
14471452
// otherwise they should be treated as missing.

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 65 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,10 @@ extension Lexer.Cursor {
5757

5858
/// A narrow mode that's used for 'try?' and 'try!' to ensure we prefer to
5959
/// lex a regex literal rather than a binary operator. This is needed as the
60-
/// last token will be a postfix operator, which would normally indicate a
61-
/// binary operator is expected next, but in this case we know it must be an
62-
/// expression. See the comment in `tryScanOperatorAsRegexLiteral` for more info.
60+
/// `previousTokenKind` will be `.postfixOperator`, which would normally
61+
/// indicate a binary operator is expected next, but in this case we know it
62+
/// must be an expression. See the comment in
63+
/// `tryScanOperatorAsRegexLiteral` for more info.
6364
/// NOTE: This is a complete hack, do not add new uses of this.
6465
case preferRegexOverBinaryOperator
6566

@@ -93,7 +94,8 @@ extension Lexer.Cursor {
9394
case inStringInterpolation(stringLiteralKind: StringLiteralKind, parenCount: Int)
9495

9596
/// We have encountered a regex literal, and have its tokens to work
96-
/// through.
97+
/// through. `lexemes` is a pointer to the lexemes allocated in the state
98+
/// stack bump pointer allocator.
9799
case inRegexLiteral(index: UInt8, lexemes: UnsafePointer<RegexLiteralLexemes>)
98100

99101
/// The mode in which leading trivia should be lexed for this state or `nil`
@@ -212,6 +214,7 @@ extension Lexer.Cursor {
212214
self.kind = kind
213215
self.position = position
214216
}
217+
215218
init(_ kind: TokenDiagnostic.Kind, position: Lexer.Cursor) {
216219
self.init(kind, position: position.position)
217220
}
@@ -239,6 +242,9 @@ extension Lexer {
239242

240243
/// If we have already lexed a token, the kind of the previously lexed token
241244
var previousTokenKind: RawTokenKind?
245+
246+
/// If the `previousTokenKind` is `.keyword`, the keyword kind. Otherwise
247+
/// `nil`.
242248
var previousKeyword: Keyword?
243249

244250
private var stateStack: StateStack = StateStack()
@@ -309,18 +315,56 @@ extension Lexer {
309315
/// for this lexeme.
310316
let trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode?
311317

312-
init(
318+
/// If `tokenKind` is `.keyword`, the kind of keyword produced, otherwise
319+
/// `nil`.
320+
let keywordKind: Keyword?
321+
322+
private init(
313323
_ tokenKind: RawTokenKind,
314-
flags: Lexer.Lexeme.Flags = [],
315-
error: Cursor.LexingDiagnostic? = nil,
316-
stateTransition: StateTransition? = nil,
317-
trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode? = nil
324+
flags: Lexer.Lexeme.Flags,
325+
error: Cursor.LexingDiagnostic?,
326+
stateTransition: StateTransition?,
327+
trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode?,
328+
keywordKind: Keyword?
318329
) {
319330
self.tokenKind = tokenKind
320331
self.flags = flags
321332
self.error = error
322333
self.stateTransition = stateTransition
323334
self.trailingTriviaLexingMode = trailingTriviaLexingMode
335+
self.keywordKind = keywordKind
336+
}
337+
338+
/// Create a lexer result. Note that keywords should use `Result.keyword`
339+
/// instead.
340+
init(
341+
_ tokenKind: RawTokenKind,
342+
flags: Lexer.Lexeme.Flags = [],
343+
error: Cursor.LexingDiagnostic? = nil,
344+
stateTransition: StateTransition? = nil,
345+
trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode? = nil
346+
) {
347+
precondition(tokenKind != .keyword, "Use Result.keyword instead")
348+
self.init(
349+
tokenKind,
350+
flags: flags,
351+
error: error,
352+
stateTransition: stateTransition,
353+
trailingTriviaLexingMode: trailingTriviaLexingMode,
354+
keywordKind: nil
355+
)
356+
}
357+
358+
/// Produce a lexer result for a given keyword.
359+
static func keyword(_ kind: Keyword) -> Self {
360+
Self(
361+
.keyword,
362+
flags: [],
363+
error: nil,
364+
stateTransition: nil,
365+
trailingTriviaLexingMode: nil,
366+
keywordKind: kind
367+
)
324368
}
325369
}
326370
}
@@ -366,7 +410,7 @@ extension Lexer.Cursor {
366410
let result: Lexer.Result
367411
switch currentState {
368412
case .normal:
369-
result = lexNormal(sourceBufferStart: sourceBufferStart)
413+
result = lexNormal(sourceBufferStart: sourceBufferStart, preferRegexOverBinaryOperator: false)
370414
case .preferRegexOverBinaryOperator:
371415
// In this state we lex a single token with the flag set, and then pop the state.
372416
result = lexNormal(sourceBufferStart: sourceBufferStart, preferRegexOverBinaryOperator: true)
@@ -420,7 +464,7 @@ extension Lexer.Cursor {
420464
cursor: cursor
421465
)
422466
self.previousTokenKind = result.tokenKind
423-
self.previousKeyword = result.tokenKind == .keyword ? Keyword(lexeme.tokenText)! : nil
467+
self.previousKeyword = result.keywordKind
424468

425469
return lexeme
426470
}
@@ -554,6 +598,7 @@ extension Lexer.Cursor.Position {
554598
self.input = UnsafeBufferPointer(rebasing: input)
555599
return c
556600
}
601+
557602
/// Advance the cursor position by `n` bytes. The offset must be valid.
558603
func advanced(by n: Int) -> Self {
559604
precondition(n > 0)
@@ -824,7 +869,7 @@ extension Lexer.Cursor {
824869
extension Lexer.Cursor {
825870
private mutating func lexNormal(
826871
sourceBufferStart: Lexer.Cursor,
827-
preferRegexOverBinaryOperator: Bool = false
872+
preferRegexOverBinaryOperator: Bool
828873
) -> Lexer.Result {
829874
switch self.peek() {
830875
case UInt8(ascii: "@"): _ = self.advance(); return Lexer.Result(.atSign)
@@ -1010,7 +1055,7 @@ extension Lexer.Cursor {
10101055
return Lexer.Result(.stringSegment, stateTransition: .pop)
10111056
default:
10121057
// If we haven't reached the end of the string interpolation, lex as if we were in a normal expression.
1013-
return self.lexNormal(sourceBufferStart: sourceBufferStart)
1058+
return self.lexNormal(sourceBufferStart: sourceBufferStart, preferRegexOverBinaryOperator: false)
10141059
}
10151060
}
10161061
}
@@ -1916,7 +1961,7 @@ extension Lexer.Cursor {
19161961

19171962
let text = tokStart.text(upTo: self)
19181963
if let keyword = Keyword(text), keyword.isLexerClassified {
1919-
return Lexer.Result(.keyword)
1964+
return Lexer.Result.keyword(keyword)
19201965
} else if text == "_" {
19211966
return Lexer.Result(.wildcard)
19221967
} else {
@@ -1977,7 +2022,7 @@ extension Lexer.Cursor {
19772022
case UInt8(ascii: "?"):
19782023
return .postfixQuestionMark
19792024
default:
1980-
fatalError("Must be at '!' or '?'")
2025+
preconditionFailure("Must be at '!' or '?'")
19812026
}
19822027
}()
19832028
_ = self.advance()
@@ -2006,17 +2051,17 @@ extension Lexer.Cursor {
20062051

20072052
// Check to see if we have a regex literal starting in the operator.
20082053
do {
2009-
var ptr = tokStart
2010-
while ptr.input.baseAddress! < self.input.baseAddress! {
2054+
var regexScan = tokStart
2055+
while regexScan.input.baseAddress! < self.input.baseAddress! {
20112056
// Scan for the first '/' in the operator to see if it starts a regex
20122057
// literal.
2013-
guard ptr.is(at: "/") else {
2014-
_ = ptr.advance()
2058+
guard regexScan.is(at: "/") else {
2059+
_ = regexScan.advance()
20152060
continue
20162061
}
20172062
guard
20182063
let result = self.tryLexOperatorAsRegexLiteral(
2019-
at: ptr,
2064+
at: regexScan,
20202065
operatorStart: tokStart,
20212066
operatorEnd: self,
20222067
sourceBufferStart: sourceBufferStart,

Sources/SwiftParser/Lexer/RegexLiteralLexer.swift

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,17 @@
1515
/// A separate lexer specifically for regex literals.
1616
fileprivate struct RegexLiteralLexer {
1717
enum LexResult {
18+
/// Continue the lex, this is returned from `lexPatternCharacter` when
19+
/// it successfully lexed a character.
1820
case `continue`
21+
22+
/// The lexing has finished successfully.
1923
case done
24+
25+
/// This is not, in fact, a regex.
2026
case notARegex
27+
28+
/// We have an unterminated regex.
2129
case unterminated
2230
}
2331

@@ -29,10 +37,14 @@ fileprivate struct RegexLiteralLexer {
2937
private var firstNewline: Lexer.Cursor?
3038
private var isMultiline: Bool { firstNewline != nil }
3139

32-
/// Tracks the current group depth, used to enforce the heuristic that a bare
33-
/// slash regex literal with an unbalanced ')' should be treated as an
40+
/// Tracks the current group '(' depth, used to enforce the heuristic that a
41+
/// bare slash regex literal with an unbalanced ')' should be treated as an
3442
/// operator instead.
3543
private var groupDepth = 0
44+
45+
/// Tracks the current '[' custom character class depth, used to ensure we
46+
/// don't count '(' and ')' characters in a custom character class as counting
47+
/// as group characters.
3648
private var customCharacterClassDepth = 0
3749

3850
/// Tracks the last unescaped space or tab character, used to enforce that a
@@ -58,7 +70,7 @@ fileprivate struct RegexLiteralLexer {
5870
}
5971

6072
/// Attempt to lex a character of the regex pattern.
61-
mutating func lexPatternCharacter(escaped: Bool = false) -> LexResult {
73+
private mutating func lexPatternCharacter(escaped: Bool) -> LexResult {
6274
if cursor.isAtEndOfFile {
6375
// We've hit the end of the buffer. In multi-line mode, we don't want to
6476
// skip over what is likely otherwise valid Swift code, so resume from the
@@ -136,7 +148,7 @@ fileprivate struct RegexLiteralLexer {
136148
}
137149

138150
/// Attempt to eat a the closing delimiter.
139-
mutating func tryEatEnding() -> LexResult? {
151+
private mutating func tryEatEnding() -> LexResult? {
140152
let openPoundCount = builder.numOpenPounds
141153
let slashBegin = cursor
142154
var newCursor = cursor
@@ -209,7 +221,7 @@ fileprivate struct RegexLiteralLexer {
209221
return .done
210222
}
211223

212-
mutating func lexImpl() -> LexResult {
224+
private mutating func lexImpl() -> LexResult {
213225
// We can consume any number of pound signs.
214226
var poundCount = 0
215227
while cursor.advance(matching: "#") {
@@ -276,7 +288,7 @@ fileprivate struct RegexLiteralLexer {
276288
if let result = tryEatEnding() {
277289
return result
278290
}
279-
switch lexPatternCharacter() {
291+
switch lexPatternCharacter(escaped: false) {
280292
case .continue:
281293
continue
282294
case let result:
@@ -288,7 +300,7 @@ fileprivate struct RegexLiteralLexer {
288300
mutating func lex() -> RegexLiteralLexemes? {
289301
switch lexImpl() {
290302
case .continue:
291-
fatalError("Not a valid result")
303+
preconditionFailure("Not a valid result")
292304
case .notARegex:
293305
return nil
294306
case .unterminated where !mustBeRegex:
@@ -337,6 +349,7 @@ extension RegexLiteralLexemes.Element {
337349
case closingSlash
338350
case closingPounds
339351
}
352+
340353
/// Retrieve the actual token kind.
341354
var tokenKind: RawTokenKind {
342355
switch kind {
@@ -370,7 +383,8 @@ extension RegexLiteralLexemes {
370383
/// A builder type for the regex literal lexer.
371384
///
372385
/// NOTE: This is stored for the regex literal lexer state, so should be kept
373-
/// as small as possible.
386+
/// as small as possible. Additionally, it is allocated using a bump pointer
387+
/// allocator, so must remain a POD type (i.e no classes).
374388
fileprivate struct Builder {
375389
private(set) var numOpenPounds: Int = 0
376390
private(set) var patternByteLength: Int = 0
@@ -415,6 +429,7 @@ extension RegexLiteralLexemes {
415429
_patternErrorOffset = start.distance(to: newValue.position)
416430
}
417431
}
432+
418433
var hasPounds: Bool { numOpenPounds > 0 }
419434
}
420435
}
@@ -487,7 +502,12 @@ extension RegexLiteralLexemes.Builder {
487502
at cursor: Lexer.Cursor
488503
) {
489504
precondition(lastLexemeKind == .openingSlash)
490-
patternError = .init(kind, position: cursor)
505+
506+
// Only record if we don't already have a pattern error, we want to prefer
507+
// the first error we encounter.
508+
if patternError == nil {
509+
patternError = .init(kind, position: cursor)
510+
}
491511
}
492512

493513
/// Finish regex literal lexing.

0 commit comments

Comments
 (0)