From afbe222e8d5e4ed9de679d941f4cf3c22e34cfe6 Mon Sep 17 00:00:00 2001 From: Alex Hoppen Date: Mon, 18 Nov 2024 20:23:44 -0800 Subject: [PATCH 1/2] Fix an infinite loop if a conflict marker is found but it's not at the start of a new line rdar://137746823 --- Sources/SwiftParser/Lexer/Cursor.swift | 1 + Tests/SwiftParserTest/LexerTests.swift | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/Sources/SwiftParser/Lexer/Cursor.swift b/Sources/SwiftParser/Lexer/Cursor.swift index 4c2372fb886..d6df971eba9 100644 --- a/Sources/SwiftParser/Lexer/Cursor.swift +++ b/Sources/SwiftParser/Lexer/Cursor.swift @@ -2448,6 +2448,7 @@ extension Lexer.Cursor { // Must occur at start of line. guard restOfBuffer.previous == "\n" || restOfBuffer.previous == "\r" else { + _ = restOfBuffer.advance() continue } diff --git a/Tests/SwiftParserTest/LexerTests.swift b/Tests/SwiftParserTest/LexerTests.swift index 362ea00d389..dd9e61bcf12 100644 --- a/Tests/SwiftParserTest/LexerTests.swift +++ b/Tests/SwiftParserTest/LexerTests.swift @@ -1706,4 +1706,19 @@ class LexerTests: ParserTestCase { ] ) } + + func testConflictMarkerNotAtStartOfLine() { + assertLexemes( + #""" + <<<<<<< a + >>>>>>> a + """#, + lexemes: [ + LexemeSpec(.binaryOperator, text: "<<<<<<<", trailing: " "), + LexemeSpec(.identifier, text: "a"), + LexemeSpec(.binaryOperator, leading: "\n ", text: ">>>>>>>", trailing: " ", flags: [.isAtStartOfLine]), + LexemeSpec(.identifier, text: "a"), + ] + ) + } } From b8450eacee5f97da376bc9476e965a56f1011d3e Mon Sep 17 00:00:00 2001 From: Alex Hoppen Date: Mon, 18 Nov 2024 20:25:32 -0800 Subject: [PATCH 2/2] Miscellaneous (performance) improvements to conflict marker lexing --- Sources/SwiftParser/Lexer/Cursor.swift | 35 +++++++++++++++----------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/Sources/SwiftParser/Lexer/Cursor.swift b/Sources/SwiftParser/Lexer/Cursor.swift index d6df971eba9..16618a40d19 100644 --- a/Sources/SwiftParser/Lexer/Cursor.swift +++ b/Sources/SwiftParser/Lexer/Cursor.swift @@ -292,7 +292,7 @@ extension Lexer { self.stateStack.perform(stateTransition: stateTransition, stateAllocator: stateAllocator) } - func starts(with possiblePrefix: some Sequence) -> Bool { + func starts(with possiblePrefix: SyntaxText) -> Bool { return self.input.starts(with: possiblePrefix) } @@ -2036,7 +2036,7 @@ extension Lexer.Cursor { } // Special case; allow '`$`'. - if quote.starts(with: "`$`".utf8) { + if quote.starts(with: "`$`") { self = quote let firstBacktickConsumed = self.advance(matching: "`") let dollarConsumed = self.advance(matching: "$") @@ -2383,7 +2383,7 @@ extension Lexer.Cursor { case normal case perforce - var introducer: String { + var introducer: SyntaxText { switch self { case .perforce: return ">>>> " @@ -2392,7 +2392,7 @@ extension Lexer.Cursor { } } - var terminator: String { + var terminator: SyntaxText { switch self { case .perforce: return "<<<<\n" @@ -2408,11 +2408,15 @@ extension Lexer.Cursor { } // Check to see if we have <<<<<<< or >>>>. - guard start.starts(with: "<<<<<<< ".utf8) || start.starts(with: ">>>> ".utf8) else { + let kind: ConflictMarker + if start.starts(with: ConflictMarker.normal.introducer) { + kind = .normal + } else if start.starts(with: ConflictMarker.perforce.introducer) { + kind = .perforce + } else { return false } - let kind = start.is(at: "<") ? ConflictMarker.normal : .perforce guard let end = Self.findConflictEnd(start, markerKind: kind) else { // No end of conflict marker found. return false @@ -2432,16 +2436,17 @@ extension Lexer.Cursor { static func findConflictEnd(_ curPtr: Lexer.Cursor, markerKind: ConflictMarker) -> Lexer.Cursor? { // Get a reference to the rest of the buffer minus the length of the start // of the conflict marker. - let advanced = curPtr.input.baseAddress?.advanced(by: markerKind.introducer.utf8.count) + let advanced = curPtr.input.baseAddress?.advanced(by: markerKind.introducer.count) var restOfBuffer = Lexer.Cursor( - input: .init(start: advanced, count: curPtr.input.count - markerKind.introducer.utf8.count), - previous: curPtr.input[markerKind.introducer.utf8.count - 1] + input: .init(start: advanced, count: curPtr.input.count - markerKind.introducer.count), + previous: curPtr.input[markerKind.introducer.count - 1] ) + let terminator = markerKind.terminator + let terminatorStart = terminator.first! while !restOfBuffer.isAtEndOfFile { - let terminatorStart = markerKind.terminator.unicodeScalars.first! - restOfBuffer.advance(while: { byte in byte != terminatorStart }) + restOfBuffer.advance(while: { $0.value != terminatorStart }) - guard restOfBuffer.starts(with: markerKind.terminator.utf8) else { + guard restOfBuffer.starts(with: terminator) else { _ = restOfBuffer.advance() continue } @@ -2452,10 +2457,10 @@ extension Lexer.Cursor { continue } - let advanced = restOfBuffer.input.baseAddress?.advanced(by: markerKind.terminator.utf8.count) + let advanced = restOfBuffer.input.baseAddress?.advanced(by: terminator.count) return Lexer.Cursor( - input: .init(start: advanced, count: restOfBuffer.input.count - markerKind.terminator.utf8.count), - previous: restOfBuffer.input[markerKind.terminator.utf8.count - 1] + input: .init(start: advanced, count: restOfBuffer.input.count - terminator.count), + previous: restOfBuffer.input[terminator.count - 1] ) } return nil