Skip to content

Commit c2b7ad2

Browse files
committed
Correctly classify operators split from a regex literal
Previously we always produced `.prefixOperator`, which is wrong for cases like prefix `&`, `=`, and `?` which should produce different token kinds. Factor out the classification code, and use it to produce the correct token kind.
1 parent 2f19e8c commit c2b7ad2

File tree

4 files changed

+162
-69
lines changed

4 files changed

+162
-69
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 80 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -2029,6 +2029,80 @@ extension Lexer.Cursor {
20292029
return Lexer.Result(kind, stateTransition: transition)
20302030
}
20312031

2032+
/// Classify an operator token given its start and ending cursor.
2033+
static func classifyOperatorToken(
2034+
operStart: Lexer.Cursor,
2035+
operEnd: Lexer.Cursor,
2036+
sourceBufferStart: Lexer.Cursor
2037+
) -> (RawTokenKind, error: LexingDiagnostic?) {
2038+
// Decide between the binary, prefix, and postfix cases.
2039+
// It's binary if either both sides are bound or both sides are not bound.
2040+
// Otherwise, it's postfix if left-bound and prefix if right-bound.
2041+
let leftBound = operStart.isLeftBound(sourceBufferStart: sourceBufferStart)
2042+
let rightBound = operEnd.isRightBound(isLeftBound: leftBound)
2043+
2044+
// Match various reserved words.
2045+
if operEnd.input.baseAddress! - operStart.input.baseAddress! == 1 {
2046+
switch operStart.peek() {
2047+
case UInt8(ascii: "="):
2048+
if leftBound != rightBound {
2049+
var errorPos = operStart
2050+
2051+
if rightBound {
2052+
_ = errorPos.advance()
2053+
}
2054+
2055+
return (
2056+
.equal,
2057+
error: LexingDiagnostic(
2058+
.equalMustHaveConsistentWhitespaceOnBothSides,
2059+
position: errorPos
2060+
)
2061+
)
2062+
} else {
2063+
return (.equal, error: nil)
2064+
}
2065+
case UInt8(ascii: "&"):
2066+
if leftBound == rightBound || leftBound {
2067+
break
2068+
}
2069+
return (.prefixAmpersand, error: nil)
2070+
case UInt8(ascii: "."):
2071+
return (.period, error: nil)
2072+
case UInt8(ascii: "?"):
2073+
if (leftBound) {
2074+
return (.postfixQuestionMark, error: nil)
2075+
}
2076+
return (.infixQuestionMark, error: nil)
2077+
default:
2078+
break
2079+
}
2080+
} else if (operEnd.input.baseAddress! - operStart.input.baseAddress! == 2) {
2081+
switch (operStart.peek(), operStart.peek(at: 1)) {
2082+
case (UInt8(ascii: "-"), UInt8(ascii: ">")): // ->
2083+
return (.arrow, error: nil)
2084+
case (UInt8(ascii: "*"), UInt8(ascii: "/")): // */
2085+
return (.unknown, error: LexingDiagnostic(.unexpectedBlockCommentEnd, position: operStart))
2086+
default:
2087+
break
2088+
}
2089+
} else {
2090+
// Verify there is no "*/" in the middle of the identifier token, we reject
2091+
// it as potentially ending a block comment.
2092+
if operStart.text(upTo: operEnd).contains("*/") {
2093+
return (.unknown, error: LexingDiagnostic(.unexpectedBlockCommentEnd, position: operStart))
2094+
}
2095+
}
2096+
2097+
if leftBound == rightBound {
2098+
return (.binaryOperator, error: nil)
2099+
} else if leftBound {
2100+
return (.postfixOperator, error: nil)
2101+
} else {
2102+
return (.prefixOperator, error: nil)
2103+
}
2104+
}
2105+
20322106
mutating func lexOperatorIdentifier(
20332107
sourceBufferStart: Lexer.Cursor,
20342108
preferRegexOverBinaryOperator: Bool
@@ -2087,73 +2161,12 @@ extension Lexer.Cursor {
20872161
_ = ptr.advance()
20882162
}
20892163
}
2090-
2091-
// Decide between the binary, prefix, and postfix cases.
2092-
// It's binary if either both sides are bound or both sides are not bound.
2093-
// Otherwise, it's postfix if left-bound and prefix if right-bound.
2094-
let leftBound = tokStart.isLeftBound(sourceBufferStart: sourceBufferStart)
2095-
let rightBound = self.isRightBound(isLeftBound: leftBound)
2096-
2097-
// Match various reserved words.
2098-
if self.input.baseAddress! - tokStart.input.baseAddress! == 1 {
2099-
switch tokStart.peek() {
2100-
case UInt8(ascii: "="):
2101-
if leftBound != rightBound {
2102-
var errorPos = tokStart
2103-
2104-
if rightBound {
2105-
_ = errorPos.advance()
2106-
}
2107-
2108-
return Lexer.Result(
2109-
.equal,
2110-
error: LexingDiagnostic(
2111-
.equalMustHaveConsistentWhitespaceOnBothSides,
2112-
position: errorPos
2113-
)
2114-
)
2115-
} else {
2116-
return Lexer.Result(.equal)
2117-
}
2118-
case UInt8(ascii: "&"):
2119-
if leftBound == rightBound || leftBound {
2120-
break
2121-
}
2122-
return Lexer.Result(.prefixAmpersand)
2123-
case UInt8(ascii: "."):
2124-
return Lexer.Result(.period)
2125-
case UInt8(ascii: "?"):
2126-
if (leftBound) {
2127-
return Lexer.Result(.postfixQuestionMark)
2128-
}
2129-
return Lexer.Result(.infixQuestionMark)
2130-
default:
2131-
break
2132-
}
2133-
} else if (self.input.baseAddress! - tokStart.input.baseAddress! == 2) {
2134-
switch (tokStart.peek(), tokStart.peek(at: 1)) {
2135-
case (UInt8(ascii: "-"), UInt8(ascii: ">")): // ->
2136-
return Lexer.Result(.arrow)
2137-
case (UInt8(ascii: "*"), UInt8(ascii: "/")): // */
2138-
return Lexer.Result(.unknown, error: LexingDiagnostic(.unexpectedBlockCommentEnd, position: tokStart))
2139-
default:
2140-
break
2141-
}
2142-
} else {
2143-
// Verify there is no "*/" in the middle of the identifier token, we reject
2144-
// it as potentially ending a block comment.
2145-
if tokStart.text(upTo: self).contains("*/") {
2146-
return Lexer.Result(.unknown, error: LexingDiagnostic(.unexpectedBlockCommentEnd, position: tokStart))
2147-
}
2148-
}
2149-
2150-
if leftBound == rightBound {
2151-
return Lexer.Result(.binaryOperator)
2152-
} else if leftBound {
2153-
return Lexer.Result(.postfixOperator)
2154-
} else {
2155-
return Lexer.Result(.prefixOperator)
2156-
}
2164+
let (kind, error) = Self.classifyOperatorToken(
2165+
operStart: tokStart,
2166+
operEnd: self,
2167+
sourceBufferStart: sourceBufferStart
2168+
)
2169+
return Lexer.Result(kind, error: error)
21572170
}
21582171

21592172
mutating func lexDollarIdentifier() -> Lexer.Result {

Sources/SwiftParser/Lexer/RegexLiteralLexer.swift

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -766,8 +766,14 @@ extension Lexer.Cursor {
766766
// If we started lexing in the middle of an operator, split off the prefix
767767
// operator, and move the cursor to where the regex literal starts.
768768
self.position = regexStart.position
769+
let (kind, error) = Self.classifyOperatorToken(
770+
operStart: operatorStart,
771+
operEnd: regexStart,
772+
sourceBufferStart: sourceBufferStart
773+
)
769774
return Lexer.Result(
770-
.prefixOperator,
775+
kind,
776+
error: error,
771777
stateTransition: .pushRegexLexemes(index: 0, lexemes: lexemes)
772778
)
773779
} else {

Tests/SwiftParserTest/RegexLiteralTests.swift

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,73 @@ final class RegexLiteralTests: XCTestCase {
911911
)
912912
}
913913

914+
func testPrefixOpSplitting1() {
915+
assertParse(
916+
"""
917+
let x =1️⃣/abc/
918+
""",
919+
diagnostics: [
920+
DiagnosticSpec(message: "'=' must have consistent whitespace on both sides")
921+
]
922+
)
923+
}
924+
925+
func testPrefixOpSplitting2() {
926+
assertParse(
927+
"""
928+
let x1️⃣ .2️⃣/abc/
929+
""",
930+
diagnostics: [
931+
DiagnosticSpec(locationMarker: "1️⃣", message: "consecutive statements on a line must be separated by ';'"),
932+
DiagnosticSpec(locationMarker: "2️⃣", message: "expected name in member access"),
933+
]
934+
)
935+
}
936+
937+
func testPrefixOpSplitting3() {
938+
assertParse(
939+
"""
940+
let x = true?/abc/1️⃣:/def/
941+
""",
942+
substructure: Syntax(BinaryOperatorExprSyntax(operatorToken: .binaryOperator("/"))),
943+
diagnostics: [
944+
DiagnosticSpec(message: "extraneous code ':/def/' at top level")
945+
]
946+
)
947+
}
948+
949+
func testPrefixOpSplitting4() {
950+
assertParse(
951+
"""
952+
let x = true ?/abc/ : /def/
953+
""",
954+
substructure: Syntax(
955+
SequenceExprSyntax(
956+
elements: .init([
957+
BooleanLiteralExprSyntax(booleanLiteral: true),
958+
UnresolvedTernaryExprSyntax(firstChoice: RegexLiteralExprSyntax(regexPattern: .regexLiteralPattern("abc"))),
959+
RegexLiteralExprSyntax(regexPattern: .regexLiteralPattern("def")),
960+
])
961+
)
962+
)
963+
)
964+
}
965+
966+
func testPrefixOpSplitting5() {
967+
assertParse(
968+
"""
969+
let x = &/abc/
970+
""",
971+
substructure: Syntax(
972+
InOutExprSyntax(
973+
expression: RegexLiteralExprSyntax(
974+
regexPattern: .regexLiteralPattern("abc")
975+
)
976+
)
977+
)
978+
)
979+
}
980+
914981
func testNulCharacter() {
915982
assertParse(
916983
"/1️⃣\0/",

Tests/SwiftParserTest/translated/ForwardSlashRegexTests.swift

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,14 @@ final class ForwardSlashRegexTests: XCTestCase {
449449
assertParse(
450450
"""
451451
bar(&/x/)
452-
"""
452+
""",
453+
substructure: Syntax(
454+
InOutExprSyntax(
455+
expression: RegexLiteralExprSyntax(
456+
regexPattern: .regexLiteralPattern("x")
457+
)
458+
)
459+
)
453460
)
454461
}
455462

0 commit comments

Comments
 (0)