Skip to content

Complete regex literal lexing #1463

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public let SYNTAX_CLASSIFICATIONS: [SyntaxClassification] = [
SyntaxClassification(name: "ObjectLiteral", description: "An image, color, etc. literal."),
SyntaxClassification(name: "OperatorIdentifier", description: "An identifier referring to an operator."),
SyntaxClassification(name: "PoundDirectiveKeyword", description: "A `#` keyword like `#warning`."),
SyntaxClassification(name: "RegexLiteral", description: "A regex literal, including multiline regex literals."),
SyntaxClassification(name: "StringInterpolationAnchor", description: "The opening and closing parenthesis of string interpolation."),
SyntaxClassification(name: "StringLiteral", description: "A string literal including multiline string literals."),
SyntaxClassification(name: "TypeIdentifier", description: "An identifier referring to a type."),
Expand Down
24 changes: 21 additions & 3 deletions CodeGeneration/Sources/SyntaxSupport/ExprNodes.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1199,9 +1199,27 @@ public let EXPR_NODES: [Node] = [
kind: "Expr",
children: [
Child(
name: "Regex",
kind: .token(choices: [.token(tokenKind: "RegexLiteralToken")])
)
name: "OpeningPounds",
kind: .token(choices: [.token(tokenKind: "ExtendedRegexDelimiterToken")]),
isOptional: true
),
Child(
name: "OpenSlash",
kind: .token(choices: [.token(tokenKind: "RegexSlashToken")])
),
Child(
name: "RegexPattern",
kind: .token(choices: [.token(tokenKind: "RegexLiteralPatternToken")])
),
Child(
name: "CloseSlash",
kind: .token(choices: [.token(tokenKind: "RegexSlashToken")])
),
Child(
name: "ClosingPounds",
kind: .token(choices: [.token(tokenKind: "ExtendedRegexDelimiterToken")]),
isOptional: true
),
]
),

Expand Down
4 changes: 3 additions & 1 deletion CodeGeneration/Sources/SyntaxSupport/TokenSpec.swift
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ public let SYNTAX_TOKENS: [TokenSpec] = [
PunctuatorSpec(name: "Ellipsis", kind: "ellipsis", text: "..."),
PunctuatorSpec(name: "Equal", kind: "equal", text: "=", requiresLeadingSpace: true, requiresTrailingSpace: true),
PunctuatorSpec(name: "ExclamationMark", kind: "exclaim_postfix", text: "!"),
MiscSpec(name: "ExtendedRegexDelimiter", kind: "extended_regex_delimiter", nameForDiagnostics: "extended delimiter", classification: "RegexLiteral"),
LiteralSpec(name: "FloatingLiteral", kind: "floating_literal", nameForDiagnostics: "floating literal", classification: "FloatingLiteral"),
MiscSpec(name: "Identifier", kind: "identifier", nameForDiagnostics: "identifier", classification: "Identifier"),
PunctuatorSpec(name: "InfixQuestionMark", kind: "question_infix", text: "?"),
Expand All @@ -202,7 +203,8 @@ public let SYNTAX_TOKENS: [TokenSpec] = [
PunctuatorSpec(name: "PrefixAmpersand", kind: "amp_prefix", text: "&"),
MiscSpec(name: "PrefixOperator", kind: "oper_prefix", nameForDiagnostics: "prefix operator", classification: "OperatorIdentifier"),
MiscSpec(name: "RawStringDelimiter", kind: "raw_string_delimiter", nameForDiagnostics: "raw string delimiter"),
LiteralSpec(name: "RegexLiteral", kind: "regex_literal", nameForDiagnostics: "regex literal"),
MiscSpec(name: "RegexLiteralPattern", kind: "regex_literal_pattern", nameForDiagnostics: "regex pattern", classification: "RegexLiteral"),
PunctuatorSpec(name: "RegexSlash", kind: "regex_slash", text: "/", classification: "RegexLiteral"),
PunctuatorSpec(name: "RightAngle", kind: "r_angle", text: ">"),
PunctuatorSpec(name: "RightBrace", kind: "r_brace", text: "}"),
PunctuatorSpec(name: "RightParen", kind: "r_paren", text: ")"),
Expand Down
10 changes: 8 additions & 2 deletions Sources/IDEUtils/generated/SyntaxClassification.swift
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ public enum SyntaxClassification {
case operatorIdentifier
/// A `#` keyword like `#warning`.
case poundDirectiveKeyword
/// A regex literal, including multiline regex literals.
case regexLiteral
/// The opening and closing parenthesis of string interpolation.
case stringInterpolationAnchor
/// A string literal including multiline string literals.
Expand Down Expand Up @@ -138,6 +140,8 @@ extension RawTokenKind {
return .none
case .exclamationMark:
return .none
case .extendedRegexDelimiter:
return .regexLiteral
case .floatingLiteral:
return .floatingLiteral
case .identifier:
Expand Down Expand Up @@ -186,8 +190,10 @@ extension RawTokenKind {
return .operatorIdentifier
case .rawStringDelimiter:
return .none
case .regexLiteral:
return .none
case .regexLiteralPattern:
return .regexLiteral
case .regexSlash:
return .regexLiteral
case .rightAngle:
return .none
case .rightBrace:
Expand Down
1 change: 1 addition & 0 deletions Sources/SwiftParser/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ add_swift_host_library(SwiftParser
Lexer/Lexeme.swift
Lexer/LexemeSequence.swift
Lexer/Lexer.swift
Lexer/RegexLiteralLexer.swift
Lexer/UnicodeScalarExtensions.swift
)

Expand Down
24 changes: 1 addition & 23 deletions Sources/SwiftParser/Declarations.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1315,28 +1315,6 @@ extension Parser {
}

extension Parser {
/// Are we at a regular expression literal that could act as an operator?
private mutating func atRegexLiteralThatCouldBeAnOperator() -> Bool {
guard self.at(.regexLiteral) else {
return false
}

/// Try to re-lex at regex literal as an operator. If it succeeds and
/// consumes the entire regex literal, we're done.
return self.currentToken.tokenText.withBuffer {
(buffer: UnsafeBufferPointer<UInt8>) -> Bool in
var cursor = Lexer.Cursor(input: buffer, previous: 0)
guard buffer[0] == UInt8(ascii: "/") else { return false }
switch cursor.lexOperatorIdentifier(sourceBufferStart: cursor).tokenKind {
case .unknown:
return false

default:
return cursor.input.isEmpty
}
}
}

@_spi(RawSyntax)
public mutating func parseFuncDeclaration(
_ attrs: DeclAttributes,
Expand All @@ -1345,7 +1323,7 @@ extension Parser {
let (unexpectedBeforeFuncKeyword, funcKeyword) = self.eat(handle)
let unexpectedBeforeIdentifier: RawUnexpectedNodesSyntax?
let identifier: RawTokenSyntax
if self.at(anyIn: Operator.self) != nil || self.at(.exclamationMark, .prefixAmpersand) || self.atRegexLiteralThatCouldBeAnOperator() {
if self.at(anyIn: Operator.self) != nil || self.at(.exclamationMark, .prefixAmpersand) {
var name = self.currentToken.tokenText
if name.count > 1 && name.hasSuffix("<") && self.peek().rawTokenKind == .identifier {
name = SyntaxText(rebasing: name.dropLast())
Expand Down
34 changes: 29 additions & 5 deletions Sources/SwiftParser/Expressions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1170,7 +1170,7 @@ extension Parser {
)
case (.rawStringDelimiter, _)?, (.stringQuote, _)?, (.multilineStringQuote, _)?, (.singleQuote, _)?:
return RawExprSyntax(self.parseStringLiteral())
case (.regexLiteral, _)?:
case (.extendedRegexDelimiter, _)?, (.regexSlash, _)?:
return RawExprSyntax(self.parseRegexLiteral())
case (.nilKeyword, let handle)?:
let nilKeyword = self.eat(handle)
Expand Down Expand Up @@ -1433,13 +1433,37 @@ extension Parser {
/// Grammar
/// =======
///
/// regular-expression-literal → '\' `Any valid regular expression characters` '\'
/// regular-expression-literal → '#'* '/' `Any valid regular expression characters` '/' '#'*
@_spi(RawSyntax)
public mutating func parseRegexLiteral() -> RawRegexLiteralExprSyntax {
let (unexpectedBeforeLiteral, literal) = self.expect(.regexLiteral)
// See if we have an opening set of pounds.
let openPounds = self.consume(if: .extendedRegexDelimiter)

// Parse the opening slash.
let (unexpectedBeforeSlash, openSlash) = self.expect(.regexSlash)

// If we had opening pounds, there should be no trivia for the slash.
if let openPounds = openPounds {
precondition(openPounds.trailingTriviaByteLength == 0 && openSlash.leadingTriviaByteLength == 0)
}

// Parse the pattern and closing slash, avoiding recovery or leading trivia
// as the lexer should provide the tokens exactly in order without trivia,
// otherwise they should be treated as missing.
let pattern = self.expectWithoutRecoveryOrLeadingTrivia(.regexLiteralPattern)
let closeSlash = self.expectWithoutRecoveryOrLeadingTrivia(.regexSlash)

// Finally, parse a closing set of pounds.
let (unexpectedBeforeClosePounds, closePounds) = parsePoundDelimiter(.extendedRegexDelimiter, matching: openPounds)

return RawRegexLiteralExprSyntax(
unexpectedBeforeLiteral,
regex: literal,
openingPounds: openPounds,
unexpectedBeforeSlash,
openSlash: openSlash,
regexPattern: pattern,
closeSlash: closeSlash,
unexpectedBeforeClosePounds,
closingPounds: closePounds,
arena: self.arena
)
}
Expand Down
Loading