diff --git a/CodeGeneration/Sources/SyntaxSupport/Classification.swift b/CodeGeneration/Sources/SyntaxSupport/Classification.swift index 860218fb2fe..0b1f5337734 100644 --- a/CodeGeneration/Sources/SyntaxSupport/Classification.swift +++ b/CodeGeneration/Sources/SyntaxSupport/Classification.swift @@ -58,6 +58,7 @@ public let SYNTAX_CLASSIFICATIONS: [SyntaxClassification] = [ SyntaxClassification(name: "ObjectLiteral", description: "An image, color, etc. literal."), SyntaxClassification(name: "OperatorIdentifier", description: "An identifier referring to an operator."), SyntaxClassification(name: "PoundDirectiveKeyword", description: "A `#` keyword like `#warning`."), + SyntaxClassification(name: "RegexLiteral", description: "A regex literal, including multiline regex literals."), SyntaxClassification(name: "StringInterpolationAnchor", description: "The opening and closing parenthesis of string interpolation."), SyntaxClassification(name: "StringLiteral", description: "A string literal including multiline string literals."), SyntaxClassification(name: "TypeIdentifier", description: "An identifier referring to a type."), diff --git a/CodeGeneration/Sources/SyntaxSupport/ExprNodes.swift b/CodeGeneration/Sources/SyntaxSupport/ExprNodes.swift index 9cb05403b41..d6bb093ccfe 100644 --- a/CodeGeneration/Sources/SyntaxSupport/ExprNodes.swift +++ b/CodeGeneration/Sources/SyntaxSupport/ExprNodes.swift @@ -1199,9 +1199,27 @@ public let EXPR_NODES: [Node] = [ kind: "Expr", children: [ Child( - name: "Regex", - kind: .token(choices: [.token(tokenKind: "RegexLiteralToken")]) - ) + name: "OpeningPounds", + kind: .token(choices: [.token(tokenKind: "ExtendedRegexDelimiterToken")]), + isOptional: true + ), + Child( + name: "OpenSlash", + kind: .token(choices: [.token(tokenKind: "RegexSlashToken")]) + ), + Child( + name: "RegexPattern", + kind: .token(choices: [.token(tokenKind: "RegexLiteralPatternToken")]) + ), + Child( + name: "CloseSlash", + kind: .token(choices: [.token(tokenKind: "RegexSlashToken")]) + ), + Child( + name: "ClosingPounds", + kind: .token(choices: [.token(tokenKind: "ExtendedRegexDelimiterToken")]), + isOptional: true + ), ] ), diff --git a/CodeGeneration/Sources/SyntaxSupport/TokenSpec.swift b/CodeGeneration/Sources/SyntaxSupport/TokenSpec.swift index c484a59fc65..2d9fc349d54 100644 --- a/CodeGeneration/Sources/SyntaxSupport/TokenSpec.swift +++ b/CodeGeneration/Sources/SyntaxSupport/TokenSpec.swift @@ -178,6 +178,7 @@ public let SYNTAX_TOKENS: [TokenSpec] = [ PunctuatorSpec(name: "Ellipsis", kind: "ellipsis", text: "..."), PunctuatorSpec(name: "Equal", kind: "equal", text: "=", requiresLeadingSpace: true, requiresTrailingSpace: true), PunctuatorSpec(name: "ExclamationMark", kind: "exclaim_postfix", text: "!"), + MiscSpec(name: "ExtendedRegexDelimiter", kind: "extended_regex_delimiter", nameForDiagnostics: "extended delimiter", classification: "RegexLiteral"), LiteralSpec(name: "FloatingLiteral", kind: "floating_literal", nameForDiagnostics: "floating literal", classification: "FloatingLiteral"), MiscSpec(name: "Identifier", kind: "identifier", nameForDiagnostics: "identifier", classification: "Identifier"), PunctuatorSpec(name: "InfixQuestionMark", kind: "question_infix", text: "?"), @@ -202,7 +203,8 @@ public let SYNTAX_TOKENS: [TokenSpec] = [ PunctuatorSpec(name: "PrefixAmpersand", kind: "amp_prefix", text: "&"), MiscSpec(name: "PrefixOperator", kind: "oper_prefix", nameForDiagnostics: "prefix operator", classification: "OperatorIdentifier"), MiscSpec(name: "RawStringDelimiter", kind: "raw_string_delimiter", nameForDiagnostics: "raw string delimiter"), - LiteralSpec(name: "RegexLiteral", kind: "regex_literal", nameForDiagnostics: "regex literal"), + MiscSpec(name: "RegexLiteralPattern", kind: "regex_literal_pattern", nameForDiagnostics: "regex pattern", classification: "RegexLiteral"), + PunctuatorSpec(name: "RegexSlash", kind: "regex_slash", text: "/", classification: "RegexLiteral"), PunctuatorSpec(name: "RightAngle", kind: "r_angle", text: ">"), PunctuatorSpec(name: "RightBrace", kind: "r_brace", text: "}"), PunctuatorSpec(name: "RightParen", kind: "r_paren", text: ")"), diff --git a/Sources/IDEUtils/generated/SyntaxClassification.swift b/Sources/IDEUtils/generated/SyntaxClassification.swift index 697d5344a52..b4a9f336ec7 100644 --- a/Sources/IDEUtils/generated/SyntaxClassification.swift +++ b/Sources/IDEUtils/generated/SyntaxClassification.swift @@ -47,6 +47,8 @@ public enum SyntaxClassification { case operatorIdentifier /// A `#` keyword like `#warning`. case poundDirectiveKeyword + /// A regex literal, including multiline regex literals. + case regexLiteral /// The opening and closing parenthesis of string interpolation. case stringInterpolationAnchor /// A string literal including multiline string literals. @@ -138,6 +140,8 @@ extension RawTokenKind { return .none case .exclamationMark: return .none + case .extendedRegexDelimiter: + return .regexLiteral case .floatingLiteral: return .floatingLiteral case .identifier: @@ -186,8 +190,10 @@ extension RawTokenKind { return .operatorIdentifier case .rawStringDelimiter: return .none - case .regexLiteral: - return .none + case .regexLiteralPattern: + return .regexLiteral + case .regexSlash: + return .regexLiteral case .rightAngle: return .none case .rightBrace: diff --git a/Sources/SwiftParser/CMakeLists.txt b/Sources/SwiftParser/CMakeLists.txt index 04cd4f94900..6aae28cb5a0 100644 --- a/Sources/SwiftParser/CMakeLists.txt +++ b/Sources/SwiftParser/CMakeLists.txt @@ -42,6 +42,7 @@ add_swift_host_library(SwiftParser Lexer/Lexeme.swift Lexer/LexemeSequence.swift Lexer/Lexer.swift + Lexer/RegexLiteralLexer.swift Lexer/UnicodeScalarExtensions.swift ) diff --git a/Sources/SwiftParser/Declarations.swift b/Sources/SwiftParser/Declarations.swift index 36319c37477..4e1e8c5d8b4 100644 --- a/Sources/SwiftParser/Declarations.swift +++ b/Sources/SwiftParser/Declarations.swift @@ -1315,28 +1315,6 @@ extension Parser { } extension Parser { - /// Are we at a regular expression literal that could act as an operator? - private mutating func atRegexLiteralThatCouldBeAnOperator() -> Bool { - guard self.at(.regexLiteral) else { - return false - } - - /// Try to re-lex at regex literal as an operator. If it succeeds and - /// consumes the entire regex literal, we're done. - return self.currentToken.tokenText.withBuffer { - (buffer: UnsafeBufferPointer) -> Bool in - var cursor = Lexer.Cursor(input: buffer, previous: 0) - guard buffer[0] == UInt8(ascii: "/") else { return false } - switch cursor.lexOperatorIdentifier(sourceBufferStart: cursor).tokenKind { - case .unknown: - return false - - default: - return cursor.input.isEmpty - } - } - } - @_spi(RawSyntax) public mutating func parseFuncDeclaration( _ attrs: DeclAttributes, @@ -1345,7 +1323,7 @@ extension Parser { let (unexpectedBeforeFuncKeyword, funcKeyword) = self.eat(handle) let unexpectedBeforeIdentifier: RawUnexpectedNodesSyntax? let identifier: RawTokenSyntax - if self.at(anyIn: Operator.self) != nil || self.at(.exclamationMark, .prefixAmpersand) || self.atRegexLiteralThatCouldBeAnOperator() { + if self.at(anyIn: Operator.self) != nil || self.at(.exclamationMark, .prefixAmpersand) { var name = self.currentToken.tokenText if name.count > 1 && name.hasSuffix("<") && self.peek().rawTokenKind == .identifier { name = SyntaxText(rebasing: name.dropLast()) diff --git a/Sources/SwiftParser/Expressions.swift b/Sources/SwiftParser/Expressions.swift index 77afcb5f691..6346f216ec8 100644 --- a/Sources/SwiftParser/Expressions.swift +++ b/Sources/SwiftParser/Expressions.swift @@ -1170,7 +1170,7 @@ extension Parser { ) case (.rawStringDelimiter, _)?, (.stringQuote, _)?, (.multilineStringQuote, _)?, (.singleQuote, _)?: return RawExprSyntax(self.parseStringLiteral()) - case (.regexLiteral, _)?: + case (.extendedRegexDelimiter, _)?, (.regexSlash, _)?: return RawExprSyntax(self.parseRegexLiteral()) case (.nilKeyword, let handle)?: let nilKeyword = self.eat(handle) @@ -1433,13 +1433,37 @@ extension Parser { /// Grammar /// ======= /// - /// regular-expression-literal → '\' `Any valid regular expression characters` '\' + /// regular-expression-literal → '#'* '/' `Any valid regular expression characters` '/' '#'* @_spi(RawSyntax) public mutating func parseRegexLiteral() -> RawRegexLiteralExprSyntax { - let (unexpectedBeforeLiteral, literal) = self.expect(.regexLiteral) + // See if we have an opening set of pounds. + let openPounds = self.consume(if: .extendedRegexDelimiter) + + // Parse the opening slash. + let (unexpectedBeforeSlash, openSlash) = self.expect(.regexSlash) + + // If we had opening pounds, there should be no trivia for the slash. + if let openPounds = openPounds { + precondition(openPounds.trailingTriviaByteLength == 0 && openSlash.leadingTriviaByteLength == 0) + } + + // Parse the pattern and closing slash, avoiding recovery or leading trivia + // as the lexer should provide the tokens exactly in order without trivia, + // otherwise they should be treated as missing. + let pattern = self.expectWithoutRecoveryOrLeadingTrivia(.regexLiteralPattern) + let closeSlash = self.expectWithoutRecoveryOrLeadingTrivia(.regexSlash) + + // Finally, parse a closing set of pounds. + let (unexpectedBeforeClosePounds, closePounds) = parsePoundDelimiter(.extendedRegexDelimiter, matching: openPounds) + return RawRegexLiteralExprSyntax( - unexpectedBeforeLiteral, - regex: literal, + openingPounds: openPounds, + unexpectedBeforeSlash, + openSlash: openSlash, + regexPattern: pattern, + closeSlash: closeSlash, + unexpectedBeforeClosePounds, + closingPounds: closePounds, arena: self.arena ) } diff --git a/Sources/SwiftParser/Lexer/Cursor.swift b/Sources/SwiftParser/Lexer/Cursor.swift index cc59d28249b..6497ac19456 100644 --- a/Sources/SwiftParser/Lexer/Cursor.swift +++ b/Sources/SwiftParser/Lexer/Cursor.swift @@ -49,10 +49,21 @@ extension Lexer.Cursor { /// New entries in the state stack are added when: /// - A string literal is entered /// - A string interpolation inside is entered + /// - A regex literal is being lexed + /// - A narrow case for 'try?' and 'try!' to ensure correct regex lexing enum State { /// Normal top-level lexing mode case normal + /// A narrow mode that's used for 'try?' and 'try!' to ensure we prefer to + /// lex a regex literal rather than a binary operator. This is needed as the + /// `previousTokenKind` will be `.postfixOperator`, which would normally + /// indicate a binary operator is expected next, but in this case we know it + /// must be an expression. See the comment in + /// `tryScanOperatorAsRegexLiteral` for more info. + /// NOTE: This is a complete hack, do not add new uses of this. + case preferRegexOverBinaryOperator + /// The lexer has lexed `delimiterLength` raw string delimiters '##' but not /// the string quote itself. case afterRawStringDelimiter(delimiterLength: Int) @@ -82,11 +93,16 @@ extension Lexer.Cursor { /// `stringInterpolationStart` points to the first character inside the interpolation. case inStringInterpolation(stringLiteralKind: StringLiteralKind, parenCount: Int) + /// We have encountered a regex literal, and have its tokens to work + /// through. `lexemes` is a pointer to the lexemes allocated in the state + /// stack bump pointer allocator. + case inRegexLiteral(index: UInt8, lexemes: UnsafePointer) + /// The mode in which leading trivia should be lexed for this state or `nil` /// if no trivia should be lexed. func leadingTriviaLexingMode(cursor: Lexer.Cursor) -> TriviaLexingMode? { switch self { - case .normal: return .normal + case .normal, .preferRegexOverBinaryOperator: return .normal case .afterRawStringDelimiter: return nil case .inStringLiteral: return nil case .afterStringLiteral: return nil @@ -99,6 +115,7 @@ extension Lexer.Cursor { case .singleLine, .singleQuote: return .noNewlines case .multiLine: return .normal } + case .inRegexLiteral: return nil } } @@ -106,13 +123,14 @@ extension Lexer.Cursor { /// if no trivia should be lexed. func trailingTriviaLexingMode(cursor: Lexer.Cursor) -> TriviaLexingMode? { switch self { - case .normal: return .noNewlines + case .normal, .preferRegexOverBinaryOperator: return .noNewlines case .afterRawStringDelimiter: return nil case .inStringLiteral: return nil case .afterStringLiteral: return nil case .afterClosingStringQuote: return nil case .inStringInterpolationStart: return nil case .inStringInterpolation: return .noNewlines + case .inRegexLiteral: return nil } } @@ -122,13 +140,14 @@ extension Lexer.Cursor { /// hitting a newline. var shouldPopStateWhenReachingNewlineInTrailingTrivia: Bool { switch self { - case .normal: return false + case .normal, .preferRegexOverBinaryOperator: return false case .afterRawStringDelimiter: return false case .inStringLiteral(kind: let stringLiteralKind, delimiterLength: _): return stringLiteralKind != .multiLine case .afterStringLiteral: return false case .afterClosingStringQuote: return false case .inStringInterpolationStart: return false case .inStringInterpolation: return false + case .inRegexLiteral: return false } } } @@ -166,6 +185,8 @@ extension Lexer.Cursor { } } topState = newState + case .pushRegexLexemes(let index, let lexemes): + perform(stateTransition: .push(newState: .inRegexLiteral(index: index, lexemes: lexemes.allocate(in: stateAllocator))), stateAllocator: stateAllocator) case .replace(newState: let newState): topState = newState case .pop: @@ -189,9 +210,13 @@ extension Lexer.Cursor { /// The position in the token at which the diagnostic is. let position: Lexer.Cursor.Position - init(_ kind: TokenDiagnostic.Kind, position: Lexer.Cursor) { + init(_ kind: TokenDiagnostic.Kind, position: Lexer.Cursor.Position) { self.kind = kind - self.position = position.position + self.position = position + } + + init(_ kind: TokenDiagnostic.Kind, position: Lexer.Cursor) { + self.init(kind, position: position.position) } func tokenDiagnostic(tokenStart: Lexer.Cursor) -> TokenDiagnostic { @@ -217,6 +242,11 @@ extension Lexer { /// If we have already lexed a token, the kind of the previously lexed token var previousTokenKind: RawTokenKind? + + /// If the `previousTokenKind` is `.keyword`, the keyword kind. Otherwise + /// `nil`. + var previousKeyword: Keyword? + private var stateStack: StateStack = StateStack() init(input: UnsafeBufferPointer, previous: UInt8) { @@ -263,6 +293,11 @@ extension Lexer { enum StateTransition { /// Push a new state onto the state stack case push(newState: Cursor.State) + + /// Push a set of regex literal lexemes onto the state stack. This avoids + /// needing to plumb the state allocator through the lexer. + case pushRegexLexemes(index: UInt8, lexemes: RegexLiteralLexemes) + /// Replace the current state on the state stack by `newState` case replace(newState: Cursor.State) /// Pop a single state from the state stack. @@ -280,18 +315,56 @@ extension Lexer { /// for this lexeme. let trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode? - init( + /// If `tokenKind` is `.keyword`, the kind of keyword produced, otherwise + /// `nil`. + let keywordKind: Keyword? + + private init( _ tokenKind: RawTokenKind, - flags: Lexer.Lexeme.Flags = [], - error: Cursor.LexingDiagnostic? = nil, - stateTransition: StateTransition? = nil, - trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode? = nil + flags: Lexer.Lexeme.Flags, + error: Cursor.LexingDiagnostic?, + stateTransition: StateTransition?, + trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode?, + keywordKind: Keyword? ) { self.tokenKind = tokenKind self.flags = flags self.error = error self.stateTransition = stateTransition self.trailingTriviaLexingMode = trailingTriviaLexingMode + self.keywordKind = keywordKind + } + + /// Create a lexer result. Note that keywords should use `Result.keyword` + /// instead. + init( + _ tokenKind: RawTokenKind, + flags: Lexer.Lexeme.Flags = [], + error: Cursor.LexingDiagnostic? = nil, + stateTransition: StateTransition? = nil, + trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode? = nil + ) { + precondition(tokenKind != .keyword, "Use Result.keyword instead") + self.init( + tokenKind, + flags: flags, + error: error, + stateTransition: stateTransition, + trailingTriviaLexingMode: trailingTriviaLexingMode, + keywordKind: nil + ) + } + + /// Produce a lexer result for a given keyword. + static func keyword(_ kind: Keyword) -> Self { + Self( + .keyword, + flags: [], + error: nil, + stateTransition: nil, + trailingTriviaLexingMode: nil, + keywordKind: kind + ) } } } @@ -337,7 +410,11 @@ extension Lexer.Cursor { let result: Lexer.Result switch currentState { case .normal: - result = lexNormal(sourceBufferStart: sourceBufferStart) + result = lexNormal(sourceBufferStart: sourceBufferStart, preferRegexOverBinaryOperator: false) + case .preferRegexOverBinaryOperator: + // In this state we lex a single token with the flag set, and then pop the state. + result = lexNormal(sourceBufferStart: sourceBufferStart, preferRegexOverBinaryOperator: true) + self.stateStack.perform(stateTransition: .pop, stateAllocator: stateAllocator) case .afterRawStringDelimiter(delimiterLength: let delimiterLength): result = lexAfterRawStringDelimiter(delimiterLength: delimiterLength) case .inStringLiteral(kind: let stringLiteralKind, delimiterLength: let delimiterLength): @@ -350,6 +427,8 @@ extension Lexer.Cursor { result = lexInStringInterpolationStart(stringLiteralKind: stringLiteralKind) case .inStringInterpolation(stringLiteralKind: let stringLiteralKind, parenCount: let parenCount): result = lexInStringInterpolation(stringLiteralKind: stringLiteralKind, parenCount: parenCount, sourceBufferStart: sourceBufferStart) + case .inRegexLiteral(let index, let lexemes): + result = lexInRegexLiteral(lexemes.pointee[index...], existingPtr: lexemes) } if let stateTransition = result.stateTransition { @@ -372,10 +451,9 @@ extension Lexer.Cursor { flags.insert(.isAtStartOfLine) } - self.previousTokenKind = result.tokenKind diagnostic = TokenDiagnostic(combining: diagnostic, result.error?.tokenDiagnostic(tokenStart: cursor)) - return .init( + let lexeme = Lexer.Lexeme( tokenKind: result.tokenKind, flags: flags, diagnostic: diagnostic, @@ -385,6 +463,10 @@ extension Lexer.Cursor { trailingTriviaLength: trailingTriviaStart.distance(to: self), cursor: cursor ) + self.previousTokenKind = result.tokenKind + self.previousKeyword = result.keywordKind + + return lexeme } } @@ -516,6 +598,15 @@ extension Lexer.Cursor.Position { self.input = UnsafeBufferPointer(rebasing: input) return c } + + /// Advance the cursor position by `n` bytes. The offset must be valid. + func advanced(by n: Int) -> Self { + precondition(n > 0) + precondition(n <= self.input.count) + var input = self.input.dropFirst(n - 1) + let c = input.removeFirst() + return .init(input: UnsafeBufferPointer(rebasing: input), previous: c) + } } extension Lexer.Cursor { @@ -776,7 +867,10 @@ extension Lexer.Cursor { // MARK: - Main entry point extension Lexer.Cursor { - private mutating func lexNormal(sourceBufferStart: Lexer.Cursor) -> Lexer.Result { + private mutating func lexNormal( + sourceBufferStart: Lexer.Cursor, + preferRegexOverBinaryOperator: Bool + ) -> Lexer.Result { switch self.peek() { case UInt8(ascii: "@"): _ = self.advance(); return Lexer.Result(.atSign) case UInt8(ascii: "{"): _ = self.advance(); return Lexer.Result(.leftBrace) @@ -798,46 +892,32 @@ extension Lexer.Cursor { } // Try lex a regex literal. - if let token = self.tryLexRegexLiteral(sourceBufferStart: sourceBufferStart) { - return Lexer.Result(token) + if let result = self.lexRegexLiteral() { + return result } // Otherwise try lex a magic pound literal. return self.lexMagicPoundLiteral() - case UInt8(ascii: "/"): - // Try lex a regex literal. - if let token = self.tryLexRegexLiteral(sourceBufferStart: sourceBufferStart) { - return Lexer.Result(token) - } - // Otherwise try lex a magic pound literal. - return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart) - case UInt8(ascii: "!"): - if self.isLeftBound(sourceBufferStart: sourceBufferStart) { - _ = self.advance() - return Lexer.Result(.exclamationMark) + case UInt8(ascii: "!"), UInt8(ascii: "?"): + if let result = lexPostfixOptionalChain(sourceBufferStart: sourceBufferStart) { + return result } - return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart) + return self.lexOperatorIdentifier( + sourceBufferStart: sourceBufferStart, + preferRegexOverBinaryOperator: preferRegexOverBinaryOperator + ) - case UInt8(ascii: "?"): - if self.isLeftBound(sourceBufferStart: sourceBufferStart) { - _ = self.advance() - return Lexer.Result(.postfixQuestionMark) + case UInt8(ascii: "<"): + if self.is(offset: 1, at: "#"), + let result = self.tryLexEditorPlaceholder(sourceBufferStart: sourceBufferStart) + { + return result } - return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart) + return self.lexOperatorIdentifier( + sourceBufferStart: sourceBufferStart, + preferRegexOverBinaryOperator: preferRegexOverBinaryOperator + ) - case UInt8(ascii: "<"): - if self.is(offset: 1, at: "#") { - return self.tryLexEditorPlaceholder(sourceBufferStart: sourceBufferStart) - } - return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart) - case UInt8(ascii: ">"): - return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart) - - case UInt8(ascii: "="), UInt8(ascii: "-"), UInt8(ascii: "+"), - UInt8(ascii: "*"), UInt8(ascii: "%"), UInt8(ascii: "&"), - UInt8(ascii: "|"), UInt8(ascii: "^"), UInt8(ascii: "~"), - UInt8(ascii: "."): - return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart) case UInt8(ascii: "A"), UInt8(ascii: "B"), UInt8(ascii: "C"), UInt8(ascii: "D"), UInt8(ascii: "E"), UInt8(ascii: "F"), UInt8(ascii: "G"), UInt8(ascii: "H"), UInt8(ascii: "I"), @@ -881,7 +961,10 @@ extension Lexer.Cursor { } if tmp.advance(if: { Unicode.Scalar($0).isOperatorStartCodePoint }) { - return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart) + return self.lexOperatorIdentifier( + sourceBufferStart: sourceBufferStart, + preferRegexOverBinaryOperator: preferRegexOverBinaryOperator + ) } switch self.lexUnknown() { @@ -972,7 +1055,7 @@ extension Lexer.Cursor { return Lexer.Result(.stringSegment, stateTransition: .pop) default: // If we haven't reached the end of the string interpolation, lex as if we were in a normal expression. - return self.lexNormal(sourceBufferStart: sourceBufferStart) + return self.lexNormal(sourceBufferStart: sourceBufferStart, preferRegexOverBinaryOperator: false) } } } @@ -1680,9 +1763,9 @@ extension Lexer.Cursor { return .pop case .afterRawStringDelimiter(delimiterLength: let delimiterLength): return .replace(newState: .inStringLiteral(kind: kind, delimiterLength: delimiterLength)) - case .normal, .inStringInterpolation: + case .normal, .preferRegexOverBinaryOperator, .inStringInterpolation: return .push(newState: .inStringLiteral(kind: kind, delimiterLength: 0)) - default: + case .inRegexLiteral, .inStringLiteral, .afterClosingStringQuote, .inStringInterpolationStart: preconditionFailure("Unexpected currentState '\(currentState)' for 'stateTransitionAfterLexingStringQuote'") } } @@ -1878,7 +1961,7 @@ extension Lexer.Cursor { let text = tokStart.text(upTo: self) if let keyword = Keyword(text), keyword.isLexerClassified { - return Lexer.Result(.keyword) + return Lexer.Result.keyword(keyword) } else if text == "_" { return Lexer.Result(.wildcard) } else { @@ -1919,64 +2002,57 @@ extension Lexer.Cursor { return Lexer.Result(.backtick) } - mutating func lexOperatorIdentifier(sourceBufferStart: Lexer.Cursor) -> Lexer.Result { - let tokStart = self - let didStart = self.advance(if: { $0.isOperatorStartCodePoint }) - precondition(didStart, "unexpected operator start") + /// Attempt to lex a postfix '!' or '?'. + mutating func lexPostfixOptionalChain(sourceBufferStart: Lexer.Cursor) -> Lexer.Result? { + // Must be left bound, otherwise this isn't postfix. + guard self.isLeftBound(sourceBufferStart: sourceBufferStart) else { return nil } - repeat { - // '.' cannot appear in the middle of an operator unless the operator - // started with a '.'. - if self.is(at: ".") && tokStart.is(notAt: ".") { - break - } - let text = SyntaxText(baseAddress: self.input.baseAddress, count: self.input.count) - if text.hasPrefix("<#") && text.containsPlaceholderEnd() { - break - } - - // // If we are lexing a `/.../` regex literal, we don't consider `/` to be an - // // operator character. - // if ForwardSlashRegexMode != LexerForwardSlashRegexMode::None && - // CurPtr.peek() == UInt8(ascii: "/") { - // break - // } - } while self.advance(if: { $0.isOperatorContinuationCodePoint }) - - if self.input.baseAddress! - tokStart.input.baseAddress! > 2 { - // If there is a "//" or "/*" in the middle of an identifier token, - // it starts a comment. - var ptr = tokStart - // Skip over the first character. A `//` or /*` at the beginning would have - // been consumed as trivia. - _ = ptr.advance() - while ptr.input.baseAddress! < self.input.baseAddress! { - if ptr.is(at: "/") && ptr.is(offset: 1, at: "/", "*") { - self = ptr - break - } - _ = ptr.advance() - } + var transition: Lexer.StateTransition? + if previousKeyword == .try { + // If we have 'try' as the previous keyword kind, we have `try?` or `try!` + // and need to transition into the state where we prefer lexing a regex + // literal over a binary operator. See the comment in + // `tryScanOperatorAsRegexLiteral` for more info. + transition = .push(newState: .preferRegexOverBinaryOperator) } + let kind: RawTokenKind = { + switch self.peek() { + case UInt8(ascii: "!"): + return .exclamationMark + case UInt8(ascii: "?"): + return .postfixQuestionMark + default: + preconditionFailure("Must be at '!' or '?'") + } + }() + _ = self.advance() + return Lexer.Result(kind, stateTransition: transition) + } + /// Classify an operator token given its start and ending cursor. + static func classifyOperatorToken( + operStart: Lexer.Cursor, + operEnd: Lexer.Cursor, + sourceBufferStart: Lexer.Cursor + ) -> (RawTokenKind, error: LexingDiagnostic?) { // Decide between the binary, prefix, and postfix cases. // It's binary if either both sides are bound or both sides are not bound. // Otherwise, it's postfix if left-bound and prefix if right-bound. - let leftBound = tokStart.isLeftBound(sourceBufferStart: sourceBufferStart) - let rightBound = self.isRightBound(isLeftBound: leftBound) + let leftBound = operStart.isLeftBound(sourceBufferStart: sourceBufferStart) + let rightBound = operEnd.isRightBound(isLeftBound: leftBound) // Match various reserved words. - if self.input.baseAddress! - tokStart.input.baseAddress! == 1 { - switch tokStart.peek() { + if operEnd.input.baseAddress! - operStart.input.baseAddress! == 1 { + switch operStart.peek() { case UInt8(ascii: "="): if leftBound != rightBound { - var errorPos = tokStart + var errorPos = operStart if rightBound { _ = errorPos.advance() } - return Lexer.Result( + return ( .equal, error: LexingDiagnostic( .equalMustHaveConsistentWhitespaceOnBothSides, @@ -1984,47 +2060,113 @@ extension Lexer.Cursor { ) ) } else { - return Lexer.Result(.equal) + return (.equal, error: nil) } case UInt8(ascii: "&"): if leftBound == rightBound || leftBound { break } - return Lexer.Result(.prefixAmpersand) + return (.prefixAmpersand, error: nil) case UInt8(ascii: "."): - return Lexer.Result(.period) + return (.period, error: nil) case UInt8(ascii: "?"): if (leftBound) { - return Lexer.Result(.postfixQuestionMark) + return (.postfixQuestionMark, error: nil) } - return Lexer.Result(.infixQuestionMark) + return (.infixQuestionMark, error: nil) default: break } - } else if (self.input.baseAddress! - tokStart.input.baseAddress! == 2) { - switch (tokStart.peek(), tokStart.peek(at: 1)) { + } else if (operEnd.input.baseAddress! - operStart.input.baseAddress! == 2) { + switch (operStart.peek(), operStart.peek(at: 1)) { case (UInt8(ascii: "-"), UInt8(ascii: ">")): // -> - return Lexer.Result(.arrow) + return (.arrow, error: nil) case (UInt8(ascii: "*"), UInt8(ascii: "/")): // */ - return Lexer.Result(.unknown, error: LexingDiagnostic(.unexpectedBlockCommentEnd, position: tokStart)) + return (.unknown, error: LexingDiagnostic(.unexpectedBlockCommentEnd, position: operStart)) default: break } } else { // Verify there is no "*/" in the middle of the identifier token, we reject // it as potentially ending a block comment. - if tokStart.text(upTo: self).contains("*/") { - return Lexer.Result(.unknown, error: LexingDiagnostic(.unexpectedBlockCommentEnd, position: tokStart)) + if operStart.text(upTo: operEnd).contains("*/") { + return (.unknown, error: LexingDiagnostic(.unexpectedBlockCommentEnd, position: operStart)) } } if leftBound == rightBound { - return Lexer.Result(.binaryOperator) + return (.binaryOperator, error: nil) } else if leftBound { - return Lexer.Result(.postfixOperator) + return (.postfixOperator, error: nil) } else { - return Lexer.Result(.prefixOperator) + return (.prefixOperator, error: nil) + } + } + + mutating func lexOperatorIdentifier( + sourceBufferStart: Lexer.Cursor, + preferRegexOverBinaryOperator: Bool + ) -> Lexer.Result { + let tokStart = self + let didStart = self.advance(if: { $0.isOperatorStartCodePoint }) + precondition(didStart, "unexpected operator start") + + repeat { + // '.' cannot appear in the middle of an operator unless the operator + // started with a '.'. + if self.is(at: ".") && tokStart.is(notAt: ".") { + break + } + let text = SyntaxText(baseAddress: self.input.baseAddress, count: self.input.count) + if text.hasPrefix("<#") && text.containsPlaceholderEnd() { + break + } + } while self.advance(if: { $0.isOperatorContinuationCodePoint }) + + // Check to see if we have a regex literal starting in the operator. + do { + var regexScan = tokStart + while regexScan.input.baseAddress! < self.input.baseAddress! { + // Scan for the first '/' in the operator to see if it starts a regex + // literal. + guard regexScan.is(at: "/") else { + _ = regexScan.advance() + continue + } + guard + let result = self.tryLexOperatorAsRegexLiteral( + at: regexScan, + operatorStart: tokStart, + operatorEnd: self, + sourceBufferStart: sourceBufferStart, + preferRegexOverBinaryOperator: preferRegexOverBinaryOperator + ) + else { break } + return result + } + } + + if self.input.baseAddress! - tokStart.input.baseAddress! > 2 { + // If there is a "//" or "/*" in the middle of an identifier token, + // it starts a comment. + var ptr = tokStart + // Skip over the first character. A `//` or /*` at the beginning would have + // been consumed as trivia. + _ = ptr.advance() + while ptr.input.baseAddress! < self.input.baseAddress! { + if ptr.is(at: "/") && ptr.is(offset: 1, at: "/", "*") { + self = ptr + break + } + _ = ptr.advance() + } } + let (kind, error) = Self.classifyOperatorToken( + operStart: tokStart, + operEnd: self, + sourceBufferStart: sourceBufferStart + ) + return Lexer.Result(kind, error: error) } mutating func lexDollarIdentifier() -> Lexer.Result { @@ -2060,7 +2202,7 @@ extension Lexer.Cursor { // MARK: - Editor Placeholders extension Lexer.Cursor { - mutating func tryLexEditorPlaceholder(sourceBufferStart: Lexer.Cursor) -> Lexer.Result { + mutating func tryLexEditorPlaceholder(sourceBufferStart: Lexer.Cursor) -> Lexer.Result? { precondition(self.is(at: "<") && self.is(offset: 1, at: "#")) let start = self var ptr = self @@ -2087,7 +2229,7 @@ extension Lexer.Cursor { } // Not a well-formed placeholder. - return self.lexOperatorIdentifier(sourceBufferStart: sourceBufferStart) + return nil } } @@ -2267,123 +2409,3 @@ extension Lexer.Cursor { return nil } } - -extension Lexer.Cursor { - mutating func tryLexRegexLiteral(sourceBufferStart: Lexer.Cursor) -> RawTokenKind? { - guard !self.isLeftBound(sourceBufferStart: sourceBufferStart) else { - return nil - } - - var tmp = self - var poundCount = 0 - var parenCount = 0 - - while tmp.advance(matching: "#") { - poundCount += 1 - } - - guard tmp.advance(matching: "/") else { - return nil - } - - // For `/.../` regex literals, we need to ban space and tab at the start of - // a regex to avoid ambiguity with operator chains, e.g: - // - // Builder { - // 0 - // / 1 / - // 2 - // } - // - if poundCount == 0 && tmp.is(at: " ", "\n", "\t") { - return nil - } - - var isMultiline = false - LOOP: while true { - switch tmp.peek() { - case UInt8(ascii: " "), UInt8(ascii: "\t"): - _ = tmp.advance() - case UInt8(ascii: "\n"), UInt8(ascii: "\r"): - isMultiline = true - break LOOP - default: - break LOOP - } - } - - var escaped = false - DELIMITLOOP: while true { - defer { escaped = false } - - let previousByte = tmp.previous - switch tmp.advance() { - case nil: - return nil - case UInt8(ascii: "/"): - // If we're at the end of the literal, peek ahead to see if the closing - // slash is actually the start of a comment. - if tmp.is(at: "/", "*") { - return nil - } - - var endLex = tmp - for _ in 0.. LexResult { + if cursor.isAtEndOfFile { + // We've hit the end of the buffer. In multi-line mode, we don't want to + // skip over what is likely otherwise valid Swift code, so resume from the + // first newline. + if let firstNewline = firstNewline { + cursor = firstNewline + } + return .unterminated + } + let charCursor = cursor + guard let char = cursor.advanceValidatingUTF8Character() else { + builder.recordPatternError(.invalidUtf8, at: cursor) + return .continue + } + switch char { + case "\n", "\r": + guard isMultiline else { + // Bump back the cursor to the newline to ensure it doesn't + // become part of the pattern token. + cursor = charCursor + return .unterminated + } + lastNewlineEnd = cursor + break + + case "\\" where !escaped: + // Advance again for an escape sequence. + return lexPatternCharacter(escaped: true) + + case "(" where !escaped && customCharacterClassDepth == 0: + groupDepth += 1 + + case ")" where !escaped && customCharacterClassDepth == 0: + guard groupDepth > 0 else { + // If we have an unbalanced ')', and this may not be a regex, don't + // lex as a regex. + if !mustBeRegex { + return .notARegex + } + break + } + groupDepth -= 1 + + case "[" where !escaped: + customCharacterClassDepth += 1 + + case "]" where !escaped: + if customCharacterClassDepth > 0 { + customCharacterClassDepth -= 1 + } + + case "\0": + builder.recordPatternError(.nulCharacter, at: charCursor) + break + + case let char + where char.isASCII && !char.isPrintableASCII && !(isMultiline && char == "\t"): + // Diagnose unprintable ASCII. + // Note that tabs are allowed in multi-line literals. + // TODO: This matches the string literal behavior, but should we allow + // tabs for single-line regex literals too? + builder.recordPatternError(.unprintableAsciiCharacter, at: charCursor) + break + + case " ", "\t": + if !escaped { + lastUnespacedSpaceOrTab = charCursor + } + break + + default: + break + } + return .continue + } + + /// Attempt to eat a the closing delimiter. + private mutating func tryEatEnding() -> LexResult? { + let openPoundCount = builder.numOpenPounds + let slashBegin = cursor + var newCursor = cursor + + guard newCursor.advance(matching: "/") else { return nil } + let slashEnd = newCursor + candidateCloseSlashEnd = slashEnd.position + + // Try to eat closing pounds. Note we don't do this if we don't have any + // opening pounds (for recovery), as the builder currently bases the maximum + // token count off the presence of opening pounds, and it's not clear if + // recovery in that case is useful anyway. + var closePoundCount = 0 + if openPoundCount > 0 { + while newCursor.advance(matching: "#") { + closePoundCount += 1 + } + } + + // Make sure we have sufficient closing pounds. Note we can consume extra + // for better recovery. + guard closePoundCount >= openPoundCount else { return nil } + + // If we have a multi-line literal, make sure the closing delimiter + // appears alone on a newline. + if let lastNewlineEnd = lastNewlineEnd { + var delimScan = lastNewlineEnd + while delimScan.pointer < slashBegin.pointer { + if !delimScan.advance(matching: " ", "\t") { + builder.recordPatternError(.multilineRegexClosingNotOnNewline, at: slashBegin) + break + } + } + } + + if closePoundCount == 0 { + if let end = newCursor.peek() { + // If we're lexing `/.../`, treat it as unterminated if we ended on the + // opening of a comment. We prefer to lex the comment as it's more likely + // than not that is what the user is expecting. + switch UnicodeScalar(end) { + case "*", "/": + return .unterminated + default: + break + } + } + + // We also ban unespaced space and tab at the end of a regex literal if + // this might not be a regex. + if let lastUnespacedSpaceOrTab = lastUnespacedSpaceOrTab, + lastUnespacedSpaceOrTab.position.advanced(by: 1).pointer == slashBegin.position.pointer + { + if mustBeRegex { + // TODO: We ought to have a fix-it that suggests #/.../#. We could + // suggest escaping, but that would be wrong if the user has written (?x). + // TODO: Should we suggest #/.../# for space-as-first character too? + builder.recordPatternError(.spaceAtEndOfRegexLiteral, at: lastUnespacedSpaceOrTab) + } else { + return .notARegex + } + } + } + + builder.recordCloseSlash(endingAt: slashEnd.position) + if closePoundCount > 0 { + builder.recordClosePounds(count: closePoundCount) + } + cursor = newCursor + return .done + } + + private mutating func lexImpl() -> LexResult { + // We can consume any number of pound signs. + var poundCount = 0 + while cursor.advance(matching: "#") { + poundCount += 1 + } + if poundCount > 0 { + builder.recordOpenPounds(count: poundCount) + } + + // Try to lex the opening delimiter. + let openSlash = cursor + guard cursor.advance(matching: "/") else { + return .notARegex + } + builder.recordOpenSlash() + + if !builder.hasPounds { + if let next = cursor.peek() { + switch UnicodeScalar(next) { + case " ", "\t": + // For `/.../` regex literals, we need to ban space and tab at the start + // of a regex to avoid ambiguity with operator chains, e.g: + // + // Builder { + // 0 + // / 1 / + // 2 + // } + // + if mustBeRegex { + // TODO: We ought to have a fix-it that inserts a backslash to escape. + builder.recordPatternError(.spaceAtStartOfRegexLiteral, at: cursor) + } else { + return .notARegex + } + case "*", "/": + // Start of a comment, not a regex. + return .notARegex + default: + break + } + } + if openSlash.previous == UInt8(ascii: "*") { + // End of block comment, not a regex. + return .notARegex + } + } + + // If the delimiter allows multi-line, try skipping over any whitespace to a + // newline character. If we can do that, we enter multi-line mode. + if builder.hasPounds { + var newlineScan = cursor + while let next = newlineScan.peek() { + switch UnicodeScalar(next) { + case " ", "\t": + _ = newlineScan.advance() + continue + case "\n", "\r": + firstNewline = newlineScan + cursor = newlineScan + default: + break + } + break + } + } + + while true { + if let result = tryEatEnding() { + return result + } + switch lexPatternCharacter(escaped: false) { + case .continue: + continue + case let result: + return result + } + } + } + + mutating func lex() -> RegexLiteralLexemes? { + switch lexImpl() { + case .continue: + preconditionFailure("Not a valid result") + case .notARegex: + return nil + case .unterminated where !mustBeRegex: + // If this may not be a regex, bail. + return nil + case .done, .unterminated: + // In both cases we can just return the lexemes. We'll diagnose when + // parsing. + return builder.finish( + at: cursor.position, + candidateCloseSlashEnd: candidateCloseSlashEnd + ) + } + } +} + +struct RegexLiteralLexemes { + private let builder: Builder + + fileprivate init(from builder: Builder) { + self.builder = builder + } + + struct Element { + var kind: Kind + var end: Lexer.Cursor.Position + var error: Lexer.Cursor.LexingDiagnostic? + } +} + +extension RegexLiteralLexemes { + /// Allocate the lexemes on a given bump pointer allocator. + func allocate(in allocator: BumpPtrAllocator) -> UnsafePointer { + let ptr = allocator.allocate(Self.self, count: 1).baseAddress! + ptr.initialize(to: self) + return UnsafePointer(ptr) + } +} + +extension RegexLiteralLexemes.Element { + /// The regex literal token kind, the raw value of which indicates its index. + enum Kind: UInt8 { + case openingPounds + case openingSlash + case pattern + case closingSlash + case closingPounds + } + + /// Retrieve the actual token kind. + var tokenKind: RawTokenKind { + switch kind { + case .openingPounds, .closingPounds: + return .extendedRegexDelimiter + case .openingSlash, .closingSlash: + return .regexSlash + case .pattern: + return .regexLiteralPattern + } + } +} + +extension RegexLiteralLexemes.Element.Kind { + /// Construct the regex literal token kind from a given index, taking pounds + /// into account. + fileprivate init(at index: UInt8, hasPounds: Bool) { + if hasPounds { + // If we have pounds, we have 5 tokens maximum. + precondition(index < 5) + self.init(rawValue: index)! + } else { + // Otherwise, we have 3 tokens maximum, and start at the slash. + precondition(index < 3) + self.init(rawValue: index + 1)! + } + } +} + +extension RegexLiteralLexemes { + /// A builder type for the regex literal lexer. + /// + /// NOTE: This is stored for the regex literal lexer state, so should be kept + /// as small as possible. Additionally, it is allocated using a bump pointer + /// allocator, so must remain a POD type (i.e no classes). + fileprivate struct Builder { + private(set) var numOpenPounds: Int = 0 + private(set) var patternByteLength: Int = 0 + private(set) var numClosePounds: Int = 0 + + // The start position is split into its component input buffer and + // previous char to allow for a more optimized layout. + private let _startInput: UnsafeBufferPointer + + // The pattern diagnostic is split for a more optimized layout. + private var _patternErrorOffset: Int? + private var _patternErrorKind: TokenDiagnostic.Kind? + + private let _startPrevious: UInt8 + + /// The number of lexemes. This is a UInt8 as there can only be a maximum + /// of 5 lexemes for a regex literal (open pounds, open slash, pattern, + /// closing slash, closing pounds). + private(set) var lexemeCount: UInt8 = 0 + + init(startingAt start: Lexer.Cursor.Position) { + self._startInput = start.input + self._startPrevious = start.previous + } + var start: Lexer.Cursor.Position { + .init(input: _startInput, previous: _startPrevious) + } + + private(set) var patternError: Lexer.Cursor.LexingDiagnostic? { + get { + guard let _patternErrorKind = _patternErrorKind else { return nil } + let pos = start.advanced(by: _patternErrorOffset!) + return .init(_patternErrorKind, position: pos) + } + set { + guard let newValue = newValue else { + _patternErrorKind = nil + _patternErrorOffset = nil + return + } + _patternErrorKind = newValue.kind + _patternErrorOffset = start.distance(to: newValue.position) + } + } + + var hasPounds: Bool { numOpenPounds > 0 } + } +} + +extension RegexLiteralLexemes.Builder { + typealias Element = RegexLiteralLexemes.Element + + /// The lexeme kind for the last lexed token, or `nil` if no token has been + /// lexed yet. + var lastLexemeKind: Element.Kind? { + if lexemeCount == 0 { return nil } + return .init(at: lexemeCount - 1, hasPounds: hasPounds) + } + + /// The end byte offset for a given regex token kind. + func endByteOffset(for kind: Element.Kind) -> Int { + switch kind { + case .openingPounds: + return numOpenPounds + case .openingSlash: + return numOpenPounds + 1 + case .pattern: + return numOpenPounds + 1 + patternByteLength + case .closingSlash: + return numOpenPounds + 1 + patternByteLength + 1 + case .closingPounds: + return numOpenPounds + 1 + patternByteLength + 1 + numClosePounds + } + } + + /// Retrieve the end cursor position for a given regex token kind. + func endCursorPosition(for kind: Element.Kind) -> Lexer.Cursor.Position { + start.advanced(by: endByteOffset(for: kind)) + } + + mutating func recordOpenPounds(count: Int) { + precondition(lastLexemeKind == nil) + numOpenPounds = count + lexemeCount += 1 + } + + mutating func recordOpenSlash() { + precondition(lastLexemeKind == nil || lastLexemeKind == .openingPounds) + lexemeCount += 1 + } + + private mutating func recordRegexPattern(byteLength: Int) { + precondition(lastLexemeKind == .openingSlash) + patternByteLength = byteLength + lexemeCount += 1 + } + + mutating func recordCloseSlash(endingAt closeSlashEnd: Lexer.Cursor.Position) { + precondition(lastLexemeKind == .openingSlash) + + // We use the close slash to compute the pattern length. + let patternStart = endCursorPosition(for: .openingSlash) + recordRegexPattern(byteLength: patternStart.distance(to: closeSlashEnd) - 1) + lexemeCount += 1 + } + + mutating func recordClosePounds(count: Int) { + precondition(lastLexemeKind == .closingSlash) + numClosePounds = count + lexemeCount += 1 + } + + mutating func recordPatternError( + _ kind: TokenDiagnostic.Kind, + at cursor: Lexer.Cursor + ) { + precondition(lastLexemeKind == .openingSlash) + + // Only record if we don't already have a pattern error, we want to prefer + // the first error we encounter. + if patternError == nil { + patternError = .init(kind, position: cursor) + } + } + + /// Finish regex literal lexing. + mutating func finish( + at end: Lexer.Cursor.Position, + candidateCloseSlashEnd: Lexer.Cursor.Position? + ) -> RegexLiteralLexemes { + // If we ended up in the middle of a pattern, we have an unterminated + // literal. Make sure to record the pattern, and do some recovery for + // better diagnostics. + if lastLexemeKind == .openingSlash { + let patternStart = endCursorPosition(for: .openingSlash) + let byteLength = patternStart.distance(to: end) + + // If have an extended literal, we can do some recovery for unterminated + // cases by seeing if we have something that looks like the ending + // delimiter at the end of the pattern, and treating it as such. The + // parser will diagnose if it doesn't end up matching. + func inferClosingDelimiter() -> (slashEnd: Lexer.Cursor.Position, numPounds: Int)? { + guard hasPounds && byteLength > 0 else { return nil } + + // If the last characeter is '/', we can use that. + if UnicodeScalar(end.previous) == "/" { + return (end, numPounds: 0) + } + // If the last character is '#', scan from the candidate last slash to + // see if we only have '#' characters until the end. In such a case, + // we can claim those characters as part of the delimiter. + if UnicodeScalar(end.previous) == "#", + let candidateCloseSlashEnd = candidateCloseSlashEnd + { + var poundScan = candidateCloseSlashEnd + var numClosingPounds = 0 + while poundScan.pointer < end.pointer, + UnicodeScalar(poundScan.advance()!) == "#" + { + numClosingPounds += 1 + } + precondition(numClosingPounds < numOpenPounds, "Should have lexed this?") + + // Should be at the end now, otherwise this is something bogus in + // the middle of the pattern. + if poundScan.pointer == end.pointer { + return (candidateCloseSlashEnd, numClosingPounds) + } + } + return nil + } + if let (closeSlashEnd, numClosingPounds) = inferClosingDelimiter() { + recordCloseSlash(endingAt: closeSlashEnd) + if numClosingPounds > 0 { + recordClosePounds(count: numClosingPounds) + } + } else { + recordRegexPattern(byteLength: byteLength) + } + } + return .init(from: self) + } +} + +extension RegexLiteralLexemes: RandomAccessCollection { + typealias Index = UInt8 + + var startIndex: UInt8 { 0 } + var endIndex: UInt8 { builder.lexemeCount } + + /// Retrieve the token at the given index. + subscript(index: UInt8) -> Element { + let kind = Element.Kind(at: index, hasPounds: builder.hasPounds) + return .init( + kind: kind, + end: builder.endCursorPosition(for: kind), + error: kind == .pattern ? builder.patternError : nil + ) + } +} + +extension Lexer.Cursor { + /// A heuristic that determines whether the cursor is currently in a regex + /// literal position by looking at the previous token to determine if we're + /// expecting an expression, or a binary operator. + fileprivate func isInRegexLiteralPosition() -> Bool { + switch previousTokenKind { + // Can lex a regex literal at the start of the buffer. + case nil: + return true + + // Cannot lex at the end of the buffer. + case .eof: + return false + + // Prefix grammar that appears before an expression. + case .leftAngle, .leftBrace, .leftParen, .leftSquareBracket, .prefixOperator, .prefixAmpersand: + return true + + // Binary operators sequence expressions. + case .binaryOperator, .equal: + return true + + // Infix punctuation that generally separates expressions. + case .semicolon, .comma, .colon, .infixQuestionMark: + return true + + // Postfix grammar would expect an binary operator next. + case .postfixOperator, .exclamationMark, .postfixQuestionMark, .rightAngle, .rightBrace, .rightParen, .rightSquareBracket: + return false + + // Punctuation that does not sequence expressions. + case .arrow, .ellipsis, .period, .atSign, .pound, .backtick, .backslash: + return false + + case .keyword: + // There are a handful of keywords that are expressions, handle them. + // Otherwise, a regex literal can generally be parsed after a keyword. + switch previousKeyword! { + case .true, .false, .Any, .nil, .`self`, .`Self`, .super: + return false + default: + return true + } + + // Identifiers do not sequence expressions. + case .identifier, .dollarIdentifier, .wildcard: + return false + + // Literals are themselves expressions and therefore don't sequence expressions. + case .floatingLiteral, .integerLiteral: + return false + + // Pound keywords that do not generally sequence expressions. + case .poundAvailableKeyword, .poundSourceLocationKeyword, .poundUnavailableKeyword: + return false + + // Pound keywords that generally do sequence expressions. + case .poundIfKeyword, .poundElseKeyword, .poundElseifKeyword, .poundEndifKeyword: + return true + + // Bits of string/regex grammar, we can't start lexing a regex literal here. + case .extendedRegexDelimiter, .regexSlash, .regexLiteralPattern, .rawStringDelimiter, .stringQuote, .stringSegment, .multilineStringQuote, .singleQuote: + return false + + // Allow unknown for better recovery. + case .unknown: + return true + } + } +} + +extension Lexer.Cursor { + /// Scan for a regex literal, without advancing the cursor. Returns the regex + /// literal tokens scanned, or `nil` if there is no regex literal. + fileprivate func scanRegexLiteral(mustBeRegex: Bool) -> RegexLiteralLexemes? { + var lexer = RegexLiteralLexer(self, mustBeRegex: mustBeRegex) + return lexer.lex() + } + + /// Attempt to scan for a regex literal starting from within an operator we've + /// lexed. + fileprivate func tryScanOperatorAsRegexLiteral( + operatorStart: Lexer.Cursor, + operatorEnd: Lexer.Cursor, + sourceBufferStart: Lexer.Cursor, + preferRegexOverBinaryOperator: Bool + ) -> RegexLiteralLexemes? { + precondition(self.pointer >= operatorStart.pointer, "lexing before the operator?") + + let isLeftBound = operatorStart.isLeftBound(sourceBufferStart: sourceBufferStart) + let isRightBound = operatorEnd.isRightBound(isLeftBound: isLeftBound) + + // Must not be left bound, we should lex a postfix '/' instead. + guard !isLeftBound else { return nil } + + // If the previous token was 'func' or 'operator', the next token must + // be an operator, not a regex. This is needed to correctly handle cases + // like: + // + // operator /^/ + // postfix func /(lhs: Int) { 5/ } + // + // Re-lexing isn't a viable strategy as there could be unbalanced curly + // braces in the regex, which interferes with the lexical structure (e.g + // anything relying on the lexed tokens to correctly balance curly braces). + switch self.previousKeyword { + case .func, .operator: + return nil + default: + break + } + + // Handle cases where the '/' is part of what looks like a binary operator. + var mustBeRegex = false + if isLeftBound == isRightBound { + // The `preferRegexOverBinaryOperator` flag is set if we previously had a + // 'try?' or 'try!'. In that case, the previous token is a postfix + // operator, which would usually indicate that we're not in regex literal + // position (as we would typically expect a binary operator to follow a + // postfix operator, not an expression). However 'try' is special because + // it cannot appear on the LHS of a binary operator, so we know we must + // have a regex. + // + // This is needed to handle cases like `try? /^ x/`, which should be lexed + // as a regex. This can occur for cases like `try? /^ x/.wholeMatch(...)`. + if preferRegexOverBinaryOperator { + mustBeRegex = true + } + // If we are not in a regex literal position, and are not certain, then + // prefer lexing as a binary operator instead. + if !mustBeRegex && !operatorStart.isInRegexLiteralPosition() { + return nil + } + // For better recovery, we can confidently lex a regex literal if we're in + // regex literal position, and the '/' is part of what looks like a binary + // operator. This would otherwise be illegal code, as binary operators + // cannot appear in expression position. The only exception to this is if + // the previous token indicates we're in an argument list, in which case + // an unapplied operator is legal, and we should prefer to lex as that + // instead. + switch previousTokenKind { + case .leftParen, .leftSquareBracket, .comma, .colon: + break + default: + mustBeRegex = true + } + } + return scanRegexLiteral(mustBeRegex: mustBeRegex) + } +} + +extension Lexer.Cursor { + /// Attempt to lex a regex literal. Note this lexes confidently, without + /// applying various heuristics to avoid lexing a regex literal in ambiguous + /// cases. + mutating func lexRegexLiteral() -> Lexer.Result? { + guard let lexemes = scanRegexLiteral(mustBeRegex: true) else { return nil } + return lexInRegexLiteral(lexemes[...], existingPtr: nil) + } + + /// Attempt to lex an operator as a regex literal, returning the result, + /// or `nil` if a regex literal could not be lexed. + mutating func tryLexOperatorAsRegexLiteral( + at regexStart: Lexer.Cursor, + operatorStart: Lexer.Cursor, + operatorEnd: Lexer.Cursor, + sourceBufferStart: Lexer.Cursor, + preferRegexOverBinaryOperator: Bool + ) -> Lexer.Result? { + guard + let lexemes = regexStart.tryScanOperatorAsRegexLiteral( + operatorStart: operatorStart, + operatorEnd: operatorEnd, + sourceBufferStart: sourceBufferStart, + preferRegexOverBinaryOperator: preferRegexOverBinaryOperator + ) + else { return nil } + + if regexStart.pointer > operatorStart.pointer { + // If we started lexing in the middle of an operator, split off the prefix + // operator, and move the cursor to where the regex literal starts. + self.position = regexStart.position + let (kind, error) = Self.classifyOperatorToken( + operStart: operatorStart, + operEnd: regexStart, + sourceBufferStart: sourceBufferStart + ) + return Lexer.Result( + kind, + error: error, + stateTransition: .pushRegexLexemes(index: 0, lexemes: lexemes) + ) + } else { + // Otherwise we just have a regex literal. We can call into + // `lexInRegexLiteral` to pop the first token and push the state. + return lexInRegexLiteral(lexemes[...], existingPtr: nil) + } + } + + /// Lex an already-lexed regex literal. If `existingPtr` is non-nil, this is + /// for an existing regex literal state on the lexer state stack. + mutating func lexInRegexLiteral( + _ lexemes: Slice, + existingPtr: UnsafePointer? + ) -> Lexer.Result { + // Given we have already lexed the regex literal, this is as simple as + // popping off the next token and moving the lexer up to its end position. + var lexemes = lexemes + let lexeme = lexemes.removeFirst() + + self.position = lexeme.end + + // The new index is now given by the slice start index (as we've removed + // the first element). + let index = lexemes.startIndex + + // Compute the new transition. + let transition: Lexer.StateTransition? + if let existingPtr = existingPtr { + transition = lexemes.isEmpty ? .pop : .replace(newState: .inRegexLiteral(index: index, lexemes: existingPtr)) + } else { + transition = lexemes.isEmpty ? nil : .pushRegexLexemes(index: index, lexemes: lexemes.base) + } + return .init(lexeme.tokenKind, error: lexeme.error, stateTransition: transition) + } +} diff --git a/Sources/SwiftParser/StringLiterals.swift b/Sources/SwiftParser/StringLiterals.swift index 8de1386832d..ffd756e6b49 100644 --- a/Sources/SwiftParser/StringLiterals.swift +++ b/Sources/SwiftParser/StringLiterals.swift @@ -426,8 +426,12 @@ extension Parser { // MARK: - Parse string literals extension Parser { - /// Consumes a raw string delimiter that has the same number of `#` as `openDelimiter`. - private mutating func parseStringDelimiter(openDelimiter: RawTokenSyntax?) -> (unexpectedBeforeCheckedDelimiter: RawUnexpectedNodesSyntax?, checkedDelimiter: RawTokenSyntax?) { + /// Consumes a raw string or extended regex delimiter that has the same + /// number of `#` as `openDelimiter`. + mutating func parsePoundDelimiter( + _ kind: RawTokenKind, + matching openDelimiter: RawTokenSyntax? + ) -> (unexpectedBeforeCheckedDelimiter: RawUnexpectedNodesSyntax?, checkedDelimiter: RawTokenSyntax?) { // Check for leadingTriviaText == "" so we don't consume the leading raw // string delimiter of an upcoming string literal, e.g. in // ``` @@ -435,7 +439,7 @@ extension Parser { // #"raw literal"# // ``` let delimiter: RawTokenSyntax? - if self.at(.rawStringDelimiter) && self.currentToken.leadingTriviaText == "" { + if self.at(TokenSpec(kind)) && self.currentToken.leadingTriviaText == "" { delimiter = self.consumeAnyToken() } else { delimiter = nil @@ -445,14 +449,14 @@ extension Parser { case (nil, nil): return (nil, nil) case (let open?, nil): - return (nil, missingToken(.rawStringDelimiter, text: open.tokenText)) + return (nil, missingToken(kind, text: open.tokenText)) case (nil, .some): return (RawUnexpectedNodesSyntax([delimiter], arena: self.arena), nil) case (let open?, let close?): if open.tokenText == close.tokenText { return (nil, close) } else { - return (RawUnexpectedNodesSyntax([delimiter], arena: self.arena), missingToken(.rawStringDelimiter, text: open.tokenText)) + return (RawUnexpectedNodesSyntax([delimiter], arena: self.arena), missingToken(kind, text: open.tokenText)) } } } @@ -482,7 +486,7 @@ extension Parser { if let stringSegment = self.consume(if: .stringSegment) { segments.append(.stringSegment(RawStringSegmentSyntax(content: stringSegment, arena: self.arena))) } else if let backslash = self.consume(if: .backslash) { - let (unexpectedBeforeDelimiter, delimiter) = self.parseStringDelimiter(openDelimiter: openDelimiter) + let (unexpectedBeforeDelimiter, delimiter) = self.parsePoundDelimiter(.rawStringDelimiter, matching: openDelimiter) let leftParen = self.expectWithoutRecoveryOrLeadingTrivia(.leftParen) let expressions = RawTupleExprElementListSyntax(elements: self.parseArgumentListElements(pattern: .none), arena: self.arena) @@ -546,7 +550,7 @@ extension Parser { closeQuote = self.expectWithoutRecoveryOrLeadingTrivia(TokenSpec(openQuote.tokenKind)) } - let (unexpectedBeforeCloseDelimiter, closeDelimiter) = self.parseStringDelimiter(openDelimiter: openDelimiter) + let (unexpectedBeforeCloseDelimiter, closeDelimiter) = self.parsePoundDelimiter(.rawStringDelimiter, matching: openDelimiter) if openQuote.tokenKind == .multilineStringQuote, !openQuote.isMissing, !closeQuote.isMissing { let postProcessed = postProcessMultilineStringLiteral(rawStringDelimitersToken: openDelimiter, openQuote: openQuote, segments: segments, closeQuote: closeQuote) diff --git a/Sources/SwiftParser/TokenPrecedence.swift b/Sources/SwiftParser/TokenPrecedence.swift index 55fc529cc70..93049d329a3 100644 --- a/Sources/SwiftParser/TokenPrecedence.swift +++ b/Sources/SwiftParser/TokenPrecedence.swift @@ -115,15 +115,15 @@ public enum TokenPrecedence: Comparable { self = .unknownToken // MARK: Identifier like case // Literals - .floatingLiteral, .integerLiteral, .regexLiteral, + .floatingLiteral, .integerLiteral, // Pound literals .poundAvailableKeyword, .poundSourceLocationKeyword, .poundUnavailableKeyword, // Identifiers .dollarIdentifier, .identifier, // '_' can occur in types to replace a type identifier .wildcard, - // String segment, string interpolation anchor and pound don't really fit anywhere else - .pound, .stringSegment: + // String segment, string interpolation anchor, pound, and regex pattern don't really fit anywhere else + .pound, .stringSegment, .regexLiteralPattern: self = .identifierLike // MARK: Expr keyword @@ -139,7 +139,8 @@ public enum TokenPrecedence: Comparable { self = .weakBracketed(closingDelimiter: .rightSquareBracket) case .leftAngle: self = .weakBracketed(closingDelimiter: .rightAngle) - case .multilineStringQuote, .rawStringDelimiter, .singleQuote, .stringQuote: + case .multilineStringQuote, .rawStringDelimiter, .singleQuote, .stringQuote, + .regexSlash, .extendedRegexDelimiter: self = .weakBracketed(closingDelimiter: tokenKind) case // Chaining punctuators .infixQuestionMark, .period, .postfixQuestionMark, .exclamationMark, diff --git a/Sources/SwiftParser/TokenSpecSet.swift b/Sources/SwiftParser/TokenSpecSet.swift index 9da678e61fd..96abcc6d484 100644 --- a/Sources/SwiftParser/TokenSpecSet.swift +++ b/Sources/SwiftParser/TokenSpecSet.swift @@ -329,7 +329,6 @@ enum OperatorLike: TokenSpecSet { case postfixQuestionMark case equal case arrow - case regexLiteral // regex literals can look like operators, e.g. '/^/' init?(lexeme: Lexer.Lexeme) { if let op = Operator(lexeme: lexeme) { @@ -342,7 +341,6 @@ enum OperatorLike: TokenSpecSet { case .postfixQuestionMark: self = .postfixQuestionMark case .equal: self = .equal case .arrow: self = .arrow - case .regexLiteral: self = .regexLiteral default: return nil } } @@ -354,7 +352,6 @@ enum OperatorLike: TokenSpecSet { .postfixQuestionMark, .equal, .arrow, - .regexLiteral, ] } @@ -366,7 +363,6 @@ enum OperatorLike: TokenSpecSet { case .postfixQuestionMark: return TokenSpec(.postfixQuestionMark, remapping: .postfixOperator) case .equal: return TokenSpec(.equal, remapping: .binaryOperator) case .arrow: return TokenSpec(.arrow, remapping: .binaryOperator) - case .regexLiteral: return TokenSpec(.regexLiteral, remapping: .binaryOperator, recoveryPrecedence: TokenPrecedence(nonKeyword: .binaryOperator)) } } } @@ -588,7 +584,8 @@ enum PrimaryExpressionStart: TokenSpecSet { case pound case poundAvailableKeyword // For recovery case poundUnavailableKeyword // For recovery - case regexLiteral + case regexSlash + case extendedRegexDelimiter case selfKeyword case superKeyword case trueKeyword @@ -616,7 +613,8 @@ enum PrimaryExpressionStart: TokenSpecSet { case TokenSpec(.pound): self = .pound case TokenSpec(.poundAvailableKeyword): self = .poundAvailableKeyword case TokenSpec(.poundUnavailableKeyword): self = .poundUnavailableKeyword - case TokenSpec(.regexLiteral): self = .regexLiteral + case TokenSpec(.regexSlash): self = .regexSlash + case TokenSpec(.extendedRegexDelimiter): self = .extendedRegexDelimiter case TokenSpec(.self): self = .selfKeyword case TokenSpec(.super): self = .superKeyword case TokenSpec(.true): self = .trueKeyword @@ -647,7 +645,8 @@ enum PrimaryExpressionStart: TokenSpecSet { case .pound: return .pound case .poundAvailableKeyword: return .poundAvailableKeyword case .poundUnavailableKeyword: return .poundUnavailableKeyword - case .regexLiteral: return .regexLiteral + case .regexSlash: return .regexSlash + case .extendedRegexDelimiter: return .extendedRegexDelimiter case .selfKeyword: return .keyword(.self) case .superKeyword: return .keyword(.super) case .trueKeyword: return .keyword(.true) diff --git a/Sources/SwiftParser/generated/TokenSpecStaticMembers.swift b/Sources/SwiftParser/generated/TokenSpecStaticMembers.swift index 438d0d4b8e5..dd90a837195 100644 --- a/Sources/SwiftParser/generated/TokenSpecStaticMembers.swift +++ b/Sources/SwiftParser/generated/TokenSpecStaticMembers.swift @@ -63,6 +63,10 @@ extension TokenSpec { return TokenSpec(.exclamationMark) } + static var extendedRegexDelimiter: TokenSpec { + return TokenSpec(.extendedRegexDelimiter) + } + static var floatingLiteral: TokenSpec { return TokenSpec(.floatingLiteral) } @@ -155,8 +159,12 @@ extension TokenSpec { return TokenSpec(.rawStringDelimiter) } - static var regexLiteral: TokenSpec { - return TokenSpec(.regexLiteral) + static var regexLiteralPattern: TokenSpec { + return TokenSpec(.regexLiteralPattern) + } + + static var regexSlash: TokenSpec { + return TokenSpec(.regexSlash) } static var rightAngle: TokenSpec { diff --git a/Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift b/Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift index 09307f8ed0e..65478cad737 100644 --- a/Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift +++ b/Sources/SwiftParserDiagnostics/LexerDiagnosticMessages.swift @@ -54,6 +54,9 @@ public enum StaticTokenError: String, DiagnosticMessage { case sourceConflictMarker = "source control conflict marker in source file" case unexpectedBlockCommentEnd = "unexpected end of block comment" case unicodeCurlyQuote = #"unicode curly quote found; use '"' instead"# + case spaceAtStartOfRegexLiteral = "bare slash regex literal may not start with space" + case spaceAtEndOfRegexLiteral = "bare slash regex literal may not end with space" + case multilineRegexClosingNotOnNewline = "multi-line regex closing delimiter must appear on new line" case unprintableAsciiCharacter = "unprintable ASCII character found in source file" public var message: String { self.rawValue } @@ -163,6 +166,9 @@ public extension SwiftSyntax.TokenDiagnostic { case .sourceConflictMarker: return StaticTokenError.sourceConflictMarker case .unexpectedBlockCommentEnd: return StaticTokenError.unexpectedBlockCommentEnd case .unicodeCurlyQuote: return StaticTokenError.unicodeCurlyQuote + case .spaceAtStartOfRegexLiteral: return StaticTokenError.spaceAtStartOfRegexLiteral + case .spaceAtEndOfRegexLiteral: return StaticTokenError.spaceAtEndOfRegexLiteral + case .multilineRegexClosingNotOnNewline: return StaticTokenError.multilineRegexClosingNotOnNewline case .unprintableAsciiCharacter: return StaticTokenError.unprintableAsciiCharacter } } diff --git a/Sources/SwiftParserDiagnostics/MissingNodesError.swift b/Sources/SwiftParserDiagnostics/MissingNodesError.swift index 2c9cde23505..705a6e67414 100644 --- a/Sources/SwiftParserDiagnostics/MissingNodesError.swift +++ b/Sources/SwiftParserDiagnostics/MissingNodesError.swift @@ -166,6 +166,8 @@ fileprivate extension TokenKind { return .leftSquareBracket case .stringQuote, .multilineStringQuote, .rawStringDelimiter: return self + case .regexSlash, .extendedRegexDelimiter: + return self default: return nil } diff --git a/Sources/SwiftParserDiagnostics/MissingTokenError.swift b/Sources/SwiftParserDiagnostics/MissingTokenError.swift index fadcc2352af..c18699209a7 100644 --- a/Sources/SwiftParserDiagnostics/MissingTokenError.swift +++ b/Sources/SwiftParserDiagnostics/MissingTokenError.swift @@ -34,8 +34,8 @@ extension ParseDiagnosticsGenerator { handled = handleInvalidMultilineStringQuote(invalidToken: invalidToken, missingToken: missingToken, invalidTokenContainer: invalidTokenContainer) case (.period, .period): handled = handleInvalidPeriod(invalidToken: invalidToken, missingToken: missingToken, invalidTokenContainer: invalidTokenContainer) - case (.rawStringDelimiter, .rawStringDelimiter): - handled = handleInvalidRawStringDelimiter(invalidToken: invalidToken, missingToken: missingToken, invalidTokenContainer: invalidTokenContainer) + case (.rawStringDelimiter, .rawStringDelimiter), (.extendedRegexDelimiter, .extendedRegexDelimiter): + handled = handleInvalidPoundDelimiter(invalidToken: invalidToken, missingToken: missingToken, invalidTokenContainer: invalidTokenContainer) default: handled = false } @@ -122,19 +122,25 @@ extension ParseDiagnosticsGenerator { return true } - private func handleInvalidRawStringDelimiter(invalidToken: TokenSyntax, missingToken: TokenSyntax, invalidTokenContainer: UnexpectedNodesSyntax) -> Bool { + private func handleInvalidPoundDelimiter( + invalidToken: TokenSyntax, + missingToken: TokenSyntax, + invalidTokenContainer: UnexpectedNodesSyntax + ) -> Bool { + let isTooMany = invalidToken.contentLength > missingToken.contentLength let message: DiagnosticMessage if missingToken.parent?.is(ExpressionSegmentSyntax.self) == true { message = .tooManyRawStringDelimitersToStartInterpolation } else { + let parent = missingToken.parent! precondition( - missingToken.parent?.is(StringLiteralExprSyntax.self) == true, - "Raw string delimiters should only occur in string interpolation and at the end of a string literal" + parent.is(StringLiteralExprSyntax.self) || parent.is(RegexLiteralExprSyntax.self), + "Raw string delimiters should only occur in string interpolation and at the end of a string or regex literal" ) - message = .tooManyClosingRawStringDelimiters + message = isTooMany ? StaticParserError.tooManyClosingPoundDelimiters : .tooFewClosingPoundDelimiters } let fixIt = FixIt( - message: .removeExtraneousDelimiters, + message: isTooMany ? .removeExtraneousDelimiters : .insertExtraClosingPounds, changes: [ .makeMissing(invalidToken), .makePresentBeforeTrivia(missingToken), diff --git a/Sources/SwiftParserDiagnostics/ParserDiagnosticMessages.swift b/Sources/SwiftParserDiagnostics/ParserDiagnosticMessages.swift index db70ffecdac..3cd88f30a2a 100644 --- a/Sources/SwiftParserDiagnostics/ParserDiagnosticMessages.swift +++ b/Sources/SwiftParserDiagnostics/ParserDiagnosticMessages.swift @@ -197,9 +197,12 @@ extension DiagnosticMessage where Self == StaticParserError { public static var subscriptsCannotHaveNames: Self { .init("subscripts cannot have a name") } - public static var tooManyClosingRawStringDelimiters: Self { + public static var tooManyClosingPoundDelimiters: Self { .init("too many '#' characters in closing delimiter") } + public static var tooFewClosingPoundDelimiters: Self { + .init("expected additional '#' characters in closing delimiter") + } public static var tooManyRawStringDelimitersToStartInterpolation: Self { .init("too many '#' characters to start string interpolation") } @@ -519,6 +522,9 @@ extension FixItMessage where Self == StaticParserFixIt { public static var removeExtraneousDelimiters: Self { .init("remove extraneous delimiters") } + public static var insertExtraClosingPounds: Self { + .init("insert additional closing '#' delimiters") + } public static var removeExtraneousWhitespace: Self { .init("remove whitespace") } diff --git a/Sources/SwiftSyntax/TokenDiagnostic.swift b/Sources/SwiftSyntax/TokenDiagnostic.swift index 5b2be370e5a..64ab846ebff 100644 --- a/Sources/SwiftSyntax/TokenDiagnostic.swift +++ b/Sources/SwiftSyntax/TokenDiagnostic.swift @@ -49,6 +49,9 @@ public struct TokenDiagnostic: Hashable { case unexpectedBlockCommentEnd case unicodeCurlyQuote case unprintableAsciiCharacter + case spaceAtStartOfRegexLiteral + case spaceAtEndOfRegexLiteral + case multilineRegexClosingNotOnNewline } public let kind: Kind @@ -122,6 +125,9 @@ public struct TokenDiagnostic: Hashable { case .unexpectedBlockCommentEnd: return .error case .unicodeCurlyQuote: return .error case .unprintableAsciiCharacter: return .error + case .spaceAtStartOfRegexLiteral: return .error + case .spaceAtEndOfRegexLiteral: return .error + case .multilineRegexClosingNotOnNewline: return .error } } } diff --git a/Sources/SwiftSyntax/generated/TokenKind.swift b/Sources/SwiftSyntax/generated/TokenKind.swift index 739bd58864b..227f514f270 100644 --- a/Sources/SwiftSyntax/generated/TokenKind.swift +++ b/Sources/SwiftSyntax/generated/TokenKind.swift @@ -27,6 +27,7 @@ public enum TokenKind: Hashable { case ellipsis case equal case exclamationMark + case extendedRegexDelimiter(String) case floatingLiteral(String) case identifier(String) case infixQuestionMark @@ -51,7 +52,8 @@ public enum TokenKind: Hashable { case prefixAmpersand case prefixOperator(String) case rawStringDelimiter(String) - case regexLiteral(String) + case regexLiteralPattern(String) + case regexSlash case rightAngle case rightBrace case rightParen @@ -89,6 +91,8 @@ public enum TokenKind: Hashable { return #"="# case .exclamationMark: return #"!"# + case .extendedRegexDelimiter(let text): + return text case .floatingLiteral(let text): return text case .identifier(let text): @@ -137,8 +141,10 @@ public enum TokenKind: Hashable { return text case .rawStringDelimiter(let text): return text - case .regexLiteral(let text): + case .regexLiteralPattern(let text): return text + case .regexSlash: + return #"/"# case .rightAngle: return #">"# case .rightBrace: @@ -222,6 +228,8 @@ public enum TokenKind: Hashable { return #"#unavailable"# case .prefixAmpersand: return #"&"# + case .regexSlash: + return #"/"# case .rightAngle: return #">"# case .rightBrace: @@ -271,6 +279,8 @@ public enum TokenKind: Hashable { return #"="# case .exclamationMark: return #"!"# + case .extendedRegexDelimiter: + return #"extended delimiter"# case .floatingLiteral: return #"floating literal"# case .identifier: @@ -317,8 +327,10 @@ public enum TokenKind: Hashable { return #"prefix operator"# case .rawStringDelimiter: return #"raw string delimiter"# - case .regexLiteral: - return #"regex literal"# + case .regexLiteralPattern: + return #"regex pattern"# + case .regexSlash: + return #"/"# case .rightAngle: return #">"# case .rightBrace: @@ -375,6 +387,8 @@ public enum TokenKind: Hashable { return false case .exclamationMark: return false + case .extendedRegexDelimiter: + return false case .floatingLiteral: return false case .identifier: @@ -421,7 +435,9 @@ public enum TokenKind: Hashable { return false case .rawStringDelimiter: return false - case .regexLiteral: + case .regexLiteralPattern: + return false + case .regexSlash: return false case .rightAngle: return false @@ -479,6 +495,8 @@ public enum TokenKind: Hashable { return true case .exclamationMark: return true + case .extendedRegexDelimiter: + return false case .floatingLiteral: return false case .identifier: @@ -527,8 +545,10 @@ public enum TokenKind: Hashable { return false case .rawStringDelimiter: return false - case .regexLiteral: + case .regexLiteralPattern: return false + case .regexSlash: + return true case .rightAngle: return true case .rightBrace: @@ -580,6 +600,8 @@ extension TokenKind: Equatable { return true case (.exclamationMark, .exclamationMark): return true + case (.extendedRegexDelimiter(let lhsText), .extendedRegexDelimiter(let rhsText)): + return lhsText == rhsText case (.floatingLiteral(let lhsText), .floatingLiteral(let rhsText)): return lhsText == rhsText case (.identifier(let lhsText), .identifier(let rhsText)): @@ -628,8 +650,10 @@ extension TokenKind: Equatable { return lhsText == rhsText case (.rawStringDelimiter(let lhsText), .rawStringDelimiter(let rhsText)): return lhsText == rhsText - case (.regexLiteral(let lhsText), .regexLiteral(let rhsText)): + case (.regexLiteralPattern(let lhsText), .regexLiteralPattern(let rhsText)): return lhsText == rhsText + case (.regexSlash, .regexSlash): + return true case (.rightAngle, .rightAngle): return true case (.rightBrace, .rightBrace): @@ -674,6 +698,7 @@ public enum RawTokenKind: UInt8, Equatable, Hashable { case ellipsis case equal case exclamationMark + case extendedRegexDelimiter case floatingLiteral case identifier case infixQuestionMark @@ -698,7 +723,8 @@ public enum RawTokenKind: UInt8, Equatable, Hashable { case prefixAmpersand case prefixOperator case rawStringDelimiter - case regexLiteral + case regexLiteralPattern + case regexSlash case rightAngle case rightBrace case rightParen @@ -767,6 +793,8 @@ public enum RawTokenKind: UInt8, Equatable, Hashable { return #"#unavailable"# case .prefixAmpersand: return #"&"# + case .regexSlash: + return #"/"# case .rightAngle: return #">"# case .rightBrace: @@ -819,6 +847,8 @@ public enum RawTokenKind: UInt8, Equatable, Hashable { return true case .exclamationMark: return true + case .extendedRegexDelimiter: + return false case .floatingLiteral: return false case .identifier: @@ -867,8 +897,10 @@ public enum RawTokenKind: UInt8, Equatable, Hashable { return false case .rawStringDelimiter: return false - case .regexLiteral: + case .regexLiteralPattern: return false + case .regexSlash: + return true case .rightAngle: return true case .rightBrace: @@ -931,6 +963,8 @@ extension TokenKind { case .exclamationMark: precondition(text.isEmpty || rawKind.defaultText.map(String.init) == text) return .exclamationMark + case .extendedRegexDelimiter: + return .extendedRegexDelimiter(text) case .floatingLiteral: return .floatingLiteral(text) case .identifier: @@ -999,8 +1033,11 @@ extension TokenKind { return .prefixOperator(text) case .rawStringDelimiter: return .rawStringDelimiter(text) - case .regexLiteral: - return .regexLiteral(text) + case .regexLiteralPattern: + return .regexLiteralPattern(text) + case .regexSlash: + precondition(text.isEmpty || rawKind.defaultText.map(String.init) == text) + return .regexSlash case .rightAngle: precondition(text.isEmpty || rawKind.defaultText.map(String.init) == text) return .rightAngle @@ -1061,6 +1098,8 @@ extension TokenKind { return (.equal, nil) case .exclamationMark: return (.exclamationMark, nil) + case .extendedRegexDelimiter(let str): + return (.extendedRegexDelimiter, str) case .floatingLiteral(let str): return (.floatingLiteral, str) case .identifier(let str): @@ -1109,8 +1148,10 @@ extension TokenKind { return (.prefixOperator, str) case .rawStringDelimiter(let str): return (.rawStringDelimiter, str) - case .regexLiteral(let str): - return (.regexLiteral, str) + case .regexLiteralPattern(let str): + return (.regexLiteralPattern, str) + case .regexSlash: + return (.regexSlash, nil) case .rightAngle: return (.rightAngle, nil) case .rightBrace: diff --git a/Sources/SwiftSyntax/generated/Tokens.swift b/Sources/SwiftSyntax/generated/Tokens.swift index 5ff628cec23..3ad25b165e9 100644 --- a/Sources/SwiftSyntax/generated/Tokens.swift +++ b/Sources/SwiftSyntax/generated/Tokens.swift @@ -171,6 +171,22 @@ extension TokenSyntax { ) } + public static func extendedRegexDelimiter( + _ text: String, + leadingTrivia: Trivia = [], + trailingTrivia: Trivia = [], + presence: SourcePresence = .present + + ) -> TokenSyntax { + return TokenSyntax( + .extendedRegexDelimiter(text), + leadingTrivia: leadingTrivia, + trailingTrivia: trailingTrivia, + presence: presence + + ) + } + public static func floatingLiteral( _ text: String, leadingTrivia: Trivia = [], @@ -521,7 +537,7 @@ extension TokenSyntax { ) } - public static func regexLiteral( + public static func regexLiteralPattern( _ text: String, leadingTrivia: Trivia = [], trailingTrivia: Trivia = [], @@ -529,7 +545,21 @@ extension TokenSyntax { ) -> TokenSyntax { return TokenSyntax( - .regexLiteral(text), + .regexLiteralPattern(text), + leadingTrivia: leadingTrivia, + trailingTrivia: trailingTrivia, + presence: presence + + ) + } + + public static func regexSlashToken( + leadingTrivia: Trivia = [], + trailingTrivia: Trivia = [], + presence: SourcePresence = .present + ) -> TokenSyntax { + return TokenSyntax( + .regexSlash, leadingTrivia: leadingTrivia, trailingTrivia: trailingTrivia, presence: presence diff --git a/Sources/SwiftSyntax/generated/raw/RawSyntaxNodes.swift b/Sources/SwiftSyntax/generated/raw/RawSyntaxNodes.swift index 39c96776cae..76fca89a259 100644 --- a/Sources/SwiftSyntax/generated/raw/RawSyntaxNodes.swift +++ b/Sources/SwiftSyntax/generated/raw/RawSyntaxNodes.swift @@ -16805,32 +16805,80 @@ public struct RawRegexLiteralExprSyntax: RawExprSyntaxNodeProtocol { } public init( - _ unexpectedBeforeRegex: RawUnexpectedNodesSyntax? = nil, - regex: RawTokenSyntax, - _ unexpectedAfterRegex: RawUnexpectedNodesSyntax? = nil, + _ unexpectedBeforeOpeningPounds: RawUnexpectedNodesSyntax? = nil, + openingPounds: RawTokenSyntax?, + _ unexpectedBetweenOpeningPoundsAndOpenSlash: RawUnexpectedNodesSyntax? = nil, + openSlash: RawTokenSyntax, + _ unexpectedBetweenOpenSlashAndRegexPattern: RawUnexpectedNodesSyntax? = nil, + regexPattern: RawTokenSyntax, + _ unexpectedBetweenRegexPatternAndCloseSlash: RawUnexpectedNodesSyntax? = nil, + closeSlash: RawTokenSyntax, + _ unexpectedBetweenCloseSlashAndClosingPounds: RawUnexpectedNodesSyntax? = nil, + closingPounds: RawTokenSyntax?, + _ unexpectedAfterClosingPounds: RawUnexpectedNodesSyntax? = nil, arena: __shared SyntaxArena ) { let raw = RawSyntax.makeLayout( - kind: .regexLiteralExpr, uninitializedCount: 3, arena: arena) { layout in + kind: .regexLiteralExpr, uninitializedCount: 11, arena: arena) { layout in layout.initialize(repeating: nil) - layout[0] = unexpectedBeforeRegex?.raw - layout[1] = regex.raw - layout[2] = unexpectedAfterRegex?.raw + layout[0] = unexpectedBeforeOpeningPounds?.raw + layout[1] = openingPounds?.raw + layout[2] = unexpectedBetweenOpeningPoundsAndOpenSlash?.raw + layout[3] = openSlash.raw + layout[4] = unexpectedBetweenOpenSlashAndRegexPattern?.raw + layout[5] = regexPattern.raw + layout[6] = unexpectedBetweenRegexPatternAndCloseSlash?.raw + layout[7] = closeSlash.raw + layout[8] = unexpectedBetweenCloseSlashAndClosingPounds?.raw + layout[9] = closingPounds?.raw + layout[10] = unexpectedAfterClosingPounds?.raw } self.init(unchecked: raw) } - public var unexpectedBeforeRegex: RawUnexpectedNodesSyntax? { + public var unexpectedBeforeOpeningPounds: RawUnexpectedNodesSyntax? { layoutView.children[0].map(RawUnexpectedNodesSyntax.init(raw:)) } - public var regex: RawTokenSyntax { - layoutView.children[1].map(RawTokenSyntax.init(raw:))! + public var openingPounds: RawTokenSyntax? { + layoutView.children[1].map(RawTokenSyntax.init(raw:)) } - public var unexpectedAfterRegex: RawUnexpectedNodesSyntax? { + public var unexpectedBetweenOpeningPoundsAndOpenSlash: RawUnexpectedNodesSyntax? { layoutView.children[2].map(RawUnexpectedNodesSyntax.init(raw:)) } + + public var openSlash: RawTokenSyntax { + layoutView.children[3].map(RawTokenSyntax.init(raw:))! + } + + public var unexpectedBetweenOpenSlashAndRegexPattern: RawUnexpectedNodesSyntax? { + layoutView.children[4].map(RawUnexpectedNodesSyntax.init(raw:)) + } + + public var regexPattern: RawTokenSyntax { + layoutView.children[5].map(RawTokenSyntax.init(raw:))! + } + + public var unexpectedBetweenRegexPatternAndCloseSlash: RawUnexpectedNodesSyntax? { + layoutView.children[6].map(RawUnexpectedNodesSyntax.init(raw:)) + } + + public var closeSlash: RawTokenSyntax { + layoutView.children[7].map(RawTokenSyntax.init(raw:))! + } + + public var unexpectedBetweenCloseSlashAndClosingPounds: RawUnexpectedNodesSyntax? { + layoutView.children[8].map(RawUnexpectedNodesSyntax.init(raw:)) + } + + public var closingPounds: RawTokenSyntax? { + layoutView.children[9].map(RawTokenSyntax.init(raw:)) + } + + public var unexpectedAfterClosingPounds: RawUnexpectedNodesSyntax? { + layoutView.children[10].map(RawUnexpectedNodesSyntax.init(raw:)) + } } @_spi(RawSyntax) diff --git a/Sources/SwiftSyntax/generated/raw/RawSyntaxValidation.swift b/Sources/SwiftSyntax/generated/raw/RawSyntaxValidation.swift index c6acf9c5120..5c0b317c1a8 100644 --- a/Sources/SwiftSyntax/generated/raw/RawSyntaxValidation.swift +++ b/Sources/SwiftSyntax/generated/raw/RawSyntaxValidation.swift @@ -2075,10 +2075,18 @@ func validateLayout(layout: RawSyntaxBuffer, as kind: SyntaxKind) { assertNoError(kind, 7, verify(layout[7], as: RawDeclNameArgumentsSyntax?.self)) assertNoError(kind, 8, verify(layout[8], as: RawUnexpectedNodesSyntax?.self)) case .regexLiteralExpr: - assert(layout.count == 3) + assert(layout.count == 11) assertNoError(kind, 0, verify(layout[0], as: RawUnexpectedNodesSyntax?.self)) - assertNoError(kind, 1, verify(layout[1], as: RawTokenSyntax.self, tokenChoices: [.tokenKind(.regexLiteral)])) + assertNoError(kind, 1, verify(layout[1], as: RawTokenSyntax?.self, tokenChoices: [.tokenKind(.extendedRegexDelimiter)])) assertNoError(kind, 2, verify(layout[2], as: RawUnexpectedNodesSyntax?.self)) + assertNoError(kind, 3, verify(layout[3], as: RawTokenSyntax.self, tokenChoices: [.tokenKind(.regexSlash)])) + assertNoError(kind, 4, verify(layout[4], as: RawUnexpectedNodesSyntax?.self)) + assertNoError(kind, 5, verify(layout[5], as: RawTokenSyntax.self, tokenChoices: [.tokenKind(.regexLiteralPattern)])) + assertNoError(kind, 6, verify(layout[6], as: RawUnexpectedNodesSyntax?.self)) + assertNoError(kind, 7, verify(layout[7], as: RawTokenSyntax.self, tokenChoices: [.tokenKind(.regexSlash)])) + assertNoError(kind, 8, verify(layout[8], as: RawUnexpectedNodesSyntax?.self)) + assertNoError(kind, 9, verify(layout[9], as: RawTokenSyntax?.self, tokenChoices: [.tokenKind(.extendedRegexDelimiter)])) + assertNoError(kind, 10, verify(layout[10], as: RawUnexpectedNodesSyntax?.self)) case .repeatWhileStmt: assert(layout.count == 9) assertNoError(kind, 0, verify(layout[0], as: RawUnexpectedNodesSyntax?.self)) diff --git a/Sources/SwiftSyntax/generated/syntaxNodes/SyntaxExprNodes.swift b/Sources/SwiftSyntax/generated/syntaxNodes/SyntaxExprNodes.swift index 780857eea65..ba423b3bf91 100644 --- a/Sources/SwiftSyntax/generated/syntaxNodes/SyntaxExprNodes.swift +++ b/Sources/SwiftSyntax/generated/syntaxNodes/SyntaxExprNodes.swift @@ -5465,16 +5465,48 @@ public struct RegexLiteralExprSyntax: ExprSyntaxProtocol, SyntaxHashable { public init( leadingTrivia: Trivia? = nil, - _ unexpectedBeforeRegex: UnexpectedNodesSyntax? = nil, - regex: TokenSyntax = .regexLiteral("RegexLiteralToken"), - _ unexpectedAfterRegex: UnexpectedNodesSyntax? = nil, + _ unexpectedBeforeOpeningPounds: UnexpectedNodesSyntax? = nil, + openingPounds: TokenSyntax? = nil, + _ unexpectedBetweenOpeningPoundsAndOpenSlash: UnexpectedNodesSyntax? = nil, + openSlash: TokenSyntax = .regexSlashToken(), + _ unexpectedBetweenOpenSlashAndRegexPattern: UnexpectedNodesSyntax? = nil, + regexPattern: TokenSyntax = .regexLiteralPattern("RegexLiteralPatternToken"), + _ unexpectedBetweenRegexPatternAndCloseSlash: UnexpectedNodesSyntax? = nil, + closeSlash: TokenSyntax = .regexSlashToken(), + _ unexpectedBetweenCloseSlashAndClosingPounds: UnexpectedNodesSyntax? = nil, + closingPounds: TokenSyntax? = nil, + _ unexpectedAfterClosingPounds: UnexpectedNodesSyntax? = nil, trailingTrivia: Trivia? = nil ) { // Extend the lifetime of all parameters so their arenas don't get destroyed // before they can be added as children of the new arena. - let data: SyntaxData = withExtendedLifetime((SyntaxArena(), (unexpectedBeforeRegex, regex, unexpectedAfterRegex))) {(arena, _) in - let layout: [RawSyntax?] = [unexpectedBeforeRegex?.raw, regex.raw, unexpectedAfterRegex?.raw] + let data: SyntaxData = withExtendedLifetime((SyntaxArena(), ( + unexpectedBeforeOpeningPounds, + openingPounds, + unexpectedBetweenOpeningPoundsAndOpenSlash, + openSlash, + unexpectedBetweenOpenSlashAndRegexPattern, + regexPattern, + unexpectedBetweenRegexPatternAndCloseSlash, + closeSlash, + unexpectedBetweenCloseSlashAndClosingPounds, + closingPounds, + unexpectedAfterClosingPounds + ))) {(arena, _) in + let layout: [RawSyntax?] = [ + unexpectedBeforeOpeningPounds?.raw, + openingPounds?.raw, + unexpectedBetweenOpeningPoundsAndOpenSlash?.raw, + openSlash.raw, + unexpectedBetweenOpenSlashAndRegexPattern?.raw, + regexPattern.raw, + unexpectedBetweenRegexPatternAndCloseSlash?.raw, + closeSlash.raw, + unexpectedBetweenCloseSlashAndClosingPounds?.raw, + closingPounds?.raw, + unexpectedAfterClosingPounds?.raw + ] let raw = RawSyntax.makeLayout( kind: SyntaxKind.regexLiteralExpr, from: layout, @@ -5487,7 +5519,7 @@ public struct RegexLiteralExprSyntax: ExprSyntaxProtocol, SyntaxHashable { self.init(data) } - public var unexpectedBeforeRegex: UnexpectedNodesSyntax? { + public var unexpectedBeforeOpeningPounds: UnexpectedNodesSyntax? { get { return data.child(at: 0, parent: Syntax(self)).map(UnexpectedNodesSyntax.init) } @@ -5496,16 +5528,16 @@ public struct RegexLiteralExprSyntax: ExprSyntaxProtocol, SyntaxHashable { } } - public var regex: TokenSyntax { + public var openingPounds: TokenSyntax? { get { - return TokenSyntax(data.child(at: 1, parent: Syntax(self))!) + return data.child(at: 1, parent: Syntax(self)).map(TokenSyntax.init) } set(value) { - self = RegexLiteralExprSyntax(data.replacingChild(at: 1, with: value.raw, arena: SyntaxArena())) + self = RegexLiteralExprSyntax(data.replacingChild(at: 1, with: value?.raw, arena: SyntaxArena())) } } - public var unexpectedAfterRegex: UnexpectedNodesSyntax? { + public var unexpectedBetweenOpeningPoundsAndOpenSlash: UnexpectedNodesSyntax? { get { return data.child(at: 2, parent: Syntax(self)).map(UnexpectedNodesSyntax.init) } @@ -5514,8 +5546,92 @@ public struct RegexLiteralExprSyntax: ExprSyntaxProtocol, SyntaxHashable { } } + public var openSlash: TokenSyntax { + get { + return TokenSyntax(data.child(at: 3, parent: Syntax(self))!) + } + set(value) { + self = RegexLiteralExprSyntax(data.replacingChild(at: 3, with: value.raw, arena: SyntaxArena())) + } + } + + public var unexpectedBetweenOpenSlashAndRegexPattern: UnexpectedNodesSyntax? { + get { + return data.child(at: 4, parent: Syntax(self)).map(UnexpectedNodesSyntax.init) + } + set(value) { + self = RegexLiteralExprSyntax(data.replacingChild(at: 4, with: value?.raw, arena: SyntaxArena())) + } + } + + public var regexPattern: TokenSyntax { + get { + return TokenSyntax(data.child(at: 5, parent: Syntax(self))!) + } + set(value) { + self = RegexLiteralExprSyntax(data.replacingChild(at: 5, with: value.raw, arena: SyntaxArena())) + } + } + + public var unexpectedBetweenRegexPatternAndCloseSlash: UnexpectedNodesSyntax? { + get { + return data.child(at: 6, parent: Syntax(self)).map(UnexpectedNodesSyntax.init) + } + set(value) { + self = RegexLiteralExprSyntax(data.replacingChild(at: 6, with: value?.raw, arena: SyntaxArena())) + } + } + + public var closeSlash: TokenSyntax { + get { + return TokenSyntax(data.child(at: 7, parent: Syntax(self))!) + } + set(value) { + self = RegexLiteralExprSyntax(data.replacingChild(at: 7, with: value.raw, arena: SyntaxArena())) + } + } + + public var unexpectedBetweenCloseSlashAndClosingPounds: UnexpectedNodesSyntax? { + get { + return data.child(at: 8, parent: Syntax(self)).map(UnexpectedNodesSyntax.init) + } + set(value) { + self = RegexLiteralExprSyntax(data.replacingChild(at: 8, with: value?.raw, arena: SyntaxArena())) + } + } + + public var closingPounds: TokenSyntax? { + get { + return data.child(at: 9, parent: Syntax(self)).map(TokenSyntax.init) + } + set(value) { + self = RegexLiteralExprSyntax(data.replacingChild(at: 9, with: value?.raw, arena: SyntaxArena())) + } + } + + public var unexpectedAfterClosingPounds: UnexpectedNodesSyntax? { + get { + return data.child(at: 10, parent: Syntax(self)).map(UnexpectedNodesSyntax.init) + } + set(value) { + self = RegexLiteralExprSyntax(data.replacingChild(at: 10, with: value?.raw, arena: SyntaxArena())) + } + } + public static var structure: SyntaxNodeStructure { - return .layout([\Self.unexpectedBeforeRegex, \Self.regex, \Self.unexpectedAfterRegex]) + return .layout([ + \Self.unexpectedBeforeOpeningPounds, + \Self.openingPounds, + \Self.unexpectedBetweenOpeningPoundsAndOpenSlash, + \Self.openSlash, + \Self.unexpectedBetweenOpenSlashAndRegexPattern, + \Self.regexPattern, + \Self.unexpectedBetweenRegexPatternAndCloseSlash, + \Self.closeSlash, + \Self.unexpectedBetweenCloseSlashAndClosingPounds, + \Self.closingPounds, + \Self.unexpectedAfterClosingPounds + ]) } public func childNameForDiagnostics(_ index: SyntaxChildrenIndex) -> String? { @@ -5526,6 +5642,22 @@ public struct RegexLiteralExprSyntax: ExprSyntaxProtocol, SyntaxHashable { return nil case 2: return nil + case 3: + return nil + case 4: + return nil + case 5: + return nil + case 6: + return nil + case 7: + return nil + case 8: + return nil + case 9: + return nil + case 10: + return nil default: fatalError("Invalid index") } @@ -5535,9 +5667,18 @@ public struct RegexLiteralExprSyntax: ExprSyntaxProtocol, SyntaxHashable { extension RegexLiteralExprSyntax: CustomReflectable { public var customMirror: Mirror { return Mirror(self, children: [ - "unexpectedBeforeRegex": unexpectedBeforeRegex.map(Syntax.init)?.asProtocol(SyntaxProtocol.self) as Any , - "regex": Syntax(regex).asProtocol(SyntaxProtocol.self), - "unexpectedAfterRegex": unexpectedAfterRegex.map(Syntax.init)?.asProtocol(SyntaxProtocol.self) as Any ]) + "unexpectedBeforeOpeningPounds": unexpectedBeforeOpeningPounds.map(Syntax.init)?.asProtocol(SyntaxProtocol.self) as Any , + "openingPounds": openingPounds.map(Syntax.init)?.asProtocol(SyntaxProtocol.self) as Any , + "unexpectedBetweenOpeningPoundsAndOpenSlash": unexpectedBetweenOpeningPoundsAndOpenSlash.map(Syntax.init)?.asProtocol(SyntaxProtocol.self) as Any , + "openSlash": Syntax(openSlash).asProtocol(SyntaxProtocol.self), + "unexpectedBetweenOpenSlashAndRegexPattern": unexpectedBetweenOpenSlashAndRegexPattern.map(Syntax.init)?.asProtocol(SyntaxProtocol.self) as Any , + "regexPattern": Syntax(regexPattern).asProtocol(SyntaxProtocol.self), + "unexpectedBetweenRegexPatternAndCloseSlash": unexpectedBetweenRegexPatternAndCloseSlash.map(Syntax.init)?.asProtocol(SyntaxProtocol.self) as Any , + "closeSlash": Syntax(closeSlash).asProtocol(SyntaxProtocol.self), + "unexpectedBetweenCloseSlashAndClosingPounds": unexpectedBetweenCloseSlashAndClosingPounds.map(Syntax.init)?.asProtocol(SyntaxProtocol.self) as Any , + "closingPounds": closingPounds.map(Syntax.init)?.asProtocol(SyntaxProtocol.self) as Any , + "unexpectedAfterClosingPounds": unexpectedAfterClosingPounds.map(Syntax.init)?.asProtocol(SyntaxProtocol.self) as Any + ]) } } diff --git a/Sources/lit-test-helper/ClassifiedSyntaxTreePrinter.swift b/Sources/lit-test-helper/ClassifiedSyntaxTreePrinter.swift index 8a1b0bb2a73..c6739d09b78 100644 --- a/Sources/lit-test-helper/ClassifiedSyntaxTreePrinter.swift +++ b/Sources/lit-test-helper/ClassifiedSyntaxTreePrinter.swift @@ -26,6 +26,7 @@ extension SyntaxClassification { case .floatingLiteral: return "float" case .stringLiteral: return "str" case .stringInterpolationAnchor: return "anchor" + case .regexLiteral: return "regex" case .poundDirectiveKeyword: return "#kw" case .buildConfigId: return "#id" case .attribute: return "attr-builtin" diff --git a/Tests/SwiftParserTest/DeclarationTests.swift b/Tests/SwiftParserTest/DeclarationTests.swift index 66c544fc884..d49f15d23fc 100644 --- a/Tests/SwiftParserTest/DeclarationTests.swift +++ b/Tests/SwiftParserTest/DeclarationTests.swift @@ -66,10 +66,9 @@ final class DeclarationTests: XCTestCase { assertParse("func /^/ (lhs: Int, rhs: Int) -> Int { 1 / 2 }") assertParse( - "func 1️⃣/^notoperator^/ (lhs: Int, rhs: Int) -> Int { 1 / 2 }", + "func /^1️⃣notoperator^/ (lhs: Int, rhs: Int) -> Int { 1 / 2 }", diagnostics: [ - DiagnosticSpec(message: "expected identifier in function"), - DiagnosticSpec(message: "unexpected code '/^notoperator^/' before parameter clause"), + DiagnosticSpec(message: "unexpected code 'notoperator^/' before parameter clause") ] ) @@ -742,16 +741,15 @@ final class DeclarationTests: XCTestCase { func testExpressionMember() { assertParse( """ - struct S { - 1️⃣/ 2️⃣#3️⃣#4️⃣#line 5️⃣25 "line-directive.swift" - } + struct S {1️⃣ + /2️⃣ ###line 25 "line-directive.swift"3️⃣ + 4️⃣} """, diagnostics: [ - DiagnosticSpec(locationMarker: "1️⃣", message: "expected 'func' in function"), - DiagnosticSpec(locationMarker: "2️⃣", message: "expected parameter clause in function signature"), - DiagnosticSpec(locationMarker: "3️⃣", message: "expected identifier in macro expansion"), - DiagnosticSpec(locationMarker: "4️⃣", message: "expected identifier in macro expansion"), - DiagnosticSpec(locationMarker: "5️⃣", message: #"unexpected code '25 "line-directive.swift"' in struct"#), + DiagnosticSpec(locationMarker: "1️⃣", message: "expected '}' to end struct"), + DiagnosticSpec(locationMarker: "2️⃣", message: "bare slash regex literal may not start with space"), + DiagnosticSpec(locationMarker: "3️⃣", message: "expected '/' to end regex literal"), + DiagnosticSpec(locationMarker: "4️⃣", message: "extraneous brace at top level"), ] ) } diff --git a/Tests/SwiftParserTest/ExpressionTests.swift b/Tests/SwiftParserTest/ExpressionTests.swift index 6b822a57649..2654209a715 100644 --- a/Tests/SwiftParserTest/ExpressionTests.swift +++ b/Tests/SwiftParserTest/ExpressionTests.swift @@ -304,14 +304,6 @@ final class ExpressionTests: XCTestCase { ) } - func testRegexLiteral() { - assertParse( - #""" - /(?[[:alpha:]]\w*) = (?[0-9A-F]+)/ - """# - ) - } - func testInitializerExpression() { assertParse("Lexer.Cursor(input: input, previous: 0)") } diff --git a/Tests/SwiftParserTest/LexerTests.swift b/Tests/SwiftParserTest/LexerTests.swift index c386b840344..047b7f9fb9e 100644 --- a/Tests/SwiftParserTest/LexerTests.swift +++ b/Tests/SwiftParserTest/LexerTests.swift @@ -337,19 +337,29 @@ public class LexerTests: XCTestCase { assertLexemes( "/abc/", lexemes: [ - LexemeSpec(.regexLiteral, text: "/abc/") + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "abc"), + LexemeSpec(.regexSlash, text: "/"), ] ) assertLexemes( "#/abc/#", lexemes: [ - LexemeSpec(.regexLiteral, text: "#/abc/#") + LexemeSpec(.extendedRegexDelimiter, text: "#"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "abc"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.extendedRegexDelimiter, text: "#"), ] ) assertLexemes( "###/abc/###", lexemes: [ - LexemeSpec(.regexLiteral, text: "###/abc/###") + LexemeSpec(.extendedRegexDelimiter, text: "###"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "abc"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.extendedRegexDelimiter, text: "###"), ] ) assertLexemes( @@ -360,45 +370,69 @@ public class LexerTests: XCTestCase { /# """, lexemes: [ - LexemeSpec(.regexLiteral, text: "#/\na\nb\n/#") + LexemeSpec(.extendedRegexDelimiter, text: "#"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "\na\nb\n"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.extendedRegexDelimiter, text: "#"), ] ) assertLexemes( "#/ \na\nb\n /#", lexemes: [ - LexemeSpec(.regexLiteral, text: "#/ \na\nb\n /#") + LexemeSpec(.extendedRegexDelimiter, text: "#"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: " \na\nb\n "), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.extendedRegexDelimiter, text: "#"), ] ) assertLexemes( "##/ \na\nb\n /##", lexemes: [ - LexemeSpec(.regexLiteral, text: "##/ \na\nb\n /##") + LexemeSpec(.extendedRegexDelimiter, text: "##"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: " \na\nb\n "), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.extendedRegexDelimiter, text: "##"), ] ) assertLexemes( "#/abc/def/#", lexemes: [ - LexemeSpec(.regexLiteral, text: "#/abc/def/#") + LexemeSpec(.extendedRegexDelimiter, text: "#"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "abc/def"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.extendedRegexDelimiter, text: "#"), ] ) assertLexemes( "#/abc\\/#def/#", lexemes: [ - LexemeSpec(.regexLiteral, text: "#/abc\\/#def/#") + LexemeSpec(.extendedRegexDelimiter, text: "#"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "abc\\/#def"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.extendedRegexDelimiter, text: "#"), ] ) assertLexemes( "#/abc|#def/#", lexemes: [ - LexemeSpec(.regexLiteral, text: "#/abc|#def/#") + LexemeSpec(.extendedRegexDelimiter, text: "#"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "abc|#def"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.extendedRegexDelimiter, text: "#"), ] ) assertLexemes( "#/abc\n/#", lexemes: [ - LexemeSpec(.pound, text: "#"), - LexemeSpec(.binaryOperator, text: "/"), - LexemeSpec(.identifier, text: "abc"), + LexemeSpec(.extendedRegexDelimiter, text: "#"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "abc"), LexemeSpec(.prefixOperator, leading: "\n", text: "/", flags: [.isAtStartOfLine]), LexemeSpec(.pound, text: "#"), ] @@ -406,9 +440,9 @@ public class LexerTests: XCTestCase { assertLexemes( "#/abc\r/#", lexemes: [ - LexemeSpec(.pound, text: "#"), - LexemeSpec(.binaryOperator, text: "/"), - LexemeSpec(.identifier, text: "abc"), + LexemeSpec(.extendedRegexDelimiter, text: "#"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "abc"), LexemeSpec(.prefixOperator, leading: "\r", text: "/", flags: [.isAtStartOfLine]), LexemeSpec(.pound, text: "#"), ] @@ -422,6 +456,117 @@ public class LexerTests: XCTestCase { LexemeSpec(.postfixOperator, text: "/"), ] ) + // Make sure if we have an unterminated extended literal, we stop at the + // first newline + assertLexemes( + """ + #/ + abc + """, + lexemes: [ + LexemeSpec(.extendedRegexDelimiter, text: "#"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: ""), + LexemeSpec(.identifier, leading: "\n", text: "abc", flags: .isAtStartOfLine), + ] + ) + // Check that we can split operators okay. + assertLexemes( + "!/abc/", + lexemes: [ + LexemeSpec(.prefixOperator, text: "!"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "abc"), + LexemeSpec(.regexSlash, text: "/"), + ] + ) + assertLexemes( + "!^/abc/", + lexemes: [ + LexemeSpec(.prefixOperator, text: "!^"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "abc"), + LexemeSpec(.regexSlash, text: "/"), + ] + ) + assertLexemes( + "!#/abc/#", + lexemes: [ + LexemeSpec(.prefixOperator, text: "!"), + LexemeSpec(.extendedRegexDelimiter, text: "#"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "abc"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.extendedRegexDelimiter, text: "#"), + ] + ) + // Make sure we don't lex this as a regex. + assertLexemes( + """ + func /^ () { y/ } + """, + lexemes: [ + LexemeSpec(.keyword, text: "func", trailing: " "), + LexemeSpec(.binaryOperator, text: "/^", trailing: " "), + LexemeSpec(.leftParen, text: "("), + LexemeSpec(.rightParen, text: ")", trailing: " "), + LexemeSpec(.leftBrace, text: "{", trailing: " "), + LexemeSpec(.identifier, text: "y"), + LexemeSpec(.postfixOperator, text: "/", trailing: " "), + LexemeSpec(.rightBrace, text: "}"), + ] + ) + assertLexemes( + "^^/!*/", + lexemes: [ + LexemeSpec(.prefixOperator, text: "^^"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "!*"), + LexemeSpec(.regexSlash, text: "/"), + ] + ) + assertLexemes( + "/!*/", + lexemes: [ + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "!*"), + LexemeSpec(.regexSlash, text: "/"), + ] + ) + + // Regex literal, even though it's in operator position. + assertLexemes( + "x /y/", + lexemes: [ + LexemeSpec(.identifier, text: "x", trailing: " "), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "y"), + LexemeSpec(.regexSlash, text: "/"), + ] + ) + assertLexemes( + "x /.^ y/", + lexemes: [ + LexemeSpec(.identifier, text: "x", trailing: " "), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: ".^ y"), + LexemeSpec(.regexSlash, text: "/"), + ] + ) + + // Comments, not regex literals + assertLexemes( + "^//", + lexemes: [ + LexemeSpec(.binaryOperator, text: "^", trailing: "//") + ] + ) + assertLexemes( + "^/*/", + lexemes: [ + LexemeSpec(.binaryOperator, text: "^", trailing: "/*/") + ] + ) } func testUnexpectedLexing() { @@ -613,13 +758,29 @@ public class LexerTests: XCTestCase { assertLexemes( "#/abc|#def/", lexemes: [ - LexemeSpec(.pound, text: "#"), - LexemeSpec(.binaryOperator, text: "/"), - LexemeSpec(.identifier, text: "abc"), - LexemeSpec(.binaryOperator, text: "|"), - LexemeSpec(.pound, text: "#"), - LexemeSpec(.identifier, text: "def"), - LexemeSpec(.postfixOperator, text: "/"), + LexemeSpec(.extendedRegexDelimiter, text: "#"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "abc|#def"), + LexemeSpec(.regexSlash, text: "/"), + ] + ) + + assertLexemes( + "#/abc|#def//", + lexemes: [ + LexemeSpec(.extendedRegexDelimiter, text: "#"), + LexemeSpec(.regexSlash, text: "/"), + LexemeSpec(.regexLiteralPattern, text: "abc|#def/"), + LexemeSpec(.regexSlash, text: "/"), + ] + ) + // This is an operator, not a regex. + assertLexemes( + "lhs /==/ rhs", + lexemes: [ + LexemeSpec(.identifier, text: "lhs", trailing: " "), + LexemeSpec(.binaryOperator, text: "/==/", trailing: " "), + LexemeSpec(.identifier, text: "rhs"), ] ) } @@ -903,6 +1064,24 @@ public class LexerTests: XCTestCase { } } + func testInvalidUTF8RegexLiteral() { + let slashByte = UInt8(UnicodeScalar("/").value) + let sourceBytes: [UInt8] = [slashByte, 0xfd, slashByte] + + lex(sourceBytes) { lexemes in + guard lexemes.count == 4 else { + return XCTFail("Expected 4 lexemes, got \(lexemes.count)") + } + assertRawBytesLexeme( + lexemes[1], + kind: .regexLiteralPattern, + leadingTrivia: [], + text: [0xfd], + error: TokenDiagnostic(.invalidUtf8, byteOffset: 1) + ) + } + } + func testInterpolatedString() { assertLexemes( #""" diff --git a/Tests/SwiftParserTest/RegexLiteralTests.swift b/Tests/SwiftParserTest/RegexLiteralTests.swift new file mode 100644 index 00000000000..acc3b690812 --- /dev/null +++ b/Tests/SwiftParserTest/RegexLiteralTests.swift @@ -0,0 +1,1208 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2023 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax +@_spi(RawSyntax) import SwiftParser +import XCTest + +final class RegexLiteralTests: XCTestCase { + func testForwardSlash1() { + assertParse( + #""" + /(?[[:alpha:]]\w*) = (?[0-9A-F]+)/ + """# + ) + } + + func testForwardSlash2() { + assertParse( + """ + postfix func /(lhs: Int) -> Int {1/} + """ + ) + } + + func testEmpty() { + assertParse("#//#") + } + + func testExtraneous1() { + assertParse( + #""" + #//#1️⃣# + """#, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "too many '#' characters in closing delimiter") + ], + fixedSource: "#//#" + ) + } + func testExtraneous2() { + assertParse( + """ + #/abc/#1️⃣# + """, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "too many '#' characters in closing delimiter") + ], + fixedSource: "#/abc/#" + ) + } + + func testUnterminated1() { + assertParse( + #""" + #1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected identifier in macro expansion") + ] + ) + } + + func testUnterminated2() { + assertParse( + #""" + /1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected '/' to end regex literal") + ] + ) + } + + func testUnterminated3() { + assertParse( + #""" + /#1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected identifier in macro expansion") + ] + ) + } + + func testUnterminated4() { + assertParse( + #""" + #/1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected '/#' to end regex literal") + ] + ) + } + + func testUnterminated5() { + assertParse( + #""" + #//1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected '#' to end regex literal") + ] + ) + } + + func testUnterminated6() { + assertParse( + #""" + #///1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected '#' to end regex literal") + ] + ) + } + + func testUnterminated7() { + assertParse( + #""" + #/#1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected '/#' to end regex literal") + ] + ) + } + + func testUnterminated8() { + assertParse( + #""" + #/##1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected '/#' to end regex literal") + ] + ) + } + + func testUnterminated9() { + assertParse( + #""" + #/##/1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected '#' to end regex literal") + ] + ) + } + + func testUnterminated10() { + assertParse( + #""" + ##/##/#1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected additional '#' characters in closing delimiter") + ], + fixedSource: "##/##/##" + ) + } + + func testUnterminated11() { + assertParse( + #""" + ##/###1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected '/##' to end regex literal") + ], + fixedSource: "##/###/##" + ) + } + + func testUnterminated12() { + assertParse( + #""" + #/\/#1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected '/#' to end regex literal") + ], + fixedSource: #"#/\/#/#"# + ) + } + + func testUnterminated13() { + assertParse( + #""" + ##/abc/#def1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected '/##' to end regex literal") + ], + fixedSource: "##/abc/#def/##" + ) + } + + func testUnterminated14() { + assertParse( + #""" + ##/abc/def#1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected '/##' to end regex literal") + ], + fixedSource: "##/abc/def#/##" + ) + } + + func testTerminated1() { + assertParse( + #""" + #//# + """# + ) + } + + func testTerminated2() { + assertParse( + #""" + #///# + """# + ) + } + + func testTerminated3() { + assertParse( + #""" + #/#//# + """# + ) + } + + func testTerminated4() { + assertParse( + #""" + ##/##/## + """# + ) + } + + func testTerminated5() { + assertParse( + #""" + #/\/#/# + """# + ) + } + + func testTerminated6() { + assertParse( + #""" + #/\//# + """# + ) + } + + func testTerminated7() { + assertParse( + #""" + #/\\/# + """# + ) + } + + func testUnprintable1() { + assertParse( + """ + /1️⃣\u{7F}/ + """, + diagnostics: [ + DiagnosticSpec(message: "unprintable ASCII character found in source file") + ] + ) + } + + func testUnprintable2() { + assertParse( + """ + #/1️⃣\u{7F}/# + """, + diagnostics: [ + DiagnosticSpec(message: "unprintable ASCII character found in source file") + ] + ) + } + + func testMultiline1() { + assertParse( + """ + #/ + abc1️⃣/# + """, + diagnostics: [ + DiagnosticSpec(message: "multi-line regex closing delimiter must appear on new line") + ] + ) + } + + func testMultiline2() { + assertParse( + """ + #/abc1️⃣ + /#2️⃣ + """, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "expected '/#' to end regex literal"), + DiagnosticSpec(locationMarker: "2️⃣", message: "expected identifier in macro expansion"), + ] + ) + } + + func testMultiline3() { + assertParse( + """ + #/abc1️⃣ + \t \t /#2️⃣ + """, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "expected '/#' to end regex literal"), + DiagnosticSpec(locationMarker: "2️⃣", message: "expected identifier in macro expansion"), + ] + ) + } + + func testMultiline4() { + assertParse( + """ + #/ + abc + \t \t /# + """ + ) + } + + func testMultiline5() { + assertParse( + """ + #/ + #1️⃣/# + """, + diagnostics: [ + DiagnosticSpec(message: "multi-line regex closing delimiter must appear on new line") + ] + ) + } + + func testOpeningSpace1() { + assertParse( + """ + /1️⃣ a/ + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testOpeningSpace2() { + assertParse( + """ + let x = /1️⃣ a/ + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testOpeningSpace3() { + assertParse( + """ + #/ a/# + """ + ) + } + + func testClosingSpace1() { + assertParse( + """ + /a /1️⃣ + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression after operator") + ] + ) + } + + func testClosingSpace2() { + assertParse( + """ + let x = /a /1️⃣ + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression after operator") + ] + ) + } + + func testClosingSpace3() { + assertParse( + """ + #/a /# + """ + ) + } + + func testOpeningAndClosingSpace1() { + assertParse( + """ + /1️⃣ / + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testOpeningAndClosingSpace2() { + assertParse( + """ + x += /1️⃣ / + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testOpeningAndClosingSpace3() { + assertParse( + """ + #/ /# + """ + ) + } + + func testOpeningAndClosingSpace4() { + assertParse( + """ + /1️⃣ / + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testOpeningAndClosingSpace5() { + assertParse( + """ + let x = /1️⃣ / + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testOpeningAndClosingSpace6() { + assertParse( + """ + #/ /# + """ + ) + } + + func testSingleLineTabChar() { + // We currently only keep track of one lexer error, so only diagnose the first. + assertParse( + """ + #/1️⃣\t\t/# + """, + diagnostics: [ + DiagnosticSpec(message: "unprintable ASCII character found in source file") + ] + ) + } + + func testBinOpDisambiguation1() { + assertParse( + """ + x /^ y ^/ z + """ + ) + } + + func testBinOpDisambiguation2() { + assertParse( + """ + x /^ y/ + """ + ) + } + + func testBinOpDisambiguation3() { + assertParse( + """ + x !/^ y/ + """ + ) + } + + func testBinOpDisambiguation4() { + assertParse( + """ + x !/^ y !/ z + """ + ) + } + + func testBinOpDisambiguation5() { + assertParse( + """ + try? /^ x/ + """ + ) + } + + func testBinOpDisambiguation6() { + assertParse( + """ + try? /^ x ^/ + """ + ) + } + + func testBinOpDisambiguation7() { + assertParse( + """ + try! /^ x/ + """ + ) + } + + func testBinOpDisambiguation8() { + assertParse( + """ + try? /^ x ^/ + """ + ) + } + + func testBinOpDisambiguation9() { + assertParse( + """ + x < /^ }}x/ + """ + ) + } + + func testBinOpDisambiguation10() { + assertParse( + """ + { /^ }}x/ } + """ + ) + } + + func testBinOpDisambiguation11() { + assertParse( + """ + ( /^ }}x/ ) + """ + ) + } + + func testBinOpDisambiguation12() { + assertParse( + """ + [ /^ }}x/ ] + """ + ) + } + + func testBinOpDisambiguation13() { + assertParse( + """ + foo(&/^ }}x/) + """ + ) + } + + func testBinOpDisambiguation14() { + assertParse( + """ + x; /^ }}x/ + """ + ) + } + + func testBinOpDisambiguation15() { + assertParse( + """ + [0 : /^ }}x/] + """ + ) + } + + func testBinOpDisambiguation16() { + assertParse( + """ + (0, /^ }}x/) + """ + ) + } + + func testBinOpDisambiguation17() { + assertParse( + """ + x^ /^ x/ + """ + ) + } + + func testBinOpDisambiguation18() { + assertParse( + """ + x! /^ x/ + """ + ) + } + + func testBinOpDisambiguation19() { + assertParse( + """ + x? /^ x/ + """ + ) + } + + func testBinOpDisambiguation20() { + assertParse( + """ + x > /^ }}x/ + """ + ) + } + + func testBinOpDisambiguation21() { + assertParse( + """ + {} /^ x/ + """ + ) + } + + func testBinOpDisambiguation22() { + assertParse( + """ + () /^ x/ + """ + ) + } + + func testBinOpDisambiguation23() { + assertParse( + """ + [] /^ x/ + """ + ) + } + + func testBinOpDisambiguation24() { + assertParse( + """ + x... /^ x/ + """ + ) + } + + func testBinOpDisambiguation25() { + assertParse( + """ + x.1️⃣ /^ x/ + """, + diagnostics: [ + DiagnosticSpec(message: "expected name in member access") + ] + ) + } + + func testBinOpDisambiguation26() { + // FIXME: The diagnostic should be one character back + assertParse( + """ + # 1️⃣/^ x/ + """, + diagnostics: [ + DiagnosticSpec(message: "expected identifier in macro expansion") + ] + ) + } + + func testBinOpDisambiguation27() { + assertParse( + """ + `x` /^ x/ + """ + ) + } + + func testBinOpDisambiguation28() { + // FIXME: The diagnostic should be one character back + assertParse( + #""" + \ 1️⃣/^ x/ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected root in key path") + ] + ) + } + + func testBinOpDisambiguation29() { + assertParse( + """ + x /^ x/ + """ + ) + } + + func testBinOpDisambiguation30() { + assertParse( + """ + true /^ x/ + """ + ) + } + + func testBinOpDisambiguation31() { + assertParse( + """ + false /^ x/ + """ + ) + } + + func testBinOpDisambiguation32() { + assertParse( + """ + try /^ }}x/ + """ + ) + } + + func testBinOpDisambiguation33() { + assertParse( + """ + x as Any /^ x/ + """ + ) + } + + func testBinOpDisambiguation34() { + assertParse( + """ + nil /^ x/ + """ + ) + } + + func testBinOpDisambiguation35() { + assertParse( + """ + .none /^ x/ + """ + ) + } + + func testBinOpDisambiguation36() { + assertParse( + """ + .objc /^ x/ + """ + ) + } + + func testBinOpDisambiguation37() { + assertParse( + """ + P.Protocol /^ x/ + """ + ) + } + + func testBinOpDisambiguation38() { + assertParse( + """ + self /^ x/ + """ + ) + } + + func testBinOpDisambiguation39() { + assertParse( + """ + Self /^ x/ + """ + ) + } + + func testBinOpDisambiguation40() { + assertParse( + """ + super /^ x/ + """ + ) + } + + func testBinOpDisambiguation41() { + // await is a contextual keyword, so we can't assume it must be a regex. + assertParse( + """ + await 1️⃣/^ x/ + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression in 'await' expression") + ] + ) + } + + func testBinOpDisambiguation42() { + // await is a contextual keyword, so we can't assume it must be a regex. + assertParse( + """ + ^await /^ x/ + """ + ) + } + + func testBinOpDisambiguation43() { + assertParse( + """ + x ? /^ }}x/ : /x/ + """ + ) + } + + func testBinOpDisambiguation44() { + assertParse( + """ + x ? /x/ : /^ }}x/ + """ + ) + } + + func testBinOpDisambiguation45() { + assertParse( + """ + 0 /^ x/ + """ + ) + } + + func testBinOpDisambiguation46() { + assertParse( + """ + 0.1 /^ x/ + """ + ) + } + + func testBinOpDisambiguation47() { + assertParse( + """ + #if /^ }}x/ + #endif + """ + ) + } + + func testBinOpDisambiguation48() { + assertParse( + """ + #if true + #else + /^ }}x/ + #endif + """ + ) + } + + func testBinOpDisambiguation49() { + assertParse( + """ + #if true + #elseif /^ }}x/ + #endif + """ + ) + } + + func testBinOpDisambiguation50() { + assertParse( + """ + #if true + #endif + /^ }}x/ + """ + ) + } + + func testBinOpDisambiguation51() { + // Unapplied operators, not regex. + assertParse( + """ + foo(a: /, /) + """, + substructure: Syntax( + TupleExprElementListSyntax([ + .init( + label: "a", + colon: .colonToken(), + expression: IdentifierExprSyntax(identifier: .binaryOperator("/")), + trailingComma: .commaToken() + ), + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("/")) + ), + ]) + ) + ) + } + + func testBinOpDisambiguation52() { + // Unapplied operators, not regex. + assertParse( + """ + foo(a, /, /) + """, + substructure: Syntax( + TupleExprElementListSyntax([ + .init( + expression: IdentifierExprSyntax(identifier: "a"), + trailingComma: .commaToken() + ), + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("/")), + trailingComma: .commaToken() + ), + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("/")) + ), + ]) + ) + ) + } + + func testBinOpDisambiguation53() { + // Unapplied operators, not regex. + assertParse( + """ + foo(a, ^/, /) + """, + substructure: Syntax( + TupleExprElementListSyntax([ + .init( + expression: IdentifierExprSyntax(identifier: "a"), + trailingComma: .commaToken() + ), + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("^/")), + trailingComma: .commaToken() + ), + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("/")) + ), + ]) + ) + ) + } + + func testBinOpDisambiguation54() { + // Unapplied operators, not regex. + assertParse( + """ + foo(a: ^/, /) + """, + substructure: Syntax( + TupleExprElementListSyntax([ + .init( + label: "a", + colon: .colonToken(), + expression: IdentifierExprSyntax(identifier: .binaryOperator("^/")), + trailingComma: .commaToken() + ), + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("/")) + ), + ]) + ) + ) + } + + func testBinOpDisambiguation55() { + // Unapplied operators, not regex. + assertParse( + """ + foo(^/, /) + """, + substructure: Syntax( + TupleExprElementListSyntax([ + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("^/")), + trailingComma: .commaToken() + ), + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("/")) + ), + ]) + ) + ) + } + + func testBinOpDisambiguation56() { + // Unapplied operators, not regex. + assertParse( + """ + (^/, /) + """, + substructure: Syntax( + TupleExprElementListSyntax([ + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("^/")), + trailingComma: .commaToken() + ), + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("/")) + ), + ]) + ) + ) + } + + func testBinOpDisambiguation57() { + // Unapplied operators, not regex. + assertParse( + """ + (/, /) + """, + substructure: Syntax( + TupleExprElementListSyntax([ + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("/")), + trailingComma: .commaToken() + ), + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("/")) + ), + ]) + ) + ) + } + + func testBinOpDisambiguation58() { + // Unapplied operators, not regex. + assertParse( + """ + x[/, /] + """, + substructure: Syntax( + TupleExprElementListSyntax([ + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("/")), + trailingComma: .commaToken() + ), + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("/")) + ), + ]) + ) + ) + } + + func testBinOpDisambiguation59() { + // Unapplied operators, not regex. + assertParse( + """ + x[^/, /] + """, + substructure: Syntax( + TupleExprElementListSyntax([ + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("^/")), + trailingComma: .commaToken() + ), + .init( + expression: IdentifierExprSyntax(identifier: .binaryOperator("/")) + ), + ]) + ) + ) + } + + func testBinOpDisambiguation60() { + // Invalid. We can't confidently lex as a regex (as the lexer thinks it + // could be a subscript), so we get a parser error. + assertParse( + """ + [1️⃣/, /] + """, + diagnostics: [ + DiagnosticSpec(message: "unexpected code '/, /' in array") + ] + ) + } + + func testBinOpDisambiguation61() { + // Fine if there's no trailing space though. + assertParse( + """ + [/,/] + """, + substructure: Syntax(RegexLiteralExprSyntax(regexPattern: .regexLiteralPattern(","))) + ) + } + + func testPrefixOpSplitting1() { + assertParse( + """ + let x =1️⃣/abc/ + """, + diagnostics: [ + DiagnosticSpec(message: "'=' must have consistent whitespace on both sides") + ] + ) + } + + func testPrefixOpSplitting2() { + assertParse( + """ + let x1️⃣ .2️⃣/abc/ + """, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "consecutive statements on a line must be separated by ';'"), + DiagnosticSpec(locationMarker: "2️⃣", message: "expected name in member access"), + ] + ) + } + + func testPrefixOpSplitting3() { + assertParse( + """ + let x = true?/abc/1️⃣:/def/ + """, + substructure: Syntax(BinaryOperatorExprSyntax(operatorToken: .binaryOperator("/"))), + diagnostics: [ + DiagnosticSpec(message: "extraneous code ':/def/' at top level") + ] + ) + } + + func testPrefixOpSplitting4() { + assertParse( + """ + let x = true ?/abc/ : /def/ + """, + substructure: Syntax( + SequenceExprSyntax( + elements: .init([ + BooleanLiteralExprSyntax(booleanLiteral: true), + UnresolvedTernaryExprSyntax(firstChoice: RegexLiteralExprSyntax(regexPattern: .regexLiteralPattern("abc"))), + RegexLiteralExprSyntax(regexPattern: .regexLiteralPattern("def")), + ]) + ) + ) + ) + } + + func testPrefixOpSplitting5() { + assertParse( + """ + let x = &/abc/ + """, + substructure: Syntax( + InOutExprSyntax( + expression: RegexLiteralExprSyntax( + regexPattern: .regexLiteralPattern("abc") + ) + ) + ) + ) + } + + func testNulCharacter() { + assertParse( + "/1️⃣\0/", + diagnostics: [ + DiagnosticSpec(message: "nul character embedded in middle of file", severity: .warning) + ] + ) + } + + func testEmoji() { + assertParse("/👍/") + } +} diff --git a/Tests/SwiftParserTest/translated/ForwardSlashRegexSkippingAllowedTests.swift b/Tests/SwiftParserTest/translated/ForwardSlashRegexSkippingAllowedTests.swift new file mode 100644 index 00000000000..550e60e3a3f --- /dev/null +++ b/Tests/SwiftParserTest/translated/ForwardSlashRegexSkippingAllowedTests.swift @@ -0,0 +1,138 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2023 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +// This test file has been translated from swift/test/StringProcessing/Parse/forward-slash-regex-skipping-allowed.swift + +import XCTest + +final class ForwardSlashRegexSkippingAllowedTests: XCTestCase { + func testForwardSlashRegexSkippingAllowed3() { + // Ensures there is a parse error + assertParse( + """ + var 1️⃣: Int + """, + diagnostics: [ + DiagnosticSpec(message: "expected pattern in variable") + ] + ) + } + + func testForwardSlashRegexSkippingAllowed4() { + // Balanced `{}`, so okay. + assertParse( + """ + func a() { /1️⃣ {}/ } + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testForwardSlashRegexSkippingAllowed5() { + assertParse( + #""" + func b() { /1️⃣ \{}/ } + """#, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testForwardSlashRegexSkippingAllowed6() { + assertParse( + #""" + func c() { /1️⃣ {"{"}/ } + """#, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testForwardSlashRegexSkippingAllowed7() { + // Some cases of infix '/' that we should continue to skip. + assertParse( + """ + func d() { + _ = 1 / 2 + 3 * 4 + _ = 1 / 2 / 3 / 4 + } + """ + ) + } + + func testForwardSlashRegexSkippingAllowed8() { + assertParse( + #""" + func e() { + let arr = [1, 2, 3] + _ = arr.reduce(0, /) / 2 + func foo(_ i: Int, _ fn: () -> Void) {} + foo(1 / 2 / 3, { print("}}}{{{") }) + } + """# + ) + } + + func testForwardSlashRegexSkippingAllowed9() { + // Some cases of prefix '/' that we should continue to skip. + assertParse( + """ + prefix operator / + prefix func / (_ x: T) -> T { x } + """ + ) + } + + func testForwardSlashRegexSkippingAllowed11() { + assertParse( + """ + func f() { + _ = /E.e + (/E.e).foo(/0) + func foo(_ x: T, _ y: U) {} + foo(/E.e, /E.e) + foo((/E.e), /E.e) + foo((/)(E.e), /E.e) + func bar(_ x: T) -> Int { 0 } + _ = bar(/E.e) / 2 + } + """ + ) + } + + func testForwardSlashRegexSkippingAllowed12() { + assertParse( + """ + postfix operator / + prefix func / (_ x: T) -> T { x } + """ + ) + } + + func testForwardSlashRegexSkippingAllowed13() { + // Some cases of postfix '/' that we should continue to skip. + assertParse( + """ + func g() { + _ = 0/ + _ = 0/ / 1/ + _ = 1/ + 1/ + _ = 1 + 2/ + } + """ + ) + } +} diff --git a/Tests/SwiftParserTest/translated/ForwardSlashRegexSkippingInvalidTests.swift b/Tests/SwiftParserTest/translated/ForwardSlashRegexSkippingInvalidTests.swift new file mode 100644 index 00000000000..58c28998a3b --- /dev/null +++ b/Tests/SwiftParserTest/translated/ForwardSlashRegexSkippingInvalidTests.swift @@ -0,0 +1,278 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2023 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +// This test file has been translated from swift/test/StringProcessing/Parse/forward-slash-regex-skipping-invalid.swift + +import XCTest + +final class ForwardSlashRegexSkippingInvalidTests: XCTestCase { + func testForwardSlashRegexSkippingInvalid1() { + // We don't consider this a regex literal when skipping as it has an initial + // space. + assertParse( + """ + func a() { _ = /1️⃣ x*/ } + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testForwardSlashRegexSkippingInvalid2() { + // Same because of unbalanced ')' + assertParse( + """ + func b() { _ = /x1️⃣)*/ } + """, + diagnostics: [ + DiagnosticSpec(message: "unexpected code ')*/' in function") + ] + ) + } + + func testForwardSlashRegexSkippingInvalid3() { + // These also fail the heuristic, but have unbalanced `{` `}`, so we don't skip. + assertParse( + """ + func c() { _ = /1️⃣ x}*/ } + func d() { _ = /2️⃣ x{*/ } + """, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "bare slash regex literal may not start with space"), + DiagnosticSpec(locationMarker: "2️⃣", message: "bare slash regex literal may not start with space"), + ] + ) + } + + func testForwardSlashRegexSkippingInvalid4() { + // Unterminated, and unbalanced `{}`. + assertParse( + """ + func e() { + _ = /1️⃣ }2️⃣ + } + """, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "bare slash regex literal may not start with space"), + DiagnosticSpec(locationMarker: "2️⃣", message: "expected '/' to end regex literal"), + ] + ) + } + + func testForwardSlashRegexSkippingInvalid5() { + assertParse( + """ + func f() { + _ = /1️⃣ {2️⃣ + } + """, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "bare slash regex literal may not start with space"), + DiagnosticSpec(locationMarker: "2️⃣", message: "expected '/' to end regex literal"), + ] + ) + } + + func testForwardSlashRegexSkippingInvalid6() { + assertParse( + """ + func g() { + _ = /x } + 1️⃣} + """, + diagnostics: [ + DiagnosticSpec(message: "extraneous brace at top level") + ] + ) + } + + func testForwardSlashRegexSkippingInvalid7() { + assertParse( + """ + func h() { + _ = /x { + } + } + """ + ) + } + + func testForwardSlashRegexSkippingInvalid8() { + assertParse( + #""" + func i() { + _ = /x1️⃣ "[abc] {2️⃣ + } + """#, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "consecutive statements on a line must be separated by ';'"), + DiagnosticSpec(locationMarker: "2️⃣", message: #"expected '"' to end string literal"#), + ] + ) + } + + func testForwardSlashRegexSkippingInvalid9() { + assertParse( + """ + func j() { + _ = /^ [abc] {1️⃣ + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected '/' to end regex literal") + ] + ) + } + + func testForwardSlashRegexSkippingInvalid10() { + assertParse( + #""" + func k() { + _ = /^ "[abc] {1️⃣ + } + """#, + diagnostics: [ + DiagnosticSpec(message: "expected '/' to end regex literal") + ] + ) + } + + func testForwardSlashRegexSkippingInvalid11() { + assertParse( + """ + func l() { + _ = /^ } abc {1️⃣ + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected '/' to end regex literal") + ] + ) + } + + func testForwardSlashRegexSkippingInvalid12() { + assertParse( + #""" + func m() { + _ = /1️⃣ "2️⃣ + } + 3️⃣} + """#, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "bare slash regex literal may not start with space"), + DiagnosticSpec(locationMarker: "2️⃣", message: "expected '/' to end regex literal"), + DiagnosticSpec(locationMarker: "3️⃣", message: "extraneous brace at top level"), + + ] + ) + } + + func testForwardSlashRegexSkippingInvalid13() { + // Unbalanced `}`, make sure we don't consider the string literal `{`. + assertParse( + #""" + func n() { /1️⃣ "{"}/ } + """#, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testForwardSlashRegexSkippingInvalid14() { + assertParse( + """ + func o() { + _ = { + 0 + /x}}1️⃣} / + 2 + } + } + """, + diagnostics: [ + DiagnosticSpec(message: "extraneous code at top level") + ] + ) + } + + func testForwardSlashRegexSkippingInvalid15() { + assertParse( + """ + func p() { + _ = 2 + /x} 1️⃣/ + .bitWidth + } + """, + diagnostics: [ + DiagnosticSpec(message: "extraneous code at top level") + ] + ) + } + + func testForwardSlashRegexSkippingInvalid17() { + assertParse( + """ + func err1() { _ = /1️⃣ 0xG}/ } + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testForwardSlashRegexSkippingInvalid18() { + assertParse( + """ + func err2() { _ = /1️⃣ 0oG}/ } + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testForwardSlashRegexSkippingInvalid19() { + assertParse( + """ + func err3() { _ = /1️⃣ {"/ } + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testForwardSlashRegexSkippingInvalid20() { + assertParse( + """ + func err4() { _ = /1️⃣ {'/ } + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testForwardSlashRegexSkippingInvalid21() { + assertParse( + """ + func err5() { _ = /1️⃣ {<#placeholder#>/ } + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } +} diff --git a/Tests/SwiftParserTest/translated/ForwardSlashRegexSkippingTests.swift b/Tests/SwiftParserTest/translated/ForwardSlashRegexSkippingTests.swift new file mode 100644 index 00000000000..a6f22540eb8 --- /dev/null +++ b/Tests/SwiftParserTest/translated/ForwardSlashRegexSkippingTests.swift @@ -0,0 +1,474 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2023 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +// This test file has been translated from swift/test/StringProcessing/Parse/forward-slash-regex-skipping.swift + +import XCTest + +final class ForwardSlashRegexSkippingTests: XCTestCase { + func testForwardSlashRegexSkipping3() { + assertParse( + #""" + struct A { + static let r = /test":"(.*?)"/ + } + """# + ) + } + + func testForwardSlashRegexSkipping4() { + assertParse( + """ + struct B { + static let r = /x*/ + } + """ + ) + } + + func testForwardSlashRegexSkipping5() { + assertParse( + """ + struct C { + func foo() { + let r = /x*/ + } + } + """ + ) + } + + func testForwardSlashRegexSkipping6() { + assertParse( + """ + struct D { + func foo() { + func bar() { + let r = /x}}*/ + } + } + } + """ + ) + } + + func testForwardSlashRegexSkipping7() { + assertParse( + """ + func a() { _ = /abc}}*/ } + """ + ) + } + + func testForwardSlashRegexSkipping8() { + assertParse( + #""" + func b() { _ = /\// } + """# + ) + } + + func testForwardSlashRegexSkipping9() { + assertParse( + #""" + func c() { _ = /\\/ } + """# + ) + } + + func testForwardSlashRegexSkipping10() { + assertParse( + """ + func d() { _ = ^^/x}1️⃣}*/ } + """ + ) + } + + func testForwardSlashRegexSkipping11() { + assertParse( + """ + func e() { _ = (^^/x1️⃣}2️⃣}*/) } + """ + ) + } + + func testForwardSlashRegexSkipping12() { + assertParse( + """ + func f() { _ = ^^/^x}1️⃣}*/ } + """ + ) + } + + func testForwardSlashRegexSkipping13() { + assertParse( + #""" + func g() { _ = "\(/x}}*/)" } + """# + ) + } + + func testForwardSlashRegexSkipping14() { + assertParse( + #""" + func h() { _ = "\(^^/x1️⃣}}*/)" } + """# + ) + } + + func testForwardSlashRegexSkipping15() { + assertParse( + #""" + func i() { + func foo(_ x: T, y: T) {} + foo(/}}*/, y: /"/) + } + """# + ) + } + + func testForwardSlashRegexSkipping16() { + assertParse( + """ + func j() { + _ = { + 0 + /x}}}/ + 2 + } + } + """ + ) + } + + func testForwardSlashRegexSkipping17() { + assertParse( + """ + func k() { + _ = 2 + / 1 / .bitWidth + } + """ + ) + } + + func testForwardSlashRegexSkipping18() { + assertParse( + """ + func l() { + _ = 2 + /x}*/ .self + } + """ + ) + } + + func testForwardSlashRegexSkipping20() { + assertParse( + """ + func m() { + _ = 2 + / 1 / + .bitWidth + } + """ + ) + } + + func testForwardSlashRegexSkipping21() { + assertParse( + """ + func n() { + _ = 2 + /x}/ + .bitWidth + } + """ + ) + } + + func testForwardSlashRegexSkipping23() { + assertParse( + """ + func o() { + _ = /x// comment + } + """ + ) + } + + func testForwardSlashRegexSkipping24() { + assertParse( + """ + func p() { + _ = /x // comment + } + """ + ) + } + + func testForwardSlashRegexSkipping25() { + assertParse( + """ + func q() { + _ = /x/*comment*/ + } + """ + ) + } + + func testForwardSlashRegexSkipping26() { + assertParse( + """ + func r() { _ = /[(0)]/ } + """ + ) + } + + func testForwardSlashRegexSkipping27() { + assertParse( + """ + func s() { _ = /(x)/ } + """ + ) + } + + func testForwardSlashRegexSkipping28() { + assertParse( + """ + func t() { _ = /[)]/ } + """ + ) + } + + func testForwardSlashRegexSkipping29() { + assertParse( + #""" + func u() { _ = /[a\])]/ } + """# + ) + } + + func testForwardSlashRegexSkipping30() { + assertParse( + """ + func v() { _ = /([)])/ } + """ + ) + } + + func testForwardSlashRegexSkipping31() { + assertParse( + """ + func w() { _ = /]]][)]/ } + """ + ) + } + + func testForwardSlashRegexSkipping32() { + assertParse( + """ + func x() { _ = /,/ } + """ + ) + } + + func testForwardSlashRegexSkipping33() { + assertParse( + """ + func y() { _ = /}/ } + """ + ) + } + + func testForwardSlashRegexSkipping34() { + assertParse( + """ + func z() { _ = /]/ } + """ + ) + } + + func testForwardSlashRegexSkipping35() { + assertParse( + """ + func a1() { _ = /:/ } + """ + ) + } + + func testForwardSlashRegexSkipping36() { + assertParse( + """ + func a2() { _ = /;/ } + """ + ) + } + + func testForwardSlashRegexSkipping37() { + assertParse( + """ + func a3() { _ = /)/ } + """ + ) + } + + func testForwardSlashRegexSkipping38() { + assertParse( + """ + func a4() { _ = /1️⃣ / } + """, + diagnostics: [ + // TODO: Old parser had a fix-it to add backslash to escape + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testForwardSlashRegexSkipping39() { + assertParse( + #""" + func a5() { _ = /\ / } + """# + ) + } + + func testForwardSlashRegexSkipping42() { + assertParse( + #""" + func a7() { _ = /\/}/ } + """# + ) + } + + func testForwardSlashRegexSkipping43() { + // Make sure we don't emit errors for these. + assertParse( + """ + func err1() { _ = /0xG/ } + """ + ) + } + + func testForwardSlashRegexSkipping44() { + assertParse( + """ + func err2() { _ = /0oG/ } + """ + ) + } + + func testForwardSlashRegexSkipping45() { + assertParse( + #""" + func err3() { _ = /"/ } + """# + ) + } + + func testForwardSlashRegexSkipping46() { + assertParse( + """ + func err4() { _ = /'/ } + """ + ) + } + + func testForwardSlashRegexSkipping47() { + assertParse( + """ + func err5() { _ = /<#placeholder#>/ } + """ + ) + } + + func testForwardSlashRegexSkipping48() { + assertParse( + """ + func err6() { _ = ^^/1️⃣0xG/ } + """ + ) + } + + func testForwardSlashRegexSkipping49() { + assertParse( + """ + func err7() { _ = ^^/1️⃣0oG/ } + """ + ) + } + + func testForwardSlashRegexSkipping50() { + assertParse( + #""" + func err8() { _ = ^^/"/ }1️⃣ + """# + ) + } + + func testForwardSlashRegexSkipping51() { + assertParse( + """ + func err9() { _ = ^^/'/ }1️⃣ + """ + ) + } + + func testForwardSlashRegexSkipping52() { + assertParse( + """ + func err10() { _ = ^^/<#placeholder#>/ } + """ + ) + } + + func testForwardSlashRegexSkipping53() { + assertParse( + """ + func err11() { _ = (^^/1️⃣0xG/) } + """ + ) + } + + func testForwardSlashRegexSkipping54() { + assertParse( + """ + func err12() { _ = (^^/1️⃣0oG/) } + """ + ) + } + + func testForwardSlashRegexSkipping55() { + assertParse( + #""" + func err13() { _ = (^^/"/) }1️⃣ + """# + ) + } + + func testForwardSlashRegexSkipping56() { + assertParse( + """ + func err14() { _ = (^^/'/) }1️⃣ + """ + ) + } + + func testForwardSlashRegexSkipping57() { + assertParse( + """ + func err15() { _ = (^^/<#placeholder#>/) } + """ + ) + } + +} diff --git a/Tests/SwiftParserTest/translated/ForwardSlashRegexTests.swift b/Tests/SwiftParserTest/translated/ForwardSlashRegexTests.swift new file mode 100644 index 00000000000..1d381c7b2a2 --- /dev/null +++ b/Tests/SwiftParserTest/translated/ForwardSlashRegexTests.swift @@ -0,0 +1,1646 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2023 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +// This test file has been translated from swift/test/StringProcessing/Parse/forward-slash-regex.swift + +@_spi(RawSyntax) import SwiftSyntax +@_spi(RawSyntax) import SwiftParser +import XCTest + +final class ForwardSlashRegexTests: XCTestCase { + func testForwardSlashRegex1() { + assertParse( + """ + prefix operator / + prefix operator ^/ + prefix operator /^/ + """ + ) + } + + func testForwardSlashRegex2() { + assertParse( + """ + prefix func ^/ (_ x: T) -> T { x } + """ + ) + } + + func testForwardSlashRegex8() { + // The divisions in the body of the below operators make sure we don't try and + // consider them to be ending delimiters of a regex. + assertParse( + """ + infix operator /^/ : P + func /^/ (lhs: Int, rhs: Int) -> Int { 1 / 2 } + """ + ) + } + + func testForwardSlashRegex9() { + assertParse( + """ + infix operator /^ : P + func /^ (lhs: Int, rhs: Int) -> Int { 1 / 2 } + """ + ) + } + + func testForwardSlashRegex10() { + assertParse( + """ + infix operator ^^/ : P + func ^^/ (lhs: Int, rhs: Int) -> Int { 1 / 2 } + """ + ) + } + + func testForwardSlashRegex11() { + assertParse( + """ + let i = 0 /^/ 1/^/3 + """ + ) + } + + func testForwardSlashRegex12() { + assertParse( + """ + let x = /abc/ + """ + ) + } + + func testForwardSlashRegex13() { + assertParse( + """ + _ = /abc/ + """ + ) + } + + func testForwardSlashRegex14() { + assertParse( + """ + _ = /x/.self + """ + ) + } + + func testForwardSlashRegex15() { + assertParse( + #""" + _ = /\// + """# + ) + } + + func testForwardSlashRegex16() { + assertParse( + #""" + _ = /\\/ + """# + ) + } + + func testForwardSlashRegex18() { + // These unfortunately become prefix `=` and infix `=/` respectively. We could + // likely improve the diagnostic though. + assertParse( + """ + do { + let z1️⃣=/0/ + } + """, + diagnostics: [ + DiagnosticSpec(message: "unexpected code '=/0/' in 'do' statement") + ] + ) + } + + func testForwardSlashRegex19() { + assertParse( + """ + do { + _=/0/ + } + """ + ) + } + + func testForwardSlashRegex21() { + assertParse( + """ + _ = /x + """ + ) + } + + func testForwardSlashRegex22() { + assertParse( + """ + _ = !/x/ + """ + ) + } + + func testForwardSlashRegex23() { + assertParse( + """ + _ = (!/x/) + """ + ) + } + + func testForwardSlashRegex24() { + assertParse( + """ + _ = !/1️⃣ / + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testForwardSlashRegex25() { + assertParse( + """ + _ = !!/1️⃣ / + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testForwardSlashRegex26() { + assertParse( + """ + _ = !!/x/ + """ + ) + } + + func testForwardSlashRegex27() { + assertParse( + """ + _ = (!!/x/) + """ + ) + } + + func testForwardSlashRegex28() { + assertParse( + """ + _ = /^)1️⃣ + """, + diagnostics: [ + DiagnosticSpec(message: "expected '/' to end regex literal") + ] + ) + } + + func testForwardSlashRegex29() { + assertParse( + """ + _ = /x/! + """ + ) + } + + func testForwardSlashRegex30() { + assertParse( + """ + _ = /x/ + /y/ + """ + ) + } + + func testForwardSlashRegex31() { + assertParse( + """ + _ = /x/+/y/ + """ + ) + } + + func testForwardSlashRegex32() { + assertParse( + """ + _ = /x/?.blah + """ + ) + } + + func testForwardSlashRegex33() { + assertParse( + """ + _ = /x/!.blah + """ + ) + } + + func testForwardSlashRegex34() { + // Regex literals cannot end with space, so this is infix '/?' + assertParse( + """ + do { + _ = /x /? + .blah + } + """, + substructure: Syntax(BinaryOperatorExprSyntax(operatorToken: .binaryOperator("/?"))) + ) + } + + func testForwardSlashRegex35() { + assertParse( + """ + _ = /x/? + .blah + """ + ) + } + + func testForwardSlashRegex36() { + assertParse( + """ + _ = 0; /x/ + """ + ) + } + + func testForwardSlashRegex37() { + assertParse( + """ + do { + _ = 0; /x /1️⃣ + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression after operator") + ] + ) + } + + func testForwardSlashRegex38() { + assertParse( + """ + _ = /x/ ? 0 : 1 + do { + _ = /x / 1️⃣? 0 : 1 + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression after operator"), + DiagnosticSpec(message: "unexpected code '? 0 : 1' in 'do' statement"), + ] + ) + } + + func testForwardSlashRegex39() { + assertParse( + """ + _ = .random() ? /x/ : .blah + """ + ) + } + + func testForwardSlashRegex40() { + assertParse( + """ + _ = /x/ ?? /x/ + do { + _ = /x / 1️⃣?? /x / + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression after operator"), + DiagnosticSpec(message: "unexpected code '?? /x /' in 'do' statement"), + ] + ) + } + + func testForwardSlashRegex41() { + // This parses as /x/?? / x/ + assertParse( + """ + _ = /x/??/x/ + """, + substructure: Syntax( + SequenceExprSyntax( + elements: .init([ + DiscardAssignmentExprSyntax(), + AssignmentExprSyntax(), + OptionalChainingExprSyntax( + expression: OptionalChainingExprSyntax( + expression: RegexLiteralExprSyntax(regexPattern: .regexLiteralPattern("x")) + ) + ), + BinaryOperatorExprSyntax(operatorToken: .binaryOperator("/")), + PostfixUnaryExprSyntax(expression: IdentifierExprSyntax(identifier: "x"), operatorToken: .postfixOperator("/")), + ]) + ) + ) + ) + } + + func testForwardSlashRegex42() { + assertParse( + """ + _ = /x/ ... /y/ + """ + ) + } + + func testForwardSlashRegex43() { + // This parses as /x/ .../ x/ + assertParse( + """ + _ = /x/.../y/ + """, + substructure: Syntax(BinaryOperatorExprSyntax(operatorToken: .binaryOperator(".../"))) + ) + } + + func testForwardSlashRegex44() { + assertParse( + """ + _ = /x/... + """ + ) + } + + func testForwardSlashRegex45() { + // This is parsed as /x /... + assertParse( + """ + do { + _ = /x1️⃣ /2️⃣... + } + """, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "consecutive statements on a line must be separated by ';'"), + DiagnosticSpec(locationMarker: "2️⃣", message: "expected expression in operator"), + DiagnosticSpec(locationMarker: "2️⃣", message: "unexpected code '...' in 'do' statement"), + ] + ) + } + + func testForwardSlashRegex46() { + assertParse( + """ + do { + _ = true / false /1️⃣; + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression after operator") + ] + ) + } + + func testForwardSlashRegex47() { + assertParse( + #""" + _ = "\(/x/)" + """# + ) + } + + func testForwardSlashRegex48() { + assertParse( + """ + func defaulted(x: Regex = /x/) {} + """ + ) + } + + func testForwardSlashRegex50() { + assertParse( + """ + foo(/abc/, y: /abc/) + """ + ) + } + + func testForwardSlashRegex51() { + assertParse( + """ + foo(/abc/, y: /abc /1️⃣) + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression after operator") + ] + ) + } + + func testForwardSlashRegex53() { + assertParse( + """ + bar(&/x/) + """, + substructure: Syntax( + InOutExprSyntax( + expression: RegexLiteralExprSyntax( + regexPattern: .regexLiteralPattern("x") + ) + ) + ) + ) + } + + func testForwardSlashRegex55() { + assertParse( + """ + func testSubscript(_ x: S) { + x[/x/] + x[/x /1️⃣] + _ = x[/] / 2 + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression after operator") + ] + ) + } + + func testForwardSlashRegex56() { + assertParse( + """ + func testReturn() -> Regex { + if .random() { + return /x/ + } + return /x /1️⃣ + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression after operator") + ] + ) + } + + func testForwardSlashRegex57() { + assertParse( + """ + func testThrow() throws { + throw /x/ + } + """ + ) + } + + func testForwardSlashRegex58() { + assertParse( + """ + do { + _ = [/abc/, /abc /1️⃣] + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression after operator") + ] + ) + } + + func testForwardSlashRegex59() { + assertParse( + """ + do { + _ = [/abc /1️⃣: /abc /2️⃣] + } + """, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "expected expression after operator"), + DiagnosticSpec(locationMarker: "2️⃣", message: "expected expression after operator"), + ] + ) + } + + func testForwardSlashRegex60() { + assertParse( + """ + _ = [/abc/:/abc/] + """, + substructure: Syntax( + DictionaryExprSyntax( + content: .elements( + .init([ + .init( + keyExpression: RegexLiteralExprSyntax(regexPattern: .regexLiteralPattern("abc")), + valueExpression: RegexLiteralExprSyntax(regexPattern: .regexLiteralPattern("abc")) + ) + ]) + ) + ) + ) + ) + } + + func testForwardSlashRegex61() { + assertParse( + """ + _ = [/abc/ : /abc/] + """ + ) + } + + func testForwardSlashRegex62() { + assertParse( + """ + _ = [/abc/ :/abc/] + """ + ) + } + + func testForwardSlashRegex63() { + assertParse( + """ + _ = [/abc/: /abc/] + """ + ) + } + + func testForwardSlashRegex64() { + assertParse( + """ + _ = (/abc/, /abc/) + """ + ) + } + + func testForwardSlashRegex65() { + assertParse( + """ + _ = ((/abc/)) + """ + ) + } + + func testForwardSlashRegex66() { + assertParse( + """ + do { + _ = ((/abc /1️⃣)) + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression after operator") + ] + ) + } + + func testForwardSlashRegex67() { + assertParse( + """ + _ = { /abc/ } + """ + ) + } + + func testForwardSlashRegex68() { + assertParse( + """ + _ = { + /abc/ + } + """ + ) + } + + func testForwardSlashRegex69() { + assertParse( + """ + let _: () -> Int = { + 0 + / 1 / + 2 + } + """ + ) + } + + func testForwardSlashRegex70() { + // This is parsed as '/1 / 2' + assertParse( + """ + let _: () -> Int = { + 0 + /1 / + 2 + } + """, + substructure: Syntax(BinaryOperatorExprSyntax(operatorToken: .binaryOperator("/"))) + ) + } + + func testForwardSlashRegex71() { + assertParse( + """ + _ = { + 0 + /1/ + 2 + } + """ + ) + } + + func testForwardSlashRegex73() { + // Operator chain, as a regex literal may not start with space. + assertParse( + """ + _ = 2 + / 1 / .bitWidth + """, + substructure: Syntax(BinaryOperatorExprSyntax(operatorToken: .binaryOperator("/"))) + ) + } + + func testForwardSlashRegex74() { + // Regex literal + assertParse( + """ + _ = 2 + /1/ .bitWidth + """, + substructure: Syntax(RegexLiteralExprSyntax(regexPattern: .regexLiteralPattern("1"))) + ) + } + + func testForwardSlashRegex75() { + // Operator chain + assertParse( + """ + _ = 2 + / 1 / + .bitWidth + """, + substructure: Syntax(BinaryOperatorExprSyntax(operatorToken: .binaryOperator("/"))) + ) + } + + func testForwardSlashRegex76() { + // This is parsed as '/1 / .bitWidth' + assertParse( + """ + _ = 2 + /1 / + .bitWidth + """, + substructure: Syntax(BinaryOperatorExprSyntax(operatorToken: .binaryOperator("/"))) + ) + } + + func testForwardSlashRegex77() { + // This is parsed as /1/.bitWidth + assertParse( + """ + _ = !!/1/ .bitWidth + """, + substructure: Syntax( + PrefixOperatorExprSyntax( + operatorToken: .prefixOperator("!!"), + postfixExpression: MemberAccessExprSyntax( + base: RegexLiteralExprSyntax(regexPattern: .regexLiteralPattern("1")), + name: "bitWidth" + ) + ) + ) + ) + } + + func testForwardSlashRegex78() { + // This is parsed as '!!/1' + assertParse( + """ + _ = !!/1 / .bitWidth + """, + substructure: Syntax(BinaryOperatorExprSyntax(operatorToken: .binaryOperator("/"))) + ) + } + + func testForwardSlashRegex79() { + assertParse( + """ + let z = + /y/ + """, + substructure: Syntax(RegexLiteralExprSyntax(regexPattern: .regexLiteralPattern("y"))) + ) + } + + func testForwardSlashRegex81() { + // While '.' is technically an operator character, it seems more likely that + // the user hasn't written the member name yet. + assertParse( + """ + _ = 0.1️⃣ / 1 / 2 + """, + diagnostics: [ + DiagnosticSpec(message: "expected name in member access") + ] + ) + } + + func testForwardSlashRegex82() { + assertParse( + """ + _ = 0 . 1️⃣/ 1 / 2 + """, + diagnostics: [ + DiagnosticSpec(message: "expected name in member access") + ] + ) + } + + func testForwardSlashRegex83() { + assertParse( + #""" + switch "" { + case _ where /x/: + break + default: + break + } + """# + ) + } + + func testForwardSlashRegex84() { + assertParse( + """ + do {} catch /x/ {} + """ + ) + } + + func testForwardSlashRegex86() { + assertParse( + """ + switch /x/ { + default: + break + } + """ + ) + } + + func testForwardSlashRegex87() { + assertParse( + """ + if /x/ {} + """ + ) + } + + func testForwardSlashRegex88() { + assertParse( + """ + if /x/.smth {} + """ + ) + } + + func testForwardSlashRegex89() { + assertParse( + """ + func testGuard() { + guard /x/ else { return } + } + """ + ) + } + + func testForwardSlashRegex90() { + assertParse( + """ + for x in [0] where /x/ {} + """ + ) + } + + func testForwardSlashRegex92() { + assertParse( + """ + _ = /x/ as Magic + """ + ) + } + + func testForwardSlashRegex93() { + assertParse( + """ + _ = /x/ as! String + """ + ) + } + + func testForwardSlashRegex94() { + assertParse( + """ + _ = type(of: /x/) + """ + ) + } + + func testForwardSlashRegex95() { + assertParse( + """ + do { + let 1️⃣/x/ + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected pattern in variable") + ] + ) + } + + func testForwardSlashRegex96() { + assertParse( + """ + do { + _ = try /x/; _ = try /x /1️⃣ + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression after operator") + ] + ) + } + + func testForwardSlashRegex97() { + assertParse( + """ + do { + _ = try? /x/; _ = try? /x /1️⃣ + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression after operator") + ] + ) + } + + func testForwardSlashRegex98() { + assertParse( + """ + do { + _ = try! /x/; _ = try! /x /1️⃣ + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected expression after operator") + ] + ) + } + + func testForwardSlashRegex99() { + assertParse( + """ + _ = await /x/ + """ + ) + } + + func testForwardSlashRegex100() { + assertParse( + """ + /x/ = 0 + /x/() + """ + ) + } + + func testForwardSlashRegex102() { + // We treat the following as comments, as it seems more likely the user has + // written a comment and is still in the middle of writing the characters before + // it. + assertParse( + """ + _ = /x// comment + """, + substructure: Syntax(PrefixOperatorExprSyntax(operatorToken: .prefixOperator("/"), postfixExpression: IdentifierExprSyntax(identifier: "x"))) + ) + } + + func testForwardSlashRegex103() { + assertParse( + """ + _ = /x // comment + """, + substructure: Syntax(PrefixOperatorExprSyntax(operatorToken: .prefixOperator("/"), postfixExpression: IdentifierExprSyntax(identifier: "x"))) + ) + } + + func testForwardSlashRegex104() { + assertParse( + """ + _ = /x/*comment*/ + """, + substructure: Syntax(PrefixOperatorExprSyntax(operatorToken: .prefixOperator("/"), postfixExpression: IdentifierExprSyntax(identifier: "x"))) + ) + } + + func testForwardSlashRegex108() { + // These become regex literals, unless last character is space, or are surrounded in parens. + assertParse( + """ + baz(/, /) + """, + substructure: Syntax( + TupleExprElementListSyntax([ + .init(expression: IdentifierExprSyntax(identifier: .binaryOperator("/")), trailingComma: .commaToken()), + .init(expression: IdentifierExprSyntax(identifier: .binaryOperator("/"))), + ]) + ) + ) + assertParse( + """ + baz(/,/) + """, + substructure: Syntax(RegexLiteralExprSyntax(regexPattern: .regexLiteralPattern(","))) + ) + } + + func testForwardSlashRegex109() { + assertParse( + """ + baz((/), /) + """, + substructure: Syntax( + TupleExprElementListSyntax([ + .init( + expression: TupleExprSyntax( + elementList: .init([ + .init(expression: IdentifierExprSyntax(identifier: .binaryOperator("/"))) + ]) + ), + trailingComma: .commaToken() + ), + .init(expression: IdentifierExprSyntax(identifier: .binaryOperator("/"))), + ]) + ) + ) + } + + func testForwardSlashRegex110() { + assertParse( + """ + baz(/^, /) + """, + substructure: Syntax( + TupleExprElementListSyntax([ + .init(expression: IdentifierExprSyntax(identifier: .binaryOperator("/^")), trailingComma: .commaToken()), + .init(expression: IdentifierExprSyntax(identifier: .binaryOperator("/"))), + ]) + ) + ) + assertParse( + """ + baz(/^,/) + """, + substructure: Syntax(RegexLiteralExprSyntax(regexPattern: .regexLiteralPattern("^,"))) + ) + } + + func testForwardSlashRegex111() { + assertParse( + """ + baz((/^), /) + """, + substructure: Syntax( + TupleExprElementListSyntax([ + .init( + expression: TupleExprSyntax( + elementList: .init([ + .init(expression: IdentifierExprSyntax(identifier: .binaryOperator("/^"))) + ]) + ), + trailingComma: .commaToken() + ), + .init(expression: IdentifierExprSyntax(identifier: .binaryOperator("/"))), + ]) + ) + ) + } + + func testForwardSlashRegex112() { + assertParse( + """ + baz(^^/, /) + baz(^^/,/) + baz((^^/), /) + """ + ) + } + + func testForwardSlashRegex114() { + assertParse( + """ + bazbaz(/, 0) + bazbaz(^^/, 0) + """ + ) + } + + func testForwardSlashRegex116() { + assertParse( + #""" + _ = qux(/, 1) / 2 + do { + _ = qux(/, "(") / 2 + _ = qux(/, "(")/1️⃣2 + } + """#, + diagnostics: [ + DiagnosticSpec(message: "expected ')' to end function call") + ] + ) + } + + func testForwardSlashRegex117() { + assertParse( + #""" + _ = qux((/), "(") / 2 + """# + ) + } + + func testForwardSlashRegex118() { + assertParse( + """ + _ = qux(/, 1) // this comment tests to make sure we don't try and end the regex on the starting '/' of '//'. + """ + ) + } + + func testForwardSlashRegex119() { + assertParse( + """ + _ = qux(/, 1) /* same thing with a block comment */ + """ + ) + } + + func testForwardSlashRegex122() { + assertParse( + """ + quxqux(/^/) + quxqux((/^/)) + quxqux({ $0 /^/ $1 }) + """ + ) + } + + func testForwardSlashRegex123() { + assertParse( + """ + quxqux(!/^/) + """ + ) + } + + func testForwardSlashRegex124() { + assertParse( + """ + quxqux(/^) + """ + ) + } + + func testForwardSlashRegex125() { + assertParse( + """ + _ = quxqux(/^) / 1 + """ + ) + } + + func testForwardSlashRegex127() { + assertParse( + """ + _ = arr.reduce(1, /) / 3 + """ + ) + } + + func testForwardSlashRegex128() { + assertParse( + """ + _ = arr.reduce(1, /) + arr.reduce(1, /) + """ + ) + } + + func testForwardSlashRegex130() { + assertParse( + """ + _ = (/x) + """ + ) + } + + func testForwardSlashRegex131() { + assertParse( + """ + _ = (/x)/ + """ + ) + } + + func testForwardSlashRegex132() { + assertParse( + """ + _ = (/[(0)])/ + """ + ) + } + + func testForwardSlashRegex133() { + assertParse( + """ + _ = /[(0)]/ + """ + ) + } + + func testForwardSlashRegex134() { + assertParse( + """ + _ = /(x)/ + """ + ) + } + + func testForwardSlashRegex135() { + assertParse( + """ + _ = /[)]/ + """ + ) + } + + func testForwardSlashRegex136() { + assertParse( + #""" + _ = /[a\])]/ + """# + ) + } + + func testForwardSlashRegex137() { + assertParse( + """ + _ = /([)])/ + """ + ) + } + + func testForwardSlashRegex138() { + assertParse( + """ + _ = /]]][)]/ + """ + ) + } + + func testForwardSlashRegex139() { + assertParse( + """ + _ = /1️⃣ + """, + diagnostics: [ + DiagnosticSpec(message: "expected '/' to end regex literal") + ] + ) + } + + func testForwardSlashRegex140() { + assertParse( + """ + _ = /)1️⃣ + """, + diagnostics: [ + DiagnosticSpec(message: "expected '/' to end regex literal") + ] + ) + } + + func testForwardSlashRegex141() { + assertParse( + """ + let fn: (Int, Int) -> Int = (/) + """ + ) + } + + func testForwardSlashRegex142() { + assertParse( + #""" + _ = /\()1️⃣/ + """#, + diagnostics: [ + DiagnosticSpec(message: "extraneous code '/' at top level") + ] + ) + } + + func testForwardSlashRegex143() { + assertParse( + #""" + do { + let _: Regex = ℹ️(/whatever\)/1️⃣ + } + """#, + diagnostics: [ + DiagnosticSpec( + message: "expected ')' to end tuple", + notes: [ + NoteSpec(message: "to match this opening '('") + ] + ) + ] + ) + } + + func testForwardSlashRegex144() { + assertParse( + """ + do { + _ = /(()())1️⃣)/ + } + """, + diagnostics: [ + DiagnosticSpec(message: "unexpected code ')/' in 'do' statement") + ] + ) + } + + func testForwardSlashRegex145() { + assertParse( + """ + do { + _ = /[x]1️⃣)/ + } + """, + diagnostics: [ + DiagnosticSpec(message: "unexpected code ')/' in 'do' statement") + ] + ) + } + + func testForwardSlashRegex146() { + assertParse( + #""" + do { + _ = /[\1️⃣]2️⃣])/ + } + """#, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "expected root in key path"), + DiagnosticSpec(locationMarker: "2️⃣", message: "unexpected code '])/' in 'do' statement"), + ] + ) + } + + func testForwardSlashRegex147() { + assertParse( + """ + _ = ^/x/ + """ + ) + } + + func testForwardSlashRegex148() { + assertParse( + """ + _ = (^/x)/ + """ + ) + } + + func testForwardSlashRegex149() { + assertParse( + """ + _ = (!!/x/) + """ + ) + } + + func testForwardSlashRegex150() { + assertParse( + #""" + _ = ^/"/1️⃣"2️⃣ + """#, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "consecutive statements on a line must be separated by ';'"), + DiagnosticSpec(locationMarker: "2️⃣", message: #"expected '"' to end string literal"#), + ] + ) + } + + func testForwardSlashRegex151() { + assertParse( + #""" + _ = ^/"[/1️⃣"2️⃣ + """#, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "consecutive statements on a line must be separated by ';'"), + DiagnosticSpec(locationMarker: "2️⃣", message: #"expected '"' to end string literal"#), + ] + ) + } + + func testForwardSlashRegex152() { + assertParse( + #""" + _ = (^/)("/") + """# + ) + } + + func testForwardSlashRegex155() { + assertParse( + """ + _ = /./ + """ + ) + } + + func testForwardSlashRegex157() { + // Okay, as the space is escaped. + assertParse( + #""" + _ = /\ / + """# + ) + } + + func testForwardSlashRegex158() { + assertParse( + """ + _ = /1️⃣ / + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testForwardSlashRegex159() { + assertParse( + """ + _ = /1️⃣ / + """, + diagnostics: [ + DiagnosticSpec(message: "bare slash regex literal may not start with space") + ] + ) + } + + func testForwardSlashRegex160() { + assertParse( + """ + _ = #/ /# + """ + ) + } + + func testForwardSlashRegex161() { + assertParse( + #""" + _ = /x\ / + """# + ) + } + + func testForwardSlashRegex162() { + assertParse( + #""" + _ = /\ \ / + """# + ) + } + + func testForwardSlashRegex163() { + assertParse( + """ + + """ + ) + } + + func testForwardSlashRegex164() { + // There are intentionally trailing spaces here + assertParse( + """ + _ = /1️⃣ 2️⃣ + """, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "bare slash regex literal may not start with space"), + DiagnosticSpec(locationMarker: "2️⃣", message: "expected '/' to end regex literal"), + ] + ) + } + + func testForwardSlashRegex166() { + // There are intentionally trailing spaces here + assertParse( + """ + _ = /^ 1️⃣ + """, + diagnostics: [ + DiagnosticSpec(message: "expected '/' to end regex literal") + ] + ) + } + + func testForwardSlashRegex167() { + assertParse( + #""" + _ = /\)/ + """# + ) + } + + func testForwardSlashRegex168() { + assertParse( + """ + _ = /)/ + """ + ) + } + + func testForwardSlashRegex169() { + assertParse( + """ + _ = /,/ + """ + ) + } + + func testForwardSlashRegex170() { + assertParse( + """ + _ = /}/ + """ + ) + } + + func testForwardSlashRegex171() { + assertParse( + """ + _ = /]/ + """ + ) + } + + func testForwardSlashRegex172() { + assertParse( + """ + _ = /:/ + """ + ) + } + + func testForwardSlashRegex173() { + assertParse( + """ + _ = /;/ + """ + ) + } + + func testForwardSlashRegex175() { + assertParse( + """ + _ = /0xG/ + """ + ) + } + + func testForwardSlashRegex176() { + assertParse( + """ + _ = /0oG/ + """ + ) + } + + func testForwardSlashRegex177() { + assertParse( + #""" + _ = /"/ + """# + ) + } + + func testForwardSlashRegex178() { + assertParse( + """ + _ = /'/ + """ + ) + } + + func testForwardSlashRegex179() { + assertParse( + """ + _ = /<#placeholder#>/ + """ + ) + } + + func testForwardSlashRegex180() { + assertParse( + """ + _ = ^^/1️⃣0xG/ + """ + ) + } + + func testForwardSlashRegex181() { + assertParse( + """ + _ = ^^/1️⃣0oG/ + """ + ) + } + + func testForwardSlashRegex182() { + assertParse( + #""" + _ = ^^/"/1️⃣ + """# + ) + } + + func testForwardSlashRegex183() { + assertParse( + """ + _ = ^^/'/1️⃣ + """ + ) + } + + func testForwardSlashRegex184() { + assertParse( + """ + _ = ^^/<#placeholder#>/ + """ + ) + } + + func testForwardSlashRegex185() { + assertParse( + """ + _ = (^^/1️⃣0xG/) + """ + ) + } + + func testForwardSlashRegex186() { + assertParse( + """ + _ = (^^/1️⃣0oG/) + """ + ) + } + + func testForwardSlashRegex187() { + assertParse( + #""" + _ = (^^/"/)1️⃣ + """# + ) + } + + func testForwardSlashRegex188() { + assertParse( + """ + _ = (^^/'/)1️⃣ + """ + ) + } + + func testForwardSlashRegex189() { + assertParse( + """ + _ = (^^/<#placeholder#>/) + """ + ) + } +} diff --git a/Tests/SwiftParserTest/translated/PrefixSlashTests.swift b/Tests/SwiftParserTest/translated/PrefixSlashTests.swift new file mode 100644 index 00000000000..c4b5195d67f --- /dev/null +++ b/Tests/SwiftParserTest/translated/PrefixSlashTests.swift @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2023 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +// This test file has been translated from swift/test/StringProcessing/Parse/prefix-slash.swift + +import XCTest + +final class PrefixSlashTests: XCTestCase { + func testPrefixSlash2() { + assertParse( + """ + prefix operator / + prefix func / (_ x: T) -> T { x } + """ + ) + } + + func testPrefixSlash4() { + assertParse( + """ + _ = /E.e + (/E.e).foo(/0) + """ + ) + } + + func testPrefixSlash6() { + assertParse( + """ + foo(/E.e, /E.e) + foo((/E.e), /E.e) + foo((/)(E.e), /E.e) + """ + ) + } + + func testPrefixSlash8() { + assertParse( + """ + _ = bar(/E.e) / 2 + """ + ) + } +} diff --git a/Tests/SwiftParserTest/translated/RegexParseEndOfBufferTests.swift b/Tests/SwiftParserTest/translated/RegexParseEndOfBufferTests.swift new file mode 100644 index 00000000000..5929db521f2 --- /dev/null +++ b/Tests/SwiftParserTest/translated/RegexParseEndOfBufferTests.swift @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2023 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +// This test file has been translated from swift/test/StringProcessing/Parse/regex_parse_end_of_buffer.swift + +import XCTest + +final class RegexParseEndOfBufferTests: XCTestCase { + func testRegexParseEndOfBuffer1() { + assertParse( + "var unterminated = #/(xy1️⃣", + diagnostics: [ + DiagnosticSpec(message: "expected '/#' to end regex literal") + ] + ) + } +} diff --git a/Tests/SwiftParserTest/translated/RegexParseErrorTests.swift b/Tests/SwiftParserTest/translated/RegexParseErrorTests.swift new file mode 100644 index 00000000000..92c102d8249 --- /dev/null +++ b/Tests/SwiftParserTest/translated/RegexParseErrorTests.swift @@ -0,0 +1,193 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2023 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +// This test file has been translated from swift/test/StringProcessing/Parse/regex_parse_error.swift + +import XCTest + +final class RegexParseErrorTests: XCTestCase { + func testRegexParseError1() { + assertParse( + """ + _ = /(/ + """ + ) + } + + func testRegexParseError2() { + assertParse( + """ + _ = #/(/# + """ + ) + } + + func testRegexParseError4() { + assertParse( + """ + _ = /)/ + """ + ) + } + + func testRegexParseError5() { + assertParse( + """ + _ = #/)/# + """ + ) + } + + func testRegexParseError6() { + assertParse( + #""" + _ = #/\\/''/1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected '#' to end regex literal") + ] + ) + } + + func testRegexParseError7() { + assertParse( + #""" + _ = #/\|1️⃣ + """#, + diagnostics: [ + DiagnosticSpec(message: "expected '/#' to end regex literal") + ] + ) + } + + func testRegexParseError8() { + assertParse( + """ + _ = #//1️⃣ + """, + diagnostics: [ + DiagnosticSpec(message: "expected '#' to end regex literal") + ] + ) + } + + func testRegexParseError9() { + assertParse( + """ + _ = #/xy1️⃣ + """, + diagnostics: [ + DiagnosticSpec(message: "expected '/#' to end regex literal") + ] + ) + } + + func testRegexParseError10() { + assertParse( + """ + _ = #/(?/# + """ + ) + } + + func testRegexParseError11() { + assertParse( + """ + _ = #/(?'/# + """ + ) + } + + func testRegexParseError12() { + assertParse( + """ + _ = #/(?'abc/# + """ + ) + } + + func testRegexParseError13() { + assertParse( + """ + _ = #/(?'abc /# + """ + ) + } + + func testRegexParseError14() { + assertParse( + """ + do { + _ = #/(?'a1️⃣ + } + """, + diagnostics: [ + DiagnosticSpec(message: "expected '/#' to end regex literal") + ] + ) + } + + func testRegexParseError15() { + assertParse( + #""" + _ = #/\(?'abc/# + """# + ) + } + + func testRegexParseError16() { + assertParse( + #""" + do { + _ = /\1️⃣ + /2️⃣ + } + """#, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "expected root in key path"), + DiagnosticSpec(locationMarker: "2️⃣", message: "expected expression after operator"), + ] + ) + } + + func testRegexParseError17() { + assertParse( + #""" + do { + _ = #/\1️⃣ + /#2️⃣ + } + """#, + diagnostics: [ + DiagnosticSpec(locationMarker: "1️⃣", message: "expected '/#' to end regex literal"), + DiagnosticSpec(locationMarker: "2️⃣", message: "expected identifier in macro expansion"), + ] + ) + } + + func testRegexParseError19() { + assertParse( + """ + foo(#/(?/#, #/abc/#) + foo(#/(?C/#, #/abc/#) + """ + ) + } + + func testRegexParseError20() { + assertParse( + """ + foo(#/(?'/#, #/abc/#) + """ + ) + } +} diff --git a/Tests/SwiftParserTest/translated/RegexTests.swift b/Tests/SwiftParserTest/translated/RegexTests.swift new file mode 100644 index 00000000000..0c8f2715a82 --- /dev/null +++ b/Tests/SwiftParserTest/translated/RegexTests.swift @@ -0,0 +1,72 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2023 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +// This test file has been translated from swift/test/StringProcessing/Parse/regex.swift + +import XCTest + +final class RegexTests: XCTestCase { + func testRegex1() { + assertParse( + """ + _ = /abc/ + _ = #/abc/# + _ = ##/abc/## + """ + ) + } + + func testRegex3() { + assertParse( + """ + foo(/abc/, #/abc/#, ##/abc/##) + """ + ) + } + + func testRegex4() { + assertParse( + """ + let arr = [/abc/, #/abc/#, ##/abc/##] + """ + ) + } + + func testRegex5() { + assertParse( + #""" + _ = /\w+/.self + _ = #/\w+/#.self + _ = ##/\w+/##.self + """# + ) + } + + func testRegex6() { + assertParse( + ##""" + _ = /#\/\#\\/ + _ = #/#/\/\#\\/# + _ = ##/#|\|\#\\/## + """## + ) + } + + func testRegex7() { + assertParse( + """ + _ = (#/[*/#, #/+]/#, #/.]/#) + """ + ) + } + +}