@@ -493,7 +493,7 @@ struct CharacterByte: ExpressibleByUnicodeScalarLiteral, ExpressibleByIntegerLit
493
493
let value : UInt8
494
494
495
495
init ( unicodeScalarLiteral value: Unicode . Scalar ) {
496
- self . value = UInt8 ( ascii: Unicode . Scalar ( unicodeScalarLiteral : value) )
496
+ self . value = UInt8 ( ascii: value)
497
497
}
498
498
499
499
init ( integerLiteral value: UInt8 ) {
@@ -964,11 +964,11 @@ extension Lexer.Cursor {
964
964
return Lexer . Result ( . endOfFile)
965
965
default :
966
966
var tmp = self
967
- if tmp. advance ( if: { Unicode . Scalar ( $0 ) . isValidIdentifierStartCodePoint } ) {
967
+ if tmp. advance ( if: { $0 . isValidIdentifierStartCodePoint } ) {
968
968
return self . lexIdentifier ( )
969
969
}
970
970
971
- if tmp. advance ( if: { Unicode . Scalar ( $0 ) . isOperatorStartCodePoint } ) {
971
+ if tmp. advance ( if: { $0 . isOperatorStartCodePoint } ) {
972
972
return self . lexOperatorIdentifier (
973
973
sourceBufferStart: sourceBufferStart,
974
974
preferRegexOverBinaryOperator: preferRegexOverBinaryOperator
@@ -1009,7 +1009,7 @@ extension Lexer.Cursor {
1009
1009
private mutating func lexAfterClosingStringQuote( ) -> Lexer . Result {
1010
1010
switch self . peek ( ) {
1011
1011
case " # " :
1012
- self . advance ( while: { $0 == Unicode . Scalar ( " # " ) } )
1012
+ self . advance ( while: { $0 == " # " } )
1013
1013
return Lexer . Result ( . rawStringPoundDelimiter, stateTransition: . pop)
1014
1014
case nil :
1015
1015
return Lexer . Result ( . endOfFile)
@@ -1028,7 +1028,7 @@ extension Lexer.Cursor {
1028
1028
/// number of '#' is correct because otherwise `isAtStringInterpolationAnchor`
1029
1029
/// would have returned false in `lexInStringLiteral` and w we wouldn't have
1030
1030
/// transitioned to the `afterBackslashOfStringInterpolation` state.
1031
- self . advance ( while: { $0 == Unicode . Scalar ( " # " ) } )
1031
+ self . advance ( while: { $0 == " # " } )
1032
1032
return Lexer . Result ( . rawStringPoundDelimiter)
1033
1033
case " ( " :
1034
1034
_ = self . advance ( )
@@ -1248,9 +1248,7 @@ extension Lexer.Cursor {
1248
1248
)
1249
1249
}
1250
1250
1251
- self . advance ( while: {
1252
- ( $0 >= Unicode . Scalar ( " 0 " ) && $0 <= Unicode . Scalar ( " 7 " ) ) || $0 == Unicode . Scalar ( " _ " )
1253
- } )
1251
+ self . advance ( while: { ( $0 >= " 0 " && $0 <= " 7 " ) || $0 == " _ " } )
1254
1252
1255
1253
let tmp = self
1256
1254
if self . advance ( if: { $0. isValidIdentifierContinuationCodePoint } ) {
@@ -1279,9 +1277,7 @@ extension Lexer.Cursor {
1279
1277
)
1280
1278
}
1281
1279
1282
- self . advance ( while: {
1283
- $0 == Unicode . Scalar ( " 0 " ) || $0 == Unicode . Scalar ( " 1 " ) || $0 == Unicode . Scalar ( " _ " )
1284
- } )
1280
+ self . advance ( while: { $0 == " 0 " || $0 == " 1 " || $0 == " _ " } )
1285
1281
1286
1282
let tmp = self
1287
1283
if self . advance ( if: { $0. isValidIdentifierContinuationCodePoint } ) {
@@ -1298,7 +1294,7 @@ extension Lexer.Cursor {
1298
1294
1299
1295
// Handle a leading [0-9]+, lexing an integer or falling through if we have a
1300
1296
// floating point value.
1301
- self . advance ( while: { $0. isDigit || $0 == Unicode . Scalar ( " _ " ) } )
1297
+ self . advance ( while: { $0. isDigit || $0 == " _ " } )
1302
1298
1303
1299
// TODO: This can probably be unified with lexHexNumber somehow
1304
1300
@@ -1333,7 +1329,7 @@ extension Lexer.Cursor {
1333
1329
// Lex decimal point.
1334
1330
if self . advance ( matching: " . " ) {
1335
1331
// Lex any digits after the decimal point.
1336
- self . advance ( while: { $0. isDigit || $0 == Unicode . Scalar ( " _ " ) } )
1332
+ self . advance ( while: { $0. isDigit || $0 == " _ " } )
1337
1333
}
1338
1334
1339
1335
// Lex exponent.
@@ -1364,7 +1360,7 @@ extension Lexer.Cursor {
1364
1360
)
1365
1361
}
1366
1362
1367
- self . advance ( while: { $0. isDigit || $0 == Unicode . Scalar ( " _ " ) } )
1363
+ self . advance ( while: { $0. isDigit || $0 == " _ " } )
1368
1364
1369
1365
let tmp = self
1370
1366
if self . advance ( if: { $0. isValidIdentifierContinuationCodePoint } ) {
@@ -1401,7 +1397,7 @@ extension Lexer.Cursor {
1401
1397
}
1402
1398
}
1403
1399
1404
- self . advance ( while: { $0. isHexDigit || $0 == Unicode . Scalar ( " _ " ) } )
1400
+ self . advance ( while: { $0. isHexDigit || $0 == " _ " } )
1405
1401
1406
1402
if self . isAtEndOfFile || self . is ( notAt: " . " , " p " , " P " ) {
1407
1403
let tmp = self
@@ -1429,7 +1425,7 @@ extension Lexer.Cursor {
1429
1425
return Lexer . Result ( . integerLiteral)
1430
1426
}
1431
1427
1432
- self . advance ( while: { $0. isHexDigit || $0 == Unicode . Scalar ( " _ " ) } )
1428
+ self . advance ( while: { $0. isHexDigit || $0 == " _ " } )
1433
1429
1434
1430
if self . isAtEndOfFile || self . is ( notAt: " p " , " P " ) {
1435
1431
if let peeked = self . peek ( at: 1 ) , !Unicode. Scalar ( peeked) . isDigit {
@@ -1486,7 +1482,7 @@ extension Lexer.Cursor {
1486
1482
)
1487
1483
}
1488
1484
1489
- self . advance ( while: { $0. isDigit || $0 == Unicode . Scalar ( " _ " ) } )
1485
+ self . advance ( while: { $0. isDigit || $0 == " _ " } )
1490
1486
1491
1487
let tmp = self
1492
1488
if self . advance ( if: { $0. isValidIdentifierContinuationCodePoint } ) {
@@ -1545,8 +1541,8 @@ extension Lexer.Cursor {
1545
1541
case success( Unicode . Scalar )
1546
1542
1547
1543
/// An escaped character, e.g. `\n` or `\u{1234}`. It has been validated that
1548
- /// this is a valid character
1549
- case validatedEscapeSequence( Character )
1544
+ /// this is a valid unicode scalar.
1545
+ case validatedEscapeSequence( Unicode . Scalar )
1550
1546
1551
1547
/// The end of a string literal has been reached.
1552
1548
case endOfString
@@ -1605,16 +1601,11 @@ extension Lexer.Cursor {
1605
1601
case " \\ " : // Escapes.
1606
1602
_ = self . advance ( )
1607
1603
if !self . advanceIfStringDelimiter ( delimiterLength: delimiterLength) {
1608
- return . success( Unicode . Scalar ( " \\ " ) )
1604
+ return . success( " \\ " )
1609
1605
}
1610
1606
switch self . lexEscapedCharacter ( isMultilineString: stringLiteralKind == . multiLine) {
1611
- case . success( let escapedCharacterCode) :
1612
- // Check to see if the encoding is valid.
1613
- if let validatedScalar = Unicode . Scalar ( escapedCharacterCode) {
1614
- return . validatedEscapeSequence( Character ( validatedScalar) )
1615
- } else {
1616
- return . error( . invalidEscapeSequenceInStringLiteral)
1617
- }
1607
+ case . success( let codePoint) :
1608
+ return . validatedEscapeSequence( codePoint)
1618
1609
case . error( let kind) :
1619
1610
return . error( kind)
1620
1611
}
@@ -1635,7 +1626,7 @@ extension Lexer.Cursor {
1635
1626
enum EscapedCharacterLex {
1636
1627
// Successfully lexed an escape sequence that represents the Unicode character
1637
1628
// at the given codepoint
1638
- case success( UInt32 )
1629
+ case success( Unicode . Scalar )
1639
1630
case error( TokenDiagnostic . Kind )
1640
1631
}
1641
1632
@@ -1649,13 +1640,13 @@ extension Lexer.Cursor {
1649
1640
// Escape processing. We already ate the "\".
1650
1641
switch self . peek ( ) {
1651
1642
// Simple single-character escapes.
1652
- case " 0 " : _ = self . advance ( ) ; return . success( UInt32 ( UInt8 ( ascii : " \0 " ) ) )
1653
- case " n " : _ = self . advance ( ) ; return . success( UInt32 ( UInt8 ( ascii : " \n " ) ) )
1654
- case " r " : _ = self . advance ( ) ; return . success( UInt32 ( UInt8 ( ascii : " \r " ) ) )
1655
- case " t " : _ = self . advance ( ) ; return . success( UInt32 ( UInt8 ( ascii : " \t " ) ) )
1656
- case #"""# : _ = self . advance ( ) ; return . success( UInt32 ( UInt8 ( ascii : #"""# ) ) )
1657
- case " ' " : _ = self . advance ( ) ; return . success( UInt32 ( UInt8 ( ascii : " ' " ) ) )
1658
- case " \\ " : _ = self . advance ( ) ; return . success( UInt32 ( UInt8 ( ascii : " \\ " ) ) )
1643
+ case " 0 " : _ = self . advance ( ) ; return . success( " \0 " )
1644
+ case " n " : _ = self . advance ( ) ; return . success( " \n " )
1645
+ case " r " : _ = self . advance ( ) ; return . success( " \r " )
1646
+ case " t " : _ = self . advance ( ) ; return . success( " \t " )
1647
+ case #"""# : _ = self . advance ( ) ; return . success( #"""# )
1648
+ case " ' " : _ = self . advance ( ) ; return . success( " ' " )
1649
+ case " \\ " : _ = self . advance ( ) ; return . success( " \\ " )
1659
1650
1660
1651
case " u " : // e.g. \u{1234}
1661
1652
_ = self . advance ( )
@@ -1667,7 +1658,7 @@ extension Lexer.Cursor {
1667
1658
return self . lexUnicodeEscape ( )
1668
1659
case " \n " , " \r " :
1669
1660
if isMultilineString && self . maybeConsumeNewlineEscape ( ) {
1670
- return . success( UInt32 ( UInt8 ( ascii : " \n " ) ) )
1661
+ return . success( " \n " )
1671
1662
}
1672
1663
return . error( . invalidEscapeSequenceInStringLiteral)
1673
1664
case nil :
@@ -1692,24 +1683,30 @@ extension Lexer.Cursor {
1692
1683
precondition ( quoteConsumed)
1693
1684
1694
1685
let digitStart = self
1695
- var numDigits = 0
1696
- while self . advance ( if: { $0. isHexDigit } ) {
1697
- numDigits += 1
1698
- }
1686
+ self . advance ( while: { $0. isHexDigit } )
1687
+
1688
+ let digitText = SyntaxText (
1689
+ baseAddress: digitStart. pointer,
1690
+ count: digitStart. distance ( to: self )
1691
+ )
1699
1692
1700
1693
guard self . advance ( matching: " } " ) else {
1701
1694
return . error( . expectedClosingBraceInUnicodeEscape)
1702
1695
}
1703
1696
1704
- if numDigits == 0 || numDigits > 8 {
1697
+ guard 1 <= digitText . count && digitText . count <= 8 else {
1705
1698
return . error( . invalidNumberOfHexDigitsInUnicodeEscape)
1706
1699
}
1707
1700
1708
- if let codePoint = UInt32 ( String ( decoding: digitStart. input [ 0 ..< numDigits] , as: UTF8 . self) , radix: 16 ) {
1709
- return . success( codePoint)
1710
- } else {
1701
+ guard
1702
+ // FIXME: Implement 'UInt32(_: SyntaxText, radix:)'.
1703
+ let codePoint = UInt32 ( String ( syntaxText: digitText) , radix: 16 ) ,
1704
+ let scalar = Unicode . Scalar. init ( codePoint)
1705
+ else {
1711
1706
return . error( . invalidEscapeSequenceInStringLiteral)
1712
1707
}
1708
+
1709
+ return . success( scalar)
1713
1710
}
1714
1711
1715
1712
private mutating func maybeConsumeNewlineEscape( ) -> Bool {
@@ -1719,7 +1716,7 @@ extension Lexer.Cursor {
1719
1716
case " " , " \t " :
1720
1717
continue
1721
1718
case " \r " :
1722
- _ = tmp. advance ( if: { $0 == Unicode . Scalar ( " \n " ) } )
1719
+ _ = tmp. advance ( if: { $0 == " \n " } )
1723
1720
fallthrough
1724
1721
case " \n " :
1725
1722
self = tmp
@@ -1776,7 +1773,7 @@ extension Lexer.Cursor {
1776
1773
// Scan ahead until the end of the line. Every time we see a closing
1777
1774
// quote, check if it is followed by the correct number of closing delimiters.
1778
1775
while isSingleLineString. is ( notAt: " \r " , " \n " ) {
1779
- if isSingleLineString. advance ( if: { $0 == Unicode . Scalar ( ( #"""# ) ) } ) {
1776
+ if isSingleLineString. advance ( if: { $0 == #"""# } ) {
1780
1777
if isSingleLineString. advanceIfStringDelimiter ( delimiterLength: leadingDelimiterLength) {
1781
1778
return Lexer . Result ( . stringQuote, stateTransition: stateTransitionAfterLexingStringQuote ( kind: . singleLine) )
1782
1779
}
@@ -2238,7 +2235,7 @@ extension Lexer.Cursor {
2238
2235
case . error:
2239
2236
// If the character was incorrectly encoded, give up.
2240
2237
return nil
2241
- case . endOfString, . success( Unicode . Scalar ( 0x201D ) ) :
2238
+ case . endOfString, . success( " \u{201D} " ) :
2242
2239
// If we found a closing quote, then we're done. Just return the spot
2243
2240
// to continue.
2244
2241
return body
@@ -2262,10 +2259,10 @@ extension Lexer.Cursor {
2262
2259
precondition ( !( self . peekScalar ( ) ? . isValidIdentifierStartCodePoint ?? false ) && !( self . peekScalar ( ) ? . isOperatorStartCodePoint ?? false ) )
2263
2260
let start = self
2264
2261
var tmp = self
2265
- if tmp. advance ( if: { Unicode . Scalar ( $0 ) . isValidIdentifierContinuationCodePoint } ) {
2262
+ if tmp. advance ( if: { $0 . isValidIdentifierContinuationCodePoint } ) {
2266
2263
// If this is a valid identifier continuation, but not a valid identifier
2267
2264
// start, attempt to recover by eating more continuation characters.
2268
- tmp. advance ( while: { Unicode . Scalar ( $0 ) . isValidIdentifierContinuationCodePoint } )
2265
+ tmp. advance ( while: { $0 . isValidIdentifierContinuationCodePoint } )
2269
2266
self = tmp
2270
2267
return . lexemeContents( Lexer . Result ( . identifier, error: LexingDiagnostic ( . invalidIdentifierStartCharacter, position: start) ) )
2271
2268
}
@@ -2369,10 +2366,8 @@ extension Lexer.Cursor {
2369
2366
previous: curPtr. input [ markerKind. introducer. utf8. count - 1 ]
2370
2367
)
2371
2368
while !restOfBuffer. isAtEndOfFile {
2372
- let terminatorStart = markerKind. terminator. utf8. first!
2373
- restOfBuffer. advance ( while: { byte in
2374
- byte != Unicode . Scalar ( terminatorStart)
2375
- } )
2369
+ let terminatorStart = markerKind. terminator. unicodeScalars. first!
2370
+ restOfBuffer. advance ( while: { byte in byte != terminatorStart } )
2376
2371
2377
2372
guard restOfBuffer. starts ( with: markerKind. terminator. utf8) else {
2378
2373
_ = restOfBuffer. advance ( )
0 commit comments