Skip to content

Commit eee52cb

Browse files
authored
Merge pull request #2438 from pinkjuice66/utf16-surrogates-check
[Parser] Remove redundant UTF-16 surrogates check logic
2 parents 114a6a1 + 073a271 commit eee52cb

File tree

2 files changed

+35
-5
lines changed

2 files changed

+35
-5
lines changed

Sources/SwiftParser/Lexer/UnicodeScalarExtensions.swift

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -199,11 +199,6 @@ extension Unicode.Scalar {
199199
_ = advance()
200200
}
201201

202-
// UTF-16 surrogate pair values are not valid code points.
203-
if (charValue >= 0xD800 && charValue <= 0xDFFF) {
204-
return nil
205-
}
206-
207202
// If we got here, we read the appropriate number of accumulated bytes.
208203
// Verify that the encoding was actually minimal.
209204
// Number of bits in the value, ignoring leading zeros.

Tests/SwiftParserTest/LexerTests.swift

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,6 +1083,41 @@ public class LexerTests: ParserTestCase {
10831083
}
10841084
}
10851085

1086+
func testUTF16Surrogates1() {
1087+
// U+D800 <= (UTF16 surrogates code point) <= U+DFFF
1088+
let sourceBytes: [UInt8] = [0xED, 0xA0, 0x80] // The bytes represent the code point U+D800
1089+
1090+
lex(sourceBytes) { lexemes in
1091+
guard lexemes.count == 1 else {
1092+
return XCTFail("Expected 1 lexemes, got \(lexemes.count)")
1093+
}
1094+
assertRawBytesLexeme(
1095+
lexemes[0],
1096+
kind: .endOfFile,
1097+
leadingTrivia: [0xED, 0xA0, 0x80],
1098+
text: [],
1099+
error: TokenDiagnostic(.invalidUtf8, byteOffset: 0)
1100+
)
1101+
}
1102+
}
1103+
1104+
func testUTF16Surrogates2() {
1105+
let sourceBytes: [UInt8] = [0xED, 0xBF, 0xBF] // The bytes represent the code point U+DFFF
1106+
1107+
lex(sourceBytes) { lexemes in
1108+
guard lexemes.count == 1 else {
1109+
return XCTFail("Expected 1 lexemes, got \(lexemes.count)")
1110+
}
1111+
assertRawBytesLexeme(
1112+
lexemes[0],
1113+
kind: .endOfFile,
1114+
leadingTrivia: [0xED, 0xBF, 0xBF],
1115+
text: [],
1116+
error: TokenDiagnostic(.invalidUtf8, byteOffset: 0)
1117+
)
1118+
}
1119+
}
1120+
10861121
func testInvalidUTF8RegexLiteral() {
10871122
let slashByte = UInt8(UnicodeScalar("/").value)
10881123
let sourceBytes: [UInt8] = [slashByte, 0xfd, slashByte]

0 commit comments

Comments
 (0)