Skip to content

Commit c284a23

Browse files
committed
Use separate error for invalid UTF-8 sequence in makeString
1 parent 9a63ae1 commit c284a23

File tree

2 files changed

+50
-44
lines changed

2 files changed

+50
-44
lines changed

Sources/Foundation/JSONSerialization+Parser.swift

+46-44
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ internal struct JSONParser {
2929
}
3030
}
3131
#endif
32-
32+
3333
// ensure only white space is remaining
3434
var whitespace = 0
3535
while let next = reader.peek(offset: whitespace) {
@@ -41,7 +41,7 @@ internal struct JSONParser {
4141
throw JSONError.unexpectedCharacter(ascii: next, characterIndex: reader.readerIndex + whitespace)
4242
}
4343
}
44-
44+
4545
return value
4646
}
4747

@@ -107,15 +107,15 @@ internal struct JSONParser {
107107
default:
108108
break
109109
}
110-
110+
111111
var array = [JSONValue]()
112112
array.reserveCapacity(10)
113-
113+
114114
// parse values
115115
while true {
116116
let value = try parseValue()
117117
array.append(value)
118-
118+
119119
// consume the whitespace after the value before the comma
120120
let ascii = try reader.consumeWhitespace()
121121
switch ascii {
@@ -161,7 +161,7 @@ internal struct JSONParser {
161161
default:
162162
break
163163
}
164-
164+
165165
var object = [String: JSONValue]()
166166
object.reserveCapacity(20)
167167

@@ -174,7 +174,7 @@ internal struct JSONParser {
174174
reader.moveReaderIndex(forwardBy: 1)
175175
try reader.consumeWhitespace()
176176
object[key] = try self.parseValue()
177-
177+
178178
let commaOrBrace = try reader.consumeWhitespace()
179179
switch commaOrBrace {
180180
case ._closebrace:
@@ -196,26 +196,26 @@ internal struct JSONParser {
196196
}
197197

198198
extension JSONParser {
199-
199+
200200
struct DocumentReader {
201201
let array: [UInt8]
202202

203203
private(set) var readerIndex: Int = 0
204-
204+
205205
private var readableBytes: Int {
206206
self.array.endIndex - self.readerIndex
207207
}
208-
208+
209209
var isEOF: Bool {
210210
self.readerIndex >= self.array.endIndex
211211
}
212-
212+
213213

214214
init(array: [UInt8]) {
215215
self.array = array
216216
}
217217

218-
subscript(bounds: Range<Int>) -> ArraySlice<UInt8> {
218+
subscript<R: RangeExpression<Int>>(bounds: R) -> ArraySlice<UInt8> {
219219
self.array[bounds]
220220
}
221221

@@ -234,14 +234,14 @@ extension JSONParser {
234234
guard self.readerIndex + offset < self.array.endIndex else {
235235
return nil
236236
}
237-
237+
238238
return self.array[self.readerIndex + offset]
239239
}
240-
240+
241241
mutating func moveReaderIndex(forwardBy offset: Int) {
242242
self.readerIndex += offset
243243
}
244-
244+
245245
@discardableResult
246246
mutating func consumeWhitespace() throws -> UInt8 {
247247
var whitespace = 0
@@ -255,18 +255,18 @@ extension JSONParser {
255255
return ascii
256256
}
257257
}
258-
258+
259259
throw JSONError.unexpectedEndOfFile
260260
}
261-
261+
262262
mutating func readString() throws -> String {
263263
try self.readUTF8StringTillNextUnescapedQuote()
264264
}
265-
265+
266266
mutating func readNumber() throws -> String {
267267
try self.parseNumber()
268268
}
269-
269+
270270
mutating func readBool() throws -> Bool {
271271
switch self.read() {
272272
case UInt8(ascii: "t"):
@@ -314,11 +314,11 @@ extension JSONParser {
314314
throw JSONError.unexpectedCharacter(ascii: self.peek(offset: -1)!, characterIndex: self.readerIndex - 1)
315315
}
316316
}
317-
317+
318318
// MARK: - Private Methods -
319319

320320
// MARK: String
321-
321+
322322
enum EscapedSequenceError: Swift.Error {
323323
case expectedLowSurrogateUTF8SequenceAfterHighSurrogate(index: Int)
324324
case unexpectedEscapedCharacter(ascii: UInt8, index: Int)
@@ -339,10 +339,10 @@ extension JSONParser {
339339
self.moveReaderIndex(forwardBy: copy + 1)
340340
guard var result = output else {
341341
// if we don't have an output string we create a new string
342-
return try Self.makeString(self[stringStartIndex ..< stringStartIndex + copy])
342+
return try makeString(at: stringStartIndex ..< stringStartIndex + copy)
343343
}
344344
// if we have an output string we append
345-
result += try Self.makeString(self[stringStartIndex ..< stringStartIndex + copy])
345+
result += try makeString(at: stringStartIndex ..< stringStartIndex + copy)
346346
return result
347347

348348
case 0 ... 31:
@@ -352,17 +352,17 @@ extension JSONParser {
352352
// through U+001F).
353353
var string = output ?? ""
354354
let errorIndex = self.readerIndex + copy
355-
string += try Self.makeString(self.array[stringStartIndex ... errorIndex])
355+
string += try makeString(at: stringStartIndex ... errorIndex)
356356
throw JSONError.unescapedControlCharacterInString(ascii: byte, in: string, index: errorIndex)
357357

358358
case UInt8(ascii: "\\"):
359359
self.moveReaderIndex(forwardBy: copy)
360360
if output != nil {
361-
output! += try Self.makeString(self.array[stringStartIndex ..< stringStartIndex + copy])
361+
output! += try makeString(at: stringStartIndex ..< stringStartIndex + copy)
362362
} else {
363-
output = try Self.makeString(self.array[stringStartIndex ..< stringStartIndex + copy])
363+
output = try makeString(at: stringStartIndex ..< stringStartIndex + copy)
364364
}
365-
365+
366366
let escapedStartIndex = self.readerIndex
367367

368368
do {
@@ -371,13 +371,13 @@ extension JSONParser {
371371
stringStartIndex = self.readerIndex
372372
copy = 0
373373
} catch EscapedSequenceError.unexpectedEscapedCharacter(let ascii, let failureIndex) {
374-
output! += try Self.makeString(array[escapedStartIndex ..< self.readerIndex])
374+
output! += try makeString(at: escapedStartIndex ..< self.readerIndex)
375375
throw JSONError.unexpectedEscapedCharacter(ascii: ascii, in: output!, index: failureIndex)
376376
} catch EscapedSequenceError.expectedLowSurrogateUTF8SequenceAfterHighSurrogate(let failureIndex) {
377-
output! += try Self.makeString(array[escapedStartIndex ..< self.readerIndex])
377+
output! += try makeString(at: escapedStartIndex ..< self.readerIndex)
378378
throw JSONError.expectedLowSurrogateUTF8SequenceAfterHighSurrogate(in: output!, index: failureIndex)
379379
} catch EscapedSequenceError.couldNotCreateUnicodeScalarFromUInt32(let failureIndex, let unicodeScalarValue) {
380-
output! += try Self.makeString(array[escapedStartIndex ..< self.readerIndex])
380+
output! += try makeString(at: escapedStartIndex ..< self.readerIndex)
381381
throw JSONError.couldNotCreateUnicodeScalarFromUInt32(
382382
in: output!, index: failureIndex, unicodeScalarValue: unicodeScalarValue
383383
)
@@ -392,9 +392,10 @@ extension JSONParser {
392392
throw JSONError.unexpectedEndOfFile
393393
}
394394

395-
private static func makeString<Bytes: Collection>(_ bytes: Bytes) throws -> String where Bytes.Element == UInt8 {
396-
guard let str = String(bytes: bytes, encoding: .utf8) else {
397-
throw JSONError.cannotConvertInputDataToUTF8
395+
private func makeString<R: RangeExpression<Int>>(at range: R) throws -> String {
396+
let raw = array[range]
397+
guard let str = String(bytes: raw, encoding: .utf8) else {
398+
throw JSONError.invalidUTF8Sequence(Data(raw), characterIndex: range.relative(to: array).lowerBound)
398399
}
399400
return str
400401
}
@@ -510,9 +511,9 @@ extension JSONParser {
510511
return nil
511512
}
512513
}
513-
514+
514515
// MARK: Numbers
515-
516+
516517
private enum ControlCharacter {
517518
case operand
518519
case decimalPoint
@@ -546,7 +547,7 @@ extension JSONParser {
546547
}
547548

548549
var numberchars = 1
549-
550+
550551
// parse everything else
551552
while let byte = self.peek(offset: numberchars) {
552553
switch byte {
@@ -619,32 +620,32 @@ extension JSONParser {
619620
}
620621

621622
extension UInt8 {
622-
623+
623624
internal static let _space = UInt8(ascii: " ")
624625
internal static let _return = UInt8(ascii: "\r")
625626
internal static let _newline = UInt8(ascii: "\n")
626627
internal static let _tab = UInt8(ascii: "\t")
627-
628+
628629
internal static let _colon = UInt8(ascii: ":")
629630
internal static let _comma = UInt8(ascii: ",")
630-
631+
631632
internal static let _openbrace = UInt8(ascii: "{")
632633
internal static let _closebrace = UInt8(ascii: "}")
633-
634+
634635
internal static let _openbracket = UInt8(ascii: "[")
635636
internal static let _closebracket = UInt8(ascii: "]")
636-
637+
637638
internal static let _quote = UInt8(ascii: "\"")
638639
internal static let _backslash = UInt8(ascii: "\\")
639-
640+
640641
}
641642

642643
extension Array where Element == UInt8 {
643-
644+
644645
internal static let _true = [UInt8(ascii: "t"), UInt8(ascii: "r"), UInt8(ascii: "u"), UInt8(ascii: "e")]
645646
internal static let _false = [UInt8(ascii: "f"), UInt8(ascii: "a"), UInt8(ascii: "l"), UInt8(ascii: "s"), UInt8(ascii: "e")]
646647
internal static let _null = [UInt8(ascii: "n"), UInt8(ascii: "u"), UInt8(ascii: "l"), UInt8(ascii: "l")]
647-
648+
648649
}
649650

650651
enum JSONError: Swift.Error, Equatable {
@@ -660,4 +661,5 @@ enum JSONError: Swift.Error, Equatable {
660661
case numberWithLeadingZero(index: Int)
661662
case numberIsNotRepresentableInSwift(parsed: String)
662663
case singleFragmentFoundButNotAllowed
664+
case invalidUTF8Sequence(Data, characterIndex: Int)
663665
}

Sources/Foundation/JSONSerialization.swift

+4
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,10 @@ open class JSONSerialization : NSObject {
260260
throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, userInfo: [
261261
NSDebugDescriptionErrorKey : #"Number \#(parsed) is not representable in Swift."#
262262
])
263+
case .invalidUTF8Sequence(let data, characterIndex: let index):
264+
throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, userInfo: [
265+
NSDebugDescriptionErrorKey : #"Invalid UTF-8 sequence \#(data) starting from character \#(index)."#
266+
])
263267
}
264268
} catch {
265269
preconditionFailure("Only `JSONError` expected")

0 commit comments

Comments
 (0)