Skip to content

Commit 9a63ae1

Browse files
committed
JSONDecoder.readString(): throw on invalid UTF-8
String(decoding:as:), which previously used by JSONDecoder.readString() to make string from JSON bytes, would silently repair invalid UTF-8 sequence and not throw a error. This commit uses String(bytes:encoding:) instead, which will fail on invalid UTF-8 seqence. This matches the behavior of Darwin Foundation. (Some examples of invalid UTF-8: http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt)
1 parent 0787ce6 commit 9a63ae1

File tree

1 file changed

+13
-17
lines changed

1 file changed

+13
-17
lines changed

Sources/Foundation/JSONSerialization+Parser.swift

+13-17
Original file line numberDiff line numberDiff line change
@@ -339,10 +339,10 @@ extension JSONParser {
339339
self.moveReaderIndex(forwardBy: copy + 1)
340340
guard var result = output else {
341341
// if we don't have an output string we create a new string
342-
return String(decoding: self[stringStartIndex ..< stringStartIndex + copy], as: Unicode.UTF8.self)
342+
return try Self.makeString(self[stringStartIndex ..< stringStartIndex + copy])
343343
}
344344
// if we have an output string we append
345-
result += String(decoding: self[stringStartIndex ..< stringStartIndex + copy], as: Unicode.UTF8.self)
345+
result += try Self.makeString(self[stringStartIndex ..< stringStartIndex + copy])
346346
return result
347347

348348
case 0 ... 31:
@@ -352,15 +352,15 @@ extension JSONParser {
352352
// through U+001F).
353353
var string = output ?? ""
354354
let errorIndex = self.readerIndex + copy
355-
string += self.makeStringFast(self.array[stringStartIndex ... errorIndex])
355+
string += try Self.makeString(self.array[stringStartIndex ... errorIndex])
356356
throw JSONError.unescapedControlCharacterInString(ascii: byte, in: string, index: errorIndex)
357357

358358
case UInt8(ascii: "\\"):
359359
self.moveReaderIndex(forwardBy: copy)
360360
if output != nil {
361-
output! += self.makeStringFast(self.array[stringStartIndex ..< stringStartIndex + copy])
361+
output! += try Self.makeString(self.array[stringStartIndex ..< stringStartIndex + copy])
362362
} else {
363-
output = self.makeStringFast(self.array[stringStartIndex ..< stringStartIndex + copy])
363+
output = try Self.makeString(self.array[stringStartIndex ..< stringStartIndex + copy])
364364
}
365365

366366
let escapedStartIndex = self.readerIndex
@@ -371,13 +371,13 @@ extension JSONParser {
371371
stringStartIndex = self.readerIndex
372372
copy = 0
373373
} catch EscapedSequenceError.unexpectedEscapedCharacter(let ascii, let failureIndex) {
374-
output! += makeStringFast(array[escapedStartIndex ..< self.readerIndex])
374+
output! += try Self.makeString(array[escapedStartIndex ..< self.readerIndex])
375375
throw JSONError.unexpectedEscapedCharacter(ascii: ascii, in: output!, index: failureIndex)
376376
} catch EscapedSequenceError.expectedLowSurrogateUTF8SequenceAfterHighSurrogate(let failureIndex) {
377-
output! += makeStringFast(array[escapedStartIndex ..< self.readerIndex])
377+
output! += try Self.makeString(array[escapedStartIndex ..< self.readerIndex])
378378
throw JSONError.expectedLowSurrogateUTF8SequenceAfterHighSurrogate(in: output!, index: failureIndex)
379379
} catch EscapedSequenceError.couldNotCreateUnicodeScalarFromUInt32(let failureIndex, let unicodeScalarValue) {
380-
output! += makeStringFast(array[escapedStartIndex ..< self.readerIndex])
380+
output! += try Self.makeString(array[escapedStartIndex ..< self.readerIndex])
381381
throw JSONError.couldNotCreateUnicodeScalarFromUInt32(
382382
in: output!, index: failureIndex, unicodeScalarValue: unicodeScalarValue
383383
)
@@ -392,15 +392,11 @@ extension JSONParser {
392392
throw JSONError.unexpectedEndOfFile
393393
}
394394

395-
// can be removed as soon https://bugs.swift.org/browse/SR-12126 and
396-
// https://bugs.swift.org/browse/SR-12125 has landed.
397-
// Thanks @weissi for making my code fast!
398-
private func makeStringFast<Bytes: Collection>(_ bytes: Bytes) -> String where Bytes.Element == UInt8 {
399-
if let string = bytes.withContiguousStorageIfAvailable({ String(decoding: $0, as: Unicode.UTF8.self) }) {
400-
return string
401-
} else {
402-
return String(decoding: bytes, as: Unicode.UTF8.self)
395+
private static func makeString<Bytes: Collection>(_ bytes: Bytes) throws -> String where Bytes.Element == UInt8 {
396+
guard let str = String(bytes: bytes, encoding: .utf8) else {
397+
throw JSONError.cannotConvertInputDataToUTF8
403398
}
399+
return str
404400
}
405401

406402
private mutating func parseEscapeSequence() throws -> String {
@@ -606,7 +602,7 @@ extension JSONParser {
606602
let numberStartIndex = self.readerIndex
607603
self.moveReaderIndex(forwardBy: numberchars)
608604

609-
return self.makeStringFast(self[numberStartIndex ..< self.readerIndex])
605+
return String(decoding: self[numberStartIndex ..< self.readerIndex], as: Unicode.UTF8.self)
610606
default:
611607
throw JSONError.unexpectedCharacter(ascii: byte, characterIndex: readerIndex + numberchars)
612608
}

0 commit comments

Comments
 (0)