Skip to content

Commit bafd3d0

Browse files
authored
Merge pull request swiftlang#4675 from ruihe774/json-string
JSONDecoder.readString(): throw on invalid UTF-8
2 parents 15cb7df + c284a23 commit bafd3d0

File tree

2 files changed

+52
-50
lines changed

2 files changed

+52
-50
lines changed

Sources/Foundation/JSONSerialization+Parser.swift

+48-50
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ internal struct JSONParser {
2929
}
3030
}
3131
#endif
32-
32+
3333
// ensure only white space is remaining
3434
var whitespace = 0
3535
while let next = reader.peek(offset: whitespace) {
@@ -41,7 +41,7 @@ internal struct JSONParser {
4141
throw JSONError.unexpectedCharacter(ascii: next, characterIndex: reader.readerIndex + whitespace)
4242
}
4343
}
44-
44+
4545
return value
4646
}
4747

@@ -107,15 +107,15 @@ internal struct JSONParser {
107107
default:
108108
break
109109
}
110-
110+
111111
var array = [JSONValue]()
112112
array.reserveCapacity(10)
113-
113+
114114
// parse values
115115
while true {
116116
let value = try parseValue()
117117
array.append(value)
118-
118+
119119
// consume the whitespace after the value before the comma
120120
let ascii = try reader.consumeWhitespace()
121121
switch ascii {
@@ -161,7 +161,7 @@ internal struct JSONParser {
161161
default:
162162
break
163163
}
164-
164+
165165
var object = [String: JSONValue]()
166166
object.reserveCapacity(20)
167167

@@ -174,7 +174,7 @@ internal struct JSONParser {
174174
reader.moveReaderIndex(forwardBy: 1)
175175
try reader.consumeWhitespace()
176176
object[key] = try self.parseValue()
177-
177+
178178
let commaOrBrace = try reader.consumeWhitespace()
179179
switch commaOrBrace {
180180
case ._closebrace:
@@ -196,26 +196,26 @@ internal struct JSONParser {
196196
}
197197

198198
extension JSONParser {
199-
199+
200200
struct DocumentReader {
201201
let array: [UInt8]
202202

203203
private(set) var readerIndex: Int = 0
204-
204+
205205
private var readableBytes: Int {
206206
self.array.endIndex - self.readerIndex
207207
}
208-
208+
209209
var isEOF: Bool {
210210
self.readerIndex >= self.array.endIndex
211211
}
212-
212+
213213

214214
init(array: [UInt8]) {
215215
self.array = array
216216
}
217217

218-
subscript(bounds: Range<Int>) -> ArraySlice<UInt8> {
218+
subscript<R: RangeExpression<Int>>(bounds: R) -> ArraySlice<UInt8> {
219219
self.array[bounds]
220220
}
221221

@@ -234,14 +234,14 @@ extension JSONParser {
234234
guard self.readerIndex + offset < self.array.endIndex else {
235235
return nil
236236
}
237-
237+
238238
return self.array[self.readerIndex + offset]
239239
}
240-
240+
241241
mutating func moveReaderIndex(forwardBy offset: Int) {
242242
self.readerIndex += offset
243243
}
244-
244+
245245
@discardableResult
246246
mutating func consumeWhitespace() throws -> UInt8 {
247247
var whitespace = 0
@@ -255,18 +255,18 @@ extension JSONParser {
255255
return ascii
256256
}
257257
}
258-
258+
259259
throw JSONError.unexpectedEndOfFile
260260
}
261-
261+
262262
mutating func readString() throws -> String {
263263
try self.readUTF8StringTillNextUnescapedQuote()
264264
}
265-
265+
266266
mutating func readNumber() throws -> String {
267267
try self.parseNumber()
268268
}
269-
269+
270270
mutating func readBool() throws -> Bool {
271271
switch self.read() {
272272
case UInt8(ascii: "t"):
@@ -314,11 +314,11 @@ extension JSONParser {
314314
throw JSONError.unexpectedCharacter(ascii: self.peek(offset: -1)!, characterIndex: self.readerIndex - 1)
315315
}
316316
}
317-
317+
318318
// MARK: - Private Methods -
319319

320320
// MARK: String
321-
321+
322322
enum EscapedSequenceError: Swift.Error {
323323
case expectedLowSurrogateUTF8SequenceAfterHighSurrogate(index: Int)
324324
case unexpectedEscapedCharacter(ascii: UInt8, index: Int)
@@ -339,10 +339,10 @@ extension JSONParser {
339339
self.moveReaderIndex(forwardBy: copy + 1)
340340
guard var result = output else {
341341
// if we don't have an output string we create a new string
342-
return String(decoding: self[stringStartIndex ..< stringStartIndex + copy], as: Unicode.UTF8.self)
342+
return try makeString(at: stringStartIndex ..< stringStartIndex + copy)
343343
}
344344
// if we have an output string we append
345-
result += String(decoding: self[stringStartIndex ..< stringStartIndex + copy], as: Unicode.UTF8.self)
345+
result += try makeString(at: stringStartIndex ..< stringStartIndex + copy)
346346
return result
347347

348348
case 0 ... 31:
@@ -352,17 +352,17 @@ extension JSONParser {
352352
// through U+001F).
353353
var string = output ?? ""
354354
let errorIndex = self.readerIndex + copy
355-
string += self.makeStringFast(self.array[stringStartIndex ... errorIndex])
355+
string += try makeString(at: stringStartIndex ... errorIndex)
356356
throw JSONError.unescapedControlCharacterInString(ascii: byte, in: string, index: errorIndex)
357357

358358
case UInt8(ascii: "\\"):
359359
self.moveReaderIndex(forwardBy: copy)
360360
if output != nil {
361-
output! += self.makeStringFast(self.array[stringStartIndex ..< stringStartIndex + copy])
361+
output! += try makeString(at: stringStartIndex ..< stringStartIndex + copy)
362362
} else {
363-
output = self.makeStringFast(self.array[stringStartIndex ..< stringStartIndex + copy])
363+
output = try makeString(at: stringStartIndex ..< stringStartIndex + copy)
364364
}
365-
365+
366366
let escapedStartIndex = self.readerIndex
367367

368368
do {
@@ -371,13 +371,13 @@ extension JSONParser {
371371
stringStartIndex = self.readerIndex
372372
copy = 0
373373
} catch EscapedSequenceError.unexpectedEscapedCharacter(let ascii, let failureIndex) {
374-
output! += makeStringFast(array[escapedStartIndex ..< self.readerIndex])
374+
output! += try makeString(at: escapedStartIndex ..< self.readerIndex)
375375
throw JSONError.unexpectedEscapedCharacter(ascii: ascii, in: output!, index: failureIndex)
376376
} catch EscapedSequenceError.expectedLowSurrogateUTF8SequenceAfterHighSurrogate(let failureIndex) {
377-
output! += makeStringFast(array[escapedStartIndex ..< self.readerIndex])
377+
output! += try makeString(at: escapedStartIndex ..< self.readerIndex)
378378
throw JSONError.expectedLowSurrogateUTF8SequenceAfterHighSurrogate(in: output!, index: failureIndex)
379379
} catch EscapedSequenceError.couldNotCreateUnicodeScalarFromUInt32(let failureIndex, let unicodeScalarValue) {
380-
output! += makeStringFast(array[escapedStartIndex ..< self.readerIndex])
380+
output! += try makeString(at: escapedStartIndex ..< self.readerIndex)
381381
throw JSONError.couldNotCreateUnicodeScalarFromUInt32(
382382
in: output!, index: failureIndex, unicodeScalarValue: unicodeScalarValue
383383
)
@@ -392,15 +392,12 @@ extension JSONParser {
392392
throw JSONError.unexpectedEndOfFile
393393
}
394394

395-
// can be removed as soon https://bugs.swift.org/browse/SR-12126 and
396-
// https://bugs.swift.org/browse/SR-12125 has landed.
397-
// Thanks @weissi for making my code fast!
398-
private func makeStringFast<Bytes: Collection>(_ bytes: Bytes) -> String where Bytes.Element == UInt8 {
399-
if let string = bytes.withContiguousStorageIfAvailable({ String(decoding: $0, as: Unicode.UTF8.self) }) {
400-
return string
401-
} else {
402-
return String(decoding: bytes, as: Unicode.UTF8.self)
395+
private func makeString<R: RangeExpression<Int>>(at range: R) throws -> String {
396+
let raw = array[range]
397+
guard let str = String(bytes: raw, encoding: .utf8) else {
398+
throw JSONError.invalidUTF8Sequence(Data(raw), characterIndex: range.relative(to: array).lowerBound)
403399
}
400+
return str
404401
}
405402

406403
private mutating func parseEscapeSequence() throws -> String {
@@ -514,9 +511,9 @@ extension JSONParser {
514511
return nil
515512
}
516513
}
517-
514+
518515
// MARK: Numbers
519-
516+
520517
private enum ControlCharacter {
521518
case operand
522519
case decimalPoint
@@ -550,7 +547,7 @@ extension JSONParser {
550547
}
551548

552549
var numberchars = 1
553-
550+
554551
// parse everything else
555552
while let byte = self.peek(offset: numberchars) {
556553
switch byte {
@@ -606,7 +603,7 @@ extension JSONParser {
606603
let numberStartIndex = self.readerIndex
607604
self.moveReaderIndex(forwardBy: numberchars)
608605

609-
return self.makeStringFast(self[numberStartIndex ..< self.readerIndex])
606+
return String(decoding: self[numberStartIndex ..< self.readerIndex], as: Unicode.UTF8.self)
610607
default:
611608
throw JSONError.unexpectedCharacter(ascii: byte, characterIndex: readerIndex + numberchars)
612609
}
@@ -623,32 +620,32 @@ extension JSONParser {
623620
}
624621

625622
extension UInt8 {
626-
623+
627624
internal static let _space = UInt8(ascii: " ")
628625
internal static let _return = UInt8(ascii: "\r")
629626
internal static let _newline = UInt8(ascii: "\n")
630627
internal static let _tab = UInt8(ascii: "\t")
631-
628+
632629
internal static let _colon = UInt8(ascii: ":")
633630
internal static let _comma = UInt8(ascii: ",")
634-
631+
635632
internal static let _openbrace = UInt8(ascii: "{")
636633
internal static let _closebrace = UInt8(ascii: "}")
637-
634+
638635
internal static let _openbracket = UInt8(ascii: "[")
639636
internal static let _closebracket = UInt8(ascii: "]")
640-
637+
641638
internal static let _quote = UInt8(ascii: "\"")
642639
internal static let _backslash = UInt8(ascii: "\\")
643-
640+
644641
}
645642

646643
extension Array where Element == UInt8 {
647-
644+
648645
internal static let _true = [UInt8(ascii: "t"), UInt8(ascii: "r"), UInt8(ascii: "u"), UInt8(ascii: "e")]
649646
internal static let _false = [UInt8(ascii: "f"), UInt8(ascii: "a"), UInt8(ascii: "l"), UInt8(ascii: "s"), UInt8(ascii: "e")]
650647
internal static let _null = [UInt8(ascii: "n"), UInt8(ascii: "u"), UInt8(ascii: "l"), UInt8(ascii: "l")]
651-
648+
652649
}
653650

654651
enum JSONError: Swift.Error, Equatable {
@@ -664,4 +661,5 @@ enum JSONError: Swift.Error, Equatable {
664661
case numberWithLeadingZero(index: Int)
665662
case numberIsNotRepresentableInSwift(parsed: String)
666663
case singleFragmentFoundButNotAllowed
664+
case invalidUTF8Sequence(Data, characterIndex: Int)
667665
}

Sources/Foundation/JSONSerialization.swift

+4
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,10 @@ open class JSONSerialization : NSObject {
260260
throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, userInfo: [
261261
NSDebugDescriptionErrorKey : #"Number \#(parsed) is not representable in Swift."#
262262
])
263+
case .invalidUTF8Sequence(let data, characterIndex: let index):
264+
throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, userInfo: [
265+
NSDebugDescriptionErrorKey : #"Invalid UTF-8 sequence \#(data) starting from character \#(index)."#
266+
])
263267
}
264268
} catch {
265269
preconditionFailure("Only `JSONError` expected")

0 commit comments

Comments
 (0)