diff --git a/Foundation.xcodeproj/project.pbxproj b/Foundation.xcodeproj/project.pbxproj index e59c2cac20..8a2a24ddc9 100644 --- a/Foundation.xcodeproj/project.pbxproj +++ b/Foundation.xcodeproj/project.pbxproj @@ -86,6 +86,7 @@ 3EA9D6701EF0532D00B362D6 /* TestJSONEncoder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3EA9D66F1EF0532D00B362D6 /* TestJSONEncoder.swift */; }; 3EDCE50C1EF04D8100C2EC04 /* Codable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3EDCE5051EF04D8100C2EC04 /* Codable.swift */; }; 3EDCE5101EF04D8100C2EC04 /* JSONEncoder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3EDCE5091EF04D8100C2EC04 /* JSONEncoder.swift */; }; + 49D55FA125E84FE5007BD3B3 /* JSONSerialization+Parser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 49D55FA025E84FE5007BD3B3 /* JSONSerialization+Parser.swift */; }; 528776141BF2629700CB0090 /* FoundationErrors.swift in Sources */ = {isa = PBXBuildFile; fileRef = 522C253A1BF16E1600804FC6 /* FoundationErrors.swift */; }; 528776191BF27D9500CB0090 /* Test.plist in Resources */ = {isa = PBXBuildFile; fileRef = 528776181BF27D9500CB0090 /* Test.plist */; }; 555683BD1C1250E70041D4C6 /* TestUserDefaults.swift in Sources */ = {isa = PBXBuildFile; fileRef = 555683BC1C1250E70041D4C6 /* TestUserDefaults.swift */; }; @@ -804,6 +805,7 @@ 3EDCE5051EF04D8100C2EC04 /* Codable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Codable.swift; sourceTree = ""; }; 3EDCE5091EF04D8100C2EC04 /* JSONEncoder.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = JSONEncoder.swift; sourceTree = ""; }; 400E22641C1A4E58007C5933 /* TestProcessInfo.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TestProcessInfo.swift; sourceTree = ""; }; + 49D55FA025E84FE5007BD3B3 /* JSONSerialization+Parser.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "JSONSerialization+Parser.swift"; sourceTree = ""; }; 4AE109261C17CCBF007367B5 /* TestIndexPath.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TestIndexPath.swift; sourceTree = ""; }; 4DC1D07F1C12EEEF00B5948A /* TestPipe.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TestPipe.swift; sourceTree = ""; }; 522C253A1BF16E1600804FC6 /* FoundationErrors.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FoundationErrors.swift; sourceTree = ""; }; @@ -2080,6 +2082,7 @@ 63DCE9D11EAA430100E9CB02 /* ISO8601DateFormatter.swift */, 3EDCE5091EF04D8100C2EC04 /* JSONEncoder.swift */, EADE0B641BD15DFF00C49C64 /* JSONSerialization.swift */, + 49D55FA025E84FE5007BD3B3 /* JSONSerialization+Parser.swift */, EADE0B661BD15DFF00C49C64 /* LengthFormatter.swift */, 5BD70FB11D3D4CDC003B9BF8 /* Locale.swift */, EADE0B681BD15DFF00C49C64 /* MassFormatter.swift */, @@ -2868,6 +2871,7 @@ EADE0BB01BD15E0000C49C64 /* Port.swift in Sources */, EADE0BB91BD15E0000C49C64 /* NSTextCheckingResult.swift in Sources */, EA0812691DA71C8A00651B70 /* ProgressFraction.swift in Sources */, + 49D55FA125E84FE5007BD3B3 /* JSONSerialization+Parser.swift in Sources */, 5BC1B9A821F275B000524D8C /* Collections+DataProtocol.swift in Sources */, 5BF7AEBE1BCD51F9008F214A /* NSTimeZone.swift in Sources */, EADE0B951BD15DFF00C49C64 /* DateComponentsFormatter.swift in Sources */, diff --git a/Sources/Foundation/CMakeLists.txt b/Sources/Foundation/CMakeLists.txt index 8855e956dc..e15aa4134d 100644 --- a/Sources/Foundation/CMakeLists.txt +++ b/Sources/Foundation/CMakeLists.txt @@ -37,6 +37,7 @@ add_library(Foundation ISO8601DateFormatter.swift JSONEncoder.swift JSONSerialization.swift + JSONSerialization+Parser.swift LengthFormatter.swift Locale.swift MassFormatter.swift diff --git a/Sources/Foundation/JSONSerialization+Parser.swift b/Sources/Foundation/JSONSerialization+Parser.swift new file mode 100644 index 0000000000..523119872b --- /dev/null +++ b/Sources/Foundation/JSONSerialization+Parser.swift @@ -0,0 +1,660 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + + +internal struct JSONParser { + var reader: DocumentReader + var depth: Int = 0 + + init(bytes: [UInt8]) { + self.reader = DocumentReader(array: bytes) + } + + mutating func parse() throws -> JSONValue { + try reader.consumeWhitespace() + let value = try self.parseValue() + #if DEBUG + defer { + guard self.depth == 0 else { + preconditionFailure("Expected to end parsing with a depth of 0") + } + } + #endif + + // ensure only white space is remaining + var whitespace = 0 + while let next = reader.peek(offset: whitespace) { + switch next { + case ._space, ._tab, ._return, ._newline: + whitespace += 1 + continue + default: + throw JSONError.unexpectedCharacter(ascii: next, characterIndex: reader.readerIndex + whitespace) + } + } + + return value + } + + // MARK: Generic Value Parsing + + mutating func parseValue() throws -> JSONValue { + var whitespace = 0 + while let byte = reader.peek(offset: whitespace) { + switch byte { + case UInt8(ascii: "\""): + reader.moveReaderIndex(forwardBy: whitespace) + return .string(try reader.readString()) + case ._openbrace: + reader.moveReaderIndex(forwardBy: whitespace) + let object = try parseObject() + return .object(object) + case ._openbracket: + reader.moveReaderIndex(forwardBy: whitespace) + let array = try parseArray() + return .array(array) + case UInt8(ascii: "f"), UInt8(ascii: "t"): + reader.moveReaderIndex(forwardBy: whitespace) + let bool = try reader.readBool() + return .bool(bool) + case UInt8(ascii: "n"): + reader.moveReaderIndex(forwardBy: whitespace) + try reader.readNull() + return .null + case UInt8(ascii: "-"), UInt8(ascii: "0") ... UInt8(ascii: "9"): + reader.moveReaderIndex(forwardBy: whitespace) + let number = try self.reader.readNumber() + return .number(number) + case ._space, ._return, ._newline, ._tab: + whitespace += 1 + continue + default: + throw JSONError.unexpectedCharacter(ascii: byte, characterIndex: self.reader.readerIndex) + } + } + + throw JSONError.unexpectedEndOfFile + } + + + // MARK: - Parse Array - + + mutating func parseArray() throws -> [JSONValue] { + precondition(self.reader.read() == ._openbracket) + guard self.depth < 512 else { + throw JSONError.tooManyNestedArraysOrDictionaries(characterIndex: self.reader.readerIndex - 1) + } + self.depth += 1 + defer { depth -= 1 } + + // parse first value or end immediatly + switch try reader.consumeWhitespace() { + case ._space, ._return, ._newline, ._tab: + preconditionFailure("Expected that all white space is consumed") + case ._closebracket: + // if the first char after whitespace is a closing bracket, we found an empty array + self.reader.moveReaderIndex(forwardBy: 1) + return [] + default: + break + } + + var array = [JSONValue]() + array.reserveCapacity(10) + + // parse values + while true { + let value = try parseValue() + array.append(value) + + // consume the whitespace after the value before the comma + let ascii = try reader.consumeWhitespace() + switch ascii { + case ._space, ._return, ._newline, ._tab: + preconditionFailure("Expected that all white space is consumed") + case ._closebracket: + reader.moveReaderIndex(forwardBy: 1) + return array + case ._comma: + // consume the comma + reader.moveReaderIndex(forwardBy: 1) + // consume the whitespace before the next value + if try reader.consumeWhitespace() == ._closebracket { + // the foundation json implementation does support trailing commas + reader.moveReaderIndex(forwardBy: 1) + return array + } + continue + default: + throw JSONError.unexpectedCharacter(ascii: ascii, characterIndex: reader.readerIndex) + } + } + } + + // MARK: - Object parsing - + + mutating func parseObject() throws -> [String: JSONValue] { + precondition(self.reader.read() == ._openbrace) + guard self.depth < 512 else { + throw JSONError.tooManyNestedArraysOrDictionaries(characterIndex: self.reader.readerIndex - 1) + } + self.depth += 1 + defer { depth -= 1 } + + // parse first value or end immediatly + switch try reader.consumeWhitespace() { + case ._space, ._return, ._newline, ._tab: + preconditionFailure("Expected that all white space is consumed") + case ._closebrace: + // if the first char after whitespace is a closing bracket, we found an empty array + self.reader.moveReaderIndex(forwardBy: 1) + return [:] + default: + break + } + + var object = [String: JSONValue]() + object.reserveCapacity(20) + + while true { + let key = try reader.readString() + let colon = try reader.consumeWhitespace() + guard colon == ._colon else { + throw JSONError.unexpectedCharacter(ascii: colon, characterIndex: reader.readerIndex) + } + reader.moveReaderIndex(forwardBy: 1) + try reader.consumeWhitespace() + object[key] = try self.parseValue() + + let commaOrBrace = try reader.consumeWhitespace() + switch commaOrBrace { + case ._closebrace: + reader.moveReaderIndex(forwardBy: 1) + return object + case ._comma: + reader.moveReaderIndex(forwardBy: 1) + if try reader.consumeWhitespace() == ._closebrace { + // the foundation json implementation does support trailing commas + reader.moveReaderIndex(forwardBy: 1) + return object + } + continue + default: + throw JSONError.unexpectedCharacter(ascii: commaOrBrace, characterIndex: reader.readerIndex) + } + } + } +} + +extension JSONParser { + + struct DocumentReader { + let array: [UInt8] + + private(set) var readerIndex: Int = 0 + + private var readableBytes: Int { + self.array.endIndex - self.readerIndex + } + + var isEOF: Bool { + self.readerIndex >= self.array.endIndex + } + + + init(array: [UInt8]) { + self.array = array + } + + subscript(bounds: Range) -> ArraySlice { + self.array[bounds] + } + + mutating func read() -> UInt8? { + guard self.readerIndex < self.array.endIndex else { + self.readerIndex = self.array.endIndex + return nil + } + + defer { self.readerIndex += 1 } + + return self.array[self.readerIndex] + } + + func peek(offset: Int = 0) -> UInt8? { + guard self.readerIndex + offset < self.array.endIndex else { + return nil + } + + return self.array[self.readerIndex + offset] + } + + mutating func moveReaderIndex(forwardBy offset: Int) { + self.readerIndex += offset + } + + @discardableResult + mutating func consumeWhitespace() throws -> UInt8 { + var whitespace = 0 + while let ascii = self.peek(offset: whitespace) { + switch ascii { + case ._space, ._return, ._newline, ._tab: + whitespace += 1 + continue + default: + self.moveReaderIndex(forwardBy: whitespace) + return ascii + } + } + + throw JSONError.unexpectedEndOfFile + } + + mutating func readString() throws -> String { + try self.readUTF8StringTillNextUnescapedQuote() + } + + mutating func readNumber() throws -> String { + try self.parseNumber() + } + + mutating func readBool() throws -> Bool { + switch self.read() { + case UInt8(ascii: "t"): + guard self.read() == UInt8(ascii: "r"), + self.read() == UInt8(ascii: "u"), + self.read() == UInt8(ascii: "e") + else { + guard !self.isEOF else { + throw JSONError.unexpectedEndOfFile + } + + throw JSONError.unexpectedCharacter(ascii: self.peek(offset: -1)!, characterIndex: self.readerIndex - 1) + } + + return true + case UInt8(ascii: "f"): + guard self.read() == UInt8(ascii: "a"), + self.read() == UInt8(ascii: "l"), + self.read() == UInt8(ascii: "s"), + self.read() == UInt8(ascii: "e") + else { + guard !self.isEOF else { + throw JSONError.unexpectedEndOfFile + } + + throw JSONError.unexpectedCharacter(ascii: self.peek(offset: -1)!, characterIndex: self.readerIndex - 1) + } + + return false + default: + preconditionFailure("Expected to have `t` or `f` as first character") + } + } + + mutating func readNull() throws { + guard self.read() == UInt8(ascii: "n"), + self.read() == UInt8(ascii: "u"), + self.read() == UInt8(ascii: "l"), + self.read() == UInt8(ascii: "l") + else { + guard !self.isEOF else { + throw JSONError.unexpectedEndOfFile + } + + throw JSONError.unexpectedCharacter(ascii: self.peek(offset: -1)!, characterIndex: self.readerIndex - 1) + } + } + + // MARK: - Private Methods - + + // MARK: String + + enum EscapedSequenceError: Swift.Error { + case expectedLowSurrogateUTF8SequenceAfterHighSurrogate(index: Int) + case unexpectedEscapedCharacter(ascii: UInt8, index: Int) + case couldNotCreateUnicodeScalarFromUInt32(index: Int, unicodeScalarValue: UInt32) + } + + private mutating func readUTF8StringTillNextUnescapedQuote() throws -> String { + guard self.read() == ._quote else { + throw JSONError.unexpectedCharacter(ascii: self.peek(offset: -1)!, characterIndex: self.readerIndex - 1) + } + var stringStartIndex = self.readerIndex + var copy = 0 + var output: String? + + while let byte = peek(offset: copy) { + switch byte { + case UInt8(ascii: "\""): + self.moveReaderIndex(forwardBy: copy + 1) + guard var result = output else { + // if we don't have an output string we create a new string + return String(decoding: self[stringStartIndex ..< stringStartIndex + copy], as: Unicode.UTF8.self) + } + // if we have an output string we append + result += String(decoding: self[stringStartIndex ..< stringStartIndex + copy], as: Unicode.UTF8.self) + return result + + case 0 ... 31: + // All Unicode characters may be placed within the + // quotation marks, except for the characters that must be escaped: + // quotation mark, reverse solidus, and the control characters (U+0000 + // through U+001F). + var string = output ?? "" + let errorIndex = self.readerIndex + copy + string += self.makeStringFast(self.array[stringStartIndex ... errorIndex]) + throw JSONError.unescapedControlCharacterInString(ascii: byte, in: string, index: errorIndex) + + case UInt8(ascii: "\\"): + self.moveReaderIndex(forwardBy: copy) + if output != nil { + output! += self.makeStringFast(self.array[stringStartIndex ..< stringStartIndex + copy]) + } else { + output = self.makeStringFast(self.array[stringStartIndex ..< stringStartIndex + copy]) + } + + let escapedStartIndex = self.readerIndex + + do { + let escaped = try parseEscapeSequence() + output! += escaped + stringStartIndex = self.readerIndex + copy = 0 + } catch EscapedSequenceError.unexpectedEscapedCharacter(let ascii, let failureIndex) { + output! += makeStringFast(array[escapedStartIndex ..< self.readerIndex]) + throw JSONError.unexpectedEscapedCharacter(ascii: ascii, in: output!, index: failureIndex) + } catch EscapedSequenceError.expectedLowSurrogateUTF8SequenceAfterHighSurrogate(let failureIndex) { + output! += makeStringFast(array[escapedStartIndex ..< self.readerIndex]) + throw JSONError.expectedLowSurrogateUTF8SequenceAfterHighSurrogate(in: output!, index: failureIndex) + } catch EscapedSequenceError.couldNotCreateUnicodeScalarFromUInt32(let failureIndex, let unicodeScalarValue) { + output! += makeStringFast(array[escapedStartIndex ..< self.readerIndex]) + throw JSONError.couldNotCreateUnicodeScalarFromUInt32( + in: output!, index: failureIndex, unicodeScalarValue: unicodeScalarValue + ) + } + + default: + copy += 1 + continue + } + } + + throw JSONError.unexpectedEndOfFile + } + + // can be removed as soon https://bugs.swift.org/browse/SR-12126 and + // https://bugs.swift.org/browse/SR-12125 has landed. + // Thanks @weissi for making my code fast! + private func makeStringFast(_ bytes: Bytes) -> String where Bytes.Element == UInt8 { + if let string = bytes.withContiguousStorageIfAvailable({ String(decoding: $0, as: Unicode.UTF8.self) }) { + return string + } else { + return String(decoding: bytes, as: Unicode.UTF8.self) + } + } + + private mutating func parseEscapeSequence() throws -> String { + precondition(self.read() == ._backslash, "Expected to have an backslash first") + guard let ascii = self.read() else { + throw JSONError.unexpectedEndOfFile + } + + switch ascii { + case 0x22: return "\"" + case 0x5C: return "\\" + case 0x2F: return "/" + case 0x62: return "\u{08}" // \b + case 0x66: return "\u{0C}" // \f + case 0x6E: return "\u{0A}" // \n + case 0x72: return "\u{0D}" // \r + case 0x74: return "\u{09}" // \t + case 0x75: + let character = try parseUnicodeSequence() + return String(character) + default: + throw EscapedSequenceError.unexpectedEscapedCharacter(ascii: ascii, index: self.readerIndex - 1) + } + } + + private mutating func parseUnicodeSequence() throws -> Unicode.Scalar { + // we build this for utf8 only for now. + let bitPattern = try parseUnicodeHexSequence() + + // check if high surrogate + let isFirstByteHighSurrogate = bitPattern & 0xFC00 // nil everything except first six bits + if isFirstByteHighSurrogate == 0xD800 { + // if we have a high surrogate we expect a low surrogate next + let highSurrogateBitPattern = bitPattern + guard let (escapeChar) = self.read(), + let (uChar) = self.read() + else { + throw JSONError.unexpectedEndOfFile + } + + guard escapeChar == UInt8(ascii: #"\"#), uChar == UInt8(ascii: "u") else { + throw EscapedSequenceError.expectedLowSurrogateUTF8SequenceAfterHighSurrogate(index: self.readerIndex - 1) + } + + let lowSurrogateBitBattern = try parseUnicodeHexSequence() + let isSecondByteLowSurrogate = lowSurrogateBitBattern & 0xFC00 // nil everything except first six bits + guard isSecondByteLowSurrogate == 0xDC00 else { + // we are in an escaped sequence. for this reason an output string must have + // been initialized + throw EscapedSequenceError.expectedLowSurrogateUTF8SequenceAfterHighSurrogate(index: self.readerIndex - 1) + } + + let highValue = UInt32(highSurrogateBitPattern - 0xD800) * 0x400 + let lowValue = UInt32(lowSurrogateBitBattern - 0xDC00) + let unicodeValue = highValue + lowValue + 0x10000 + guard let unicode = Unicode.Scalar(unicodeValue) else { + throw EscapedSequenceError.couldNotCreateUnicodeScalarFromUInt32( + index: self.readerIndex, unicodeScalarValue: unicodeValue + ) + } + return unicode + } + + guard let unicode = Unicode.Scalar(bitPattern) else { + throw EscapedSequenceError.couldNotCreateUnicodeScalarFromUInt32( + index: self.readerIndex, unicodeScalarValue: UInt32(bitPattern) + ) + } + return unicode + } + + private mutating func parseUnicodeHexSequence() throws -> UInt16 { + // As stated in RFC-8259 an escaped unicode character is 4 HEXDIGITs long + // https://tools.ietf.org/html/rfc8259#section-7 + let startIndex = self.readerIndex + guard let firstHex = self.read(), + let secondHex = self.read(), + let thirdHex = self.read(), + let forthHex = self.read() + else { + throw JSONError.unexpectedEndOfFile + } + + guard let first = DocumentReader.hexAsciiTo4Bits(firstHex), + let second = DocumentReader.hexAsciiTo4Bits(secondHex), + let third = DocumentReader.hexAsciiTo4Bits(thirdHex), + let forth = DocumentReader.hexAsciiTo4Bits(forthHex) + else { + let hexString = String(decoding: [firstHex, secondHex, thirdHex, forthHex], as: Unicode.UTF8.self) + throw JSONError.invalidHexDigitSequence(hexString, index: startIndex) + } + let firstByte = UInt16(first) << 4 | UInt16(second) + let secondByte = UInt16(third) << 4 | UInt16(forth) + + let bitPattern = UInt16(firstByte) << 8 | UInt16(secondByte) + + return bitPattern + } + + private static func hexAsciiTo4Bits(_ ascii: UInt8) -> UInt8? { + switch ascii { + case 48 ... 57: + return ascii - 48 + case 65 ... 70: + // uppercase letters + return ascii - 55 + case 97 ... 102: + // lowercase letters + return ascii - 87 + default: + return nil + } + } + + // MARK: Numbers + + private enum ControlCharacter { + case operand + case decimalPoint + case exp + case expOperator + } + + private mutating func parseNumber() throws -> String { + var pastControlChar: ControlCharacter = .operand + var numbersSinceControlChar: UInt = 0 + var hasLeadingZero = false + + // parse first character + + guard let ascii = self.peek() else { + preconditionFailure("Why was this function called, if there is no 0...9 or -") + } + switch ascii { + case UInt8(ascii: "0"): + numbersSinceControlChar = 1 + pastControlChar = .operand + hasLeadingZero = true + case UInt8(ascii: "1") ... UInt8(ascii: "9"): + numbersSinceControlChar = 1 + pastControlChar = .operand + case UInt8(ascii: "-"): + numbersSinceControlChar = 0 + pastControlChar = .operand + default: + preconditionFailure("Why was this function called, if there is no 0...9 or -") + } + + var numberchars = 1 + + // parse everything else + while let byte = self.peek(offset: numberchars) { + switch byte { + case UInt8(ascii: "0"): + if hasLeadingZero { + throw JSONError.numberWithLeadingZero(index: readerIndex + numberchars) + } + if numbersSinceControlChar == 0, pastControlChar == .operand { + // the number started with a minus. this is the leading zero. + hasLeadingZero = true + } + numberchars += 1 + numbersSinceControlChar += 1 + case UInt8(ascii: "1") ... UInt8(ascii: "9"): + if hasLeadingZero { + throw JSONError.numberWithLeadingZero(index: readerIndex + numberchars) + } + numberchars += 1 + numbersSinceControlChar += 1 + case UInt8(ascii: "."): + guard numbersSinceControlChar > 0, pastControlChar == .operand else { + throw JSONError.unexpectedCharacter(ascii: byte, characterIndex: readerIndex + numberchars) + } + + numberchars += 1 + hasLeadingZero = false + pastControlChar = .decimalPoint + numbersSinceControlChar = 0 + + case UInt8(ascii: "e"), UInt8(ascii: "E"): + guard numbersSinceControlChar > 0, + pastControlChar == .operand || pastControlChar == .decimalPoint + else { + throw JSONError.unexpectedCharacter(ascii: byte, characterIndex: readerIndex + numberchars) + } + + numberchars += 1 + hasLeadingZero = false + pastControlChar = .exp + numbersSinceControlChar = 0 + case UInt8(ascii: "+"), UInt8(ascii: "-"): + guard numbersSinceControlChar == 0, pastControlChar == .exp else { + throw JSONError.unexpectedCharacter(ascii: byte, characterIndex: readerIndex + numberchars) + } + + numberchars += 1 + pastControlChar = .expOperator + numbersSinceControlChar = 0 + case ._space, ._return, ._newline, ._tab, ._comma, ._closebracket, ._closebrace: + guard numbersSinceControlChar > 0 else { + throw JSONError.unexpectedCharacter(ascii: byte, characterIndex: readerIndex + numberchars) + } + let numberStartIndex = self.readerIndex + self.moveReaderIndex(forwardBy: numberchars) + + return self.makeStringFast(self[numberStartIndex ..< self.readerIndex]) + default: + throw JSONError.unexpectedCharacter(ascii: byte, characterIndex: readerIndex + numberchars) + } + } + + guard numbersSinceControlChar > 0 else { + throw JSONError.unexpectedEndOfFile + } + + defer { self.readerIndex = self.array.endIndex } + return String(decoding: self.array.suffix(from: readerIndex), as: Unicode.UTF8.self) + } + } +} + +extension UInt8 { + + internal static let _space = UInt8(ascii: " ") + internal static let _return = UInt8(ascii: "\r") + internal static let _newline = UInt8(ascii: "\n") + internal static let _tab = UInt8(ascii: "\t") + + internal static let _colon = UInt8(ascii: ":") + internal static let _comma = UInt8(ascii: ",") + + internal static let _openbrace = UInt8(ascii: "{") + internal static let _closebrace = UInt8(ascii: "}") + + internal static let _openbracket = UInt8(ascii: "[") + internal static let _closebracket = UInt8(ascii: "]") + + internal static let _quote = UInt8(ascii: "\"") + internal static let _backslash = UInt8(ascii: "\\") + +} + + +enum JSONError: Swift.Error, Equatable { + case cannotConvertInputDataToUTF8 + case unexpectedCharacter(ascii: UInt8, characterIndex: Int) + case unexpectedEndOfFile + case tooManyNestedArraysOrDictionaries(characterIndex: Int) + case invalidHexDigitSequence(String, index: Int) + case unexpectedEscapedCharacter(ascii: UInt8, in: String, index: Int) + case unescapedControlCharacterInString(ascii: UInt8, in: String, index: Int) + case expectedLowSurrogateUTF8SequenceAfterHighSurrogate(in: String, index: Int) + case couldNotCreateUnicodeScalarFromUInt32(in: String, index: Int, unicodeScalarValue: UInt32) + case numberWithLeadingZero(index: Int) + case numberIsNotRepresentableInSwift(parsed: String) + case singleFragmentFoundButNotAllowed +} diff --git a/Sources/Foundation/JSONSerialization.swift b/Sources/Foundation/JSONSerialization.swift index 847fd612f2..df41fa24d8 100644 --- a/Sources/Foundation/JSONSerialization.swift +++ b/Sources/Foundation/JSONSerialization.swift @@ -1,11 +1,14 @@ +//===----------------------------------------------------------------------===// +// // This source file is part of the Swift.org open source project // -// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors +// Copyright (c) 2014 - 2021 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // -// See http://swift.org/LICENSE.txt for license information -// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // +//===----------------------------------------------------------------------===// @_implementationOnly import CoreFoundation @@ -16,6 +19,9 @@ extension JSONSerialization { public static let mutableContainers = ReadingOptions(rawValue: 1 << 0) public static let mutableLeaves = ReadingOptions(rawValue: 1 << 1) + + public static let fragmentsAllowed = ReadingOptions(rawValue: 1 << 2) + @available(swift, deprecated: 100000, renamed: "JSONSerialization.ReadingOptions.fragmentsAllowed") public static let allowFragments = ReadingOptions(rawValue: 1 << 2) } @@ -176,33 +182,87 @@ open class JSONSerialization : NSObject { The data must be in one of the 5 supported encodings listed in the JSON specification: UTF-8, UTF-16LE, UTF-16BE, UTF-32LE, UTF-32BE. The data may or may not have a BOM. The most efficient encoding to use for parsing is UTF-8, so if you have a choice in encoding the data passed to this method, use UTF-8. */ open class func jsonObject(with data: Data, options opt: ReadingOptions = []) throws -> Any { - return try data.withUnsafeBytes { (rawBuffer: UnsafeRawBufferPointer) -> Any in - let encoding: String.Encoding - let bytes = rawBuffer.baseAddress!.assumingMemoryBound(to: UInt8.self) - let buffer: UnsafeBufferPointer - if let detected = parseBOM(bytes, length: data.count) { - encoding = detected.encoding - buffer = UnsafeBufferPointer(start: bytes.advanced(by: detected.skipLength), count: data.count - detected.skipLength) - } - else { - encoding = detectEncoding(bytes, data.count) - buffer = UnsafeBufferPointer(start: bytes, count: data.count) + do { + let jsonValue = try data.withUnsafeBytes { (ptr) -> JSONValue in + let (encoding, advanceBy) = JSONSerialization.detectEncoding(ptr) + + if encoding == .utf8 { + // we got utf8... happy path + var parser = JSONParser(bytes: Array(ptr[advanceBy.., _ length: Int) -> String.Encoding { - - if length >= 4 { + static func detectEncoding(_ bytes: UnsafeRawBufferPointer) -> (String.Encoding, Int) { + // According to RFC8259, the text encoding in JSON must be UTF8 in nonclosed systems + // https://tools.ietf.org/html/rfc8259#section-8.1 + // However, since Darwin Foundation supports utf16 and utf32, so should Swift Foundation. + + // First let's check if we can determine the encoding based on a leading Byte Ordering Mark + // (BOM). + if bytes.count >= 4 { + if bytes.starts(with: Self.utf8BOM) { + return (.utf8, 3) + } + if bytes.starts(with: Self.utf32BigEndianBOM) { + return (.utf32BigEndian, 4) + } + if bytes.starts(with: Self.utf32LittleEndianBOM) { + return (.utf32LittleEndian, 4) + } + if bytes.starts(with: [0xFF, 0xFE]) { + return (.utf16LittleEndian, 2) + } + if bytes.starts(with: [0xFE, 0xFF]) { + return (.utf16BigEndian, 2) + } + } + + // If there is no BOM present, we might be able to determine the encoding based on + // occurences of null bytes. + if bytes.count >= 4 { switch (bytes[0], bytes[1], bytes[2], bytes[3]) { case (0, 0, 0, _): - return .utf32BigEndian + return (.utf32BigEndian, 0) case (_, 0, 0, 0): - return .utf32LittleEndian + return (.utf32LittleEndian, 0) case (0, _, 0, _): - return .utf16BigEndian + return (.utf16BigEndian, 0) case (_, 0, _, 0): - return .utf16LittleEndian + return (.utf16LittleEndian, 0) default: break } } - else if length >= 2 { + else if bytes.count >= 2 { switch (bytes[0], bytes[1]) { case (0, _): - return .utf16BigEndian + return (.utf16BigEndian, 0) case (_, 0): - return .utf16LittleEndian + return (.utf16LittleEndian, 0) default: break } } - return .utf8 + return (.utf8, 0) } - static func parseBOM(_ bytes: UnsafePointer, length: Int) -> (encoding: String.Encoding, skipLength: Int)? { - if length >= 2 { - switch (bytes[0], bytes[1]) { - case (0xEF, 0xBB): - if length >= 3 && bytes[2] == 0xBF { - return (.utf8, 3) - } - case (0x00, 0x00): - if length >= 4 && bytes[2] == 0xFE && bytes[3] == 0xFF { - return (.utf32BigEndian, 4) - } - case (0xFF, 0xFE): - if length >= 4 && bytes[2] == 0 && bytes[3] == 0 { - return (.utf32LittleEndian, 4) - } - return (.utf16LittleEndian, 2) - case (0xFE, 0xFF): - return (.utf16BigEndian, 2) - default: - break - } - } + static func parseBOM(_ bytes: UnsafeRawBufferPointer) -> (encoding: String.Encoding, skipLength: Int)? { + return nil } + + // These static properties don't look very nice, but we need them to + // workaround: https://bugs.swift.org/browse/SR-14102 + private static let utf8BOM: [UInt8] = [0xEF, 0xBB, 0xBF] + private static let utf32BigEndianBOM: [UInt8] = [0x00, 0x00, 0xFE, 0xFF] + private static let utf32LittleEndianBOM: [UInt8] = [0xFF, 0xFE, 0x00, 0x00] + private static let utf16BigEndianBOM: [UInt8] = [0xFF, 0xFE] + private static let utf16LittleEndianBOM: [UInt8] = [0xFE, 0xFF] } //MARK: - JSONSerializer @@ -548,524 +620,120 @@ private struct JSONWriter { } -//MARK: - JSONDeserializer -private struct JSONReader { - - static let whitespaceASCII: [UInt8] = [ - 0x09, // Horizontal tab - 0x0A, // Line feed or New line - 0x0D, // Carriage return - 0x20, // Space - ] - - struct Structure { - static let BeginArray: UInt8 = 0x5B // [ - static let EndArray: UInt8 = 0x5D // ] - static let BeginObject: UInt8 = 0x7B // { - static let EndObject: UInt8 = 0x7D // } - static let NameSeparator: UInt8 = 0x3A // : - static let ValueSeparator: UInt8 = 0x2C // , - static let QuotationMark: UInt8 = 0x22 // " - static let Escape: UInt8 = 0x5C // \ - } - - typealias Index = Int - typealias IndexDistance = Int - - struct UnicodeSource { - let buffer: UnsafeBufferPointer - let encoding: String.Encoding - let step: Int +enum JSONValue { + case string(String) + case number(String) + case bool(Bool) + case null - init(buffer: UnsafeBufferPointer, encoding: String.Encoding) { - self.buffer = buffer - self.encoding = encoding - - self.step = { - switch encoding { - case .utf8: - return 1 - case .utf16BigEndian, .utf16LittleEndian: - return 2 - case .utf32BigEndian, .utf32LittleEndian: - return 4 - default: - return 1 - } - }() - } - - func takeASCII(_ input: Index) -> (UInt8, Index)? { - guard hasNext(input) else { - return nil - } - - let index: Int - switch encoding { - case .utf8: - index = input - case .utf16BigEndian where buffer[input] == 0: - index = input + 1 - case .utf32BigEndian where buffer[input] == 0 && buffer[input+1] == 0 && buffer[input+2] == 0: - index = input + 3 - case .utf16LittleEndian where buffer[input+1] == 0: - index = input - case .utf32LittleEndian where buffer[input+1] == 0 && buffer[input+2] == 0 && buffer[input+3] == 0: - index = input - default: - return nil - } - return (buffer[index] < 0x80) ? (buffer[index], input + step) : nil - } - - func takeString(_ begin: Index, end: Index) throws -> String { - let byteLength = begin.distance(to: end) - - guard let chunk = String(data: Data(bytes: buffer.baseAddress!.advanced(by: begin), count: byteLength), encoding: encoding) else { - throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, userInfo: [ - NSDebugDescriptionErrorKey : "Unable to convert data to a string using the detected encoding. The data may be corrupt." - ]) - } - return chunk - } - - func hasNext(_ input: Index) -> Bool { - return input + step <= buffer.endIndex - } - - func distanceFromStart(_ index: Index) -> IndexDistance { - return buffer.startIndex.distance(to: index) / step - } - } - - let source: UnicodeSource - - func consumeWhitespace(_ input: Index) -> Index? { - var index = input - while let (char, nextIndex) = source.takeASCII(index), JSONReader.whitespaceASCII.contains(char) { - index = nextIndex - } - return index - } - - func consumeStructure(_ ascii: UInt8, input: Index) throws -> Index? { - return try consumeWhitespace(input).flatMap(consumeASCII(ascii)).flatMap(consumeWhitespace) - } - - func consumeASCII(_ ascii: UInt8) -> (Index) throws -> Index? { - return { (input: Index) throws -> Index? in - switch self.source.takeASCII(input) { - case nil: - throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, userInfo: [ - NSDebugDescriptionErrorKey : "Unexpected end of file during JSON parse." - ]) - case let (taken, index)? where taken == ascii: - return index - default: - return nil - } - } - } - - func consumeASCIISequence(_ sequence: String, input: Index) throws -> Index? { - var index = input - for scalar in sequence.unicodeScalars { - guard let nextIndex = try consumeASCII(UInt8(scalar.value))(index) else { - return nil - } - index = nextIndex - } - return index - } - - func takeMatching(_ match: @escaping (UInt8) -> Bool) -> ([Character], Index) -> ([Character], Index)? { - return { input, index in - guard let (byte, index) = self.source.takeASCII(index), match(byte) else { - return nil - } - return (input + [Character(UnicodeScalar(byte))], index) - } - } - - //MARK: - String Parsing - - func parseString(_ input: Index) throws -> (String, Index)? { - guard let beginIndex = try consumeWhitespace(input).flatMap(consumeASCII(Structure.QuotationMark)) else { - return nil - } - var chunkIndex: Int = beginIndex - var currentIndex: Int = chunkIndex - - var output: String = "" - while source.hasNext(currentIndex) { - guard let (ascii, index) = source.takeASCII(currentIndex) else { - currentIndex += source.step - continue - } - switch ascii { - case Structure.QuotationMark: - output += try source.takeString(chunkIndex, end: currentIndex) - return (output, index) - case Structure.Escape: - output += try source.takeString(chunkIndex, end: currentIndex) - if let (escaped, nextIndex) = try parseEscapeSequence(index) { - output += escaped - chunkIndex = nextIndex - currentIndex = nextIndex - continue - } - else { - throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, userInfo: [ - NSDebugDescriptionErrorKey : "Invalid escape sequence at position \(source.distanceFromStart(currentIndex))" - ]) - } - default: - currentIndex = index - } - } - throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, userInfo: [ - NSDebugDescriptionErrorKey : "Unexpected end of file during string parse." - ]) - } + case array([JSONValue]) + case object([String: JSONValue]) +} - func parseEscapeSequence(_ input: Index) throws -> (String, Index)? { - guard let (byte, index) = source.takeASCII(input) else { - throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, userInfo: [ - NSDebugDescriptionErrorKey : "Early end of unicode escape sequence around character" - ]) - } - let output: String - switch byte { - case 0x22: output = "\"" - case 0x5C: output = "\\" - case 0x2F: output = "/" - case 0x62: output = "\u{08}" // \b - case 0x66: output = "\u{0C}" // \f - case 0x6E: output = "\u{0A}" // \n - case 0x72: output = "\u{0D}" // \r - case 0x74: output = "\u{09}" // \t - case 0x75: return try parseUnicodeSequence(index) - default: return nil +extension JSONValue { + var isValue: Bool { + switch self { + case .array, .object: + return false + case .null, .number, .string, .bool: + return true } - return (output, index) } - - func parseUnicodeSequence(_ input: Index) throws -> (String, Index)? { - - guard let (codeUnit, index) = parseCodeUnit(input) else { - return nil - } - - let isLeadSurrogate = UTF16.isLeadSurrogate(codeUnit) - let isTrailSurrogate = UTF16.isTrailSurrogate(codeUnit) - - guard isLeadSurrogate || isTrailSurrogate else { - // The code units that are neither lead surrogates nor trail surrogates - // form valid unicode scalars. - return (String(UnicodeScalar(codeUnit)!), index) - } - - // Surrogates must always come in pairs. - - guard isLeadSurrogate else { - // Trail surrogate must come after lead surrogate - throw CocoaError.error(.propertyListReadCorrupt, - userInfo: [ - NSDebugDescriptionErrorKey : """ - Unable to convert unicode escape sequence (no high-surrogate code point) \ - to UTF8-encoded character at position \(source.distanceFromStart(input)) - """ - ]) - } - - guard let (trailCodeUnit, finalIndex) = try consumeASCIISequence("\\u", input: index).flatMap(parseCodeUnit), - UTF16.isTrailSurrogate(trailCodeUnit) else { - throw CocoaError.error(.propertyListReadCorrupt, - userInfo: [ - NSDebugDescriptionErrorKey : """ - Unable to convert unicode escape sequence (no low-surrogate code point) \ - to UTF8-encoded character at position \(source.distanceFromStart(input)) - """ - ]) + + var isContainer: Bool { + switch self { + case .array, .object: + return true + case .null, .number, .string, .bool: + return false } - - return (String(UTF16.decode(UTF16.EncodedScalar([codeUnit, trailCodeUnit]))), finalIndex) - } - - func isHexChr(_ byte: UInt8) -> Bool { - return (byte >= 0x30 && byte <= 0x39) - || (byte >= 0x41 && byte <= 0x46) - || (byte >= 0x61 && byte <= 0x66) } +} - func parseCodeUnit(_ input: Index) -> (UTF16.CodeUnit, Index)? { - let hexParser = takeMatching(isHexChr) - guard let (result, index) = hexParser([], input).flatMap(hexParser).flatMap(hexParser).flatMap(hexParser), - let value = Int(String(result), radix: 16) else { - return nil +extension JSONValue { + var debugDataTypeDescription: String { + switch self { + case .array: + return "an array" + case .bool: + return "bool" + case .number: + return "a number" + case .string: + return "a string" + case .object: + return "a dictionary" + case .null: + return "null" } - return (UTF16.CodeUnit(value), index) } - - //MARK: - Number parsing - private static let ZERO = UInt8(ascii: "0") - private static let ONE = UInt8(ascii: "1") - private static let NINE = UInt8(ascii: "9") - private static let MINUS = UInt8(ascii: "-") - private static let PLUS = UInt8(ascii: "+") - private static let LOWER_EXPONENT = UInt8(ascii: "e") - private static let UPPER_EXPONENT = UInt8(ascii: "E") - private static let DECIMAL_SEPARATOR = UInt8(ascii: ".") - private static let allDigits = (ZERO...NINE) - private static let oneToNine = (ONE...NINE) - - private static let numberCodePoints: [UInt8] = { - var numberCodePoints = Array(ZERO...NINE) - numberCodePoints.append(contentsOf: [DECIMAL_SEPARATOR, MINUS, PLUS, LOWER_EXPONENT, UPPER_EXPONENT]) - return numberCodePoints - }() - - - func parseNumber(_ input: Index, options opt: JSONSerialization.ReadingOptions) throws -> (Any, Index)? { - - var isNegative = false - var string = "" - var isInteger = true - var exponent = 0 - var index = input - var digitCount: Int? - var ascii: UInt8 = 0 // set by nextASCII() - - // Validate the input is a valid JSON number, also gather the following - // about the input: isNegative, isInteger, the exponent and if it is +/-, - // and finally the count of digits including excluding an '.' - func checkJSONNumber() throws -> Bool { - // Return true if the next character is any one of the valid JSON number characters - func nextASCII() -> Bool { - guard let (ch, nextIndex) = source.takeASCII(index), - JSONReader.numberCodePoints.contains(ch) else { return false } +} - index = nextIndex - ascii = ch - string.append(Character(UnicodeScalar(ascii))) - return true +private extension JSONValue { + func toObjcRepresentation(options: JSONSerialization.ReadingOptions) throws -> Any { + switch self { + case .array(let values): + let array = try values.map { try $0.toObjcRepresentation(options: options) } + if !options.contains(.mutableContainers) { + return array } - - // Consume as many digits as possible and return with the next non-digit - // or nil if end of string. - func readDigits() -> UInt8? { - while let (ch, nextIndex) = source.takeASCII(index) { - if !JSONReader.allDigits.contains(ch) { - return ch + return NSMutableArray(array: array, copyItems: false) + case .object(let object): + let dictionary = try object.mapValues { try $0.toObjcRepresentation(options: options) } + if !options.contains(.mutableContainers) { + return dictionary + } + return NSMutableDictionary(dictionary: dictionary, copyItems: false) + case .bool(let bool): + return NSNumber(value: bool) + case .number(let string): + let decIndex = string.firstIndex(of: ".") + let expIndex = string.firstIndex(of: "e") + let isInteger = decIndex == nil && expIndex == nil + let isNegative = string.utf8[string.utf8.startIndex] == UInt8(ascii: "-") + let digitCount = string[string.startIndex..<(expIndex ?? string.endIndex)].count + + // Try Int64() or UInt64() first + if isInteger { + if isNegative { + if digitCount <= 19, let intValue = Int64(string) { + return NSNumber(value: intValue) + } + } else { + if digitCount <= 20, let uintValue = UInt64(string) { + return NSNumber(value: uintValue) } - string.append(Character(UnicodeScalar(ch))) - index = nextIndex - } - return nil - } - - guard nextASCII() else { return false } - - if ascii == JSONReader.MINUS { - isNegative = true - guard nextASCII() else { return false } - } - - if JSONReader.oneToNine.contains(ascii) { - guard let ch = readDigits() else { return true } - ascii = ch - if [ JSONReader.DECIMAL_SEPARATOR, JSONReader.LOWER_EXPONENT, JSONReader.UPPER_EXPONENT ].contains(ascii) { - guard nextASCII() else { return false } // There should be at least one char as readDigits didn't remove the '.eE' - } - } else if ascii == JSONReader.ZERO { - guard nextASCII() else { return true } - } else { - throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, - userInfo: [NSDebugDescriptionErrorKey : "Numbers must start with a 1-9 at character \(input)." ]) - } - - if ascii == JSONReader.DECIMAL_SEPARATOR { - isInteger = false - guard readDigits() != nil else { return true } - guard nextASCII() else { return true } - } else if JSONReader.allDigits.contains(ascii) { - throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, - userInfo: [NSDebugDescriptionErrorKey : "Leading zeros not allowed at character \(input)." ]) - } - - digitCount = string.count - (isInteger ? 0 : 1) - (isNegative ? 1 : 0) - guard ascii == JSONReader.LOWER_EXPONENT || ascii == JSONReader.UPPER_EXPONENT else { - // End of valid number characters - return true - } - digitCount = digitCount! - 1 - - // Process the exponent - isInteger = false - let positiveExponent: Bool - - guard nextASCII() else { return false } - if ascii == JSONReader.MINUS { - positiveExponent = false - guard nextASCII() else { return false } - } else if ascii == JSONReader.PLUS { - positiveExponent = true - guard nextASCII() else { return false } - } else { - positiveExponent = true - } - guard JSONReader.allDigits.contains(ascii) else { return false } - exponent = Int(ascii - JSONReader.ZERO) - while nextASCII() { - guard JSONReader.allDigits.contains(ascii) else { return false } // Invalid exponent character - exponent = (exponent * 10) + Int(ascii - JSONReader.ZERO) - if exponent > 324 { - // Exponent is too large to store in a Double - return false - } - } - exponent = positiveExponent ? exponent : -exponent - return true - } - - guard try checkJSONNumber() == true else { return nil } - digitCount = digitCount ?? string.count - (isInteger ? 0 : 1) - (isNegative ? 1 : 0) - - // Try Int64() or UInt64() first - if isInteger { - if isNegative { - if digitCount! <= 19, let intValue = Int64(string) { - return (NSNumber(value: intValue), index) - } - } else { - if digitCount! <= 20, let uintValue = UInt64(string) { - return (NSNumber(value: uintValue), index) } } - } - // Decimal holds more digits of precision but a smaller exponent than Double - // so try that if the exponent fits and there are more digits than Double can hold - if digitCount! > 17 && exponent >= -128 && exponent <= 127, - let decimal = Decimal(string: string), decimal.isFinite { - return (NSDecimalNumber(decimal: decimal), index) - } - // Fall back to Double() for everything else - if let doubleValue = Double(string) { - return (NSNumber(value: doubleValue), index) - } - return nil - } - - func parseValue(_ input: Index, options opt: JSONSerialization.ReadingOptions, recursionDepth: Int) throws -> (Any, Index)? { - guard recursionDepth < JSONSerialization.maximumRecursionDepth else { - throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, userInfo: [ - NSDebugDescriptionErrorKey: "Recursion depth exceeded during parsing" - ]) - } - - let newDepth = recursionDepth + 1 - - if let (value, parser) = try parseString(input) { - return (value, parser) - } - else if let parser = try consumeASCIISequence("true", input: input) { - return (NSNumber(value: true), parser) - } - else if let parser = try consumeASCIISequence("false", input: input) { - return (NSNumber(value: false), parser) - } - else if let parser = try consumeASCIISequence("null", input: input) { - return (NSNull(), parser) - } - else if let (object, parser) = try parseObject(input, options: opt, recursionDepth: newDepth) { - return (object, parser) - } - else if let (array, parser) = try parseArray(input, options: opt, recursionDepth: newDepth) { - return (array, parser) - } - else if let (number, parser) = try parseNumber(input, options: opt) { - return (number, parser) - } - return nil - } - - //MARK: - Object parsing - func parseObject(_ input: Index, options opt: JSONSerialization.ReadingOptions, recursionDepth: Int) throws -> ([String: Any], Index)? { - guard let beginIndex = try consumeStructure(Structure.BeginObject, input: input) else { - return nil - } - var index = beginIndex - var output: [String: Any] = [:] - while true { - if let finalIndex = try consumeStructure(Structure.EndObject, input: index) { - return (output, finalIndex) - } - - if let (key, value, nextIndex) = try parseObjectMember(index, options: opt, recursionDepth: recursionDepth) { - output[key] = value - - if let finalParser = try consumeStructure(Structure.EndObject, input: nextIndex) { - return (output, finalParser) - } - else if let nextIndex = try consumeStructure(Structure.ValueSeparator, input: nextIndex) { - index = nextIndex - continue - } - else { - return nil + var exp = 0 + + if let expIndex = expIndex { + let expStartIndex = string.index(after: expIndex) + if let parsed = Int(string[expStartIndex...]) { + exp = parsed } } - return nil - } - } - - func parseObjectMember(_ input: Index, options opt: JSONSerialization.ReadingOptions, recursionDepth: Int) throws -> (String, Any, Index)? { - guard let (name, index) = try parseString(input) else { - throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, userInfo: [ - NSDebugDescriptionErrorKey : "Missing object key at location \(source.distanceFromStart(input))" - ]) - } - guard let separatorIndex = try consumeStructure(Structure.NameSeparator, input: index) else { - throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, userInfo: [ - NSDebugDescriptionErrorKey : "Invalid separator at location \(source.distanceFromStart(index))" - ]) - } - guard let (value, finalIndex) = try parseValue(separatorIndex, options: opt, recursionDepth: recursionDepth) else { - throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, userInfo: [ - NSDebugDescriptionErrorKey : "Invalid value at location \(source.distanceFromStart(separatorIndex))" - ]) - } - - return (name, value, finalIndex) - } - - //MARK: - Array parsing - func parseArray(_ input: Index, options opt: JSONSerialization.ReadingOptions, recursionDepth: Int) throws -> ([Any], Index)? { - guard let beginIndex = try consumeStructure(Structure.BeginArray, input: input) else { - return nil - } - var index = beginIndex - var output: [Any] = [] - while true { - if let finalIndex = try consumeStructure(Structure.EndArray, input: index) { - return (output, finalIndex) + + // Decimal holds more digits of precision but a smaller exponent than Double + // so try that if the exponent fits and there are more digits than Double can hold + if digitCount > 17, exp >= -128, exp <= 127, let decimal = Decimal(string: string), decimal.isFinite { + return NSDecimalNumber(decimal: decimal) } - if let (value, nextIndex) = try parseValue(index, options: opt, recursionDepth: recursionDepth) { - output.append(value) - - if let finalIndex = try consumeStructure(Structure.EndArray, input: nextIndex) { - return (output, finalIndex) - } - else if let nextIndex = try consumeStructure(Structure.ValueSeparator, input: nextIndex) { - index = nextIndex - continue - } + // Fall back to Double() for everything else + if let doubleValue = Double(string), doubleValue.isFinite { + return NSNumber(value: doubleValue) } - throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue, userInfo: [ - NSDebugDescriptionErrorKey : "Badly formed array at location \(source.distanceFromStart(index))" - ]) + + throw JSONError.numberIsNotRepresentableInSwift(parsed: string) + case .null: + return NSNull() + case .string(let string): + if options.contains(.mutableLeaves) { + return NSMutableString(string: string) + } + return string } } } diff --git a/Tests/Foundation/Tests/TestJSONSerialization.swift b/Tests/Foundation/Tests/TestJSONSerialization.swift index cb07a2c498..a78355f458 100644 --- a/Tests/Foundation/Tests/TestJSONSerialization.swift +++ b/Tests/Foundation/Tests/TestJSONSerialization.swift @@ -90,22 +90,27 @@ extension TestJSONSerialization { //Deserialization with Data ("test_deserialize_emptyObject_withData", test_deserialize_emptyObject_withData), ("test_deserialize_multiStringObject_withData", test_deserialize_multiStringObject_withData), + ("test_deserialize_stringWithSpacesAtStart_withData", test_deserialize_stringWithSpacesAtStart_withData), + ("test_deserialize_highlyNestedArray_withData", test_deserialize_highlyNestedObject_withData), ("test_deserialize_emptyArray_withData", test_deserialize_emptyArray_withData), ("test_deserialize_multiStringArray_withData", test_deserialize_multiStringArray_withData), ("test_deserialize_unicodeString_withData", test_deserialize_unicodeString_withData), - ("test_deserialize_stringWithSpacesAtStart_withData", test_deserialize_stringWithSpacesAtStart_withData), - - + ("test_deserialize_highlyNestedArray_withData", test_deserialize_highlyNestedArray_withData), + ("test_deserialize_values_withData", test_deserialize_values_withData), ("test_deserialize_values_as_reference_types_withData", test_deserialize_values_as_reference_types_withData), ("test_deserialize_numbers_withData", test_deserialize_numbers_withData), + ("test_deserialize_numberWithLeadingZero_withData", test_deserialize_numberWithLeadingZero_withData), + ("test_deserialize_numberThatIsntRepresentableInSwift_withData", test_deserialize_numberThatIsntRepresentableInSwift_withData), ("test_deserialize_numbers_as_reference_types_withData", test_deserialize_numbers_as_reference_types_withData), ("test_deserialize_simpleEscapeSequences_withData", test_deserialize_simpleEscapeSequences_withData), ("test_deserialize_unicodeEscapeSequence_withData", test_deserialize_unicodeEscapeSequence_withData), ("test_deserialize_unicodeSurrogatePairEscapeSequence_withData", test_deserialize_unicodeSurrogatePairEscapeSequence_withData), ("test_deserialize_allowFragments_withData", test_deserialize_allowFragments_withData), + ("test_deserialize_unescapedControlCharactersWithData", test_deserialize_unescapedControlCharactersWithData), + ("test_deserialize_unescapedReversedSolidusWithData", test_deserialize_unescapedReversedSolidusWithData), ("test_deserialize_unterminatedObjectString_withData", test_deserialize_unterminatedObjectString_withData), ("test_deserialize_missingObjectKey_withData", test_deserialize_missingObjectKey_withData), @@ -121,22 +126,27 @@ extension TestJSONSerialization { //Deserialization with Stream ("test_deserialize_emptyObject_withStream", test_deserialize_emptyObject_withStream), ("test_deserialize_multiStringObject_withStream", test_deserialize_multiStringObject_withStream), + ("test_deserialize_stringWithSpacesAtStart_withStream", test_deserialize_stringWithSpacesAtStart_withStream), + ("test_deserialize_highlyNestedObject_withStream", test_deserialize_highlyNestedObject_withStream), ("test_deserialize_emptyArray_withStream", test_deserialize_emptyArray_withStream), ("test_deserialize_multiStringArray_withStream", test_deserialize_multiStringArray_withStream), ("test_deserialize_unicodeString_withStream", test_deserialize_unicodeString_withStream), - ("test_deserialize_stringWithSpacesAtStart_withStream", test_deserialize_stringWithSpacesAtStart_withStream), - - + ("test_deserialize_highlyNestedArray_withStream", test_deserialize_highlyNestedArray_withStream), + ("test_deserialize_values_withStream", test_deserialize_values_withStream), ("test_deserialize_values_as_reference_types_withStream", test_deserialize_values_as_reference_types_withStream), ("test_deserialize_numbers_withStream", test_deserialize_numbers_withStream), + ("test_deserialize_numberWithLeadingZero_withStream", test_deserialize_numberWithLeadingZero_withStream), + ("test_deserialize_numberThatIsntRepresentableInSwift_withStream", test_deserialize_numberThatIsntRepresentableInSwift_withStream), ("test_deserialize_numbers_as_reference_types_withStream", test_deserialize_numbers_as_reference_types_withStream), ("test_deserialize_simpleEscapeSequences_withStream", test_deserialize_simpleEscapeSequences_withStream), ("test_deserialize_unicodeEscapeSequence_withStream", test_deserialize_unicodeEscapeSequence_withStream), ("test_deserialize_unicodeSurrogatePairEscapeSequence_withStream", test_deserialize_unicodeSurrogatePairEscapeSequence_withStream), ("test_deserialize_allowFragments_withStream", test_deserialize_allowFragments_withStream), + ("test_deserialize_unescapedControlCharactersWithStream", test_deserialize_unescapedControlCharactersWithStream), + ("test_deserialize_unescapedReversedSolidusWithStream", test_deserialize_unescapedReversedSolidusWithStream), ("test_deserialize_unterminatedObjectString_withStream", test_deserialize_unterminatedObjectString_withStream), ("test_deserialize_missingObjectKey_withStream", test_deserialize_missingObjectKey_withStream), @@ -163,6 +173,10 @@ extension TestJSONSerialization { func test_deserialize_multiStringObject_withData() { deserialize_multiStringObject(objectType: .data) } + + func test_deserialize_highlyNestedObject_withData() { + deserialize_highlyNestedObject(objectType: .data) + } func test_deserialize_emptyArray_withData() { deserialize_emptyArray(objectType: .data) @@ -176,6 +190,10 @@ extension TestJSONSerialization { func test_deserialize_unicodeString_withData() { deserialize_unicodeString(objectType: .data) } + + func test_deserialize_highlyNestedArray_withData() { + deserialize_highlyNestedArray(objectType: .data) + } func test_deserialize_stringWithSpacesAtStart_withData() { deserialize_stringWithSpacesAtStart(objectType: .data) @@ -192,6 +210,14 @@ extension TestJSONSerialization { func test_deserialize_numbers_withData() { deserialize_numbers(objectType: .data) } + + func test_deserialize_numberWithLeadingZero_withData() { + deserialize_numberWithLeadingZero(objectType: .data) + } + + func test_deserialize_numberThatIsntRepresentableInSwift_withData() { + deserialize_numberThatIsntRepresentableInSwift(objectType: .data) + } func test_deserialize_numbers_as_reference_types_withData() { deserialize_numbers_as_reference_types(objectType: .data) @@ -212,6 +238,14 @@ extension TestJSONSerialization { func test_deserialize_allowFragments_withData() { deserialize_allowFragments(objectType: .data) } + + func test_deserialize_unescapedControlCharactersWithData() { + deserialize_unescapedControlCharacters(objectType: .data) + } + + func test_deserialize_unescapedReversedSolidusWithData() { + deserialize_unescapedReversedSolidus(objectType: .data) + } func test_deserialize_unterminatedObjectString_withData() { deserialize_unterminatedObjectString(objectType: .data) @@ -260,6 +294,10 @@ extension TestJSONSerialization { func test_deserialize_multiStringObject_withStream() { deserialize_multiStringObject(objectType: .stream) } + + func test_deserialize_highlyNestedObject_withStream() { + deserialize_highlyNestedObject(objectType: .stream) + } func test_deserialize_emptyArray_withStream() { deserialize_emptyArray(objectType: .stream) @@ -273,6 +311,10 @@ extension TestJSONSerialization { func test_deserialize_unicodeString_withStream() { deserialize_unicodeString(objectType: .stream) } + + func test_deserialize_highlyNestedArray_withStream() { + deserialize_highlyNestedArray(objectType: .stream) + } func test_deserialize_stringWithSpacesAtStart_withStream() { deserialize_stringWithSpacesAtStart(objectType: .stream) @@ -289,6 +331,14 @@ extension TestJSONSerialization { func test_deserialize_numbers_withStream() { deserialize_numbers(objectType: .stream) } + + func test_deserialize_numberWithLeadingZero_withStream() { + deserialize_numberWithLeadingZero(objectType: .stream) + } + + func test_deserialize_numberThatIsntRepresentableInSwift_withStream() { + deserialize_numberThatIsntRepresentableInSwift(objectType: .stream) + } func test_deserialize_numbers_as_reference_types_withStream() { deserialize_numbers_as_reference_types(objectType: .stream) @@ -309,6 +359,14 @@ extension TestJSONSerialization { func test_deserialize_allowFragments_withStream() { deserialize_allowFragments(objectType: .stream) } + + func test_deserialize_unescapedControlCharactersWithStream() { + deserialize_unescapedControlCharacters(objectType: .stream) + } + + func test_deserialize_unescapedReversedSolidusWithStream() { + deserialize_unescapedReversedSolidus(objectType: .stream) + } func test_deserialize_unterminatedObjectString_withStream() { deserialize_unterminatedObjectString(objectType: .stream) @@ -354,10 +412,7 @@ extension TestJSONSerialization { func deserialize_emptyObject(objectType: ObjectType) { let subject = "{}" - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) var result: [String: Any]? XCTAssertNoThrow(result = try getjsonObjectResult(data, objectType) as? [String: Any]) @@ -390,15 +445,32 @@ extension TestJSONSerialization { XCTAssertNoThrow(result = try getjsonObjectResult(data, objectType) as? [String: Any]) XCTAssertEqual(result?["title"] as? String, " hello world!!") } + + func deserialize_highlyNestedObject(objectType: ObjectType) { + // test 512 should succeed + let passingString = String(repeating: #"{"a":"#, count: 512) + "null" + String(repeating: "}", count: 512) + let passingData = Data(passingString.utf8) + XCTAssertNoThrow(_ = try getjsonObjectResult(passingData, objectType) as? [Any]) + + // test 513 should succeed + let failingString = String(repeating: #"{"a":"#, count: 513) + let failingData = Data(failingString.utf8) + + XCTAssertThrowsError(try getjsonObjectResult(failingData, objectType)) { error in + guard let nserror = (error as? NSError) else { + return XCTFail("Unexpected error: \(error)") + } + XCTAssertEqual(nserror.domain, NSCocoaErrorDomain) + XCTAssertEqual(CocoaError(_nsError: nserror).code, .propertyListReadCorrupt) + XCTAssertEqual(nserror.userInfo[NSDebugDescriptionErrorKey] as? String, "Too many nested arrays or dictionaries around character 2561.") + } + } //MARK: - Array Deserialization func deserialize_emptyArray(objectType: ObjectType) { let subject = "[]" - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) var result: [Any]? XCTAssertNoThrow(result = try getjsonObjectResult(data, objectType) as? [Any]) XCTAssertEqual(result?.count, 0) @@ -436,6 +508,26 @@ extension TestJSONSerialization { XCTAssertEqual(iterator?.next() as? String, "😢") } } + + func deserialize_highlyNestedArray(objectType: ObjectType) { + // test 512 should succeed + let passingString = String(repeating: "[", count: 512) + String(repeating: "]", count: 512) + let passingData = Data(passingString.utf8) + XCTAssertNoThrow(_ = try getjsonObjectResult(passingData, objectType) as? [Any]) + + // test 513 should succeed + let failingString = String(repeating: "[", count: 513) + let failingData = Data(failingString.utf8) + + XCTAssertThrowsError(try getjsonObjectResult(failingData, objectType)) { error in + guard let nserror = (error as? NSError) else { + return XCTFail("Unexpected error: \(error)") + } + XCTAssertEqual(nserror.domain, NSCocoaErrorDomain) + XCTAssertEqual(CocoaError(_nsError: nserror).code, .propertyListReadCorrupt) + XCTAssertEqual(nserror.userInfo[NSDebugDescriptionErrorKey] as? String, "Too many nested arrays or dictionaries around character 513.") + } + } //MARK: - Value parsing func deserialize_values(objectType: ObjectType) { @@ -507,6 +599,38 @@ extension TestJSONSerialization { XCTAssertEqual(iterator?.next() as? Double, 43) } } + + func deserialize_numberWithLeadingZero(objectType: ObjectType) { + let subject = "[01]" + + for encoding in supportedEncodings { + guard let data = subject.data(using: encoding) else { + XCTFail("Unable to convert string to data") + return + } + XCTAssertThrowsError(_ = try getjsonObjectResult(data, objectType)) { error in + guard let nserror = (error as? NSError) else { + return XCTFail("Unexpected error: \(error)") + } + XCTAssertEqual(nserror.domain, NSCocoaErrorDomain) + XCTAssertEqual(CocoaError(_nsError: nserror).code, .propertyListReadCorrupt) + XCTAssertEqual(nserror.userInfo[NSDebugDescriptionErrorKey] as? String, "Number with leading zero around character 2.") + } + } + } + + func deserialize_numberThatIsntRepresentableInSwift(objectType: ObjectType) { + let subject = "[1.1e547]" + let data = Data(subject.utf8) + + XCTAssertThrowsError(try getjsonObjectResult(data, objectType)) { error in + guard let nserror = (error as? NSError) else { + return XCTFail("Unexpected error: \(error)") + } + XCTAssertEqual(nserror.domain, NSCocoaErrorDomain) + XCTAssertEqual(CocoaError(_nsError: nserror).code, .propertyListReadCorrupt) + } + } func deserialize_numbers_as_reference_types(objectType: ObjectType) { let subject = "[1, -1, 1.3, -1.3, 1e3, 1E-3, 10, -12.34e56, 12.34e-56, 12.34e+6, 0.002, 0.0043e+4]" @@ -540,10 +664,7 @@ extension TestJSONSerialization { func deserialize_simpleEscapeSequences(objectType: ObjectType) { let subject = "[\"\\\"\", \"\\\\\", \"\\/\", \"\\b\", \"\\f\", \"\\n\", \"\\r\", \"\\t\"]" - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) var res: [Any]? XCTAssertNoThrow(res = try getjsonObjectResult(data, objectType) as? [Any]) let result = res?.compactMap { $0 as? String } @@ -560,10 +681,7 @@ extension TestJSONSerialization { func deserialize_unicodeEscapeSequence(objectType: ObjectType) { let subject = "[\"\\u2728\"]" - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) var result: [Any]? XCTAssertNoThrow(result = try getjsonObjectResult(data, objectType) as? [Any]) // result?[0] as? String returns an Optional and RHS is promoted @@ -573,10 +691,7 @@ extension TestJSONSerialization { func deserialize_unicodeSurrogatePairEscapeSequence(objectType: ObjectType) { let subject = "[\"\\uD834\\udd1E\"]" - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) var result: [Any]? XCTAssertNoThrow(result = try getjsonObjectResult(data, objectType) as? [Any]) // result?[0] as? String returns an Optional and RHS is promoted @@ -600,6 +715,7 @@ extension TestJSONSerialization { } XCTAssertEqual(nserror.domain, NSCocoaErrorDomain) XCTAssertEqual(CocoaError(_nsError: nserror).code, .propertyListReadCorrupt) + XCTAssertEqual(nserror.userInfo[NSDebugDescriptionErrorKey] as? String, "JSON text did not start with array or object and option to allow fragments not set.") } var result: Int? @@ -607,15 +723,49 @@ extension TestJSONSerialization { XCTAssertEqual(result, 3) } } + + func deserialize_unescapedControlCharacters(objectType: ObjectType) { + // All Unicode characters may be placed within the + // quotation marks, except for the characters that MUST be escaped: + // quotation mark, reverse solidus, and the control characters (U+0000 + // through U+001F). + // https://tools.ietf.org/html/rfc7159#section-7 + + for index in 0 ... 31 { + var scalars = "[\"".unicodeScalars + let invalidScalar = Unicode.Scalar(index)! + scalars.append(invalidScalar) + scalars.append(contentsOf: "\"]".unicodeScalars) + let json = String(scalars) + let data = Data(json.utf8) + + XCTAssertThrowsError(try getjsonObjectResult(data, objectType)) { error in + guard let nserror = (error as? NSError) else { + return XCTFail("Unexpected error: \(error)") + } + XCTAssertEqual(nserror.domain, NSCocoaErrorDomain) + XCTAssertEqual(CocoaError(_nsError: nserror).code, .propertyListReadCorrupt) + XCTAssertEqual(nserror.userInfo[NSDebugDescriptionErrorKey] as? String, "Unescaped control character around character 2.") + } + } + } + + func deserialize_unescapedReversedSolidus(objectType: ObjectType) { + XCTAssertThrowsError(try getjsonObjectResult(Data(#"" \ ""#.utf8), objectType, options: .allowFragments)) { error in + guard let nserror = (error as? NSError) else { + return XCTFail("Unexpected error: \(error)") + } + XCTAssertEqual(nserror.domain, NSCocoaErrorDomain) + XCTAssertEqual(CocoaError(_nsError: nserror).code, .propertyListReadCorrupt) + XCTAssertEqual(nserror.userInfo[NSDebugDescriptionErrorKey] as? String, "Invalid escape sequence around character 2.") + } + } //MARK: - Parsing Errors func deserialize_unterminatedObjectString(objectType: ObjectType) { let subject = "{\"}" - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) XCTAssertThrowsError(_ = try getjsonObjectResult(data, objectType)) { error in guard let nserror = (error as? NSError) else { return XCTFail("Unexpected error: \(error)") @@ -628,10 +778,7 @@ extension TestJSONSerialization { func deserialize_missingObjectKey(objectType: ObjectType) { let subject = "{3}" - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) XCTAssertThrowsError(_ = try getjsonObjectResult(data, objectType)) { error in guard let nserror = (error as? NSError) else { return XCTFail("Unexpected error: \(error)") @@ -644,10 +791,7 @@ extension TestJSONSerialization { func deserialize_unexpectedEndOfFile(objectType: ObjectType) { let subject = "{" - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) XCTAssertThrowsError(_ = try getjsonObjectResult(data, objectType)) { error in guard let nserror = (error as? NSError) else { return XCTFail("Unexpected error: \(error)") @@ -661,26 +805,21 @@ extension TestJSONSerialization { func deserialize_invalidValueInObject(objectType: ObjectType) { let subject = "{\"error\":}" - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) XCTAssertThrowsError(_ = try getjsonObjectResult(data, objectType)) { error in guard let nserror = (error as? NSError) else { return XCTFail("Unexpected error: \(error)") } XCTAssertEqual(nserror.domain, NSCocoaErrorDomain) XCTAssertEqual(CocoaError(_nsError: nserror).code, .propertyListReadCorrupt) + XCTAssertEqual(nserror.userInfo[NSDebugDescriptionErrorKey] as? String, "Invalid value around character 9.") } } func deserialize_invalidValueIncorrectSeparatorInObject(objectType: ObjectType) { let subject = "{\"missing\";}" - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) XCTAssertThrowsError(_ = try getjsonObjectResult(data, objectType)) { error in guard let nserror = (error as? NSError) else { return XCTFail("Unexpected error: \(error)") @@ -693,26 +832,21 @@ extension TestJSONSerialization { func deserialize_invalidValueInArray(objectType: ObjectType) { let subject = "[," - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) XCTAssertThrowsError(_ = try getjsonObjectResult(data, objectType)) { error in guard let nserror = (error as? NSError) else { return XCTFail("Unexpected error: \(error)") } XCTAssertEqual(nserror.domain, NSCocoaErrorDomain) XCTAssertEqual(CocoaError(_nsError: nserror).code, .propertyListReadCorrupt) + XCTAssertEqual(nserror.userInfo[NSDebugDescriptionErrorKey] as? String, "Invalid value around character 1.") } } func deserialize_badlyFormedArray(objectType: ObjectType) { let subject = "[2b4]" - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) XCTAssertThrowsError(_ = try getjsonObjectResult(data, objectType)) { error in guard let nserror = (error as? NSError) else { return XCTFail("Unexpected error: \(error)") @@ -725,46 +859,40 @@ extension TestJSONSerialization { func deserialize_invalidEscapeSequence(objectType: ObjectType) { let subject = "[\"\\e\"]" - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) XCTAssertThrowsError(_ = try getjsonObjectResult(data, objectType)) { error in guard let nserror = (error as? NSError) else { return XCTFail("Unexpected error: \(error)") } XCTAssertEqual(nserror.domain, NSCocoaErrorDomain) XCTAssertEqual(CocoaError(_nsError: nserror).code, .propertyListReadCorrupt) + XCTAssertEqual(nserror.userInfo[NSDebugDescriptionErrorKey] as? String, "Invalid escape sequence around character 2.") } } func deserialize_unicodeMissingLeadingSurrogate(objectType: ObjectType) { let subject = "[\"\\uDFF3\"]" - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) XCTAssertThrowsError(_ = try getjsonObjectResult(data, objectType)) { error in guard let nserror = (error as? NSError) else { return XCTFail("Unexpected error: \(error)") } XCTAssertEqual(nserror.domain, NSCocoaErrorDomain) XCTAssertEqual(CocoaError(_nsError: nserror).code, .propertyListReadCorrupt) + XCTAssertEqual(nserror.userInfo[NSDebugDescriptionErrorKey] as? String, "Unable to convert hex escape sequence (no high character) to UTF8-encoded character.") } } func deserialize_unicodeMissingTrailingSurrogate(objectType: ObjectType) { let subject = "[\"\\uD834\"]" - guard let data = subject.data(using: .utf8) else { - XCTFail("Unable to convert string to data") - return - } + let data = Data(subject.utf8) XCTAssertThrowsError(_ = try getjsonObjectResult(data, objectType)) { error in guard let nserror = (error as? NSError) else { return XCTFail("Unexpected error: \(error)") } XCTAssertEqual(nserror.domain, NSCocoaErrorDomain) XCTAssertEqual(CocoaError(_nsError: nserror).code, .propertyListReadCorrupt) + XCTAssertEqual(nserror.userInfo[NSDebugDescriptionErrorKey] as? String, "Unexpected end of file during string parse (expected low-surrogate code point but did not find one).") } }