Skip to content

JSONSerialization: Improve parsing of numbers #1657

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 7, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 136 additions & 39 deletions Foundation/JSONSerialization.swift
Original file line number Diff line number Diff line change
Expand Up @@ -816,52 +816,149 @@ private struct JSONReader {
}

//MARK: - Number parsing
static let numberCodePoints: [UInt8] = [
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, // 0...9
0x2E, 0x2D, 0x2B, 0x45, 0x65, // . - + E e
]
private static let ZERO = UInt8(ascii: "0")
private static let ONE = UInt8(ascii: "1")
private static let NINE = UInt8(ascii: "9")
private static let MINUS = UInt8(ascii: "-")
private static let PLUS = UInt8(ascii: "+")
private static let LOWER_EXPONENT = UInt8(ascii: "e")
private static let UPPER_EXPONENT = UInt8(ascii: "E")
private static let DECIMAL_SEPARATOR = UInt8(ascii: ".")
private static let allDigits = (ZERO...NINE)
private static let oneToNine = (ONE...NINE)

private static let numberCodePoints: [UInt8] = {
var numberCodePoints = Array(ZERO...NINE)
numberCodePoints.append(contentsOf: [DECIMAL_SEPARATOR, MINUS, PLUS, LOWER_EXPONENT, UPPER_EXPONENT])
return numberCodePoints
}()


func parseNumber(_ input: Index, options opt: JSONSerialization.ReadingOptions) throws -> (Any, Index)? {
func parseTypedNumber(_ address: UnsafePointer<UInt8>, count: Int) -> (Any, IndexDistance)? {
let temp_buffer_size = 64
var temp_buffer = [Int8](repeating: 0, count: temp_buffer_size)
return temp_buffer.withUnsafeMutableBufferPointer { (buffer: inout UnsafeMutableBufferPointer<Int8>) -> (Any, IndexDistance)? in
memcpy(buffer.baseAddress!, address, min(count, temp_buffer_size - 1)) // ensure null termination

let startPointer = buffer.baseAddress!
let intEndPointer = UnsafeMutablePointer<UnsafeMutablePointer<Int8>?>.allocate(capacity: 1)
defer { intEndPointer.deallocate() }
let doubleEndPointer = UnsafeMutablePointer<UnsafeMutablePointer<Int8>?>.allocate(capacity: 1)
defer { doubleEndPointer.deallocate() }
let intResult = strtol(startPointer, intEndPointer, 10)
let intDistance = startPointer.distance(to: intEndPointer[0]!)
let doubleResult = strtod(startPointer, doubleEndPointer)
let doubleDistance = startPointer.distance(to: doubleEndPointer[0]!)

guard doubleDistance > 0 else { return nil }
if intDistance == doubleDistance {
return (NSNumber(value: intResult), intDistance)

var isNegative = false
var string = ""
var isInteger = true
var exponent = 0
var positiveExponent = true
var index = input
var digitCount: Int?
var ascii: UInt8 = 0 // set by nextASCII()

// Validate the input is a valid JSON number, also gather the following
// about the input: isNegative, isInteger, the exponent and if it is +/-,
// and finally the count of digits including excluding an '.'
func checkJSONNumber() throws -> Bool {
// Return true if the next character is any one of the valid JSON number characters
func nextASCII() -> Bool {
guard let (ch, nextIndex) = source.takeASCII(index),
JSONReader.numberCodePoints.contains(ch) else { return false }

index = nextIndex
ascii = ch
string.append(Character(UnicodeScalar(ascii)))
return true
}

// Consume as many digits as possible and return with the next non-digit
// or nil if end of string.
func readDigits() -> UInt8? {
while let (ch, nextIndex) = source.takeASCII(index) {
if !JSONReader.allDigits.contains(ch) {
return ch
}
string.append(Character(UnicodeScalar(ch)))
index = nextIndex
}
return nil
}

guard nextASCII() else { return false }

if ascii == JSONReader.MINUS {
isNegative = true
guard nextASCII() else { return false }
}

if JSONReader.oneToNine.contains(ascii) {
guard let ch = readDigits() else { return true }
ascii = ch
if [ JSONReader.DECIMAL_SEPARATOR, JSONReader.LOWER_EXPONENT, JSONReader.UPPER_EXPONENT ].contains(ascii) {
guard nextASCII() else { return false } // There should be at least one char as readDigits didnt remove the '.eE'
}
} else if ascii == JSONReader.ZERO {
guard nextASCII() else { return true }
} else {
throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue,
userInfo: ["NSDebugDescription" : "Numbers must start with a 1-9 at character \(input)." ])
}

if ascii == JSONReader.DECIMAL_SEPARATOR {
isInteger = false
guard readDigits() != nil else { return true }
guard nextASCII() else { return true }
} else if JSONReader.allDigits.contains(ascii) {
throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.propertyListReadCorrupt.rawValue,
userInfo: ["NSDebugDescription" : "Leading zeros not allowed at character \(input)." ])
}

digitCount = string.count - (isInteger ? 0 : 1) - (isNegative ? 1 : 0)
guard ascii == JSONReader.LOWER_EXPONENT || ascii == JSONReader.UPPER_EXPONENT else {
// End of valid number characters
return true
}
digitCount = digitCount! - 1

// Process the exponent
isInteger = false
guard nextASCII() else { return false }
if ascii == JSONReader.MINUS {
positiveExponent = false
guard nextASCII() else { return false }
} else if ascii == JSONReader.PLUS {
positiveExponent = true
guard nextASCII() else { return false }
}
guard JSONReader.allDigits.contains(ascii) else { return false }
exponent = Int(ascii - JSONReader.ZERO)
while nextASCII() {
guard JSONReader.allDigits.contains(ascii) else { return false } // Invalid exponent character
exponent = (exponent * 10) + Int(ascii - JSONReader.ZERO)
if exponent > 324 {
// Exponent is too large to store in a Double
return false
}
return (NSNumber(value: doubleResult), doubleDistance)
}
return true
}

if source.encoding == .utf8 {
return parseTypedNumber(source.buffer.baseAddress!.advanced(by: input), count: source.buffer.count - input).map { return ($0.0, input + $0.1) }
}
else {
var numberCharacters = [UInt8]()
var index = input
while let (ascii, nextIndex) = source.takeASCII(index), JSONReader.numberCodePoints.contains(ascii) {
numberCharacters.append(ascii)
index = nextIndex

guard try checkJSONNumber() == true else { return nil }
digitCount = digitCount ?? string.count - (isInteger ? 0 : 1) - (isNegative ? 1 : 0)

// Try Int64() or UInt64() first
if isInteger {
if isNegative {
if digitCount! <= 19, let intValue = Int64(string) {
return (NSNumber(value: intValue), index)
}
} else {
if digitCount! <= 20, let uintValue = UInt64(string) {
return (NSNumber(value: uintValue), index)
}
}
numberCharacters.append(0)

return numberCharacters.withUnsafeBufferPointer {
parseTypedNumber($0.baseAddress!, count: $0.count)
}.map { return ($0.0, index) }
}

// Decimal holds more digits of precision but a smaller exponent than Double
// so try that if the exponent fits and there are more digits than Double can hold
if digitCount! > 17 && exponent >= -128 && exponent <= 127,
let decimal = Decimal(string: string), decimal.isFinite {
return (NSDecimalNumber(decimal: decimal), index)
}
// Fall back to Double() for everything else
if let doubleValue = Double(string) {
return (NSNumber(value: doubleValue), index)
}
return nil
}

//MARK: - Value parsing
Expand Down
108 changes: 108 additions & 0 deletions TestFoundation/TestJSONEncoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,113 @@ class TestJSONEncoder : XCTestCase {
}
}

func test_numericLimits() {
struct DataStruct: Codable {
let int8Value: Int8?
let uint8Value: UInt8?
let int16Value: Int16?
let uint16Value: UInt16?
let int32Value: Int32?
let uint32Value: UInt32?
let int64Value: Int64?
let intValue: Int?
let uintValue: UInt?
let uint64Value: UInt64?
let floatValue: Float?
let doubleValue: Double?
let decimalValue: Decimal?
}

func decode(_ type: String, _ value: String) throws {
var key = type.lowercased()
key.append("Value")
_ = try JSONDecoder().decode(DataStruct.self, from: "{ \"\(key)\": \(value) }".data(using: .utf8)!)
}

func testGoodValue(_ type: String, _ value: String) {
do {
try decode(type, value)
} catch {
XCTFail("Unexpected error: \(error) for parsing \(value) to \(type)")
}
}

func testErrorThrown(_ type: String, _ value: String, errorMessage: String) {
do {
try decode(type, value)
XCTFail("Decode of \(value) to \(type) should not succeed")
} catch DecodingError.dataCorrupted(let context) {
XCTAssertEqual(context.debugDescription, errorMessage)
} catch {
XCTAssertEqual(String(describing: error), errorMessage)
}
}


var goodValues = [
("Int8", "0"), ("Int8", "1"), ("Int8", "-1"), ("Int8", "-128"), ("Int8", "127"),
("UInt8", "0"), ("UInt8", "1"), ("UInt8", "255"), ("UInt8", "-0"),

("Int16", "0"), ("Int16", "1"), ("Int16", "-1"), ("Int16", "-32768"), ("Int16", "32767"),
("UInt16", "0"), ("UInt16", "1"), ("UInt16", "65535"), ("UInt16", "34.0"),

("Int32", "0"), ("Int32", "1"), ("Int32", "-1"), ("Int32", "-2147483648"), ("Int32", "2147483647"),
("UInt32", "0"), ("UInt32", "1"), ("UInt32", "4294967295"),

("Int64", "0"), ("Int64", "1"), ("Int64", "-1"), ("Int64", "-9223372036854775808"), ("Int64", "9223372036854775807"),
("UInt64", "0"), ("UInt64", "1"), ("UInt64", "18446744073709551615"),

("Double", "0"), ("Double", "1"), ("Double", "-1"), ("Double", "2.2250738585072014e-308"), ("Double", "1.7976931348623157e+308"),
("Double", "5e-324"), ("Double", "3.141592653589793"),

("Decimal", "1.2"), ("Decimal", "3.14159265358979323846264338327950288419"),
("Decimal", "3402823669209384634633746074317682114550000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"),
("Decimal", "-3402823669209384634633746074317682114550000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"),
]

if Int.max == Int64.max {
goodValues += [
("Int", "0"), ("Int", "1"), ("Int", "-1"), ("Int", "-9223372036854775808"), ("Int", "9223372036854775807"),
("UInt", "0"), ("UInt", "1"), ("UInt", "18446744073709551615"),
]
} else {
goodValues += [
("Int", "0"), ("Int", "1"), ("Int", "-1"), ("Int", "-2147483648"), ("Int", "2147483647"),
("UInt", "0"), ("UInt", "1"), ("UInt", "4294967295"),
]
}

let badValues = [
("Int8", "-129"), ("Int8", "128"), ("Int8", "1.2"),
("UInt8", "-1"), ("UInt8", "256"),

("Int16", "-32769"), ("Int16", "32768"),
("UInt16", "-1"), ("UInt16", "65536"),

("Int32", "-2147483649"), ("Int32", "2147483648"),
("UInt32", "-1"), ("UInt32", "4294967296"),

("Int64", "9223372036854775808"), ("Int64", "9223372036854775808"), ("Int64", "-100000000000000000000"),
("UInt64", "-1"), ("UInt64", "18446744073709600000"), ("Int64", "10000000000000000000000000000000000000"),
]

for value in goodValues {
testGoodValue(value.0, value.1)
}

for (type, value) in badValues {
testErrorThrown(type, value, errorMessage: "Parsed JSON number <\(value)> does not fit in \(type).")
}

// Invalid JSON number formats
testErrorThrown("Int8", "0000000000000000000000000000001", errorMessage: "The given data was not valid JSON.")
testErrorThrown("Double", "-.1", errorMessage: "The given data was not valid JSON.")
testErrorThrown("Int32", "+1", errorMessage: "The given data was not valid JSON.")
testErrorThrown("Int", ".012", errorMessage: "The given data was not valid JSON.")
testErrorThrown("Double", "2.7976931348623158e+308", errorMessage: "The given data was not valid JSON.")
}


// MARK: - Helper Functions
private var _jsonEmptyDictionary: Data {
return "{}".data(using: .utf8)!
Expand Down Expand Up @@ -1089,6 +1196,7 @@ extension TestJSONEncoder {
("test_codingOfDouble", test_codingOfDouble),
("test_codingOfString", test_codingOfString),
("test_codingOfURL", test_codingOfURL),
("test_numericLimits", test_numericLimits),
]
}
}
15 changes: 13 additions & 2 deletions TestFoundation/TestJSONSerialization.swift
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ extension TestJSONSerialization {

//MARK: - Number parsing
func deserialize_numbers(objectType: ObjectType) {
let subject = "[1, -1, 1.3, -1.3, 1e3, 1E-3, 10]"
let subject = "[1, -1, 1.3, -1.3, 1e3, 1E-3, 10, -12.34e56, 12.34e-56, 12.34e+6, 0.002, 0.0043e+4]"

do {
for encoding in supportedEncodings {
Expand All @@ -504,14 +504,19 @@ extension TestJSONSerialization {
XCTAssertEqual(result?[5] as? Double, 0.001)
XCTAssertEqual(result?[6] as? Int, 10)
XCTAssertEqual(result?[6] as? Double, 10.0)
XCTAssertEqual(result?[7] as? Double, -12.34e56)
XCTAssertEqual(result?[8] as? Double, 12.34e-56)
XCTAssertEqual(result?[9] as? Double, 12.34e6)
XCTAssertEqual(result?[10] as? Double, 2e-3)
XCTAssertEqual(result?[11] as? Double, 43)
}
} catch {
XCTFail("Unexpected error: \(error)")
}
}

func deserialize_numbers_as_reference_types(objectType: ObjectType) {
let subject = "[1, -1, 1.3, -1.3, 1e3, 1E-3, 10]"
let subject = "[1, -1, 1.3, -1.3, 1e3, 1E-3, 10, -12.34e56, 12.34e-56, 12.34e+6, 0.002, 0.0043e+4]"

do {
for encoding in supportedEncodings {
Expand All @@ -528,6 +533,12 @@ extension TestJSONSerialization {
XCTAssertEqual(result?[5] as? NSNumber, 0.001)
XCTAssertEqual(result?[6] as? NSNumber, 10)
XCTAssertEqual(result?[6] as? NSNumber, 10.0)
XCTAssertEqual(result?[7] as? NSNumber, -12.34e56)
XCTAssertEqual(result?[8] as? NSNumber, 12.34e-56)
XCTAssertEqual(result?[9] as? NSNumber, 12.34e6)
XCTAssertEqual(result?[10] as? NSNumber, 2e-3)
XCTAssertEqual(result?[11] as? NSNumber, 43)

}
} catch {
XCTFail("Unexpected error: \(error)")
Expand Down