From 1871002b259fcb3334b33cc26c757428bce245b6 Mon Sep 17 00:00:00 2001 From: Kieran O'Mahony Date: Sat, 20 Jul 2013 08:42:57 +1000 Subject: [PATCH] ENH: ujson better handling of very large and very small numbers, throw ValueError for bad double_precision arg #4042 --- pandas/io/tests/test_json/test_ujson.py | 32 +++++++++++---------- pandas/src/ujson/lib/ultrajsonenc.c | 37 +++++++++++++------------ pandas/src/ujson/python/objToJSON.c | 9 ++++++ 3 files changed, 47 insertions(+), 31 deletions(-) diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py index 19c482d8b3590..fbfac34f5073c 100644 --- a/pandas/io/tests/test_json/test_ujson.py +++ b/pandas/io/tests/test_json/test_ujson.py @@ -41,7 +41,7 @@ class UltraJSONTests(TestCase): def test_encodeDecimal(self): sut = decimal.Decimal("1337.1337") - encoded = ujson.encode(sut, double_precision=100) + encoded = ujson.encode(sut, double_precision=15) decoded = ujson.decode(encoded) self.assertEquals(decoded, 1337.1337) @@ -73,7 +73,7 @@ def test_doubleLongIssue(self): encoded = json.dumps(sut) decoded = json.loads(encoded) self.assertEqual(sut, decoded) - encoded = ujson.encode(sut, double_precision=100) + encoded = ujson.encode(sut, double_precision=15) decoded = ujson.decode(encoded) self.assertEqual(sut, decoded) @@ -82,7 +82,7 @@ def test_doubleLongDecimalIssue(self): encoded = json.dumps(sut) decoded = json.loads(encoded) self.assertEqual(sut, decoded) - encoded = ujson.encode(sut, double_precision=100) + encoded = ujson.encode(sut, double_precision=15) decoded = ujson.decode(encoded) self.assertEqual(sut, decoded) @@ -98,6 +98,16 @@ def test_decimalDecodeTestPrecise(self): decoded = ujson.decode(encoded, precise_float=True) self.assertEqual(sut, decoded) + def test_encodeDoubleTinyExponential(self): + num = 1e-40 + self.assertEqual(num, ujson.decode(ujson.encode(num))) + num = 1e-100 + self.assertEqual(num, ujson.decode(ujson.encode(num))) + num = -1e-45 + self.assertEqual(num, ujson.decode(ujson.encode(num))) + num = -1e-145 + self.assertEqual(num, ujson.decode(ujson.encode(num))) + def test_encodeDictWithUnicodeKeys(self): input = { u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1" } output = ujson.encode(input) @@ -158,15 +168,9 @@ def test_doublePrecisionTest(self): def test_invalidDoublePrecision(self): input = 30.12345678901234567890 - output = ujson.encode(input, double_precision = 20) - # should snap to the max, which is 15 - self.assertEquals(round(input, 15), json.loads(output)) - self.assertEquals(round(input, 15), ujson.decode(output)) - output = ujson.encode(input, double_precision = -1) - # also should snap to the max, which is 15 - self.assertEquals(round(input, 15), json.loads(output)) - self.assertEquals(round(input, 15), ujson.decode(output)) + self.assertRaises(ValueError, ujson.encode, input, double_precision = 20) + self.assertRaises(ValueError, ujson.encode, input, double_precision = -1) # will throw typeError self.assertRaises(TypeError, ujson.encode, input, double_precision = '9') @@ -896,13 +900,13 @@ def testFloatArray(self): def testFloatMax(self): num = np.float(np.finfo(np.float).max/10) - assert_approx_equal(np.float(ujson.decode(ujson.encode(num))), num, 15) + assert_approx_equal(np.float(ujson.decode(ujson.encode(num, double_precision=15))), num, 15) num = np.float32(np.finfo(np.float32).max/10) - assert_approx_equal(np.float32(ujson.decode(ujson.encode(num))), num, 15) + assert_approx_equal(np.float32(ujson.decode(ujson.encode(num, double_precision=15))), num, 15) num = np.float64(np.finfo(np.float64).max/10) - assert_approx_equal(np.float64(ujson.decode(ujson.encode(num))), num, 15) + assert_approx_equal(np.float64(ujson.decode(ujson.encode(num, double_precision=15))), num, 15) def testArrays(self): arr = np.arange(100); diff --git a/pandas/src/ujson/lib/ultrajsonenc.c b/pandas/src/ujson/lib/ultrajsonenc.c index 01fc7c10fe755..4106ed6b73fcf 100644 --- a/pandas/src/ujson/lib/ultrajsonenc.c +++ b/pandas/src/ujson/lib/ultrajsonenc.c @@ -507,8 +507,10 @@ void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value) { - /* if input is larger than thres_max, revert to exponential */ + /* if input is beyond the thresholds, revert to exponential */ const double thres_max = (double) 1e16 - 1; + const double thres_min = (double) 1e-15; + char precision_str[20]; int count; double diff = 0.0; char* str = enc->offset; @@ -540,6 +542,23 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value value = -value; } + /* + for very large or small numbers switch back to native sprintf for + exponentials. anyone want to write code to replace this? */ + if (value > thres_max || (value != 0.0 && fabs(value) < thres_min)) + { + precision_str[0] = '%'; + precision_str[1] = '.'; +#ifdef _WIN32 + sprintf_s(precision_str+2, sizeof(precision_str)-2, "%ug", enc->doublePrecision); + enc->offset += sprintf_s(str, enc->end - enc->offset, precision_str, neg ? -value : value); +#else + snprintf(precision_str+2, sizeof(precision_str)-2, "%ug", enc->doublePrecision); + enc->offset += snprintf(str, enc->end - enc->offset, precision_str, neg ? -value : value); +#endif + return TRUE; + } + pow10 = g_pow10[enc->doublePrecision]; whole = (unsigned long long) value; @@ -565,22 +584,6 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value ++frac; } - /* for very large numbers switch back to native sprintf for exponentials. - anyone want to write code to replace this? */ - /* - normal printf behavior is to print EVERY whole number digit - which can be 100s of characters overflowing your buffers == bad - */ - if (value > thres_max) - { -#ifdef _WIN32 - enc->offset += sprintf_s(str, enc->end - enc->offset, "%.15e", neg ? -value : value); -#else - enc->offset += snprintf(str, enc->end - enc->offset, "%.15e", neg ? -value : value); -#endif - return TRUE; - } - if (enc->doublePrecision == 0) { diff = value - whole; diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/src/ujson/python/objToJSON.c index 89d3c203fbb7d..bebaf89de341d 100644 --- a/pandas/src/ujson/python/objToJSON.c +++ b/pandas/src/ujson/python/objToJSON.c @@ -1696,6 +1696,15 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs) encoder->encodeHTMLChars = 1; } + if (idoublePrecision > JSON_DOUBLE_MAX_DECIMALS || idoublePrecision < 0) + { + PyErr_Format ( + PyExc_ValueError, + "Invalid value '%d' for option 'double_precision', max is '%u'", + idoublePrecision, + JSON_DOUBLE_MAX_DECIMALS); + return NULL; + } encoder->doublePrecision = idoublePrecision; if (sOrient != NULL)