Skip to content

ENH: ujson better handling of very large and very small numbers, throw ValueError for bad double_precision arg #4042 #4299

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 20, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 18 additions & 14 deletions pandas/io/tests/test_json/test_ujson.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class UltraJSONTests(TestCase):

def test_encodeDecimal(self):
sut = decimal.Decimal("1337.1337")
encoded = ujson.encode(sut, double_precision=100)
encoded = ujson.encode(sut, double_precision=15)
decoded = ujson.decode(encoded)
self.assertEquals(decoded, 1337.1337)

Expand Down Expand Up @@ -73,7 +73,7 @@ def test_doubleLongIssue(self):
encoded = json.dumps(sut)
decoded = json.loads(encoded)
self.assertEqual(sut, decoded)
encoded = ujson.encode(sut, double_precision=100)
encoded = ujson.encode(sut, double_precision=15)
decoded = ujson.decode(encoded)
self.assertEqual(sut, decoded)

Expand All @@ -82,7 +82,7 @@ def test_doubleLongDecimalIssue(self):
encoded = json.dumps(sut)
decoded = json.loads(encoded)
self.assertEqual(sut, decoded)
encoded = ujson.encode(sut, double_precision=100)
encoded = ujson.encode(sut, double_precision=15)
decoded = ujson.decode(encoded)
self.assertEqual(sut, decoded)

Expand All @@ -98,6 +98,16 @@ def test_decimalDecodeTestPrecise(self):
decoded = ujson.decode(encoded, precise_float=True)
self.assertEqual(sut, decoded)

def test_encodeDoubleTinyExponential(self):
num = 1e-40
self.assertEqual(num, ujson.decode(ujson.encode(num)))
num = 1e-100
self.assertEqual(num, ujson.decode(ujson.encode(num)))
num = -1e-45
self.assertEqual(num, ujson.decode(ujson.encode(num)))
num = -1e-145
self.assertEqual(num, ujson.decode(ujson.encode(num)))

def test_encodeDictWithUnicodeKeys(self):
input = { u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1" }
output = ujson.encode(input)
Expand Down Expand Up @@ -158,15 +168,9 @@ def test_doublePrecisionTest(self):

def test_invalidDoublePrecision(self):
input = 30.12345678901234567890
output = ujson.encode(input, double_precision = 20)
# should snap to the max, which is 15
self.assertEquals(round(input, 15), json.loads(output))
self.assertEquals(round(input, 15), ujson.decode(output))

output = ujson.encode(input, double_precision = -1)
# also should snap to the max, which is 15
self.assertEquals(round(input, 15), json.loads(output))
self.assertEquals(round(input, 15), ujson.decode(output))
self.assertRaises(ValueError, ujson.encode, input, double_precision = 20)
self.assertRaises(ValueError, ujson.encode, input, double_precision = -1)

# will throw typeError
self.assertRaises(TypeError, ujson.encode, input, double_precision = '9')
Expand Down Expand Up @@ -896,13 +900,13 @@ def testFloatArray(self):

def testFloatMax(self):
num = np.float(np.finfo(np.float).max/10)
assert_approx_equal(np.float(ujson.decode(ujson.encode(num))), num, 15)
assert_approx_equal(np.float(ujson.decode(ujson.encode(num, double_precision=15))), num, 15)

num = np.float32(np.finfo(np.float32).max/10)
assert_approx_equal(np.float32(ujson.decode(ujson.encode(num))), num, 15)
assert_approx_equal(np.float32(ujson.decode(ujson.encode(num, double_precision=15))), num, 15)

num = np.float64(np.finfo(np.float64).max/10)
assert_approx_equal(np.float64(ujson.decode(ujson.encode(num))), num, 15)
assert_approx_equal(np.float64(ujson.decode(ujson.encode(num, double_precision=15))), num, 15)

def testArrays(self):
arr = np.arange(100);
Expand Down
37 changes: 20 additions & 17 deletions pandas/src/ujson/lib/ultrajsonenc.c
Original file line number Diff line number Diff line change
Expand Up @@ -507,8 +507,10 @@ void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value)

int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value)
{
/* if input is larger than thres_max, revert to exponential */
/* if input is beyond the thresholds, revert to exponential */
const double thres_max = (double) 1e16 - 1;
const double thres_min = (double) 1e-15;
char precision_str[20];
int count;
double diff = 0.0;
char* str = enc->offset;
Expand Down Expand Up @@ -540,6 +542,23 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value
value = -value;
}

/*
for very large or small numbers switch back to native sprintf for
exponentials. anyone want to write code to replace this? */
if (value > thres_max || (value != 0.0 && fabs(value) < thres_min))
{
precision_str[0] = '%';
precision_str[1] = '.';
#ifdef _WIN32
sprintf_s(precision_str+2, sizeof(precision_str)-2, "%ug", enc->doublePrecision);
enc->offset += sprintf_s(str, enc->end - enc->offset, precision_str, neg ? -value : value);
#else
snprintf(precision_str+2, sizeof(precision_str)-2, "%ug", enc->doublePrecision);
enc->offset += snprintf(str, enc->end - enc->offset, precision_str, neg ? -value : value);
#endif
return TRUE;
}

pow10 = g_pow10[enc->doublePrecision];

whole = (unsigned long long) value;
Expand All @@ -565,22 +584,6 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value
++frac;
}

/* for very large numbers switch back to native sprintf for exponentials.
anyone want to write code to replace this? */
/*
normal printf behavior is to print EVERY whole number digit
which can be 100s of characters overflowing your buffers == bad
*/
if (value > thres_max)
{
#ifdef _WIN32
enc->offset += sprintf_s(str, enc->end - enc->offset, "%.15e", neg ? -value : value);
#else
enc->offset += snprintf(str, enc->end - enc->offset, "%.15e", neg ? -value : value);
#endif
return TRUE;
}

if (enc->doublePrecision == 0)
{
diff = value - whole;
Expand Down
9 changes: 9 additions & 0 deletions pandas/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -1696,6 +1696,15 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
encoder->encodeHTMLChars = 1;
}

if (idoublePrecision > JSON_DOUBLE_MAX_DECIMALS || idoublePrecision < 0)
{
PyErr_Format (
PyExc_ValueError,
"Invalid value '%d' for option 'double_precision', max is '%u'",
idoublePrecision,
JSON_DOUBLE_MAX_DECIMALS);
return NULL;
}
encoder->doublePrecision = idoublePrecision;

if (sOrient != NULL)
Expand Down