Skip to content

Commit ec8920a

Browse files
committed
Merge pull request #4299 from Komnomnomnom/ujson-small-floats
ENH: ujson better handling of very large and very small numbers, throw ValueError for bad double_precision arg #4042
2 parents 639696e + 1871002 commit ec8920a

File tree

3 files changed

+47
-31
lines changed

3 files changed

+47
-31
lines changed

pandas/io/tests/test_json/test_ujson.py

+18-14
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class UltraJSONTests(TestCase):
4141

4242
def test_encodeDecimal(self):
4343
sut = decimal.Decimal("1337.1337")
44-
encoded = ujson.encode(sut, double_precision=100)
44+
encoded = ujson.encode(sut, double_precision=15)
4545
decoded = ujson.decode(encoded)
4646
self.assertEquals(decoded, 1337.1337)
4747

@@ -73,7 +73,7 @@ def test_doubleLongIssue(self):
7373
encoded = json.dumps(sut)
7474
decoded = json.loads(encoded)
7575
self.assertEqual(sut, decoded)
76-
encoded = ujson.encode(sut, double_precision=100)
76+
encoded = ujson.encode(sut, double_precision=15)
7777
decoded = ujson.decode(encoded)
7878
self.assertEqual(sut, decoded)
7979

@@ -82,7 +82,7 @@ def test_doubleLongDecimalIssue(self):
8282
encoded = json.dumps(sut)
8383
decoded = json.loads(encoded)
8484
self.assertEqual(sut, decoded)
85-
encoded = ujson.encode(sut, double_precision=100)
85+
encoded = ujson.encode(sut, double_precision=15)
8686
decoded = ujson.decode(encoded)
8787
self.assertEqual(sut, decoded)
8888

@@ -98,6 +98,16 @@ def test_decimalDecodeTestPrecise(self):
9898
decoded = ujson.decode(encoded, precise_float=True)
9999
self.assertEqual(sut, decoded)
100100

101+
def test_encodeDoubleTinyExponential(self):
102+
num = 1e-40
103+
self.assertEqual(num, ujson.decode(ujson.encode(num)))
104+
num = 1e-100
105+
self.assertEqual(num, ujson.decode(ujson.encode(num)))
106+
num = -1e-45
107+
self.assertEqual(num, ujson.decode(ujson.encode(num)))
108+
num = -1e-145
109+
self.assertEqual(num, ujson.decode(ujson.encode(num)))
110+
101111
def test_encodeDictWithUnicodeKeys(self):
102112
input = { u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1" }
103113
output = ujson.encode(input)
@@ -158,15 +168,9 @@ def test_doublePrecisionTest(self):
158168

159169
def test_invalidDoublePrecision(self):
160170
input = 30.12345678901234567890
161-
output = ujson.encode(input, double_precision = 20)
162-
# should snap to the max, which is 15
163-
self.assertEquals(round(input, 15), json.loads(output))
164-
self.assertEquals(round(input, 15), ujson.decode(output))
165171

166-
output = ujson.encode(input, double_precision = -1)
167-
# also should snap to the max, which is 15
168-
self.assertEquals(round(input, 15), json.loads(output))
169-
self.assertEquals(round(input, 15), ujson.decode(output))
172+
self.assertRaises(ValueError, ujson.encode, input, double_precision = 20)
173+
self.assertRaises(ValueError, ujson.encode, input, double_precision = -1)
170174

171175
# will throw typeError
172176
self.assertRaises(TypeError, ujson.encode, input, double_precision = '9')
@@ -896,13 +900,13 @@ def testFloatArray(self):
896900

897901
def testFloatMax(self):
898902
num = np.float(np.finfo(np.float).max/10)
899-
assert_approx_equal(np.float(ujson.decode(ujson.encode(num))), num, 15)
903+
assert_approx_equal(np.float(ujson.decode(ujson.encode(num, double_precision=15))), num, 15)
900904

901905
num = np.float32(np.finfo(np.float32).max/10)
902-
assert_approx_equal(np.float32(ujson.decode(ujson.encode(num))), num, 15)
906+
assert_approx_equal(np.float32(ujson.decode(ujson.encode(num, double_precision=15))), num, 15)
903907

904908
num = np.float64(np.finfo(np.float64).max/10)
905-
assert_approx_equal(np.float64(ujson.decode(ujson.encode(num))), num, 15)
909+
assert_approx_equal(np.float64(ujson.decode(ujson.encode(num, double_precision=15))), num, 15)
906910

907911
def testArrays(self):
908912
arr = np.arange(100);

pandas/src/ujson/lib/ultrajsonenc.c

+20-17
Original file line numberDiff line numberDiff line change
@@ -507,8 +507,10 @@ void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value)
507507

508508
int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value)
509509
{
510-
/* if input is larger than thres_max, revert to exponential */
510+
/* if input is beyond the thresholds, revert to exponential */
511511
const double thres_max = (double) 1e16 - 1;
512+
const double thres_min = (double) 1e-15;
513+
char precision_str[20];
512514
int count;
513515
double diff = 0.0;
514516
char* str = enc->offset;
@@ -540,6 +542,23 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value
540542
value = -value;
541543
}
542544

545+
/*
546+
for very large or small numbers switch back to native sprintf for
547+
exponentials. anyone want to write code to replace this? */
548+
if (value > thres_max || (value != 0.0 && fabs(value) < thres_min))
549+
{
550+
precision_str[0] = '%';
551+
precision_str[1] = '.';
552+
#ifdef _WIN32
553+
sprintf_s(precision_str+2, sizeof(precision_str)-2, "%ug", enc->doublePrecision);
554+
enc->offset += sprintf_s(str, enc->end - enc->offset, precision_str, neg ? -value : value);
555+
#else
556+
snprintf(precision_str+2, sizeof(precision_str)-2, "%ug", enc->doublePrecision);
557+
enc->offset += snprintf(str, enc->end - enc->offset, precision_str, neg ? -value : value);
558+
#endif
559+
return TRUE;
560+
}
561+
543562
pow10 = g_pow10[enc->doublePrecision];
544563

545564
whole = (unsigned long long) value;
@@ -565,22 +584,6 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value
565584
++frac;
566585
}
567586

568-
/* for very large numbers switch back to native sprintf for exponentials.
569-
anyone want to write code to replace this? */
570-
/*
571-
normal printf behavior is to print EVERY whole number digit
572-
which can be 100s of characters overflowing your buffers == bad
573-
*/
574-
if (value > thres_max)
575-
{
576-
#ifdef _WIN32
577-
enc->offset += sprintf_s(str, enc->end - enc->offset, "%.15e", neg ? -value : value);
578-
#else
579-
enc->offset += snprintf(str, enc->end - enc->offset, "%.15e", neg ? -value : value);
580-
#endif
581-
return TRUE;
582-
}
583-
584587
if (enc->doublePrecision == 0)
585588
{
586589
diff = value - whole;

pandas/src/ujson/python/objToJSON.c

+9
Original file line numberDiff line numberDiff line change
@@ -1696,6 +1696,15 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
16961696
encoder->encodeHTMLChars = 1;
16971697
}
16981698

1699+
if (idoublePrecision > JSON_DOUBLE_MAX_DECIMALS || idoublePrecision < 0)
1700+
{
1701+
PyErr_Format (
1702+
PyExc_ValueError,
1703+
"Invalid value '%d' for option 'double_precision', max is '%u'",
1704+
idoublePrecision,
1705+
JSON_DOUBLE_MAX_DECIMALS);
1706+
return NULL;
1707+
}
16991708
encoder->doublePrecision = idoublePrecision;
17001709

17011710
if (sOrient != NULL)

0 commit comments

Comments
 (0)