Merge pull request #4299 from Komnomnomnom/ujson-small-floats

jreback · jreback · commit ec8920a0a604 · 2013-07-19T18:45:17.000-07:00
ENH: ujson better handling of very large and very small numbers, throw ValueError for bad double_precision arg #4042
diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py
@@ -41,7 +41,7 @@ class UltraJSONTests(TestCase):
 
     def test_encodeDecimal(self):
         sut = decimal.Decimal("1337.1337")
-        encoded = ujson.encode(sut, double_precision=100)
+        encoded = ujson.encode(sut, double_precision=15)
         decoded = ujson.decode(encoded)
         self.assertEquals(decoded, 1337.1337)
 
@@ -73,7 +73,7 @@ def test_doubleLongIssue(self):
         encoded = json.dumps(sut)
         decoded = json.loads(encoded)
         self.assertEqual(sut, decoded)
-        encoded = ujson.encode(sut, double_precision=100)
+        encoded = ujson.encode(sut, double_precision=15)
         decoded = ujson.decode(encoded)
         self.assertEqual(sut, decoded)
 
@@ -82,7 +82,7 @@ def test_doubleLongDecimalIssue(self):
         encoded = json.dumps(sut)
         decoded = json.loads(encoded)
         self.assertEqual(sut, decoded)
-        encoded = ujson.encode(sut, double_precision=100)
+        encoded = ujson.encode(sut, double_precision=15)
         decoded = ujson.decode(encoded)
         self.assertEqual(sut, decoded)
 
@@ -98,6 +98,16 @@ def test_decimalDecodeTestPrecise(self):
         decoded = ujson.decode(encoded, precise_float=True)
         self.assertEqual(sut, decoded)
 
+    def test_encodeDoubleTinyExponential(self):
+        num = 1e-40
+        self.assertEqual(num, ujson.decode(ujson.encode(num)))
+        num = 1e-100
+        self.assertEqual(num, ujson.decode(ujson.encode(num)))
+        num = -1e-45
+        self.assertEqual(num, ujson.decode(ujson.encode(num)))
+        num = -1e-145
+        self.assertEqual(num, ujson.decode(ujson.encode(num)))
+
     def test_encodeDictWithUnicodeKeys(self):
         input = { u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1" }
         output = ujson.encode(input)
@@ -158,15 +168,9 @@ def test_doublePrecisionTest(self):
 
     def test_invalidDoublePrecision(self):
         input = 30.12345678901234567890
-        output = ujson.encode(input, double_precision = 20)
-        # should snap to the max, which is 15
-        self.assertEquals(round(input, 15), json.loads(output))
-        self.assertEquals(round(input, 15), ujson.decode(output))
 
-        output = ujson.encode(input, double_precision = -1)
-        # also should snap to the max, which is 15
-        self.assertEquals(round(input, 15), json.loads(output))
-        self.assertEquals(round(input, 15), ujson.decode(output))
+        self.assertRaises(ValueError, ujson.encode, input, double_precision = 20)
+        self.assertRaises(ValueError, ujson.encode, input, double_precision = -1)
 
         # will throw typeError
         self.assertRaises(TypeError, ujson.encode, input, double_precision = '9')
@@ -896,13 +900,13 @@ def testFloatArray(self):
 
     def testFloatMax(self):
         num = np.float(np.finfo(np.float).max/10)
-        assert_approx_equal(np.float(ujson.decode(ujson.encode(num))), num, 15)
+        assert_approx_equal(np.float(ujson.decode(ujson.encode(num, double_precision=15))), num, 15)
 
         num = np.float32(np.finfo(np.float32).max/10)
-        assert_approx_equal(np.float32(ujson.decode(ujson.encode(num))), num, 15)
+        assert_approx_equal(np.float32(ujson.decode(ujson.encode(num, double_precision=15))), num, 15)
 
         num = np.float64(np.finfo(np.float64).max/10)
-        assert_approx_equal(np.float64(ujson.decode(ujson.encode(num))), num, 15)
+        assert_approx_equal(np.float64(ujson.decode(ujson.encode(num, double_precision=15))), num, 15)
 
     def testArrays(self):
         arr = np.arange(100);
diff --git a/pandas/src/ujson/lib/ultrajsonenc.c b/pandas/src/ujson/lib/ultrajsonenc.c
@@ -507,8 +507,10 @@ void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value)
 
 int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value)
 {
-  /* if input is larger than thres_max, revert to exponential */
+  /* if input is beyond the thresholds, revert to exponential */
   const double thres_max = (double) 1e16 - 1;
+  const double thres_min = (double) 1e-15;
+  char precision_str[20];
   int count;
   double diff = 0.0;
   char* str = enc->offset;
@@ -540,6 +542,23 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value
     value = -value;
   }
 
+  /* 
+  for very large or small numbers switch back to native sprintf for 
+  exponentials.  anyone want to write code to replace this? */
+  if (value > thres_max || (value != 0.0 && fabs(value) <  thres_min))
+  {
+    precision_str[0] = '%';
+    precision_str[1] = '.';
+#ifdef _WIN32
+    sprintf_s(precision_str+2, sizeof(precision_str)-2, "%ug", enc->doublePrecision);
+    enc->offset += sprintf_s(str, enc->end - enc->offset, precision_str, neg ? -value : value);
+#else
+    snprintf(precision_str+2, sizeof(precision_str)-2, "%ug", enc->doublePrecision);
+    enc->offset += snprintf(str, enc->end - enc->offset, precision_str, neg ? -value : value);
+#endif
+    return TRUE;
+  }
+
   pow10 = g_pow10[enc->doublePrecision];
 
   whole = (unsigned long long) value;
@@ -565,22 +584,6 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value
     ++frac;
   }
 
-  /* for very large numbers switch back to native sprintf for exponentials.
-  anyone want to write code to replace this? */
-  /*
-  normal printf behavior is to print EVERY whole number digit
-  which can be 100s of characters overflowing your buffers == bad
-  */
-  if (value > thres_max)
-  {
-#ifdef _WIN32
-  enc->offset += sprintf_s(str, enc->end - enc->offset, "%.15e", neg ? -value : value);
-#else
-  enc->offset += snprintf(str, enc->end - enc->offset, "%.15e", neg ? -value : value);
-#endif
-     return TRUE;
-   }
-
   if (enc->doublePrecision == 0)
   {
     diff = value - whole;
diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/src/ujson/python/objToJSON.c
@@ -1696,6 +1696,15 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
     encoder->encodeHTMLChars = 1;
   }
 
+  if (idoublePrecision > JSON_DOUBLE_MAX_DECIMALS || idoublePrecision < 0) 
+  {
+      PyErr_Format (
+          PyExc_ValueError, 
+          "Invalid value '%d' for option 'double_precision', max is '%u'", 
+          idoublePrecision,
+          JSON_DOUBLE_MAX_DECIMALS);
+      return NULL;
+  }
   encoder->doublePrecision = idoublePrecision;
 
   if (sOrient != NULL)