From 86a6c9c04008588363b4b67f1b49a99fa3a70147 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Fri, 19 Jul 2013 13:57:42 -0400
Subject: [PATCH] TST: raise an error json serialization of floats that cannot
 be accurately represented

DOC: doc updates for small_float errors
---
 doc/source/io.rst                        |  5 +++
 doc/source/release.rst                   |  3 ++
 pandas/io/json.py                        | 46 ++++++++++++++++++------
 pandas/io/tests/test_json/test_pandas.py | 12 +++++++
 4 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 653ac2cb10b69..a78075548b51d 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -1020,6 +1020,11 @@ Writing to a file, with a date index and a date column
    dfj2.to_json('test.json')
    open('test.json').read()
 
+.. warning::
+
+   Currently ``usjon`` cannot format small float numbers (< 1e15). A ``ValueError``
+   will be raised in these cases.
+
 Reading JSON
 ~~~~~~~~~~~~
 
diff --git a/doc/source/release.rst b/doc/source/release.rst
index a64b2a77b376c..3ac77ac14e2fe 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -37,6 +37,8 @@ pandas 0.12
   - Support for reading Amazon S3 files. (:issue:`3504`)
   - Added module for reading and writing Stata files: pandas.io.stata (:issue:`1512`)
     includes ``to_stata`` DataFrame method, and a ``read_stata`` top-level reader
+  - Added module for reading and writing JSON strings/files: pandas.io.json (:issue:`3876`)
+    includes ``to_json`` DataFrame/Series method, and a ``read_json`` top-level reader
   - Added support for writing in ``to_csv`` and reading in ``read_csv``,
     multi-index columns. The ``header`` option in ``read_csv`` now accepts a
     list of the rows from which to read the index. Added the option,
@@ -345,6 +347,7 @@ pandas 0.12
   - Fixed bug where html5lib wasn't being properly skipped (:issue:`4265`)
   - Fixed bug where get_data_famafrench wasn't using the correct file edges
     (:issue:`4281`)
+  - Raise a ``ValueError`` if trying to format small floats with ``to_json`` (:issue:`4042`)
 
 pandas 0.11.0
 =============
diff --git a/pandas/io/json.py b/pandas/io/json.py
index ce95c3394ce2c..a5d06afff4e95 100644
--- a/pandas/io/json.py
+++ b/pandas/io/json.py
@@ -16,9 +16,9 @@
 ### interface to/from ###
 
 def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision=10, force_ascii=True):
-        
+
     if isinstance(obj, Series):
-        s = SeriesWriter(obj, orient=orient, date_format=date_format, double_precision=double_precision, 
+        s = SeriesWriter(obj, orient=orient, date_format=date_format, double_precision=double_precision,
                          ensure_ascii=force_ascii).write()
     elif isinstance(obj, DataFrame):
         s = FrameWriter(obj, orient=orient, date_format=date_format, double_precision=double_precision,
@@ -41,7 +41,7 @@ def __init__(self, obj, orient, date_format, double_precision, ensure_ascii):
 
         if orient is None:
             orient = self._default_orient
-            
+
         self.orient = orient
         self.date_format = date_format
         self.double_precision = double_precision
@@ -64,19 +64,36 @@ def _format_to_date(self, data):
         if self._needs_to_date(data):
             return data.apply(lambda x: x.isoformat())
         return data
-    
+
     def copy_if_needed(self):
         """ copy myself if necessary """
         if not self.is_copy:
             self.obj = self.obj.copy()
             self.is_copy = True
 
+    def _validate(self):
+        """ validate that we can accurately write the data """
+        pass
+
+    def _raise_on_small_floats(self):
+        raise ValueError("ujson currently cannot accurately format float data less\n"
+                         "than 1e-15. A work-around is to multiply the data by\n"
+                         "a large positive factor and divide on deseriliazation\n")
+
     def write(self):
+        self._validate()
         return dumps(self.obj, orient=self.orient, double_precision=self.double_precision, ensure_ascii=self.ensure_ascii)
 
 class SeriesWriter(Writer):
     _default_orient = 'index'
 
+    def _validate(self):
+        if issubclass(self.obj.dtype.type, np.floating):
+            values = self.obj.values
+            values = values[values.nonzero()[0]]
+            if len(values) and (np.abs(values)<1e-15).any():
+                self._raise_on_small_floats()
+
     def _format_axes(self):
         if self._needs_to_date(self.obj.index):
             self.copy_if_needed()
@@ -95,6 +112,13 @@ def _format_bools(self):
 class FrameWriter(Writer):
     _default_orient = 'columns'
 
+    def _validate(self):
+        cols = [ k for k, v in self.obj.dtypes.iteritems() if issubclass(v.type,np.floating) ]
+        values = self.obj.loc[:,cols].values.ravel()
+        values = values[values.nonzero()[0]]
+        if len(values) and (np.abs(values)<1e-15).any():
+            self._raise_on_small_floats()
+
     def _format_axes(self):
         """ try to axes if they are datelike """
         if self.orient == 'columns':
@@ -186,13 +210,13 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
     return obj
 
 class Parser(object):
-    
+
     def __init__(self, json, orient, dtype=True, convert_axes=True, convert_dates=True, keep_default_dates=False, numpy=False):
         self.json = json
 
         if orient is None:
             orient = self._default_orient
-            
+
         self.orient = orient
         self.dtype = dtype
 
@@ -207,7 +231,7 @@ def __init__(self, json, orient, dtype=True, convert_axes=True, convert_dates=Tr
 
     def parse(self):
 
-        # try numpy 
+        # try numpy
         numpy = self.numpy
         if numpy:
             self._parse_numpy()
@@ -269,7 +293,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
                 pass
 
         if data.dtype == 'float':
-            
+
             # coerce floats to 64
             try:
                 data = data.astype('float64')
@@ -291,7 +315,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
 
         # coerce ints to 64
         if data.dtype == 'int':
-            
+
             # coerce floats to 64
             try:
                 data = data.astype('int64')
@@ -322,7 +346,7 @@ def _try_convert_to_date(self, data):
         if issubclass(new_data.dtype.type,np.number):
             if not ((new_data == iNaT) | (new_data > 31536000000000000L)).all():
                 return data, False
-                
+
         try:
             new_data = to_datetime(new_data)
         except:
@@ -342,7 +366,7 @@ class SeriesParser(Parser):
     _default_orient = 'index'
 
     def _parse_no_numpy(self):
-    
+
         json = self.json
         orient = self.orient
         if orient == "split":
diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py
index bc6ba1a45136c..32f951b3f17e1 100644
--- a/pandas/io/tests/test_json/test_pandas.py
+++ b/pandas/io/tests/test_json/test_pandas.py
@@ -409,6 +409,18 @@ def test_misc_example(self):
         expected = DataFrame([[1,2],[1,2]],columns=['a','b'])
         assert_frame_equal(result,expected)
 
+    def test_small_floats(self):
+
+        # raise
+        df = DataFrame([[1e-16,'foo',1e-8]],columns=list('ABC'))
+        self.assertRaises(ValueError, df.to_json)
+        s = Series([1e-16])
+        self.assertRaises(ValueError, s.to_json)
+
+        # ok
+        df = DataFrame([[1e-15,'foo',1e-8]],columns=list('ABC'))
+        df.to_json()
+
     @network
     @slow
     def test_round_trip_exception_(self):