EHN: Add index parameter to to_json (pandas-dev#17394)

reidy-p · reidy-p · commit 11c90be24d06 · 2017-11-29T21:58:42.000Z
diff --git a/json_test.py b/json_test.py
@@ -0,0 +1,27 @@
+import pandas as pd
+
+s = pd.Series([1, 2, 3], name='A')
+df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'b', 'c'])
+
+# orient='split'
+# print(df.to_json(orient='split', index=False))
+# print(df.to_json(orient='split', index=True))
+
+print(s.to_json(orient='split', index=False))
+print(s.to_json(orient='split', index=True))
+
+# orient='table'
+print(df.to_json(orient='table', index=False))
+print(df.to_json(orient='table', index=True))
+# print(df.to_json(orient='records', index=False))
+# print(df.to_json(orient='records', index=True))
+
+print(s.to_json(orient='table', index=False))
+print(s.to_json(orient='table', index=True))
+
+
+# Errors
+# print(df.to_json(orient='records', index=False))
+# print(df.to_json(orient='index', index=False))
+# print(df.to_json(orient='columns', index=False))
+# print(df.to_json(orient='values', index=False))
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1295,7 +1295,8 @@ def _repr_latex_(self):
 
     def to_json(self, path_or_buf=None, orient=None, date_format=None,
                 double_precision=10, force_ascii=True, date_unit='ms',
-                default_handler=None, lines=False, compression=None):
+                default_handler=None, lines=False, compression=None,
+                index=True):
         """
         Convert the object to a JSON string.
 
@@ -1415,7 +1416,8 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
                             double_precision=double_precision,
                             force_ascii=force_ascii, date_unit=date_unit,
                             default_handler=default_handler,
-                            lines=lines, compression=compression)
+                            lines=lines, compression=compression,
+                            index=index)
 
     def to_hdf(self, path_or_buf, key, **kwargs):
         """Write the contained data to an HDF5 file using HDFStore.
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
@@ -28,7 +28,12 @@
 # interface to/from
 def to_json(path_or_buf, obj, orient=None, date_format='epoch',
             double_precision=10, force_ascii=True, date_unit='ms',
-            default_handler=None, lines=False, compression=None):
+            default_handler=None, lines=False, compression=None,
+            index=True):
+
+    if not index and orient not in ['split', 'table']:
+        raise ValueError("'index=False' is only valid when 'orient' is "
+                         "'split' or 'table'")
 
     path_or_buf = _stringify_path(path_or_buf)
     if lines and orient != 'records':
@@ -49,7 +54,8 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
     s = writer(
         obj, orient=orient, date_format=date_format,
         double_precision=double_precision, ensure_ascii=force_ascii,
-        date_unit=date_unit, default_handler=default_handler).write()
+        date_unit=date_unit, default_handler=default_handler,
+        index=index).write()
 
     if lines:
         s = _convert_to_line_delimits(s)
@@ -69,7 +75,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
 class Writer(object):
 
     def __init__(self, obj, orient, date_format, double_precision,
-                 ensure_ascii, date_unit, default_handler=None):
+                 ensure_ascii, date_unit, index, default_handler=None):
         self.obj = obj
 
         if orient is None:
@@ -81,6 +87,7 @@ def __init__(self, obj, orient, date_format, double_precision,
         self.ensure_ascii = ensure_ascii
         self.date_unit = date_unit
         self.default_handler = default_handler
+        self.index = index
 
         self.is_copy = None
         self._format_axes()
@@ -89,6 +96,24 @@ def _format_axes(self):
         raise AbstractMethodError(self)
 
     def write(self):
+        if not self.index and self.orient == 'split':
+            if isinstance(self.obj, DataFrame):
+                obj_dict = self.obj.to_dict(orient='split')
+                del obj_dict["index"]
+            elif isinstance(self.obj, Series):
+                obj_dict = {"name":self.obj.name, "data":self.obj.values}
+            return dumps(
+                obj_dict,
+                orient=self.orient,
+                double_precision=self.double_precision,
+                ensure_ascii=self.ensure_ascii,
+                date_unit=self.date_unit,
+                iso_dates=self.date_format == 'iso',
+                default_handler=self.default_handler
+            )
+        if not self.index and self.orient == 'records':
+            print(self.obj)
+            self.obj = self.obj.drop('index', axis=1, errors='ignore')
         return dumps(
             self.obj,
             orient=self.orient,
@@ -128,7 +153,7 @@ class JSONTableWriter(FrameWriter):
     _default_orient = 'records'
 
     def __init__(self, obj, orient, date_format, double_precision,
-                 ensure_ascii, date_unit, default_handler=None):
+                 ensure_ascii, date_unit, index, default_handler=None):
         """
         Adds a `schema` attribut with the Table Schema, resets
         the index (can't do in caller, because the schema inference needs
@@ -137,7 +162,7 @@ def __init__(self, obj, orient, date_format, double_precision,
         """
         super(JSONTableWriter, self).__init__(
             obj, orient, date_format, double_precision, ensure_ascii,
-            date_unit, default_handler=default_handler)
+            date_unit, index, default_handler=default_handler)
 
         if date_format != 'iso':
             msg = ("Trying to write with `orient='table'` and "
@@ -146,7 +171,7 @@ def __init__(self, obj, orient, date_format, double_precision,
                    .format(fmt=date_format))
             raise ValueError(msg)
 
-        self.schema = build_table_schema(obj)
+        self.schema = build_table_schema(obj, index=self.index)
 
         # NotImplementd on a column MultiIndex
         if obj.ndim == 2 and isinstance(obj.columns, MultiIndex):
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -1147,3 +1147,40 @@ def test_data_frame_size_after_to_json(self):
         size_after = df.memory_usage(index=True, deep=True).sum()
 
         assert size_before == size_after
+    
+    def test_index_false_to_json(self):
+        
+        df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])
+        
+        result = df.to_json(orient='split', index=False)
+        expected = '{"columns":["a","b"],"data":[[1,2],[4,5]]}'
+        assert result == expected
+        
+        result = df.to_json(orient='table', index=False)
+        expected = '{"schema": {"fields":[{"name":"a","type":"integer"},{"name":"b","type":"integer"}],"pandas_version":"0.20.0"}, "data": [{"a":1,"b":2},{"a":4,"b":5}]}'
+        assert result == expected
+        
+        s = pd.Series([1, 2, 3], name='A')
+        
+        result = s.to_json(orient='split', index=False)
+        expected = '{"name":"A","data":[1,2,3]}'
+        assert result == expected
+
+        result = s.to_json(orient='table', index=False)
+        expected = '{"schema": {"fields":[{"name":"A","type":"integer"}],"pandas_version":"0.20.0"}, "data": [{"A":1},{"A":2},{"A":3}]}'
+        assert result == expected
+    
+    @pytest.mark.parametrize('orient', [
+        ('records'),
+        ('index'),
+        ('columns'),
+        ('values'),
+    ])
+    def test_index_false_error_to_json(self, orient):
+        
+        df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])
+        
+        with tm.assert_raises_regex(ValueError, "'index=False' is only "
+                                                "valid when 'orient' is "
+                                                "'split' or 'table'"):
+            df.to_json(orient=orient, index=False)