ENH: Add orient argument and split option to DataFrame.to_dict. (GH7840)

rpetchler · jreback · commit 5995dc36a090 · 2014-10-06T15:52:51.000-04:00
Update documentation with deprecation and enhancement notices.

Remove indentation from list in docstring.
diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt
@@ -767,6 +767,7 @@ Prior Version Deprecations/Changes
 Deprecations
 ~~~~~~~~~~~~
 
+- The ``outtype`` argument to ``pd.DataFrame.to_dict`` has been deprecated in favor of ``orient``. (:issue:`7840`)
 - The ``convert_dummies`` method has been deprecated in favor of
   ``get_dummies`` (:issue:`8140`)
 - The ``infer_dst`` argument in ``tz_localize`` will be deprecated in favor of
@@ -849,7 +850,7 @@ Enhancements
     idx
     idx + pd.offsets.MonthEnd(3)
 
-
+- Added ``split`` as an option to the ``orient`` argument in ``pd.DataFrame.to_dict``. (:issue:`7840`)
 
 - The ``get_dummies`` method can now be used on DataFrames. By default only
   catagorical columns are encoded as 0's and 1's, while other columns are
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -640,19 +640,25 @@ def from_dict(cls, data, orient='columns', dtype=None):
 
         return cls(data, index=index, columns=columns, dtype=dtype)
 
-    def to_dict(self, outtype='dict'):
-        """
-        Convert DataFrame to dictionary.
+    @deprecate_kwarg(old_arg_name='outtype', new_arg_name='orient')
+    def to_dict(self, orient='dict'):
+        """Convert DataFrame to dictionary.
 
         Parameters
         ----------
-        outtype : str {'dict', 'list', 'series', 'records'}
-            Determines the type of the values of the dictionary. The
-            default `dict` is a nested dictionary {column -> {index -> value}}.
-            `list` returns {column -> list(values)}. `series` returns
-            {column -> Series(values)}. `records` returns [{columns -> value}].
-            Abbreviations are allowed.
+        orient : str {'dict', 'list', 'series', 'split', 'records'}
+            Determines the type of the values of the dictionary.
+
+            - dict (default) : dict like {column -> {index -> value}}
+            - list : dict like {column -> [values]}
+            - series : dict like {column -> Series(values)}
+            - split : dict like
+              {index -> [index], columns -> [columns], data -> [values]}
+            - records : list like
+              [{column -> value}, ... , {column -> value}]
 
+            Abbreviations are allowed. `s` indicates `series` and `sp`
+            indicates `split`.
 
         Returns
         -------
@@ -661,13 +667,17 @@ def to_dict(self, outtype='dict'):
         if not self.columns.is_unique:
             warnings.warn("DataFrame columns are not unique, some "
                           "columns will be omitted.", UserWarning)
-        if outtype.lower().startswith('d'):
+        if orient.lower().startswith('d'):
             return dict((k, v.to_dict()) for k, v in compat.iteritems(self))
-        elif outtype.lower().startswith('l'):
+        elif orient.lower().startswith('l'):
             return dict((k, v.tolist()) for k, v in compat.iteritems(self))
-        elif outtype.lower().startswith('s'):
+        elif orient.lower().startswith('sp'):
+            return {'index': self.index.tolist(),
+                    'columns': self.columns.tolist(),
+                    'data': self.values.tolist()}
+        elif orient.lower().startswith('s'):
             return dict((k, v) for k, v in compat.iteritems(self))
-        elif outtype.lower().startswith('r'):
+        elif orient.lower().startswith('r'):
             return [dict((k, v) for k, v in zip(self.columns, row))
                     for row in self.values]
         else:  # pragma: no cover
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -4037,6 +4037,13 @@ def test_to_dict(self):
             for k2, v2 in compat.iteritems(v):
                 self.assertEqual(v2, recons_data[k][k2])
 
+        recons_data = DataFrame(test_data).to_dict("sp")
+
+        expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'],
+                          'data': [[1.0, '1'], [2.0, '2'], [nan, '3']]}
+
+        tm.assert_almost_equal(recons_data, expected_split)
+
         recons_data = DataFrame(test_data).to_dict("r")
 
         expected_records = [{'A': 1.0, 'B': '1'},