pandas-dev · TomAugspurger · May 16, 2017 · May 5, 2017 · May 5, 2017 · May 6, 2017
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -515,7 +515,6 @@ Other Enhancements
 - Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here <basics.accelerate>` (:issue:`16157`)
 - ``DataFrame.style.bar()`` now accepts two more options to further customize the bar chart. Bar alignment is set with ``align='left'|'mid'|'zero'``, the default is "left", which is backward compatible; You can now pass a list of ``color=[color_negative, color_positive]``. (:issue:`14757`)
 
-
 .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
 
 

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -26,6 +26,7 @@ New features
 
 Other Enhancements
 ^^^^^^^^^^^^^^^^^^
+- ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned.  The default is ``dict``, which is backwards compatible. (:issue:`16122`)
 
 
 

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -6,6 +6,8 @@
 import warnings
 from datetime import datetime, timedelta
 from functools import partial
+import inspect
+import collections
 
 import numpy as np
 from pandas._libs import lib, tslib
@@ -479,6 +481,43 @@ def _dict_compat(d):
                 for key, value in iteritems(d))
 
 
+def _standardize_mapping(into):
+    """
+    Helper function to standardize the supplied mapping so it can
+    be passed to the ``Series.to_dict()`` and ``DataFrame.to_dict()``
+
+    Parameters
+    ----------
+    into : instance or subclass of collections.Mapping
+        The argument supplied to ``to_dict``.  Must be a class, an
+        initialized collections.defaultdict, or an empty instance
+        of a collections.Mapping subclass.
+
+    Returns
+    -------
+    mapping : a collections.Mapping subclass or other constructor
+        a callable object that can accept an iterator to create
+        the desired Mapping.
+
+    """
+    if not inspect.isclass(into):
+        if len(into) > 0:
+            raise ValueError(
+                "to_dict() only accepts empty mappings.")
+        elif type(into) == collections.defaultdict:
+            return partial(
+                collections.defaultdict, into.default_factory)
+        else:
+            return _standardize_mapping(type(into))
+    elif not issubclass(into, collections.Mapping):
+        raise TypeError('unsupported type: {}'.format(into))
+    elif into == collections.defaultdict:
+        raise TypeError(
+            'to_dict() only accepts initialized defaultdicts')
+    else:
+        return into
+
+
 def sentinel_factory():
     class Sentinel(object):
         pass

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -63,7 +63,8 @@
                                 _default_index,
                                 _values_from_object,
                                 _maybe_box_datetimelike,
-                                _dict_compat)
+                                _dict_compat,
+                                _standardize_mapping)
 from pandas.core.generic import NDFrame, _shared_docs
 from pandas.core.index import Index, MultiIndex, _ensure_index
 from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable,
@@ -860,7 +861,7 @@ def from_dict(cls, data, orient='columns', dtype=None):
 
         return cls(data, index=index, columns=columns, dtype=dtype)
 
-    def to_dict(self, orient='dict'):
+    def to_dict(self, orient='dict', into=dict):
         """Convert DataFrame to dictionary.
 
         Parameters
@@ -882,32 +883,45 @@ def to_dict(self, orient='dict'):
             Abbreviations are allowed. `s` indicates `series` and `sp`
             indicates `split`.
 
+        into : class, default dict
+            The collections.Mapping subclass used for all Mappings
+            in the return value.  Can be the actual class or an empty
+            instance of the mapping type you want.  If you want a
+            collections.defaultdict, you must pass an initialized
+            instance.
+            .. versionadded:: 0.21.0
+
         Returns
         -------
-        result : dict like {column -> {index -> value}}
+        result : collections.Mapping like {column -> {index -> value}}
+            If ``into`` is collections.defaultdict, the return
+            value's default_factory will be None.
         """
         if not self.columns.is_unique:
             warnings.warn("DataFrame columns are not unique, some "
                           "columns will be omitted.", UserWarning)
+        # GH16122
+        into_c = _standardize_mapping(into)
         if orient.lower().startswith('d'):
-            return dict((k, v.to_dict()) for k, v in compat.iteritems(self))
+            return into_c(
+                (k, v.to_dict(into)) for k, v in compat.iteritems(self))
         elif orient.lower().startswith('l'):
-            return dict((k, v.tolist()) for k, v in compat.iteritems(self))
+            return into_c((k, v.tolist()) for k, v in compat.iteritems(self))
         elif orient.lower().startswith('sp'):
-            return {'index': self.index.tolist(),
-                    'columns': self.columns.tolist(),
-                    'data': lib.map_infer(self.values.ravel(),
-                                          _maybe_box_datetimelike)
-                    .reshape(self.values.shape).tolist()}
+            return into_c((('index', self.index.tolist()),
+                           ('columns', self.columns.tolist()),
+                           ('data', lib.map_infer(self.values.ravel(),
+                                                  _maybe_box_datetimelike)
+                            .reshape(self.values.shape).tolist())))
         elif orient.lower().startswith('s'):
-            return dict((k, _maybe_box_datetimelike(v))
-                        for k, v in compat.iteritems(self))
+            return into_c((k, _maybe_box_datetimelike(v))
+                          for k, v in compat.iteritems(self))
         elif orient.lower().startswith('r'):
-            return [dict((k, _maybe_box_datetimelike(v))
-                         for k, v in zip(self.columns, row))
+            return [into_c((k, _maybe_box_datetimelike(v))
+                           for k, v in zip(self.columns, row))
                     for row in self.values]
         elif orient.lower().startswith('i'):
-            return dict((k, v.to_dict()) for k, v in self.iterrows())
+            return into_c((k, v.to_dict(into)) for k, v in self.iterrows())
         else:
             raise ValueError("orient '%s' not understood" % orient)
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -46,7 +46,8 @@
                                 _maybe_match_name,
                                 SettingWithCopyError,
                                 _maybe_box_datetimelike,
-                                _dict_compat)
+                                _dict_compat,
+                                _standardize_mapping)
 from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
                                Float64Index, _ensure_index)
 from pandas.core.indexing import check_bool_indexer, maybe_convert_indices
@@ -1074,15 +1075,27 @@ def tolist(self):
         """ Convert Series to a nested list """
         return list(self.asobject)
 
-    def to_dict(self):
+    def to_dict(self, into=dict):
         """
-        Convert Series to {label -> value} dict
+        Convert Series to {label -> value} dict or dict-like object
+        Parameters
+        ----------
+        into : class, default dict
+            The collections.Mapping subclass to use as the return
+            object. Can be the actual class or an empty
+            instance of the mapping type you want.  If you want a
+            collections.defaultdict, you must pass an initialized
+            .. versionadded:: 0.21.0
 
         Returns
         -------
-        value_dict : dict
+        value_dict : collections.Mapping
+            If ``into`` is collections.defaultdict, the return
+            value's default_factory will be None.
         """
-        return dict(compat.iteritems(self))
+        # GH16122
+        into_c = _standardize_mapping(into)
+        return into_c(compat.iteritems(self))
 
     def to_frame(self, name=None):
         """

diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 
 import pytest
+import collections
 import numpy as np
 
 from pandas import compat
@@ -13,50 +14,6 @@
 
 class TestDataFrameConvertTo(TestData):
 
-    def test_to_dict(self):
-        test_data = {
-            'A': {'1': 1, '2': 2},
-            'B': {'1': '1', '2': '2', '3': '3'},
-        }
-        recons_data = DataFrame(test_data).to_dict()
-
-        for k, v in compat.iteritems(test_data):
-            for k2, v2 in compat.iteritems(v):
-                assert v2 == recons_data[k][k2]
-
-        recons_data = DataFrame(test_data).to_dict("l")
-
-        for k, v in compat.iteritems(test_data):
-            for k2, v2 in compat.iteritems(v):
-                assert v2 == recons_data[k][int(k2) - 1]
-
-        recons_data = DataFrame(test_data).to_dict("s")
-
-        for k, v in compat.iteritems(test_data):
-            for k2, v2 in compat.iteritems(v):
-                assert v2 == recons_data[k][k2]
-
-        recons_data = DataFrame(test_data).to_dict("sp")
-        expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'],
-                          'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]}
-        tm.assert_dict_equal(recons_data, expected_split)
-
-        recons_data = DataFrame(test_data).to_dict("r")
-        expected_records = [{'A': 1.0, 'B': '1'},
-                            {'A': 2.0, 'B': '2'},
-                            {'A': np.nan, 'B': '3'}]
-        assert isinstance(recons_data, list)
-        assert len(recons_data) == 3
-        for l, r in zip(recons_data, expected_records):
-            tm.assert_dict_equal(l, r)
-
-        # GH10844
-        recons_data = DataFrame(test_data).to_dict("i")
-
-        for k, v in compat.iteritems(test_data):
-            for k2, v2 in compat.iteritems(v):
-                assert v2 == recons_data[k2][k]
-
     def test_to_dict_timestamp(self):
 
         # GH11247
@@ -190,17 +147,65 @@ def test_to_records_with_unicode_column_names(self):
         )
         tm.assert_almost_equal(result, expected)
 
+    @pytest.mark.parametrize('mapping', [
+        dict,
+        collections.defaultdict(list),
+        collections.OrderedDict])
+    def test_to_dict(self, mapping):
+        test_data = {
+            'A': {'1': 1, '2': 2},
+            'B': {'1': '1', '2': '2', '3': '3'},
+        }
+        # GH16122
+        recons_data = DataFrame(test_data).to_dict(into=mapping)
+
+        for k, v in compat.iteritems(test_data):
+            for k2, v2 in compat.iteritems(v):
+                assert (v2 == recons_data[k][k2])
+
+        recons_data = DataFrame(test_data).to_dict("l", mapping)
+
+        for k, v in compat.iteritems(test_data):
+            for k2, v2 in compat.iteritems(v):
+                assert (v2 == recons_data[k][int(k2) - 1])
+
+        recons_data = DataFrame(test_data).to_dict("s", mapping)
+
+        for k, v in compat.iteritems(test_data):
+            for k2, v2 in compat.iteritems(v):
+                assert (v2 == recons_data[k][k2])
+
+        recons_data = DataFrame(test_data).to_dict("sp", mapping)
+        expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'],
+                          'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]}
+        tm.assert_dict_equal(recons_data, expected_split)
+
+        recons_data = DataFrame(test_data).to_dict("r", mapping)
+        expected_records = [{'A': 1.0, 'B': '1'},
+                            {'A': 2.0, 'B': '2'},
+                            {'A': np.nan, 'B': '3'}]
+        assert isinstance(recons_data, list)
+        assert (len(recons_data) == 3)
+        for l, r in zip(recons_data, expected_records):
+            tm.assert_dict_equal(l, r)
+
+        # GH10844
+        recons_data = DataFrame(test_data).to_dict("i")
+
+        for k, v in compat.iteritems(test_data):
+            for k2, v2 in compat.iteritems(v):
+                assert (v2 == recons_data[k2][k])
 
-@pytest.mark.parametrize('tz', ['UTC', 'GMT', 'US/Eastern'])
-def test_to_records_datetimeindex_with_tz(tz):
-    # GH13937
-    dr = date_range('2016-01-01', periods=10,
-                    freq='S', tz=tz)
+    @pytest.mark.parametrize('tz', ['UTC', 'GMT', 'US/Eastern'])
+    def test_to_records_datetimeindex_with_tz(self, tz):
+        # GH13937
+        dr = date_range('2016-01-01', periods=10,
+                        freq='S', tz=tz)
 
-    df = DataFrame({'datetime': dr}, index=dr)
+        df = DataFrame({'datetime': dr}, index=dr)
 
-    expected = df.to_records()
-    result = df.tz_convert("UTC").to_records()
+        expected = df.to_records()
+        result = df.tz_convert("UTC").to_records()
 
-    # both converted to UTC, so they are equal
-    tm.assert_numpy_array_equal(result, expected)
+        # both converted to UTC, so they are equal
+        tm.assert_numpy_array_equal(result, expected)
diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py
@@ -2,6 +2,8 @@
 # pylint: disable-msg=E1101,W0612
 
 from datetime import datetime
+import collections
+import pytest
 
 import numpy as np
 import pandas as pd
@@ -126,9 +128,6 @@ def test_to_frame(self):
             dict(testdifferent=self.ts.values), index=self.ts.index)
         assert_frame_equal(rs, xp)
 
-    def test_to_dict(self):
-        tm.assert_series_equal(Series(self.ts.to_dict(), name='ts'), self.ts)
-
     def test_timeseries_periodindex(self):
         # GH2891
         from pandas import period_range
@@ -167,6 +166,19 @@ class SubclassedFrame(DataFrame):
         expected = SubclassedFrame({'X': [1, 2, 3]})
         assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize('mapping', (
+        dict,
+        collections.defaultdict(list),
+        collections.OrderedDict))
+    def test_to_dict(self, mapping):
+        # GH16122
+        ts = TestData().ts
+        tm.assert_series_equal(
+            Series(ts.to_dict(mapping), name='ts'), ts)
+        from_method = Series(ts.to_dict(collections.Counter))
+        from_constructor = Series(collections.Counter(ts.iteritems()))
+        tm.assert_series_equal(from_method, from_constructor)
+
 
 class TestSeriesToList(TestData):
Original file line number	Diff line number	Diff line change
Expand Up		@@ -515,7 +515,6 @@ Other Enhancements
		- Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here <basics.accelerate>` (:issue:`16157`)
		- ``DataFrame.style.bar()`` now accepts two more options to further customize the bar chart. Bar alignment is set with ``align='left'\|'mid'\|'zero'``, the default is "left", which is backward compatible; You can now pass a list of ``color=[color_negative, color_positive]``. (:issue:`14757`)


Copy link Contributor jreback May 6, 2017 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. reset this file
		.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations


Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -26,6 +26,7 @@ New features

		Other Enhancements
		^^^^^^^^^^^^^^^^^^
		- ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`)



Expand Down