Skip to content

Commit 9d5c8d2

Browse files
committed
ENH: Provide dict object for to_dict() pandas-dev#16122
1 parent 02eafaf commit 9d5c8d2

File tree

5 files changed

+102
-60
lines changed

5 files changed

+102
-60
lines changed

doc/source/whatsnew/v0.20.0.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -526,9 +526,8 @@ Other Enhancements
526526
whether the length of a ``Series`` should be shown in its repr (:issue:`7117`).
527527
- ``parallel_coordinates()`` has gained a ``sort_labels`` keyword arg that sorts class labels and the colours assigned to them (:issue:`15908`)
528528
- Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here <basics.accelerate>` (:issue:`16157`)
529-
530529
- ``DataFrame.style.bar()`` now accepts two more options to further customize the bar chart. Bar alignment is set with ``align='left'|'mid'|'zero'``, the default is "left", which is backward compatible; You can now pass a list of ``color=[color_negative, color_positive]``. (:issue:`14757`)
531-
530+
- ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`)
532531

533532
.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
534533

pandas/core/frame.py

+27-14
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,7 @@ def from_dict(cls, data, orient='columns', dtype=None):
860860

861861
return cls(data, index=index, columns=columns, dtype=dtype)
862862

863-
def to_dict(self, orient='dict'):
863+
def to_dict(self, orient='dict', into=dict):
864864
"""Convert DataFrame to dictionary.
865865
866866
Parameters
@@ -882,32 +882,45 @@ def to_dict(self, orient='dict'):
882882
Abbreviations are allowed. `s` indicates `series` and `sp`
883883
indicates `split`.
884884
885+
into : class, default dict
886+
The collections.Mapping subclass used for all Mappings
887+
in the return value.
888+
.. versionadded:: 0.20.0
889+
885890
Returns
886891
-------
887-
result : dict like {column -> {index -> value}}
892+
result : collections.Mapping like {column -> {index -> value}}
893+
If ``into`` is collections.defaultdict, the return
894+
value's default_factory will be None.
888895
"""
896+
# GH16122
897+
if not issubclass(into, collections.Mapping):
898+
raise TypeError('unsupported type: {}'.format(type(into)))
889899
if not self.columns.is_unique:
890900
warnings.warn("DataFrame columns are not unique, some "
891901
"columns will be omitted.", UserWarning)
902+
into_c = (functools.partial(into, None)
903+
if into == collections.defaultdict else into)
892904
if orient.lower().startswith('d'):
893-
return dict((k, v.to_dict()) for k, v in compat.iteritems(self))
905+
return into_c(
906+
(k, v.to_dict(into)) for k, v in compat.iteritems(self))
894907
elif orient.lower().startswith('l'):
895-
return dict((k, v.tolist()) for k, v in compat.iteritems(self))
908+
return into_c((k, v.tolist()) for k, v in compat.iteritems(self))
896909
elif orient.lower().startswith('sp'):
897-
return {'index': self.index.tolist(),
898-
'columns': self.columns.tolist(),
899-
'data': lib.map_infer(self.values.ravel(),
900-
_maybe_box_datetimelike)
901-
.reshape(self.values.shape).tolist()}
910+
return into_c((('index', self.index.tolist()),
911+
('columns', self.columns.tolist()),
912+
('data', lib.map_infer(self.values.ravel(),
913+
_maybe_box_datetimelike)
914+
.reshape(self.values.shape).tolist())))
902915
elif orient.lower().startswith('s'):
903-
return dict((k, _maybe_box_datetimelike(v))
904-
for k, v in compat.iteritems(self))
916+
return into_c((k, _maybe_box_datetimelike(v))
917+
for k, v in compat.iteritems(self))
905918
elif orient.lower().startswith('r'):
906-
return [dict((k, _maybe_box_datetimelike(v))
907-
for k, v in zip(self.columns, row))
919+
return [into_c((k, _maybe_box_datetimelike(v))
920+
for k, v in zip(self.columns, row))
908921
for row in self.values]
909922
elif orient.lower().startswith('i'):
910-
return dict((k, v.to_dict()) for k, v in self.iterrows())
923+
return into_c((k, v.to_dict(into)) for k, v in self.iterrows())
911924
else:
912925
raise ValueError("orient '%s' not understood" % orient)
913926

pandas/core/series.py

+21-5
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import types
1010
import warnings
1111
from textwrap import dedent
12+
import collections
1213

1314
from numpy import nan, ndarray
1415
import numpy as np
@@ -1074,15 +1075,30 @@ def tolist(self):
10741075
""" Convert Series to a nested list """
10751076
return list(self.asobject)
10761077

1077-
def to_dict(self):
1078+
def to_dict(self, into=dict):
10781079
"""
1079-
Convert Series to {label -> value} dict
1080+
Convert Series to {label -> value} dict or dict-like object
1081+
Parameters
1082+
----------
1083+
into : class, default dict
1084+
The collections.Mapping subclass to use as the return
1085+
object.
1086+
.. versionadded:: 0.20.0
10801087
10811088
Returns
10821089
-------
1083-
value_dict : dict
1084-
"""
1085-
return dict(compat.iteritems(self))
1090+
value_dict : collections.Mapping
1091+
If ``into`` is collections.defaultdict, the return
1092+
value's default_factory will be None.
1093+
"""
1094+
# GH16122
1095+
if issubclass(into, collections.Mapping):
1096+
if into == collections.defaultdict:
1097+
return into(None, compat.iteritems(self))
1098+
else:
1099+
return into(compat.iteritems(self))
1100+
else:
1101+
raise TypeError('unsupported type: {}'.format(type(into)))
10861102

10871103
def to_frame(self, name=None):
10881104
"""

pandas/tests/frame/test_convert_to.py

+43-38
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# -*- coding: utf-8 -*-
22

33
import pytest
4+
import collections
45
import numpy as np
56

67
from pandas import compat
@@ -18,44 +19,48 @@ def test_to_dict(self):
1819
'A': {'1': 1, '2': 2},
1920
'B': {'1': '1', '2': '2', '3': '3'},
2021
}
21-
recons_data = DataFrame(test_data).to_dict()
22-
23-
for k, v in compat.iteritems(test_data):
24-
for k2, v2 in compat.iteritems(v):
25-
assert v2 == recons_data[k][k2]
26-
27-
recons_data = DataFrame(test_data).to_dict("l")
28-
29-
for k, v in compat.iteritems(test_data):
30-
for k2, v2 in compat.iteritems(v):
31-
assert v2 == recons_data[k][int(k2) - 1]
32-
33-
recons_data = DataFrame(test_data).to_dict("s")
34-
35-
for k, v in compat.iteritems(test_data):
36-
for k2, v2 in compat.iteritems(v):
37-
assert v2 == recons_data[k][k2]
38-
39-
recons_data = DataFrame(test_data).to_dict("sp")
40-
expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'],
41-
'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]}
42-
tm.assert_dict_equal(recons_data, expected_split)
43-
44-
recons_data = DataFrame(test_data).to_dict("r")
45-
expected_records = [{'A': 1.0, 'B': '1'},
46-
{'A': 2.0, 'B': '2'},
47-
{'A': np.nan, 'B': '3'}]
48-
assert isinstance(recons_data, list)
49-
assert len(recons_data) == 3
50-
for l, r in zip(recons_data, expected_records):
51-
tm.assert_dict_equal(l, r)
52-
53-
# GH10844
54-
recons_data = DataFrame(test_data).to_dict("i")
55-
56-
for k, v in compat.iteritems(test_data):
57-
for k2, v2 in compat.iteritems(v):
58-
assert v2 == recons_data[k2][k]
22+
# GH16122
23+
test_maps = (
24+
dict, collections.defaultdict, collections.OrderedDict)
25+
for mapping in test_maps:
26+
recons_data = DataFrame(test_data).to_dict(into=mapping)
27+
28+
for k, v in compat.iteritems(test_data):
29+
for k2, v2 in compat.iteritems(v):
30+
self.assertEqual(v2, recons_data[k][k2])
31+
32+
recons_data = DataFrame(test_data).to_dict("l", mapping)
33+
34+
for k, v in compat.iteritems(test_data):
35+
for k2, v2 in compat.iteritems(v):
36+
self.assertEqual(v2, recons_data[k][int(k2) - 1])
37+
38+
recons_data = DataFrame(test_data).to_dict("s", mapping)
39+
40+
for k, v in compat.iteritems(test_data):
41+
for k2, v2 in compat.iteritems(v):
42+
self.assertEqual(v2, recons_data[k][k2])
43+
44+
recons_data = DataFrame(test_data).to_dict("sp", mapping)
45+
expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'],
46+
'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]}
47+
tm.assert_dict_equal(recons_data, expected_split)
48+
49+
recons_data = DataFrame(test_data).to_dict("r", mapping)
50+
expected_records = [{'A': 1.0, 'B': '1'},
51+
{'A': 2.0, 'B': '2'},
52+
{'A': np.nan, 'B': '3'}]
53+
assert isinstance(recons_data, list)
54+
self.assertEqual(len(recons_data), 3)
55+
for l, r in zip(recons_data, expected_records):
56+
tm.assert_dict_equal(l, r)
57+
58+
# GH10844
59+
recons_data = DataFrame(test_data).to_dict("i")
60+
61+
for k, v in compat.iteritems(test_data):
62+
for k2, v2 in compat.iteritems(v):
63+
self.assertEqual(v2, recons_data[k2][k])
5964

6065
def test_to_dict_timestamp(self):
6166

pandas/tests/series/test_io.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# pylint: disable-msg=E1101,W0612
33

44
from datetime import datetime
5+
import collections
56

67
import numpy as np
78
import pandas as pd
@@ -127,7 +128,15 @@ def test_to_frame(self):
127128
assert_frame_equal(rs, xp)
128129

129130
def test_to_dict(self):
130-
tm.assert_series_equal(Series(self.ts.to_dict(), name='ts'), self.ts)
131+
# GH16122
132+
test_maps = (
133+
dict, collections.defaultdict, collections.OrderedDict)
134+
for mapping in test_maps:
135+
tm.assert_series_equal(
136+
Series(self.ts.to_dict(mapping), name='ts'), self.ts)
137+
from_method = Series(self.ts.to_dict(collections.Counter))
138+
from_constructor = Series(collections.Counter(self.ts.iteritems()))
139+
tm.assert_series_equal(from_method, from_constructor)
131140

132141
def test_timeseries_periodindex(self):
133142
# GH2891

0 commit comments

Comments
 (0)