Skip to content

Commit c8c3dd6

Browse files
committed
ENH: Provide dict object for to_dict() pandas-dev#16122
1 parent 60a926b commit c8c3dd6

File tree

5 files changed

+102
-59
lines changed

5 files changed

+102
-59
lines changed

doc/source/whatsnew/v0.20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ Other Enhancements
521521
- The ``display.show_dimensions`` option can now also be used to specify
522522
whether the length of a ``Series`` should be shown in its repr (:issue:`7117`).
523523
- ``parallel_coordinates()`` has gained a ``sort_labels`` keyword arg that sorts class labels and the colours assigned to them (:issue:`15908`)
524-
524+
- ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`)
525525

526526
.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
527527

pandas/core/frame.py

+27-14
Original file line numberDiff line numberDiff line change
@@ -858,7 +858,7 @@ def from_dict(cls, data, orient='columns', dtype=None):
858858

859859
return cls(data, index=index, columns=columns, dtype=dtype)
860860

861-
def to_dict(self, orient='dict'):
861+
def to_dict(self, orient='dict', into=dict):
862862
"""Convert DataFrame to dictionary.
863863
864864
Parameters
@@ -880,32 +880,45 @@ def to_dict(self, orient='dict'):
880880
Abbreviations are allowed. `s` indicates `series` and `sp`
881881
indicates `split`.
882882
883+
into : class, default dict
884+
The collections.Mapping subclass used for all Mappings
885+
in the return value.
886+
.. versionadded:: 0.20.0
887+
883888
Returns
884889
-------
885-
result : dict like {column -> {index -> value}}
890+
result : collections.Mapping like {column -> {index -> value}}
891+
If ``into`` is collections.defaultdict, the return
892+
value's default_factory will be None.
886893
"""
894+
# GH16122
895+
if not issubclass(into, collections.Mapping):
896+
raise TypeError('unsupported type: {}'.format(type(into)))
887897
if not self.columns.is_unique:
888898
warnings.warn("DataFrame columns are not unique, some "
889899
"columns will be omitted.", UserWarning)
900+
into_c = (functools.partial(into, None)
901+
if into == collections.defaultdict else into)
890902
if orient.lower().startswith('d'):
891-
return dict((k, v.to_dict()) for k, v in compat.iteritems(self))
903+
return into_c(
904+
(k, v.to_dict(into)) for k, v in compat.iteritems(self))
892905
elif orient.lower().startswith('l'):
893-
return dict((k, v.tolist()) for k, v in compat.iteritems(self))
906+
return into_c((k, v.tolist()) for k, v in compat.iteritems(self))
894907
elif orient.lower().startswith('sp'):
895-
return {'index': self.index.tolist(),
896-
'columns': self.columns.tolist(),
897-
'data': lib.map_infer(self.values.ravel(),
898-
_maybe_box_datetimelike)
899-
.reshape(self.values.shape).tolist()}
908+
return into_c((('index', self.index.tolist()),
909+
('columns', self.columns.tolist()),
910+
('data', lib.map_infer(self.values.ravel(),
911+
_maybe_box_datetimelike)
912+
.reshape(self.values.shape).tolist())))
900913
elif orient.lower().startswith('s'):
901-
return dict((k, _maybe_box_datetimelike(v))
902-
for k, v in compat.iteritems(self))
914+
return into_c((k, _maybe_box_datetimelike(v))
915+
for k, v in compat.iteritems(self))
903916
elif orient.lower().startswith('r'):
904-
return [dict((k, _maybe_box_datetimelike(v))
905-
for k, v in zip(self.columns, row))
917+
return [into_c((k, _maybe_box_datetimelike(v))
918+
for k, v in zip(self.columns, row))
906919
for row in self.values]
907920
elif orient.lower().startswith('i'):
908-
return dict((k, v.to_dict()) for k, v in self.iterrows())
921+
return into_c((k, v.to_dict(into)) for k, v in self.iterrows())
909922
else:
910923
raise ValueError("orient '%s' not understood" % orient)
911924

pandas/core/series.py

+21-5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import types
1010
import warnings
11+
import collections
1112

1213
from numpy import nan, ndarray
1314
import numpy as np
@@ -1072,15 +1073,30 @@ def tolist(self):
10721073
""" Convert Series to a nested list """
10731074
return list(self.asobject)
10741075

1075-
def to_dict(self):
1076+
def to_dict(self, into=dict):
10761077
"""
1077-
Convert Series to {label -> value} dict
1078+
Convert Series to {label -> value} dict or dict-like object
1079+
Parameters
1080+
----------
1081+
into : class, default dict
1082+
The collections.Mapping subclass to use as the return
1083+
object.
1084+
.. versionadded:: 0.20.0
10781085
10791086
Returns
10801087
-------
1081-
value_dict : dict
1082-
"""
1083-
return dict(compat.iteritems(self))
1088+
value_dict : collections.Mapping
1089+
If ``into`` is collections.defaultdict, the return
1090+
value's default_factory will be None.
1091+
"""
1092+
# GH16122
1093+
if issubclass(into, collections.Mapping):
1094+
if into == collections.defaultdict:
1095+
return into(None, compat.iteritems(self))
1096+
else:
1097+
return into(compat.iteritems(self))
1098+
else:
1099+
raise TypeError('unsupported type: {}'.format(type(into)))
10841100

10851101
def to_frame(self, name=None):
10861102
"""

pandas/tests/frame/test_convert_to.py

+43-38
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# -*- coding: utf-8 -*-
22

33
import pytest
4+
import collections
45
import numpy as np
56

67
from pandas import compat
@@ -18,44 +19,48 @@ def test_to_dict(self):
1819
'A': {'1': 1, '2': 2},
1920
'B': {'1': '1', '2': '2', '3': '3'},
2021
}
21-
recons_data = DataFrame(test_data).to_dict()
22-
23-
for k, v in compat.iteritems(test_data):
24-
for k2, v2 in compat.iteritems(v):
25-
self.assertEqual(v2, recons_data[k][k2])
26-
27-
recons_data = DataFrame(test_data).to_dict("l")
28-
29-
for k, v in compat.iteritems(test_data):
30-
for k2, v2 in compat.iteritems(v):
31-
self.assertEqual(v2, recons_data[k][int(k2) - 1])
32-
33-
recons_data = DataFrame(test_data).to_dict("s")
34-
35-
for k, v in compat.iteritems(test_data):
36-
for k2, v2 in compat.iteritems(v):
37-
self.assertEqual(v2, recons_data[k][k2])
38-
39-
recons_data = DataFrame(test_data).to_dict("sp")
40-
expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'],
41-
'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]}
42-
tm.assert_dict_equal(recons_data, expected_split)
43-
44-
recons_data = DataFrame(test_data).to_dict("r")
45-
expected_records = [{'A': 1.0, 'B': '1'},
46-
{'A': 2.0, 'B': '2'},
47-
{'A': np.nan, 'B': '3'}]
48-
assert isinstance(recons_data, list)
49-
self.assertEqual(len(recons_data), 3)
50-
for l, r in zip(recons_data, expected_records):
51-
tm.assert_dict_equal(l, r)
52-
53-
# GH10844
54-
recons_data = DataFrame(test_data).to_dict("i")
55-
56-
for k, v in compat.iteritems(test_data):
57-
for k2, v2 in compat.iteritems(v):
58-
self.assertEqual(v2, recons_data[k2][k])
22+
# GH16122
23+
test_maps = (
24+
dict, collections.defaultdict, collections.OrderedDict)
25+
for mapping in test_maps:
26+
recons_data = DataFrame(test_data).to_dict(into=mapping)
27+
28+
for k, v in compat.iteritems(test_data):
29+
for k2, v2 in compat.iteritems(v):
30+
self.assertEqual(v2, recons_data[k][k2])
31+
32+
recons_data = DataFrame(test_data).to_dict("l", mapping)
33+
34+
for k, v in compat.iteritems(test_data):
35+
for k2, v2 in compat.iteritems(v):
36+
self.assertEqual(v2, recons_data[k][int(k2) - 1])
37+
38+
recons_data = DataFrame(test_data).to_dict("s", mapping)
39+
40+
for k, v in compat.iteritems(test_data):
41+
for k2, v2 in compat.iteritems(v):
42+
self.assertEqual(v2, recons_data[k][k2])
43+
44+
recons_data = DataFrame(test_data).to_dict("sp", mapping)
45+
expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'],
46+
'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]}
47+
tm.assert_dict_equal(recons_data, expected_split)
48+
49+
recons_data = DataFrame(test_data).to_dict("r", mapping)
50+
expected_records = [{'A': 1.0, 'B': '1'},
51+
{'A': 2.0, 'B': '2'},
52+
{'A': np.nan, 'B': '3'}]
53+
assert isinstance(recons_data, list)
54+
self.assertEqual(len(recons_data), 3)
55+
for l, r in zip(recons_data, expected_records):
56+
tm.assert_dict_equal(l, r)
57+
58+
# GH10844
59+
recons_data = DataFrame(test_data).to_dict("i")
60+
61+
for k, v in compat.iteritems(test_data):
62+
for k2, v2 in compat.iteritems(v):
63+
self.assertEqual(v2, recons_data[k2][k])
5964

6065
def test_to_dict_timestamp(self):
6166

pandas/tests/series/test_io.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# pylint: disable-msg=E1101,W0612
33

44
from datetime import datetime
5+
import collections
56

67
import numpy as np
78
import pandas as pd
@@ -127,7 +128,15 @@ def test_to_frame(self):
127128
assert_frame_equal(rs, xp)
128129

129130
def test_to_dict(self):
130-
tm.assert_series_equal(Series(self.ts.to_dict(), name='ts'), self.ts)
131+
# GH16122
132+
test_maps = (
133+
dict, collections.defaultdict, collections.OrderedDict)
134+
for mapping in test_maps:
135+
tm.assert_series_equal(
136+
Series(self.ts.to_dict(mapping), name='ts'), self.ts)
137+
from_method = Series(self.ts.to_dict(collections.Counter))
138+
from_constructor = Series(collections.Counter(self.ts.iteritems()))
139+
tm.assert_series_equal(from_method, from_constructor)
131140

132141
def test_timeseries_periodindex(self):
133142
# GH2891

0 commit comments

Comments
 (0)