Skip to content

Commit f040ed2

Browse files
dwkenefickTomAugspurger
authored andcommitted
ENH: Provide dict object for to_dict() #16122 (#16220)
* ENH: Provide dict object for to_dict() #16122 * ENH: Provide dict object for to_dict() #16122 * ENH: Provide dict object for to_dict() #16122 * ENH: Provide dict object for to_dict() #16122 * ENH: Provide dict object for to_dict() #16122 * ENH: Provide dict object for to_dict() #16122 * ENH: Provide dict object for to_dict() #16122
1 parent 42e2a87 commit f040ed2

File tree

7 files changed

+258
-78
lines changed

7 files changed

+258
-78
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ New features
2626

2727
Other Enhancements
2828
^^^^^^^^^^^^^^^^^^
29+
- ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`)
2930
- ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`)
3031

3132

pandas/core/common.py

+38
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import warnings
77
from datetime import datetime, timedelta
88
from functools import partial
9+
import inspect
10+
import collections
911

1012
import numpy as np
1113
from pandas._libs import lib, tslib
@@ -479,6 +481,42 @@ def _dict_compat(d):
479481
for key, value in iteritems(d))
480482

481483

484+
def standardize_mapping(into):
485+
"""
486+
Helper function to standardize a supplied mapping.
487+
488+
.. versionadded:: 0.21.0
489+
490+
Parameters
491+
----------
492+
into : instance or subclass of collections.Mapping
493+
Must be a class, an initialized collections.defaultdict,
494+
or an instance of a collections.Mapping subclass.
495+
496+
Returns
497+
-------
498+
mapping : a collections.Mapping subclass or other constructor
499+
a callable object that can accept an iterator to create
500+
the desired Mapping.
501+
502+
See Also
503+
--------
504+
DataFrame.to_dict
505+
Series.to_dict
506+
"""
507+
if not inspect.isclass(into):
508+
if isinstance(into, collections.defaultdict):
509+
return partial(
510+
collections.defaultdict, into.default_factory)
511+
into = type(into)
512+
if not issubclass(into, collections.Mapping):
513+
raise TypeError('unsupported type: {}'.format(into))
514+
elif into == collections.defaultdict:
515+
raise TypeError(
516+
'to_dict() only accepts initialized defaultdicts')
517+
return into
518+
519+
482520
def sentinel_factory():
483521
class Sentinel(object):
484522
pass

pandas/core/frame.py

+69-15
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@
6363
_default_index,
6464
_values_from_object,
6565
_maybe_box_datetimelike,
66-
_dict_compat)
66+
_dict_compat,
67+
standardize_mapping)
6768
from pandas.core.generic import NDFrame, _shared_docs
6869
from pandas.core.index import Index, MultiIndex, _ensure_index
6970
from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable,
@@ -860,7 +861,7 @@ def from_dict(cls, data, orient='columns', dtype=None):
860861

861862
return cls(data, index=index, columns=columns, dtype=dtype)
862863

863-
def to_dict(self, orient='dict'):
864+
def to_dict(self, orient='dict', into=dict):
864865
"""Convert DataFrame to dictionary.
865866
866867
Parameters
@@ -882,32 +883,85 @@ def to_dict(self, orient='dict'):
882883
Abbreviations are allowed. `s` indicates `series` and `sp`
883884
indicates `split`.
884885
886+
into : class, default dict
887+
The collections.Mapping subclass used for all Mappings
888+
in the return value. Can be the actual class or an empty
889+
instance of the mapping type you want. If you want a
890+
collections.defaultdict, you must pass it initialized.
891+
892+
.. versionadded:: 0.21.0
893+
885894
Returns
886895
-------
887-
result : dict like {column -> {index -> value}}
896+
result : collections.Mapping like {column -> {index -> value}}
897+
898+
Examples
899+
--------
900+
>>> df = pd.DataFrame(
901+
{'col1': [1, 2], 'col2': [0.5, 0.75]}, index=['a', 'b'])
902+
>>> df
903+
col1 col2
904+
a 1 0.1
905+
b 2 0.2
906+
>>> df.to_dict()
907+
{'col1': {'a': 1, 'b': 2}, 'col2': {'a': 0.5, 'b': 0.75}}
908+
909+
You can specify the return orientation.
910+
911+
>>> df.to_dict('series')
912+
{'col1': a 1
913+
b 2
914+
Name: col1, dtype: int64, 'col2': a 0.50
915+
b 0.75
916+
Name: col2, dtype: float64}
917+
>>> df.to_dict('split')
918+
{'columns': ['col1', 'col2'],
919+
'data': [[1.0, 0.5], [2.0, 0.75]],
920+
'index': ['a', 'b']}
921+
>>> df.to_dict('records')
922+
[{'col1': 1.0, 'col2': 0.5}, {'col1': 2.0, 'col2': 0.75}]
923+
>>> df.to_dict('index')
924+
{'a': {'col1': 1.0, 'col2': 0.5}, 'b': {'col1': 2.0, 'col2': 0.75}}
925+
926+
You can also specify the mapping type.
927+
928+
>>> from collections import OrderedDict, defaultdict
929+
>>> df.to_dict(into=OrderedDict)
930+
OrderedDict([('col1', OrderedDict([('a', 1), ('b', 2)])),
931+
('col2', OrderedDict([('a', 0.5), ('b', 0.75)]))])
932+
933+
If you want a `defaultdict`, you need to initialize it:
934+
935+
>>> dd = defaultdict(list)
936+
>>> df.to_dict('records', into=dd)
937+
[defaultdict(<type 'list'>, {'col2': 0.5, 'col1': 1.0}),
938+
defaultdict(<type 'list'>, {'col2': 0.75, 'col1': 2.0})]
888939
"""
889940
if not self.columns.is_unique:
890941
warnings.warn("DataFrame columns are not unique, some "
891942
"columns will be omitted.", UserWarning)
943+
# GH16122
944+
into_c = standardize_mapping(into)
892945
if orient.lower().startswith('d'):
893-
return dict((k, v.to_dict()) for k, v in compat.iteritems(self))
946+
return into_c(
947+
(k, v.to_dict(into)) for k, v in compat.iteritems(self))
894948
elif orient.lower().startswith('l'):
895-
return dict((k, v.tolist()) for k, v in compat.iteritems(self))
949+
return into_c((k, v.tolist()) for k, v in compat.iteritems(self))
896950
elif orient.lower().startswith('sp'):
897-
return {'index': self.index.tolist(),
898-
'columns': self.columns.tolist(),
899-
'data': lib.map_infer(self.values.ravel(),
900-
_maybe_box_datetimelike)
901-
.reshape(self.values.shape).tolist()}
951+
return into_c((('index', self.index.tolist()),
952+
('columns', self.columns.tolist()),
953+
('data', lib.map_infer(self.values.ravel(),
954+
_maybe_box_datetimelike)
955+
.reshape(self.values.shape).tolist())))
902956
elif orient.lower().startswith('s'):
903-
return dict((k, _maybe_box_datetimelike(v))
904-
for k, v in compat.iteritems(self))
957+
return into_c((k, _maybe_box_datetimelike(v))
958+
for k, v in compat.iteritems(self))
905959
elif orient.lower().startswith('r'):
906-
return [dict((k, _maybe_box_datetimelike(v))
907-
for k, v in zip(self.columns, row))
960+
return [into_c((k, _maybe_box_datetimelike(v))
961+
for k, v in zip(self.columns, row))
908962
for row in self.values]
909963
elif orient.lower().startswith('i'):
910-
return dict((k, v.to_dict()) for k, v in self.iterrows())
964+
return into_c((k, v.to_dict(into)) for k, v in self.iterrows())
911965
else:
912966
raise ValueError("orient '%s' not understood" % orient)
913967

pandas/core/series.py

+31-6
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@
4646
_maybe_match_name,
4747
SettingWithCopyError,
4848
_maybe_box_datetimelike,
49-
_dict_compat)
49+
_dict_compat,
50+
standardize_mapping)
5051
from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
5152
Float64Index, _ensure_index)
5253
from pandas.core.indexing import check_bool_indexer, maybe_convert_indices
@@ -1074,15 +1075,39 @@ def tolist(self):
10741075
""" Convert Series to a nested list """
10751076
return list(self.asobject)
10761077

1077-
def to_dict(self):
1078+
def to_dict(self, into=dict):
10781079
"""
1079-
Convert Series to {label -> value} dict
1080+
Convert Series to {label -> value} dict or dict-like object.
1081+
1082+
Parameters
1083+
----------
1084+
into : class, default dict
1085+
The collections.Mapping subclass to use as the return
1086+
object. Can be the actual class or an empty
1087+
instance of the mapping type you want. If you want a
1088+
collections.defaultdict, you must pass it initialized.
1089+
1090+
.. versionadded:: 0.21.0
10801091
10811092
Returns
10821093
-------
1083-
value_dict : dict
1084-
"""
1085-
return dict(compat.iteritems(self))
1094+
value_dict : collections.Mapping
1095+
1096+
Examples
1097+
--------
1098+
>>> s = pd.Series([1, 2, 3, 4])
1099+
>>> s.to_dict()
1100+
{0: 1, 1: 2, 2: 3, 3: 4}
1101+
>>> from collections import OrderedDict, defaultdict
1102+
>>> s.to_dict(OrderedDict)
1103+
OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
1104+
>>> dd = defaultdict(list)
1105+
>>> s.to_dict(dd)
1106+
defaultdict(<type 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
1107+
"""
1108+
# GH16122
1109+
into_c = standardize_mapping(into)
1110+
return into_c(compat.iteritems(self))
10861111

10871112
def to_frame(self, name=None):
10881113
"""

pandas/tests/frame/test_convert_to.py

+79-54
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# -*- coding: utf-8 -*-
22

33
import pytest
4+
import collections
45
import numpy as np
56

67
from pandas import compat
@@ -13,50 +14,6 @@
1314

1415
class TestDataFrameConvertTo(TestData):
1516

16-
def test_to_dict(self):
17-
test_data = {
18-
'A': {'1': 1, '2': 2},
19-
'B': {'1': '1', '2': '2', '3': '3'},
20-
}
21-
recons_data = DataFrame(test_data).to_dict()
22-
23-
for k, v in compat.iteritems(test_data):
24-
for k2, v2 in compat.iteritems(v):
25-
assert v2 == recons_data[k][k2]
26-
27-
recons_data = DataFrame(test_data).to_dict("l")
28-
29-
for k, v in compat.iteritems(test_data):
30-
for k2, v2 in compat.iteritems(v):
31-
assert v2 == recons_data[k][int(k2) - 1]
32-
33-
recons_data = DataFrame(test_data).to_dict("s")
34-
35-
for k, v in compat.iteritems(test_data):
36-
for k2, v2 in compat.iteritems(v):
37-
assert v2 == recons_data[k][k2]
38-
39-
recons_data = DataFrame(test_data).to_dict("sp")
40-
expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'],
41-
'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]}
42-
tm.assert_dict_equal(recons_data, expected_split)
43-
44-
recons_data = DataFrame(test_data).to_dict("r")
45-
expected_records = [{'A': 1.0, 'B': '1'},
46-
{'A': 2.0, 'B': '2'},
47-
{'A': np.nan, 'B': '3'}]
48-
assert isinstance(recons_data, list)
49-
assert len(recons_data) == 3
50-
for l, r in zip(recons_data, expected_records):
51-
tm.assert_dict_equal(l, r)
52-
53-
# GH10844
54-
recons_data = DataFrame(test_data).to_dict("i")
55-
56-
for k, v in compat.iteritems(test_data):
57-
for k2, v2 in compat.iteritems(v):
58-
assert v2 == recons_data[k2][k]
59-
6017
def test_to_dict_timestamp(self):
6118

6219
# GH11247
@@ -190,17 +147,85 @@ def test_to_records_with_unicode_column_names(self):
190147
)
191148
tm.assert_almost_equal(result, expected)
192149

150+
@pytest.mark.parametrize('mapping', [
151+
dict,
152+
collections.defaultdict(list),
153+
collections.OrderedDict])
154+
def test_to_dict(self, mapping):
155+
test_data = {
156+
'A': {'1': 1, '2': 2},
157+
'B': {'1': '1', '2': '2', '3': '3'},
158+
}
159+
160+
# GH16122
161+
recons_data = DataFrame(test_data).to_dict(into=mapping)
162+
163+
for k, v in compat.iteritems(test_data):
164+
for k2, v2 in compat.iteritems(v):
165+
assert (v2 == recons_data[k][k2])
166+
167+
recons_data = DataFrame(test_data).to_dict("l", mapping)
168+
169+
for k, v in compat.iteritems(test_data):
170+
for k2, v2 in compat.iteritems(v):
171+
assert (v2 == recons_data[k][int(k2) - 1])
172+
173+
recons_data = DataFrame(test_data).to_dict("s", mapping)
174+
175+
for k, v in compat.iteritems(test_data):
176+
for k2, v2 in compat.iteritems(v):
177+
assert (v2 == recons_data[k][k2])
178+
179+
recons_data = DataFrame(test_data).to_dict("sp", mapping)
180+
expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'],
181+
'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]}
182+
tm.assert_dict_equal(recons_data, expected_split)
183+
184+
recons_data = DataFrame(test_data).to_dict("r", mapping)
185+
expected_records = [{'A': 1.0, 'B': '1'},
186+
{'A': 2.0, 'B': '2'},
187+
{'A': np.nan, 'B': '3'}]
188+
assert isinstance(recons_data, list)
189+
assert (len(recons_data) == 3)
190+
for l, r in zip(recons_data, expected_records):
191+
tm.assert_dict_equal(l, r)
192+
193+
# GH10844
194+
recons_data = DataFrame(test_data).to_dict("i")
195+
196+
for k, v in compat.iteritems(test_data):
197+
for k2, v2 in compat.iteritems(v):
198+
assert (v2 == recons_data[k2][k])
199+
200+
df = DataFrame(test_data)
201+
df['duped'] = df[df.columns[0]]
202+
recons_data = df.to_dict("i")
203+
comp_data = test_data.copy()
204+
comp_data['duped'] = comp_data[df.columns[0]]
205+
for k, v in compat.iteritems(comp_data):
206+
for k2, v2 in compat.iteritems(v):
207+
assert (v2 == recons_data[k2][k])
208+
209+
@pytest.mark.parametrize('mapping', [
210+
list,
211+
collections.defaultdict,
212+
[]])
213+
def test_to_dict_errors(self, mapping):
214+
# GH16122
215+
df = DataFrame(np.random.randn(3, 3))
216+
with pytest.raises(TypeError):
217+
df.to_dict(into=mapping)
193218

194-
@pytest.mark.parametrize('tz', ['UTC', 'GMT', 'US/Eastern'])
195-
def test_to_records_datetimeindex_with_tz(tz):
196-
# GH13937
197-
dr = date_range('2016-01-01', periods=10,
198-
freq='S', tz=tz)
219+
@pytest.mark.parametrize('tz', ['UTC', 'GMT', 'US/Eastern'])
220+
def test_to_records_datetimeindex_with_tz(self, tz):
221+
# GH13937
222+
dr = date_range('2016-01-01', periods=10,
223+
freq='S', tz=tz)
199224

200-
df = DataFrame({'datetime': dr}, index=dr)
225+
df = DataFrame({'datetime': dr}, index=dr)
201226

202-
expected = df.to_records()
203-
result = df.tz_convert("UTC").to_records()
227+
expected = df.to_records()
228+
result = df.tz_convert("UTC").to_records()
204229

205-
# both converted to UTC, so they are equal
206-
tm.assert_numpy_array_equal(result, expected)
230+
# both converted to UTC, so they are equal
231+
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)