-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: Provide dict object for to_dict() #16122 #16220
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
38fa22b
3070fa3
ccc33dd
73acea0
67c57e8
546816b
086c598
d6c0deb
f297ee8
8469977
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,7 +63,8 @@ | |
_default_index, | ||
_values_from_object, | ||
_maybe_box_datetimelike, | ||
_dict_compat) | ||
_dict_compat, | ||
standardize_mapping) | ||
from pandas.core.generic import NDFrame, _shared_docs | ||
from pandas.core.index import Index, MultiIndex, _ensure_index | ||
from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable, | ||
|
@@ -860,7 +861,7 @@ def from_dict(cls, data, orient='columns', dtype=None): | |
|
||
return cls(data, index=index, columns=columns, dtype=dtype) | ||
|
||
def to_dict(self, orient='dict'): | ||
def to_dict(self, orient='dict', into=dict): | ||
"""Convert DataFrame to dictionary. | ||
|
||
Parameters | ||
|
@@ -882,32 +883,85 @@ def to_dict(self, orient='dict'): | |
Abbreviations are allowed. `s` indicates `series` and `sp` | ||
indicates `split`. | ||
|
||
into : class, default dict | ||
The collections.Mapping subclass used for all Mappings | ||
in the return value. Can be the actual class or an empty | ||
instance of the mapping type you want. If you want a | ||
collections.defaultdict, you must pass it initialized. | ||
|
||
.. versionadded:: 0.21.0 | ||
|
||
Returns | ||
------- | ||
result : dict like {column -> {index -> value}} | ||
result : collections.Mapping like {column -> {index -> value}} | ||
|
||
Examples | ||
-------- | ||
>>> df = pd.DataFrame( | ||
{'col1': [1, 2], 'col2': [0.5, 0.75]}, index=['a', 'b']) | ||
>>> df | ||
col1 col2 | ||
a 1 0.1 | ||
b 2 0.2 | ||
>>> df.to_dict() | ||
{'col1': {'a': 1, 'b': 2}, 'col2': {'a': 0.5, 'b': 0.75}} | ||
|
||
You can specify the return orientation. | ||
|
||
>>> df.to_dict('series') | ||
{'col1': a 1 | ||
b 2 | ||
Name: col1, dtype: int64, 'col2': a 0.50 | ||
b 0.75 | ||
Name: col2, dtype: float64} | ||
>>> df.to_dict('split') | ||
{'columns': ['col1', 'col2'], | ||
'data': [[1.0, 0.5], [2.0, 0.75]], | ||
'index': ['a', 'b']} | ||
>>> df.to_dict('records') | ||
[{'col1': 1.0, 'col2': 0.5}, {'col1': 2.0, 'col2': 0.75}] | ||
>>> df.to_dict('index') | ||
{'a': {'col1': 1.0, 'col2': 0.5}, 'b': {'col1': 2.0, 'col2': 0.75}} | ||
|
||
You can also specify the mapping type. | ||
|
||
>>> from collections import OrderedDict, defaultdict | ||
>>> df.to_dict(into=OrderedDict) | ||
OrderedDict([('col1', OrderedDict([('a', 1), ('b', 2)])), | ||
('col2', OrderedDict([('a', 0.5), ('b', 0.75)]))]) | ||
|
||
If you want a `defaultdict`, you need to initialize it: | ||
|
||
>>> dd = defaultdict(list) | ||
>>> df.to_dict('records', into=dd) | ||
[defaultdict(<type 'list'>, {'col2': 0.5, 'col1': 1.0}), | ||
defaultdict(<type 'list'>, {'col2': 0.75, 'col1': 2.0})] | ||
""" | ||
if not self.columns.is_unique: | ||
warnings.warn("DataFrame columns are not unique, some " | ||
"columns will be omitted.", UserWarning) | ||
# GH16122 | ||
into_c = standardize_mapping(into) | ||
if orient.lower().startswith('d'): | ||
return dict((k, v.to_dict()) for k, v in compat.iteritems(self)) | ||
return into_c( | ||
(k, v.to_dict(into)) for k, v in compat.iteritems(self)) | ||
elif orient.lower().startswith('l'): | ||
return dict((k, v.tolist()) for k, v in compat.iteritems(self)) | ||
return into_c((k, v.tolist()) for k, v in compat.iteritems(self)) | ||
elif orient.lower().startswith('sp'): | ||
return {'index': self.index.tolist(), | ||
'columns': self.columns.tolist(), | ||
'data': lib.map_infer(self.values.ravel(), | ||
_maybe_box_datetimelike) | ||
.reshape(self.values.shape).tolist()} | ||
return into_c((('index', self.index.tolist()), | ||
('columns', self.columns.tolist()), | ||
('data', lib.map_infer(self.values.ravel(), | ||
_maybe_box_datetimelike) | ||
.reshape(self.values.shape).tolist()))) | ||
elif orient.lower().startswith('s'): | ||
return dict((k, _maybe_box_datetimelike(v)) | ||
for k, v in compat.iteritems(self)) | ||
return into_c((k, _maybe_box_datetimelike(v)) | ||
for k, v in compat.iteritems(self)) | ||
elif orient.lower().startswith('r'): | ||
return [dict((k, _maybe_box_datetimelike(v)) | ||
for k, v in zip(self.columns, row)) | ||
return [into_c((k, _maybe_box_datetimelike(v)) | ||
for k, v in zip(self.columns, row)) | ||
for row in self.values] | ||
elif orient.lower().startswith('i'): | ||
return dict((k, v.to_dict()) for k, v in self.iterrows()) | ||
return into_c((k, v.to_dict(into)) for k, v in self.iterrows()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it correct to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is correct. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gotcha.nI forgot that |
||
else: | ||
raise ValueError("orient '%s' not understood" % orient) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import pytest | ||
import collections | ||
import numpy as np | ||
|
||
from pandas import compat | ||
|
@@ -13,50 +14,6 @@ | |
|
||
class TestDataFrameConvertTo(TestData): | ||
|
||
def test_to_dict(self): | ||
test_data = { | ||
'A': {'1': 1, '2': 2}, | ||
'B': {'1': '1', '2': '2', '3': '3'}, | ||
} | ||
recons_data = DataFrame(test_data).to_dict() | ||
|
||
for k, v in compat.iteritems(test_data): | ||
for k2, v2 in compat.iteritems(v): | ||
assert v2 == recons_data[k][k2] | ||
|
||
recons_data = DataFrame(test_data).to_dict("l") | ||
|
||
for k, v in compat.iteritems(test_data): | ||
for k2, v2 in compat.iteritems(v): | ||
assert v2 == recons_data[k][int(k2) - 1] | ||
|
||
recons_data = DataFrame(test_data).to_dict("s") | ||
|
||
for k, v in compat.iteritems(test_data): | ||
for k2, v2 in compat.iteritems(v): | ||
assert v2 == recons_data[k][k2] | ||
|
||
recons_data = DataFrame(test_data).to_dict("sp") | ||
expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'], | ||
'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]} | ||
tm.assert_dict_equal(recons_data, expected_split) | ||
|
||
recons_data = DataFrame(test_data).to_dict("r") | ||
expected_records = [{'A': 1.0, 'B': '1'}, | ||
{'A': 2.0, 'B': '2'}, | ||
{'A': np.nan, 'B': '3'}] | ||
assert isinstance(recons_data, list) | ||
assert len(recons_data) == 3 | ||
for l, r in zip(recons_data, expected_records): | ||
tm.assert_dict_equal(l, r) | ||
|
||
# GH10844 | ||
recons_data = DataFrame(test_data).to_dict("i") | ||
|
||
for k, v in compat.iteritems(test_data): | ||
for k2, v2 in compat.iteritems(v): | ||
assert v2 == recons_data[k2][k] | ||
|
||
def test_to_dict_timestamp(self): | ||
|
||
# GH11247 | ||
|
@@ -190,17 +147,85 @@ def test_to_records_with_unicode_column_names(self): | |
) | ||
tm.assert_almost_equal(result, expected) | ||
|
||
@pytest.mark.parametrize('mapping', [ | ||
dict, | ||
collections.defaultdict(list), | ||
collections.OrderedDict]) | ||
def test_to_dict(self, mapping): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This may be tested elsewhere, but can you add a test with a dataframe that has duplicate columns? Make sure to catch the warning. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done - I added one at the end. Let me know if that is what you were getting at. |
||
test_data = { | ||
'A': {'1': 1, '2': 2}, | ||
'B': {'1': '1', '2': '2', '3': '3'}, | ||
} | ||
|
||
# GH16122 | ||
recons_data = DataFrame(test_data).to_dict(into=mapping) | ||
|
||
for k, v in compat.iteritems(test_data): | ||
for k2, v2 in compat.iteritems(v): | ||
assert (v2 == recons_data[k][k2]) | ||
|
||
recons_data = DataFrame(test_data).to_dict("l", mapping) | ||
|
||
for k, v in compat.iteritems(test_data): | ||
for k2, v2 in compat.iteritems(v): | ||
assert (v2 == recons_data[k][int(k2) - 1]) | ||
|
||
recons_data = DataFrame(test_data).to_dict("s", mapping) | ||
|
||
for k, v in compat.iteritems(test_data): | ||
for k2, v2 in compat.iteritems(v): | ||
assert (v2 == recons_data[k][k2]) | ||
|
||
recons_data = DataFrame(test_data).to_dict("sp", mapping) | ||
expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'], | ||
'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]} | ||
tm.assert_dict_equal(recons_data, expected_split) | ||
|
||
recons_data = DataFrame(test_data).to_dict("r", mapping) | ||
expected_records = [{'A': 1.0, 'B': '1'}, | ||
{'A': 2.0, 'B': '2'}, | ||
{'A': np.nan, 'B': '3'}] | ||
assert isinstance(recons_data, list) | ||
assert (len(recons_data) == 3) | ||
for l, r in zip(recons_data, expected_records): | ||
tm.assert_dict_equal(l, r) | ||
|
||
# GH10844 | ||
recons_data = DataFrame(test_data).to_dict("i") | ||
|
||
for k, v in compat.iteritems(test_data): | ||
for k2, v2 in compat.iteritems(v): | ||
assert (v2 == recons_data[k2][k]) | ||
|
||
df = DataFrame(test_data) | ||
df['duped'] = df[df.columns[0]] | ||
recons_data = df.to_dict("i") | ||
comp_data = test_data.copy() | ||
comp_data['duped'] = comp_data[df.columns[0]] | ||
for k, v in compat.iteritems(comp_data): | ||
for k2, v2 in compat.iteritems(v): | ||
assert (v2 == recons_data[k2][k]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would also add a test that hits some of the errors people might encounter (you do check these in the testing of standardize_mapping), but this is an integration test. you can put a test right after this, maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added this test - made sure the |
||
|
||
@pytest.mark.parametrize('mapping', [ | ||
list, | ||
collections.defaultdict, | ||
[]]) | ||
def test_to_dict_errors(self, mapping): | ||
# GH16122 | ||
df = DataFrame(np.random.randn(3, 3)) | ||
with pytest.raises(TypeError): | ||
df.to_dict(into=mapping) | ||
|
||
@pytest.mark.parametrize('tz', ['UTC', 'GMT', 'US/Eastern']) | ||
def test_to_records_datetimeindex_with_tz(tz): | ||
# GH13937 | ||
dr = date_range('2016-01-01', periods=10, | ||
freq='S', tz=tz) | ||
@pytest.mark.parametrize('tz', ['UTC', 'GMT', 'US/Eastern']) | ||
def test_to_records_datetimeindex_with_tz(self, tz): | ||
# GH13937 | ||
dr = date_range('2016-01-01', periods=10, | ||
freq='S', tz=tz) | ||
|
||
df = DataFrame({'datetime': dr}, index=dr) | ||
df = DataFrame({'datetime': dr}, index=dr) | ||
|
||
expected = df.to_records() | ||
result = df.tz_convert("UTC").to_records() | ||
expected = df.to_records() | ||
result = df.tz_convert("UTC").to_records() | ||
|
||
# both converted to UTC, so they are equal | ||
tm.assert_numpy_array_equal(result, expected) | ||
# both converted to UTC, so they are equal | ||
tm.assert_numpy_array_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The 0.1 and 0.2 are wrong, should be 0.5 and 0.75