Skip to content

Commit c8ce3d0

Browse files
jameswinegarjreback
authored andcommitted
BUG: DataFrame.to_dict when orient=index data loss (pandas-dev#22810)
1 parent a86501f commit c8ce3d0

File tree

3 files changed

+26
-0
lines changed

3 files changed

+26
-0
lines changed

doc/source/whatsnew/v0.24.0.txt

+16
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,22 @@ is the case with :attr:`Period.end_time`, for example
373373

374374
p.end_time
375375

376+
.. _whatsnew_0240.api_breaking.frame_to_dict_index_orient:
377+
378+
Raise ValueError in ``DataFrame.to_dict(orient='index')``
379+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
380+
381+
Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with
382+
``orient='index'`` and a non-unique index instead of losing data (:issue:`22801`)
383+
384+
.. ipython:: python
385+
:okexcept:
386+
387+
df = pd.DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A'])
388+
df
389+
390+
df.to_dict(orient='index')
391+
376392
.. _whatsnew_0240.api.datetimelike.normalize:
377393

378394
Tick DateOffset Normalize Restrictions

pandas/core/frame.py

+4
Original file line numberDiff line numberDiff line change
@@ -1224,6 +1224,10 @@ def to_dict(self, orient='dict', into=dict):
12241224
for k, v in zip(self.columns, np.atleast_1d(row)))
12251225
for row in self.values]
12261226
elif orient.lower().startswith('i'):
1227+
if not self.index.is_unique:
1228+
raise ValueError(
1229+
"DataFrame index must be unique for orient='index'."
1230+
)
12271231
return into_c((t[0], dict(zip(self.columns, t[1:])))
12281232
for t in self.itertuples())
12291233
else:

pandas/tests/frame/test_convert_to.py

+6
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,12 @@ def test_to_dict_timestamp(self):
7171
tm.assert_dict_equal(test_data_mixed.to_dict(orient='split'),
7272
expected_split_mixed)
7373

74+
def test_to_dict_index_not_unique_with_index_orient(self):
75+
# GH22801
76+
# Data loss when indexes are not unique. Raise ValueError.
77+
df = DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A'])
78+
pytest.raises(ValueError, df.to_dict, orient='index')
79+
7480
def test_to_dict_invalid_orient(self):
7581
df = DataFrame({'A': [0, 1]})
7682
pytest.raises(ValueError, df.to_dict, orient='xinvalid')

0 commit comments

Comments
 (0)