Skip to content

Commit 18315c5

Browse files
authored
#50990 Increase to_dict('list') performance (#54824)
* autoformat * autoformat * whatsnew * reformat * rm values * to_numpy() * to_numpy() * rm newline * .items * unit test * comment
1 parent 3fbb030 commit 18315c5

File tree

3 files changed

+20
-5
lines changed

3 files changed

+20
-5
lines changed

doc/source/whatsnew/v2.2.0.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,9 @@ Deprecations
158158

159159
Performance improvements
160160
~~~~~~~~~~~~~~~~~~~~~~~~
161-
- Performance improvement in :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` when indexed by a :class:`MultiIndex` (:issue:`54835`, :issue:`54883`)
161+
- Performance improvement in :func:`to_dict` on converting DataFrame to dictionary (:issue:`50990`)
162+
- Performance improvement in :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` when indexed by a :class:`MultiIndex` (:issue:`54835`)
162163
- Performance improvement when indexing with more than 4 keys (:issue:`54550`)
163-
-
164164

165165
.. ---------------------------------------------------------------------------
166166
.. _whatsnew_220.bug_fixes:

pandas/core/methods/to_dict.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -106,13 +106,13 @@ def to_dict(
106106
return into_c((k, v.to_dict(into)) for k, v in df.items())
107107

108108
elif orient == "list":
109-
object_dtype_indices_as_set = set(box_native_indices)
109+
object_dtype_indices_as_set: set[int] = set(box_native_indices)
110110
return into_c(
111111
(
112112
k,
113-
list(map(maybe_box_native, v.tolist()))
113+
list(map(maybe_box_native, v.to_numpy().tolist()))
114114
if i in object_dtype_indices_as_set
115-
else v.tolist(),
115+
else v.to_numpy().tolist(),
116116
)
117117
for i, (k, v) in enumerate(df.items())
118118
)

pandas/tests/frame/methods/test_to_dict.py

+15
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,21 @@ def test_to_dict_not_unique_warning(self):
166166
with tm.assert_produces_warning(UserWarning):
167167
df.to_dict()
168168

169+
@pytest.mark.filterwarnings("ignore::UserWarning")
170+
@pytest.mark.parametrize(
171+
"orient,expected",
172+
[
173+
("list", {"A": [2, 5], "B": [3, 6]}),
174+
("dict", {"A": {0: 2, 1: 5}, "B": {0: 3, 1: 6}}),
175+
],
176+
)
177+
def test_to_dict_not_unique(self, orient, expected):
178+
# GH#54824: This is to make sure that dataframes with non-unique column
179+
# would have uniform behavior throughout different orients
180+
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "A", "B"])
181+
result = df.to_dict(orient)
182+
assert result == expected
183+
169184
# orient - orient argument to to_dict function
170185
# item_getter - function for extracting value from
171186
# the resulting dict using column name and index

0 commit comments

Comments
 (0)