Skip to content

Commit e9f90a2

Browse files
BUG: ensure reindex / getitem to select columns properly copies data for extension dtypes (#51197)
1 parent dd30415 commit e9f90a2

File tree

3 files changed

+17
-1
lines changed

3 files changed

+17
-1
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1395,6 +1395,7 @@ ExtensionArray
13951395
- Bug in :meth:`api.types.is_integer_dtype`, :meth:`api.types.is_unsigned_integer_dtype`, :meth:`api.types.is_signed_integer_dtype`, :meth:`api.types.is_float_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``kind`` returned the corresponding NumPy type (:issue:`50667`)
13961396
- Bug in :class:`Series` constructor unnecessarily overflowing for nullable unsigned integer dtypes (:issue:`38798`, :issue:`25880`)
13971397
- Bug in setting non-string value into ``StringArray`` raising ``ValueError`` instead of ``TypeError`` (:issue:`49632`)
1398+
- Bug in :meth:`DataFrame.reindex` not honoring the default ``copy=True`` keyword in case of columns with ExtensionDtype (and as a result also selecting multiple columns with getitem (``[]``) didn't correctly result in a copy) (:issue:`51197`)
13981399

13991400
Styler
14001401
^^^^^^

pandas/core/internals/managers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -827,7 +827,7 @@ def _slice_take_blocks_ax0(
827827
# only one item and each mgr loc is a copy of that single
828828
# item.
829829
for mgr_loc in mgr_locs:
830-
newblk = blk.copy(deep=False)
830+
newblk = blk.copy(deep=not only_slice)
831831
newblk.mgr_locs = BlockPlacement(slice(mgr_loc, mgr_loc + 1))
832832
blocks.append(newblk)
833833

pandas/tests/frame/methods/test_reindex.py

+15
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,21 @@ def test_reindex_copies(self):
133133
result2 = df.reindex(columns=cols, index=df.index, copy=True)
134134
assert not np.shares_memory(result2[0]._values, df[0]._values)
135135

136+
def test_reindex_copies_ea(self):
137+
# https://github.com/pandas-dev/pandas/pull/51197
138+
# also ensure to honor copy keyword for ExtensionDtypes
139+
N = 10
140+
df = DataFrame(np.random.randn(N * 10, N), dtype="Float64")
141+
cols = np.arange(N)
142+
np.random.shuffle(cols)
143+
144+
result = df.reindex(columns=cols, copy=True)
145+
assert not np.shares_memory(result[0].array._data, df[0].array._data)
146+
147+
# pass both columns and index
148+
result2 = df.reindex(columns=cols, index=df.index, copy=True)
149+
assert not np.shares_memory(result2[0].array._data, df[0].array._data)
150+
136151
@td.skip_array_manager_not_yet_implemented
137152
def test_reindex_date_fill_value(self):
138153
# passing date to dt64 is deprecated; enforced in 2.0 to cast to object

0 commit comments

Comments
 (0)