From 105b96bc294c487d06e483b394b98b359ebd2afa Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Feb 2023 16:08:12 +0100 Subject: [PATCH 1/3] BUG: ensure getitem to select columns properly copies data for extension dtypes --- pandas/core/internals/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index ff80cccaa20d3..4952660f126b5 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -873,7 +873,7 @@ def _slice_take_blocks_ax0( # only one item and each mgr loc is a copy of that single # item. for mgr_loc in mgr_locs: - newblk = blk.copy(deep=False) + newblk = blk.copy(deep=not only_slice) newblk.mgr_locs = BlockPlacement(slice(mgr_loc, mgr_loc + 1)) blocks.append(newblk) refs.append(weakref.ref(blk)) From d818c61b76d58c626ac443e92bee9c7f03da056e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 10 Feb 2023 19:52:10 +0100 Subject: [PATCH 2/3] add test --- pandas/tests/frame/methods/test_reindex.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index f455213bd436b..44cbd9b7b38e8 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -134,6 +134,21 @@ def test_reindex_copies(self): result2 = df.reindex(columns=cols, index=df.index, copy=True) assert not np.shares_memory(result2[0]._values, df[0]._values) + def test_reindex_copies_ea(self): + # https://github.com/pandas-dev/pandas/pull/51197 + # also ensure to honor copy keyword for ExtensionDtypes + N = 10 + df = DataFrame(np.random.randn(N * 10, N), dtype="Float64") + cols = np.arange(N) + np.random.shuffle(cols) + + result = df.reindex(columns=cols, copy=True) + assert not np.shares_memory(result[0].array._data, df[0].array._data) + + # pass both columns and index + result2 = df.reindex(columns=cols, index=df.index, copy=True) + assert not np.shares_memory(result2[0].array._data, df[0].array._data) + @td.skip_array_manager_not_yet_implemented def test_reindex_date_fill_value(self): # passing date to dt64 is deprecated; enforced in 2.0 to cast to object From 2ae2d1ec18a2a06df124462cc8cd54a6c2cf9156 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 10 Feb 2023 19:56:06 +0100 Subject: [PATCH 3/3] add whatsnew --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 53581420f920f..aded1e20f1488 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1370,6 +1370,7 @@ ExtensionArray - Bug in :meth:`api.types.is_integer_dtype`, :meth:`api.types.is_unsigned_integer_dtype`, :meth:`api.types.is_signed_integer_dtype`, :meth:`api.types.is_float_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``kind`` returned the corresponding NumPy type (:issue:`50667`) - Bug in :class:`Series` constructor unnecessarily overflowing for nullable unsigned integer dtypes (:issue:`38798`, :issue:`25880`) - Bug in setting non-string value into ``StringArray`` raising ``ValueError`` instead of ``TypeError`` (:issue:`49632`) +- Bug in :meth:`DataFrame.reindex` not honoring the default ``copy=True`` keyword in case of columns with ExtensionDtype (and as a result also selecting multiple columns with getitem (``[]``) didn't correctly result in a copy) (:issue:`51197`) Styler ^^^^^^