From 401f090ad806bd8c9590766e7f1d8cdb223f2e38 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 4 Jan 2023 23:18:56 +0100 Subject: [PATCH 1/4] ENH: Add lazy copy for swapaxes no op --- pandas/core/generic.py | 8 ++++---- pandas/tests/copy_view/test_methods.py | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3517f3ee9183d..13739e81ce995 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -761,7 +761,7 @@ def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None: @final def swapaxes( - self: NDFrameT, axis1: Axis, axis2: Axis, copy: bool_t = True + self: NDFrameT, axis1: Axis, axis2: Axis, copy: bool_t | None = None ) -> NDFrameT: """ Interchange axes and swap values axes appropriately. @@ -774,9 +774,9 @@ def swapaxes( j = self._get_axis_number(axis2) if i == j: - if copy: - return self.copy() - return self + if not copy and copy is not None and not _using_copy_on_write(): + return self + return self.copy(deep=copy) mapping = {i: j, j: i} diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 63b0be281fe53..84143b8fea2b0 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -255,6 +255,24 @@ def test_to_frame(using_copy_on_write): tm.assert_frame_equal(df, expected) +@pytest.mark.parametrize("ax", ["index", "columns"]) +def test_swapaxes_noop(using_copy_on_write, ax): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df_orig = df.copy() + df2 = df.swapaxes(ax, ax) + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + # mutating df2 triggers a copy-on-write for that column/block + df2.iloc[0, 0] = 0 + if using_copy_on_write: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + + @pytest.mark.parametrize( "method, idx", [ From 4dfd5d37a4cd547fd1d52da9d9dae40b5e8c8838 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 5 Jan 2023 22:16:52 +0100 Subject: [PATCH 2/4] Move condition --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 13739e81ce995..d08776198bc5b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -774,7 +774,7 @@ def swapaxes( j = self._get_axis_number(axis2) if i == j: - if not copy and copy is not None and not _using_copy_on_write(): + if copy is False and not _using_copy_on_write(): return self return self.copy(deep=copy) From 5a20fff6336075c401c1aa597a4418716966b22b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 7 Jan 2023 23:27:41 +0100 Subject: [PATCH 3/4] Add single block case --- pandas/core/generic.py | 24 +++++++++++++++++++++--- pandas/tests/copy_view/test_methods.py | 17 +++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cbdbc0eb8cfd3..e2927e5404bc2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -130,6 +130,7 @@ notna, ) +from pandas import get_option from pandas.core import ( algorithms as algos, arraylike, @@ -157,7 +158,10 @@ BlockManager, SingleArrayManager, ) -from pandas.core.internals.construction import mgr_to_mgr +from pandas.core.internals.construction import ( + mgr_to_mgr, + ndarray_to_mgr, +) from pandas.core.internals.managers import using_copy_on_write from pandas.core.methods.describe import describe_ndframe from pandas.core.missing import ( @@ -774,7 +778,7 @@ def swapaxes( j = self._get_axis_number(axis2) if i == j: - if copy is False and not _using_copy_on_write(): + if copy is False and not using_copy_on_write(): return self return self.copy(deep=copy) @@ -782,7 +786,21 @@ def swapaxes( new_axes = (self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)) new_values = self.values.swapaxes(i, j) - if copy: + if using_copy_on_write() and self._mgr.is_single_block: + # This should only get hit in case of having a single block, otherwise a + # copy is made, we don't have to set up references. + new_mgr = ndarray_to_mgr( + new_values, + *new_axes, + dtype=None, + copy=False, + typ=get_option("mode.data_manager"), + ) + new_mgr.parent = self._mgr + new_mgr.refs = [weakref.ref(self._mgr.blocks[0])] + return self._constructor(new_mgr).__finalize__(self, method="swapaxes") + + elif (copy or copy is None) and self._mgr.is_single_block: new_values = new_values.copy() return self._constructor( diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 65c4af94aa2b0..fdb8ee011fc28 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -341,6 +341,23 @@ def test_swapaxes_noop(using_copy_on_write, ax): tm.assert_frame_equal(df, df_orig) +def test_swapaxes_single_block(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["x", "y", "z"]) + df_orig = df.copy() + df2 = df.swapaxes("index", "columns") + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "x"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "x"), get_array(df, "a")) + + # mutating df2 triggers a copy-on-write for that column/block + df2.iloc[0, 0] = 0 + if using_copy_on_write: + assert not np.shares_memory(get_array(df2, "x"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + + @pytest.mark.parametrize( "method, idx", [ From 26497beef980d00a4f1006f4da6d3e9e00d72dce Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 8 Jan 2023 11:03:40 +0100 Subject: [PATCH 4/4] Fix typing --- pandas/core/generic.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e2927e5404bc2..8e275bd718f35 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -130,7 +130,6 @@ notna, ) -from pandas import get_option from pandas.core import ( algorithms as algos, arraylike, @@ -784,18 +783,25 @@ def swapaxes( mapping = {i: j, j: i} - new_axes = (self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)) + new_axes = [self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)] new_values = self.values.swapaxes(i, j) - if using_copy_on_write() and self._mgr.is_single_block: + if ( + using_copy_on_write() + and self._mgr.is_single_block + and isinstance(self._mgr, BlockManager) + ): # This should only get hit in case of having a single block, otherwise a # copy is made, we don't have to set up references. new_mgr = ndarray_to_mgr( new_values, - *new_axes, + new_axes[0], + new_axes[1], dtype=None, copy=False, - typ=get_option("mode.data_manager"), + typ="block", ) + assert isinstance(new_mgr, BlockManager) + assert isinstance(self._mgr, BlockManager) new_mgr.parent = self._mgr new_mgr.refs = [weakref.ref(self._mgr.blocks[0])] return self._constructor(new_mgr).__finalize__(self, method="swapaxes")