From 138df124d50d67410e35444e2636c637c5c9cd40 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 16 Feb 2023 15:55:41 +0100 Subject: [PATCH 1/5] BUG: transpose not respecting CoW --- doc/source/whatsnew/v2.0.0.rst | 2 ++ pandas/core/frame.py | 9 +++++++- pandas/tests/copy_view/test_methods.py | 29 ++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 7b1ebb14d3dc8..02e097b39ad3c 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -261,6 +261,8 @@ Copy-on-Write improvements - :meth:`DataFrame.replace` will now respect the Copy-on-Write mechanism when ``inplace=True``. +- :meth:`DataFrame.transpose` will now respect the Copy-on-Write mechanism. + - Arithmetic operations that can be inplace, e.g. ``ser *= 2`` will now respect the Copy-on-Write mechanism. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 79f423b17383d..79d1a08c89007 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3563,7 +3563,14 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: if copy: new_vals = new_vals.copy() - result = self._constructor(new_vals, index=self.columns, columns=self.index) + result = self._constructor( + new_vals, index=self.columns, columns=self.index, copy=False + ) + if using_copy_on_write() and not copy: + result._mgr.blocks[0].refs = self._mgr.blocks[0].refs # type: ignore[union-attr] # noqa + result._mgr.blocks[0].refs.add_reference( # type: ignore[union-attr] + result._mgr.blocks[0] # type: ignore[arg-type, union-attr] + ) elif ( self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 16033bfa750b3..103da35e85948 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1593,3 +1593,32 @@ def test_inplace_arithmetic_series_with_reference(using_copy_on_write): tm.assert_series_equal(ser_orig, view) else: assert np.shares_memory(get_array(ser), get_array(view)) + + +def test_transpose(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": 1}) + df_orig = df.copy() + result = df.T + + assert np.shares_memory(get_array(df, "a"), get_array(result, 0)) + result.iloc[0, 0] = 100 + if using_copy_on_write: + tm.assert_frame_equal(df, df_orig) + + +def test_transpose_different_dtypes(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": 1.5}) + df_orig = df.copy() + result = df.T + + assert not np.shares_memory(get_array(df, "a"), get_array(result, 0)) + result.iloc[0, 0] = 100 + if using_copy_on_write: + tm.assert_frame_equal(df, df_orig) + + +def test_transpose_ea_single_column(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3]}, dtype="Int64") + result = df.T + + assert not np.shares_memory(get_array(df, "a"), get_array(result, 0)) From 29c1bda79145c5732d53ea67b6f312be6bf5b014 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 16 Feb 2023 18:04:59 +0100 Subject: [PATCH 2/5] FIx --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 79d1a08c89007..55f72c99eedb4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3566,7 +3566,7 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: result = self._constructor( new_vals, index=self.columns, columns=self.index, copy=False ) - if using_copy_on_write() and not copy: + if using_copy_on_write() and not copy and len(self) > 0: result._mgr.blocks[0].refs = self._mgr.blocks[0].refs # type: ignore[union-attr] # noqa result._mgr.blocks[0].refs.add_reference( # type: ignore[union-attr] result._mgr.blocks[0] # type: ignore[arg-type, union-attr] From ffcd4c055a04dbb9e9c2b4a4aa755b50b1ae57cc Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 17 Feb 2023 19:05:17 +0100 Subject: [PATCH 3/5] Adjust --- pandas/core/frame.py | 9 +++------ pandas/tests/copy_view/test_methods.py | 11 ++++++++--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3a697ef60b740..bfba14ef91c3b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3557,17 +3557,14 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: if self._can_fast_transpose: # Note: tests pass without this, but this improves perf quite a bit. new_vals = self._values.T - if copy: + if copy and not using_copy_on_write(): new_vals = new_vals.copy() result = self._constructor( new_vals, index=self.columns, columns=self.index, copy=False ) - if using_copy_on_write() and not copy and len(self) > 0: - result._mgr.blocks[0].refs = self._mgr.blocks[0].refs # type: ignore[union-attr] # noqa - result._mgr.blocks[0].refs.add_reference( # type: ignore[union-attr] - result._mgr.blocks[0] # type: ignore[arg-type, union-attr] - ) + if using_copy_on_write() and len(self) > 0: + result._mgr.add_references(self._mgr) # type: ignore[arg-type] elif ( self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 103da35e85948..689fa21017eac 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1595,12 +1595,17 @@ def test_inplace_arithmetic_series_with_reference(using_copy_on_write): assert np.shares_memory(get_array(ser), get_array(view)) -def test_transpose(using_copy_on_write): +@pytest.mark.parametrize("copy", [True, False]) +def test_transpose(using_copy_on_write, copy): df = DataFrame({"a": [1, 2, 3], "b": 1}) df_orig = df.copy() - result = df.T + result = df.transpose(copy=copy) + + if not copy or using_copy_on_write: + assert np.shares_memory(get_array(df, "a"), get_array(result, 0)) + else: + assert not np.shares_memory(get_array(df, "a"), get_array(result, 0)) - assert np.shares_memory(get_array(df, "a"), get_array(result, 0)) result.iloc[0, 0] = 100 if using_copy_on_write: tm.assert_frame_equal(df, df_orig) From 530f00c61e9be0cf9907bf0913591205255a3e23 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 20 Feb 2023 10:52:27 +0000 Subject: [PATCH 4/5] Fix ci --- pandas/core/internals/managers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 664a122015ba5..c7be0cd46097a 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -252,6 +252,9 @@ def add_references(self, mgr: BaseBlockManager) -> None: Adds the references from one manager to another. We assume that both managers have the same block structure. """ + if len(self.blocks) != len(mgr.blocks): + # If block structure changes, then we made a copy + return for i, blk in enumerate(self.blocks): blk.refs = mgr.blocks[i].refs # Argument 1 to "add_reference" of "BlockValuesRefs" has incompatible type From 742e54c43f6cf020a82336f948f64346a8b24f7e Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 20 Feb 2023 10:53:32 +0000 Subject: [PATCH 5/5] Fix array manager --- pandas/tests/copy_view/test_methods.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 689fa21017eac..c88210dec3c09 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1596,12 +1596,12 @@ def test_inplace_arithmetic_series_with_reference(using_copy_on_write): @pytest.mark.parametrize("copy", [True, False]) -def test_transpose(using_copy_on_write, copy): +def test_transpose(using_copy_on_write, copy, using_array_manager): df = DataFrame({"a": [1, 2, 3], "b": 1}) df_orig = df.copy() result = df.transpose(copy=copy) - if not copy or using_copy_on_write: + if not copy and not using_array_manager or using_copy_on_write: assert np.shares_memory(get_array(df, "a"), get_array(result, 0)) else: assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))