diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c0082b451c95d..ab9de672e5b8d 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -261,6 +261,8 @@ Copy-on-Write improvements - :meth:`DataFrame.replace` will now respect the Copy-on-Write mechanism when ``inplace=True``. +- :meth:`DataFrame.transpose` will now respect the Copy-on-Write mechanism. + - Arithmetic operations that can be inplace, e.g. ``ser *= 2`` will now respect the Copy-on-Write mechanism. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4a6b95f9a2e11..f9ff325b51413 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3557,10 +3557,14 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: if self._can_fast_transpose: # Note: tests pass without this, but this improves perf quite a bit. new_vals = self._values.T - if copy: + if copy and not using_copy_on_write(): new_vals = new_vals.copy() - result = self._constructor(new_vals, index=self.columns, columns=self.index) + result = self._constructor( + new_vals, index=self.columns, columns=self.index, copy=False + ) + if using_copy_on_write() and len(self) > 0: + result._mgr.add_references(self._mgr) # type: ignore[arg-type] elif ( self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2ea556e19d0bd..cb32b3bbc6cc7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -253,6 +253,9 @@ def add_references(self, mgr: BaseBlockManager) -> None: Adds the references from one manager to another. We assume that both managers have the same block structure. """ + if len(self.blocks) != len(mgr.blocks): + # If block structure changes, then we made a copy + return for i, blk in enumerate(self.blocks): blk.refs = mgr.blocks[i].refs # Argument 1 to "add_reference" of "BlockValuesRefs" has incompatible type diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 16033bfa750b3..c88210dec3c09 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1593,3 +1593,37 @@ def test_inplace_arithmetic_series_with_reference(using_copy_on_write): tm.assert_series_equal(ser_orig, view) else: assert np.shares_memory(get_array(ser), get_array(view)) + + +@pytest.mark.parametrize("copy", [True, False]) +def test_transpose(using_copy_on_write, copy, using_array_manager): + df = DataFrame({"a": [1, 2, 3], "b": 1}) + df_orig = df.copy() + result = df.transpose(copy=copy) + + if not copy and not using_array_manager or using_copy_on_write: + assert np.shares_memory(get_array(df, "a"), get_array(result, 0)) + else: + assert not np.shares_memory(get_array(df, "a"), get_array(result, 0)) + + result.iloc[0, 0] = 100 + if using_copy_on_write: + tm.assert_frame_equal(df, df_orig) + + +def test_transpose_different_dtypes(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": 1.5}) + df_orig = df.copy() + result = df.T + + assert not np.shares_memory(get_array(df, "a"), get_array(result, 0)) + result.iloc[0, 0] = 100 + if using_copy_on_write: + tm.assert_frame_equal(df, df_orig) + + +def test_transpose_ea_single_column(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3]}, dtype="Int64") + result = df.T + + assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))