Skip to content

Commit 0a33bd8

Browse files
committed
ENH: Add lazy copy for swapaxes no op (pandas-dev#50573)
1 parent 82e1ec0 commit 0a33bd8

File tree

2 files changed

+66
-7
lines changed

2 files changed

+66
-7
lines changed

pandas/core/generic.py

+31-7
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,10 @@
160160
BlockManager,
161161
SingleArrayManager,
162162
)
163-
from pandas.core.internals.construction import mgr_to_mgr
163+
from pandas.core.internals.construction import (
164+
mgr_to_mgr,
165+
ndarray_to_mgr,
166+
)
164167
from pandas.core.methods.describe import describe_ndframe
165168
from pandas.core.missing import (
166169
clean_fill_method,
@@ -763,7 +766,7 @@ def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None:
763766

764767
@final
765768
def swapaxes(
766-
self: NDFrameT, axis1: Axis, axis2: Axis, copy: bool_t = True
769+
self: NDFrameT, axis1: Axis, axis2: Axis, copy: bool_t | None = None
767770
) -> NDFrameT:
768771
"""
769772
Interchange axes and swap values axes appropriately.
@@ -776,15 +779,36 @@ def swapaxes(
776779
j = self._get_axis_number(axis2)
777780

778781
if i == j:
779-
if copy:
780-
return self.copy()
781-
return self
782+
if copy is False and not using_copy_on_write():
783+
return self
784+
return self.copy(deep=copy)
782785

783786
mapping = {i: j, j: i}
784787

785-
new_axes = (self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN))
788+
new_axes = [self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)]
786789
new_values = self.values.swapaxes(i, j)
787-
if copy:
790+
if (
791+
using_copy_on_write()
792+
and self._mgr.is_single_block
793+
and isinstance(self._mgr, BlockManager)
794+
):
795+
# This should only get hit in case of having a single block, otherwise a
796+
# copy is made, we don't have to set up references.
797+
new_mgr = ndarray_to_mgr(
798+
new_values,
799+
new_axes[0],
800+
new_axes[1],
801+
dtype=None,
802+
copy=False,
803+
typ="block",
804+
)
805+
assert isinstance(new_mgr, BlockManager)
806+
assert isinstance(self._mgr, BlockManager)
807+
new_mgr.parent = self._mgr
808+
new_mgr.refs = [weakref.ref(self._mgr.blocks[0])]
809+
return self._constructor(new_mgr).__finalize__(self, method="swapaxes")
810+
811+
elif (copy or copy is None) and self._mgr.is_single_block:
788812
new_values = new_values.copy()
789813

790814
return self._constructor(

pandas/tests/copy_view/test_methods.py

+35
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,41 @@ def test_to_frame(using_copy_on_write):
346346
tm.assert_frame_equal(df, expected)
347347

348348

349+
@pytest.mark.parametrize("ax", ["index", "columns"])
350+
def test_swapaxes_noop(using_copy_on_write, ax):
351+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
352+
df_orig = df.copy()
353+
df2 = df.swapaxes(ax, ax)
354+
355+
if using_copy_on_write:
356+
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
357+
else:
358+
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
359+
360+
# mutating df2 triggers a copy-on-write for that column/block
361+
df2.iloc[0, 0] = 0
362+
if using_copy_on_write:
363+
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
364+
tm.assert_frame_equal(df, df_orig)
365+
366+
367+
def test_swapaxes_single_block(using_copy_on_write):
368+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["x", "y", "z"])
369+
df_orig = df.copy()
370+
df2 = df.swapaxes("index", "columns")
371+
372+
if using_copy_on_write:
373+
assert np.shares_memory(get_array(df2, "x"), get_array(df, "a"))
374+
else:
375+
assert not np.shares_memory(get_array(df2, "x"), get_array(df, "a"))
376+
377+
# mutating df2 triggers a copy-on-write for that column/block
378+
df2.iloc[0, 0] = 0
379+
if using_copy_on_write:
380+
assert not np.shares_memory(get_array(df2, "x"), get_array(df, "a"))
381+
tm.assert_frame_equal(df, df_orig)
382+
383+
349384
@pytest.mark.parametrize(
350385
"method, idx",
351386
[

0 commit comments

Comments
 (0)