Skip to content

ENH: Add lazy copy for swapaxes no op #50573

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jan 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 31 additions & 7 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,10 @@
BlockManager,
SingleArrayManager,
)
from pandas.core.internals.construction import mgr_to_mgr
from pandas.core.internals.construction import (
mgr_to_mgr,
ndarray_to_mgr,
)
from pandas.core.methods.describe import describe_ndframe
from pandas.core.missing import (
clean_fill_method,
Expand Down Expand Up @@ -763,7 +766,7 @@ def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None:

@final
def swapaxes(
self: NDFrameT, axis1: Axis, axis2: Axis, copy: bool_t = True
self: NDFrameT, axis1: Axis, axis2: Axis, copy: bool_t | None = None
) -> NDFrameT:
"""
Interchange axes and swap values axes appropriately.
Expand All @@ -776,15 +779,36 @@ def swapaxes(
j = self._get_axis_number(axis2)

if i == j:
if copy:
return self.copy()
return self
if copy is False and not using_copy_on_write():
return self
return self.copy(deep=copy)

mapping = {i: j, j: i}

new_axes = (self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN))
new_axes = [self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)]
new_values = self.values.swapaxes(i, j)
if copy:
if (
using_copy_on_write()
and self._mgr.is_single_block
and isinstance(self._mgr, BlockManager)
):
# This should only get hit in case of having a single block, otherwise a
# copy is made, we don't have to set up references.
new_mgr = ndarray_to_mgr(
new_values,
new_axes[0],
new_axes[1],
dtype=None,
copy=False,
typ="block",
)
assert isinstance(new_mgr, BlockManager)
assert isinstance(self._mgr, BlockManager)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since this is in the if check above, not needed to repeat here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thought so as well, but mypy disagrees

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah .. When do we use a cast and when an assert to deal with such issues?

Copy link
Member Author

@phofl phofl Jan 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer the assert here, but not sure if there is a general rule

Edit: I prefer assert not cast, stupid mistake in the initial post

new_mgr.parent = self._mgr
new_mgr.refs = [weakref.ref(self._mgr.blocks[0])]
return self._constructor(new_mgr).__finalize__(self, method="swapaxes")

elif (copy or copy is None) and self._mgr.is_single_block:
new_values = new_values.copy()

return self._constructor(
Expand Down
35 changes: 35 additions & 0 deletions pandas/tests/copy_view/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,41 @@ def test_to_frame(using_copy_on_write):
tm.assert_frame_equal(df, expected)


@pytest.mark.parametrize("ax", ["index", "columns"])
def test_swapaxes_noop(using_copy_on_write, ax):
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df_orig = df.copy()
df2 = df.swapaxes(ax, ax)

if using_copy_on_write:
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))

# mutating df2 triggers a copy-on-write for that column/block
df2.iloc[0, 0] = 0
if using_copy_on_write:
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
tm.assert_frame_equal(df, df_orig)


def test_swapaxes_single_block(using_copy_on_write):
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["x", "y", "z"])
df_orig = df.copy()
df2 = df.swapaxes("index", "columns")

if using_copy_on_write:
assert np.shares_memory(get_array(df2, "x"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(df2, "x"), get_array(df, "a"))

# mutating df2 triggers a copy-on-write for that column/block
df2.iloc[0, 0] = 0
if using_copy_on_write:
assert not np.shares_memory(get_array(df2, "x"), get_array(df, "a"))
tm.assert_frame_equal(df, df_orig)


@pytest.mark.parametrize(
"method, idx",
[
Expand Down