Skip to content

BUG / CoW: also return new object in case of null slice for both rows and columsn (.(i)loc[:, :]) #49469

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
8 changes: 8 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

import numpy as np

from pandas._config import get_option

from pandas._libs.indexing import NDFrameIndexerBase
from pandas._libs.lib import item_from_zerodim
from pandas._typing import (
Expand Down Expand Up @@ -934,6 +936,12 @@ def _getitem_tuple_same_dim(self, tup: tuple):
# be handled by the _getitem_lowerdim call above.
assert retval.ndim == self.ndim

if retval is self.obj and (
get_option("mode.copy_on_write")
and get_option("mode.data_manager") == "block"
):
retval = retval.copy(deep=False)

return retval

@final
Expand Down
62 changes: 62 additions & 0 deletions pandas/tests/copy_view/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,68 @@ def test_subset_chained_single_block_row(using_copy_on_write, using_array_manage
assert subset.iloc[0] == 0


@pytest.mark.parametrize(
"method",
[
lambda df: df[:],
lambda df: df.loc[:, :],
lambda df: df.loc[:],
lambda df: df.iloc[:, :],
lambda df: df.iloc[:],
],
ids=["getitem", "loc", "loc-rows", "iloc", "iloc-rows"],
)
def test_null_slice(request, method, using_copy_on_write):
# Case: also all variants of indexing with a null slice (:) should return
# new objects to ensure we correctly use CoW for the results
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
df_orig = df.copy()

df2 = method(df)

# with CoW, we always return new objects
if using_copy_on_write:
assert df2 is not df
else:
if request.node.callspec.id in ("loc", "iloc"):
assert df2 is df
else:
assert df2 is not df

# and those trigger CoW when mutated
df2.iloc[0, 0] = 0
if using_copy_on_write:
tm.assert_frame_equal(df, df_orig)
else:
assert df.iloc[0, 0] == 0


@pytest.mark.parametrize(
"method",
[
lambda s: s[:],
lambda s: s.loc[:],
lambda s: s.iloc[:],
],
ids=["getitem", "loc", "iloc"],
)
def test_null_slice_series(request, method, using_copy_on_write):
s = Series([1, 2, 3], index=["a", "b", "c"])
s_orig = s.copy()

s2 = method(s)

# with CoW, we always return new objects (also for non-CoW this is the case)
assert s2 is not s

# and those trigger CoW when mutated
s2.iloc[0] = 0
if using_copy_on_write:
tm.assert_series_equal(s, s_orig)
else:
assert s.iloc[0] == 0


# TODO add more tests modifying the parent


Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,7 +1115,10 @@ def test_identity_slice_returns_new_object(
assert (sliced_df["a"] == 4).all()

# These should not return copies
assert original_df is original_df.loc[:, :]
if using_copy_on_write:
assert original_df is not original_df.loc[:, :]
else:
assert original_df is original_df.loc[:, :]
df = DataFrame(np.random.randn(10, 4))
assert df[0] is df.loc[:, 0]

Expand Down