Skip to content

CoW: Warn for inplace replace #56297

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Dec 8, 2023
7 changes: 6 additions & 1 deletion pandas/core/internals/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,16 @@ def replace(self, to_replace, value, inplace: bool) -> Self:
value=value,
inplace=inplace,
using_cow=using_copy_on_write(),
already_warned=_AlreadyWarned(),
)

@final
def replace_regex(self, **kwargs) -> Self:
return self.apply_with_block(
"_replace_regex", **kwargs, using_cow=using_copy_on_write()
"_replace_regex",
**kwargs,
using_cow=using_copy_on_write(),
already_warned=_AlreadyWarned(),
)

@final
Expand All @@ -278,6 +282,7 @@ def replace_list(
inplace=inplace,
regex=regex,
using_cow=using_copy_on_write(),
already_warned=_AlreadyWarned(),
)
bm._consolidate_inplace()
return bm
Expand Down
45 changes: 45 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,7 @@ def replace(
# mask may be pre-computed if we're called from replace_list
mask: npt.NDArray[np.bool_] | None = None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
"""
replace the to_replace value with value, possible to create new
Expand Down Expand Up @@ -874,6 +875,20 @@ def replace(
# and rest?
blk = self._maybe_copy(using_cow, inplace)
putmask_inplace(blk.values, mask, value)
if (
inplace
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True

if not (self.is_object and value is None):
# if the user *explicitly* gave None, we keep None, otherwise
# may downcast to NaN
Expand Down Expand Up @@ -934,6 +949,7 @@ def _replace_regex(
inplace: bool = False,
mask=None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
"""
Replace elements by the given value.
Expand Down Expand Up @@ -968,6 +984,20 @@ def _replace_regex(

replace_regex(block.values, rx, value, mask)

if (
inplace
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True

nbs = block.convert(copy=False, using_cow=using_cow)
opt = get_option("future.no_silent_downcasting")
if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:
Expand All @@ -992,6 +1022,7 @@ def replace_list(
inplace: bool = False,
regex: bool = False,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
"""
See BlockManager.replace_list docstring.
Expand Down Expand Up @@ -1048,6 +1079,20 @@ def replace_list(
else:
rb = [self if inplace else self.copy()]

if (
inplace
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True

opt = get_option("future.no_silent_downcasting")
for i, ((src, dest), mask) in enumerate(zip(pairs, masks)):
convert = i == src_len # only convert once at the end
Expand Down
7 changes: 2 additions & 5 deletions pandas/tests/copy_view/test_chained_assignment_deprecation.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,9 @@ def test_methods_iloc_warn(using_copy_on_write):
def test_methods_iloc_getitem_item_cache(
func, args, using_copy_on_write, warn_copy_on_write
):
df = DataFrame({"a": [1.5, 2, 3], "b": 1.5})
df = DataFrame({"a": [1, 2, 3], "b": 1})
ser = df.iloc[:, 0]
# TODO(CoW-warn) should warn about updating a view for all methods
with tm.assert_cow_warning(
warn_copy_on_write and func not in ("replace", "fillna")
):
with tm.assert_cow_warning(warn_copy_on_write and func == "replace"):
getattr(ser, func)(*args, inplace=True)
Comment on lines -48 to 51
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit fishy. The interpolate PR changed that to floats, this changes it back. But in the end this test is for ensuring there is no chained assignment warning, not about whether there is an "setting value on view" warning, will clean-up in a follow-up PR.


# parent that holds item_cache is dead, so don't increase ref count
Expand Down
20 changes: 12 additions & 8 deletions pandas/tests/copy_view/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,13 @@ def test_replace(using_copy_on_write, replace_kwargs):
tm.assert_frame_equal(df, df_orig)


def test_replace_regex_inplace_refs(using_copy_on_write):
def test_replace_regex_inplace_refs(using_copy_on_write, warn_copy_on_write):
df = DataFrame({"a": ["aaa", "bbb"]})
df_orig = df.copy()
view = df[:]
arr = get_array(df, "a")
df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True)
if using_copy_on_write:
assert not np.shares_memory(arr, get_array(df, "a"))
assert df._mgr._has_no_reference(0)
Expand Down Expand Up @@ -202,11 +203,12 @@ def test_replace_inplace(using_copy_on_write, to_replace):


@pytest.mark.parametrize("to_replace", [1.5, [1.5]])
def test_replace_inplace_reference(using_copy_on_write, to_replace):
def test_replace_inplace_reference(using_copy_on_write, to_replace, warn_copy_on_write):
df = DataFrame({"a": [1.5, 2, 3]})
arr_a = get_array(df, "a")
view = df[:]
df.replace(to_replace=to_replace, value=15.5, inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.replace(to_replace=to_replace, value=15.5, inplace=True)

if using_copy_on_write:
assert not np.shares_memory(get_array(df, "a"), arr_a)
Expand Down Expand Up @@ -354,12 +356,13 @@ def test_replace_list_none(using_copy_on_write):
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))


def test_replace_list_none_inplace_refs(using_copy_on_write):
def test_replace_list_none_inplace_refs(using_copy_on_write, warn_copy_on_write):
df = DataFrame({"a": ["a", "b", "c"]})
arr = get_array(df, "a")
df_orig = df.copy()
view = df[:]
df.replace(["a"], value=None, inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.replace(["a"], value=None, inplace=True)
if using_copy_on_write:
assert df._mgr._has_no_reference(0)
assert not np.shares_memory(arr, get_array(df, "a"))
Expand Down Expand Up @@ -431,15 +434,16 @@ def test_replace_listlike(using_copy_on_write):
tm.assert_frame_equal(df, df_orig)


def test_replace_listlike_inplace(using_copy_on_write):
def test_replace_listlike_inplace(using_copy_on_write, warn_copy_on_write):
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
arr = get_array(df, "a")
df.replace([200, 2], [10, 11], inplace=True)
assert np.shares_memory(get_array(df, "a"), arr)

view = df[:]
df_orig = df.copy()
df.replace([200, 3], [10, 11], inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.replace([200, 3], [10, 11], inplace=True)
if using_copy_on_write:
assert not np.shares_memory(get_array(df, "a"), arr)
tm.assert_frame_equal(view, df_orig)
Expand Down