Skip to content

CoW: Add warning for replace with inplace #56060

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
SettingWithCopyError,
SettingWithCopyWarning,
_chained_assignment_method_msg,
_chained_assignment_warning_method_msg,
)
from pandas.util._decorators import (
deprecate_nonkeyword_arguments,
Expand Down Expand Up @@ -7773,6 +7774,22 @@ def replace(
ChainedAssignmentError,
stacklevel=2,
)
elif not PYPY and not using_copy_on_write():
ctr = sys.getrefcount(self)
ref_count = REF_COUNT
if isinstance(self, ABCSeries) and hasattr(self, "_cacher"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if isinstance(self, ABCSeries) and hasattr(self, "_cacher"):
# in non-CoW mode, chained Series access will populate the `_item_cache` which results in an increased ref count not below the threshold, while we still need to warn. We detect this case of a Series derived from a DataFrame through the presence of `_cacher`
if isinstance(self, ABCSeries) and hasattr(self, "_cacher"):

(maybe a bit long to put in every place that has this check (after the other PRs), but a comment like this would have explained it to me)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am fine with adding this here. Maybe adding a link to this comment for the other prs?

# in non-CoW mode, chained Series access will populate the
# `_item_cache` which results in an increased ref count not below
# the threshold, while we still need to warn. We detect this case
# of a Series derived from a DataFrame through the presence of
# `_cacher`
ref_count += 1
if ctr <= ref_count:
warnings.warn(
_chained_assignment_warning_method_msg,
FutureWarning,
stacklevel=2,
)

if not is_bool(regex) and to_replace is not None:
raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool")
Expand Down
13 changes: 13 additions & 0 deletions pandas/errors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,19 @@ class ChainedAssignmentError(Warning):
)


_chained_assignment_warning_method_msg = (
"A value is trying to be set on a copy of a DataFrame or Series "
"through chained assignment using an inplace method.\n"
"The behavior will change in pandas 3.0. This inplace method will "
"never work because the intermediate object on which we are setting "
"values always behaves as a copy.\n\n"
"For example, when doing 'df[col].method(value, inplace=True)', try "
"using 'df.method({col: value}, inplace=True)' or "
"df[col] = df[col].method(value) instead, to perform "
"the operation inplace on the original object.\n\n"
)


class NumExprClobberingError(NameError):
"""
Exception raised when trying to use a built-in numexpr name as a variable name.
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/copy_view/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pandas import (
Categorical,
DataFrame,
option_context,
)
import pandas._testing as tm
from pandas.tests.copy_view.util import get_array
Expand Down Expand Up @@ -395,6 +396,17 @@ def test_replace_chained_assignment(using_copy_on_write):
with tm.raises_chained_assignment_error():
df[["a"]].replace(1, 100, inplace=True)
tm.assert_frame_equal(df, df_orig)
else:
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
with option_context("mode.chained_assignment", None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it otherwise also raise a SettingWithCopyWarning?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, [["a"]] currently copies

df[["a"]].replace(1, 100, inplace=True)

with tm.assert_produces_warning(FutureWarning, match="inplace method"):
with option_context("mode.chained_assignment", None):
df[df.a > 5].replace(1, 100, inplace=True)

with tm.assert_produces_warning(FutureWarning, match="inplace method"):
df["a"].replace(1, 100, inplace=True)


def test_replace_listlike(using_copy_on_write):
Expand Down
1 change: 1 addition & 0 deletions scripts/validate_unwanted_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"_global_config",
"_chained_assignment_msg",
"_chained_assignment_method_msg",
"_chained_assignment_warning_method_msg",
"_version_meson",
# The numba extensions need this to mock the iloc object
"_iLocIndexer",
Expand Down