From dff1fef7bb873641259d76254ee91a7539e65ebf Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 19 Nov 2023 15:58:59 +0100 Subject: [PATCH 1/3] CoW: Add warning for replace with inplace --- pandas/core/generic.py | 12 ++++++++++++ pandas/errors/__init__.py | 13 +++++++++++++ pandas/tests/copy_view/test_replace.py | 12 ++++++++++++ scripts/validate_unwanted_patterns.py | 1 + 4 files changed, 38 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 46bcfe0a32210..6c575a506306b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -100,6 +100,7 @@ SettingWithCopyError, SettingWithCopyWarning, _chained_assignment_method_msg, + _chained_assignment_warning_method_msg, ) from pandas.util._decorators import ( deprecate_nonkeyword_arguments, @@ -7773,6 +7774,17 @@ def replace( ChainedAssignmentError, stacklevel=2, ) + elif not PYPY and not using_copy_on_write(): + ctr = sys.getrefcount(self) + ref_count = REF_COUNT + if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + ref_count += 1 + if ctr <= ref_count: + warnings.warn( + _chained_assignment_warning_method_msg, + FutureWarning, + stacklevel=2, + ) if not is_bool(regex) and to_replace is not None: raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 09a612eca0529..e2aa9010dc109 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -503,6 +503,19 @@ class ChainedAssignmentError(Warning): ) +_chained_assignment_warning_method_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment using an inplace method.\n" + "The behavior will change in pandas 3.0. This inplace method will " + "never work because the intermediate object on which we are setting " + "values always behaves as a copy.\n\n" + "For example, when doing 'df[col].method(value, inplace=True)', try " + "using 'df.method({col: value}, inplace=True)' or " + "df[col] = df[col].method(value) instead, to perform " + "the operation inplace on the original object.\n\n" +) + + class NumExprClobberingError(NameError): """ Exception raised when trying to use a built-in numexpr name as a variable name. diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index 085f355dc4377..d11a2893becdc 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -4,6 +4,7 @@ from pandas import ( Categorical, DataFrame, + option_context, ) import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -395,6 +396,17 @@ def test_replace_chained_assignment(using_copy_on_write): with tm.raises_chained_assignment_error(): df[["a"]].replace(1, 100, inplace=True) tm.assert_frame_equal(df, df_orig) + else: + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + with option_context("mode.chained_assignment", None): + df[["a"]].replace(1, 100, inplace=True) + + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + with option_context("mode.chained_assignment", None): + df[df.a > 5].replace(1, 100, inplace=True) + + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + df["a"].replace(1, 100, inplace=True) def test_replace_listlike(using_copy_on_write): diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 6e6251425928d..5bde4c21cfab5 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -50,6 +50,7 @@ "_global_config", "_chained_assignment_msg", "_chained_assignment_method_msg", + "_chained_assignment_warning_method_msg", "_version_meson", # The numba extensions need this to mock the iloc object "_iLocIndexer", From 89185cd013358c5f3ed0fa716f5da8ed94563454 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 19 Nov 2023 17:19:04 +0100 Subject: [PATCH 2/3] Create baseline commit for warning message --- pandas/errors/__init__.py | 13 +++++++++++++ scripts/validate_unwanted_patterns.py | 1 + 2 files changed, 14 insertions(+) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 09a612eca0529..e2aa9010dc109 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -503,6 +503,19 @@ class ChainedAssignmentError(Warning): ) +_chained_assignment_warning_method_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment using an inplace method.\n" + "The behavior will change in pandas 3.0. This inplace method will " + "never work because the intermediate object on which we are setting " + "values always behaves as a copy.\n\n" + "For example, when doing 'df[col].method(value, inplace=True)', try " + "using 'df.method({col: value}, inplace=True)' or " + "df[col] = df[col].method(value) instead, to perform " + "the operation inplace on the original object.\n\n" +) + + class NumExprClobberingError(NameError): """ Exception raised when trying to use a built-in numexpr name as a variable name. diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 6e6251425928d..5bde4c21cfab5 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -50,6 +50,7 @@ "_global_config", "_chained_assignment_msg", "_chained_assignment_method_msg", + "_chained_assignment_warning_method_msg", "_version_meson", # The numba extensions need this to mock the iloc object "_iLocIndexer", From 97bf95f5ca99610839f316c932c2fb4ba335a2f4 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 20 Nov 2023 15:30:42 +0100 Subject: [PATCH 3/3] Add comment --- pandas/core/generic.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6c575a506306b..b23092e6f27e1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7778,6 +7778,11 @@ def replace( ctr = sys.getrefcount(self) ref_count = REF_COUNT if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + # in non-CoW mode, chained Series access will populate the + # `_item_cache` which results in an increased ref count not below + # the threshold, while we still need to warn. We detect this case + # of a Series derived from a DataFrame through the presence of + # `_cacher` ref_count += 1 if ctr <= ref_count: warnings.warn(