From 89185cd013358c5f3ed0fa716f5da8ed94563454 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 19 Nov 2023 17:19:04 +0100 Subject: [PATCH 1/6] Create baseline commit for warning message --- pandas/errors/__init__.py | 13 +++++++++++++ scripts/validate_unwanted_patterns.py | 1 + 2 files changed, 14 insertions(+) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 09a612eca0529..e2aa9010dc109 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -503,6 +503,19 @@ class ChainedAssignmentError(Warning): ) +_chained_assignment_warning_method_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment using an inplace method.\n" + "The behavior will change in pandas 3.0. This inplace method will " + "never work because the intermediate object on which we are setting " + "values always behaves as a copy.\n\n" + "For example, when doing 'df[col].method(value, inplace=True)', try " + "using 'df.method({col: value}, inplace=True)' or " + "df[col] = df[col].method(value) instead, to perform " + "the operation inplace on the original object.\n\n" +) + + class NumExprClobberingError(NameError): """ Exception raised when trying to use a built-in numexpr name as a variable name. diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 6e6251425928d..5bde4c21cfab5 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -50,6 +50,7 @@ "_global_config", "_chained_assignment_msg", "_chained_assignment_method_msg", + "_chained_assignment_warning_method_msg", "_version_meson", # The numba extensions need this to mock the iloc object "_iLocIndexer", From 3bd3ce4d2ba0f4c53d231af9cfa441a183a8733d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 19 Nov 2023 17:35:37 +0100 Subject: [PATCH 2/6] CoW: Add warning for mask, where and clip with inplace --- pandas/core/generic.py | 35 ++++++++++++++++++++++++++ pandas/tests/copy_view/test_clip.py | 16 +++++++++++- pandas/tests/copy_view/test_methods.py | 18 ++++++++++--- 3 files changed, 65 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 46bcfe0a32210..3ce7f2a2fc041 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -100,6 +100,7 @@ SettingWithCopyError, SettingWithCopyWarning, _chained_assignment_method_msg, + _chained_assignment_warning_method_msg, ) from pandas.util._decorators import ( deprecate_nonkeyword_arguments, @@ -8824,6 +8825,17 @@ def clip( ChainedAssignmentError, stacklevel=2, ) + elif not PYPY and not using_copy_on_write(): + ctr = sys.getrefcount(self) + ref_count = REF_COUNT + if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + ref_count += 1 + if ctr <= ref_count: + warnings.warn( + _chained_assignment_warning_method_msg, + FutureWarning, + stacklevel=2, + ) axis = nv.validate_clip_with_axis(axis, (), kwargs) if axis is not None: @@ -10724,6 +10736,18 @@ def where( ChainedAssignmentError, stacklevel=2, ) + elif not PYPY and not using_copy_on_write(): + ctr = sys.getrefcount(self) + ref_count = REF_COUNT + if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + ref_count += 1 + if ctr <= ref_count: + warnings.warn( + _chained_assignment_warning_method_msg, + FutureWarning, + stacklevel=2, + ) + other = common.apply_if_callable(other, self) return self._where(cond, other, inplace, axis, level) @@ -10790,6 +10814,17 @@ def mask( ChainedAssignmentError, stacklevel=2, ) + elif not PYPY and not using_copy_on_write(): + ctr = sys.getrefcount(self) + ref_count = REF_COUNT + if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + ref_count += 1 + if ctr <= ref_count: + warnings.warn( + _chained_assignment_warning_method_msg, + FutureWarning, + stacklevel=2, + ) cond = common.apply_if_callable(cond, self) diff --git a/pandas/tests/copy_view/test_clip.py b/pandas/tests/copy_view/test_clip.py index 6a27a0633a4aa..13ddd479c7c67 100644 --- a/pandas/tests/copy_view/test_clip.py +++ b/pandas/tests/copy_view/test_clip.py @@ -1,6 +1,9 @@ import numpy as np -from pandas import DataFrame +from pandas import ( + DataFrame, + option_context, +) import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -81,3 +84,14 @@ def test_clip_chained_inplace(using_copy_on_write): with tm.raises_chained_assignment_error(): df[["a"]].clip(1, 2, inplace=True) tm.assert_frame_equal(df, df_orig) + else: + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + df["a"].clip(1, 2, inplace=True) + + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + with option_context("mode.chained_assignment", None): + df[["a"]].clip(1, 2, inplace=True) + + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + with option_context("mode.chained_assignment", None): + df[df["a"] > 1].clip(1, 2, inplace=True) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 61569a49863cb..efe92037e4684 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -12,6 +12,7 @@ Series, Timestamp, date_range, + option_context, period_range, ) import pandas._testing as tm @@ -1550,6 +1551,17 @@ def test_chained_where_mask(using_copy_on_write, func): with tm.raises_chained_assignment_error(): getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True) tm.assert_frame_equal(df, df_orig) + else: + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + getattr(df["a"], func)(df["a"] > 2, 5, inplace=True) + + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + with option_context("mode.chained_assignment", None): + getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True) + + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + with option_context("mode.chained_assignment", None): + getattr(df[df["a"] > 1], func)(df["a"] > 2, 5, inplace=True) def test_asfreq_noop(using_copy_on_write): @@ -1684,7 +1696,7 @@ def test_get(using_copy_on_write, warn_copy_on_write, key): warn = FutureWarning if isinstance(key, str) else None else: warn = SettingWithCopyWarning if isinstance(key, list) else None - with pd.option_context("chained_assignment", "warn"): + with option_context("chained_assignment", "warn"): with tm.assert_produces_warning(warn): result.iloc[0] = 0 @@ -1721,7 +1733,7 @@ def test_xs( with tm.assert_cow_warning(single_block or axis == 1): result.iloc[0] = 0 else: - with pd.option_context("chained_assignment", "warn"): + with option_context("chained_assignment", "warn"): with tm.assert_produces_warning(SettingWithCopyWarning): result.iloc[0] = 0 @@ -1756,7 +1768,7 @@ def test_xs_multiindex( warn = SettingWithCopyWarning else: warn = None - with pd.option_context("chained_assignment", "warn"): + with option_context("chained_assignment", "warn"): with tm.assert_produces_warning(warn): result.iloc[0, 0] = 0 From bf081e5dfaedce21897c1e2598c4de45ba745e57 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 20 Nov 2023 01:53:12 +0100 Subject: [PATCH 3/6] Fix --- pandas/tests/copy_view/test_methods.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index efe92037e4684..7ad1ed4961445 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1552,14 +1552,20 @@ def test_chained_where_mask(using_copy_on_write, func): getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True) tm.assert_frame_equal(df, df_orig) else: - with tm.assert_produces_warning(FutureWarning, match="inplace method"): + with tm.assert_produces_warning( + FutureWarning, match="inplace method", check_stacklevel=False + ): getattr(df["a"], func)(df["a"] > 2, 5, inplace=True) - with tm.assert_produces_warning(FutureWarning, match="inplace method"): + with tm.assert_produces_warning( + FutureWarning, match="inplace method", check_stacklevel=False + ): with option_context("mode.chained_assignment", None): getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True) - with tm.assert_produces_warning(FutureWarning, match="inplace method"): + with tm.assert_produces_warning( + FutureWarning, match="inplace method", check_stacklevel=False + ): with option_context("mode.chained_assignment", None): getattr(df[df["a"] > 1], func)(df["a"] > 2, 5, inplace=True) From 2b3e71b92b776bcb3a033632a3622c2e8226ec77 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 21 Nov 2023 21:15:02 +0100 Subject: [PATCH 4/6] Update --- pandas/core/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0cb612798d5a7..c1e5992cf580b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10756,6 +10756,7 @@ def where( ctr = sys.getrefcount(self) ref_count = REF_COUNT if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: warnings.warn( @@ -10834,6 +10835,7 @@ def mask( ctr = sys.getrefcount(self) ref_count = REF_COUNT if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: warnings.warn( From f13535cad685737e6dd9bd3a6bb4617467f1968e Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 21 Nov 2023 22:32:21 +0100 Subject: [PATCH 5/6] Update pandas/core/generic.py Co-authored-by: Joris Van den Bossche --- pandas/core/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c1e5992cf580b..3e228ab695f76 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8845,6 +8845,7 @@ def clip( ctr = sys.getrefcount(self) ref_count = REF_COUNT if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: warnings.warn( From dc3100de0ee0fee8f2077af9e35c5d3c17be3a4c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 27 Nov 2023 13:12:38 +0100 Subject: [PATCH 6/6] fix mask to not raise warning twice --- pandas/core/generic.py | 3 ++- pandas/tests/copy_view/test_methods.py | 12 +++--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 795459a31d31e..47d5bb9dbb055 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10907,12 +10907,13 @@ def mask( ) cond = common.apply_if_callable(cond, self) + other = common.apply_if_callable(other, self) # see gh-21891 if not hasattr(cond, "__invert__"): cond = np.array(cond) - return self.where( + return self._where( ~cond, other=other, inplace=inplace, diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index c1f4b278e644b..806842dcab57a 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1552,20 +1552,14 @@ def test_chained_where_mask(using_copy_on_write, func): getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True) tm.assert_frame_equal(df, df_orig) else: - with tm.assert_produces_warning( - FutureWarning, match="inplace method", check_stacklevel=False - ): + with tm.assert_produces_warning(FutureWarning, match="inplace method"): getattr(df["a"], func)(df["a"] > 2, 5, inplace=True) - with tm.assert_produces_warning( - FutureWarning, match="inplace method", check_stacklevel=False - ): + with tm.assert_produces_warning(FutureWarning, match="inplace method"): with option_context("mode.chained_assignment", None): getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True) - with tm.assert_produces_warning( - FutureWarning, match="inplace method", check_stacklevel=False - ): + with tm.assert_produces_warning(FutureWarning, match="inplace method"): with option_context("mode.chained_assignment", None): getattr(df[df["a"] > 1], func)(df["a"] > 2, 5, inplace=True)