From 28448f4c49b45e1aaebbc47ad9905beee9f7a8dc Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 6 Jul 2023 21:36:59 +0200 Subject: [PATCH 1/3] CoW: Add ChainedAssignmentError for replace with inplace=True --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/generic.py | 10 ++++++++++ pandas/tests/copy_view/test_replace.py | 13 +++++++++++++ 3 files changed, 24 insertions(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 1119117c411d3..e08663922da8a 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -34,6 +34,7 @@ Copy-on-Write improvements as a temporary copy. This holds true for: - DataFrame.fillna / Series.fillna + - DataFrame.replace / Series.replace .. _whatsnew_210.enhancements.enhancement2: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 68e5fbd696ab9..a0e40393f5446 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7611,6 +7611,16 @@ def replace( ) inplace = validate_bool_kwarg(inplace, "inplace") + if inplace: + if not PYPY and using_copy_on_write(): + refcount = 2 if PY311 else 3 + if sys.getrefcount(self) <= refcount: + warnings.warn( + _chained_assignment_method_msg, + ChainedAssignmentError, + stacklevel=2, + ) + if not is_bool(regex) and to_replace is not None: raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index dfb1caa4b2ffd..4e80f8ed6d42a 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -373,3 +373,16 @@ def test_replace_columnwise_no_op(using_copy_on_write): assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 0] = 100 tm.assert_frame_equal(df, df_orig) + + +def test_replace_chained_assignment(using_copy_on_write): + df = DataFrame({"a": [1, np.nan, 2], "b": 1}) + df_orig = df.copy() + if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df["a"].replace(1, 100, inplace=True) + tm.assert_frame_equal(df, df_orig) + + with tm.raises_chained_assignment_error(): + df[["a"]].replace(1, 100, inplace=True) + tm.assert_frame_equal(df, df_orig) From 16506ce7560cafb634a18f0e6948ba3472e45698 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 7 Jul 2023 00:39:14 +0200 Subject: [PATCH 2/3] Fix --- pandas/tests/arrays/categorical/test_replace.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py index d38f0b8719de0..6011cf5d859fd 100644 --- a/pandas/tests/arrays/categorical/test_replace.py +++ b/pandas/tests/arrays/categorical/test_replace.py @@ -56,7 +56,9 @@ def test_replace_categorical_series(to_replace, value, expected, flip_categories ("b", None, ["a", None], "Categorical.categories length are different"), ], ) -def test_replace_categorical(to_replace, value, result, expected_error_msg): +def test_replace_categorical( + to_replace, value, result, expected_error_msg, using_copy_on_write +): # GH#26988 cat = Categorical(["a", "b"]) expected = Categorical(result) @@ -68,7 +70,11 @@ def test_replace_categorical(to_replace, value, result, expected_error_msg): # ensure non-inplace call does not affect original tm.assert_categorical_equal(cat, expected) - pd.Series(cat, copy=False).replace(to_replace, value, inplace=True) + if using_copy_on_write: + with tm.raises_chained_assignment_error(): + pd.Series(cat, copy=False).replace(to_replace, value, inplace=True) + else: + pd.Series(cat, copy=False).replace(to_replace, value, inplace=True) tm.assert_categorical_equal(cat, expected) From c494a195dae64e0bb4800accf94f6a2b72fb5d10 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Thu, 13 Jul 2023 15:25:10 -0500 Subject: [PATCH 3/3] Update --- pandas/core/generic.py | 3 +-- pandas/tests/arrays/categorical/test_replace.py | 11 +++-------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e0aa5334dbd06..7fe3a1a9c90a4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7625,8 +7625,7 @@ def replace( inplace = validate_bool_kwarg(inplace, "inplace") if inplace: if not PYPY and using_copy_on_write(): - refcount = 2 if PY311 else 3 - if sys.getrefcount(self) <= refcount: + if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, ChainedAssignmentError, diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py index 6011cf5d859fd..0611d04d36d10 100644 --- a/pandas/tests/arrays/categorical/test_replace.py +++ b/pandas/tests/arrays/categorical/test_replace.py @@ -56,9 +56,7 @@ def test_replace_categorical_series(to_replace, value, expected, flip_categories ("b", None, ["a", None], "Categorical.categories length are different"), ], ) -def test_replace_categorical( - to_replace, value, result, expected_error_msg, using_copy_on_write -): +def test_replace_categorical(to_replace, value, result, expected_error_msg): # GH#26988 cat = Categorical(["a", "b"]) expected = Categorical(result) @@ -70,11 +68,8 @@ def test_replace_categorical( # ensure non-inplace call does not affect original tm.assert_categorical_equal(cat, expected) - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - pd.Series(cat, copy=False).replace(to_replace, value, inplace=True) - else: - pd.Series(cat, copy=False).replace(to_replace, value, inplace=True) + ser = pd.Series(cat, copy=False) + ser.replace(to_replace, value, inplace=True) tm.assert_categorical_equal(cat, expected)