From 6a11b76e04045415a4fb3d0cd931a91f748c5002 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 25 Nov 2023 20:32:49 +0100 Subject: [PATCH 1/5] CoW: Fix deprecation warning for chained assignment --- pandas/core/generic.py | 13 +++--- pandas/core/series.py | 3 +- pandas/errors/__init__.py | 15 +++++++ .../test_chained_assignment_deprecation.py | 42 +++++++++++++++++++ scripts/validate_unwanted_patterns.py | 1 + 5 files changed, 67 insertions(+), 7 deletions(-) create mode 100644 pandas/tests/copy_view/test_chained_assignment_deprecation.py diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e4e95a973a3c1..64f1341235710 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -101,6 +101,7 @@ SettingWithCopyWarning, _chained_assignment_method_msg, _chained_assignment_warning_method_msg, + _check_cacher, ) from pandas.util._decorators import ( deprecate_nonkeyword_arguments, @@ -7195,7 +7196,7 @@ def fillna( elif not PYPY and not using_copy_on_write(): ctr = sys.getrefcount(self) ref_count = REF_COUNT - if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + if isinstance(self, ABCSeries) and _check_cacher(self): # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: @@ -7477,7 +7478,7 @@ def ffill( elif not PYPY and not using_copy_on_write(): ctr = sys.getrefcount(self) ref_count = REF_COUNT - if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + if isinstance(self, ABCSeries) and _check_cacher(self): # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: @@ -7660,7 +7661,7 @@ def bfill( elif not PYPY and not using_copy_on_write(): ctr = sys.getrefcount(self) ref_count = REF_COUNT - if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + if isinstance(self, ABCSeries) and _check_cacher(self): # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: @@ -7826,12 +7827,12 @@ def replace( elif not PYPY and not using_copy_on_write(): ctr = sys.getrefcount(self) ref_count = REF_COUNT - if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + if isinstance(self, ABCSeries) and _check_cacher(self): # in non-CoW mode, chained Series access will populate the # `_item_cache` which results in an increased ref count not below # the threshold, while we still need to warn. We detect this case # of a Series derived from a DataFrame through the presence of - # `_cacher` + # checking the `_cacher` ref_count += 1 if ctr <= ref_count: warnings.warn( @@ -8267,7 +8268,7 @@ def interpolate( elif not PYPY and not using_copy_on_write(): ctr = sys.getrefcount(self) ref_count = REF_COUNT - if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + if isinstance(self, ABCSeries) and _check_cacher(self): # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: diff --git a/pandas/core/series.py b/pandas/core/series.py index a9679f22f9933..13b3423497b54 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -45,6 +45,7 @@ _chained_assignment_method_msg, _chained_assignment_msg, _chained_assignment_warning_method_msg, + _check_cacher, ) from pandas.util._decorators import ( Appender, @@ -3564,7 +3565,7 @@ def update(self, other: Series | Sequence | Mapping) -> None: elif not PYPY and not using_copy_on_write(): ctr = sys.getrefcount(self) ref_count = REF_COUNT - if hasattr(self, "_cacher"): + if _check_cacher(self): # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index e2aa9010dc109..e32479a95d4e9 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -516,6 +516,21 @@ class ChainedAssignmentError(Warning): ) +def _check_cacher(obj): + # This is a mess, selection paths that return a view set the _cacher attribute + # on the Series; most of them also set _item_cache which adds 1 to our relevant + # reference count, but iloc does not, so we have to check if we are actually + # in the item cache + if hasattr(obj, "_cacher"): + parent = obj._cacher[1]() + # parent could be dead + if parent is None: + return False + if hasattr(parent, "_item_cache"): + return obj._cacher[0] in parent._item_cache + return False + + class NumExprClobberingError(NameError): """ Exception raised when trying to use a built-in numexpr name as a variable name. diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py new file mode 100644 index 0000000000000..c8ca50788b2e8 --- /dev/null +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -0,0 +1,42 @@ +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +def test_methods_iloc_warn(): + df = DataFrame({"a": [1, 2, 3], "b": 1}) + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].replace(1, 5, inplace=True) + + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].fillna(1, inplace=True) + + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].interpolate(inplace=True) + + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].ffill(inplace=True) + + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].bfill(inplace=True) + + +@pytest.mark.parametrize( + "func, args", + [ + ("replace", (1, 5)), + ("fillna", (1,)), + ("interpolate", ()), + ("bfill", ()), + ("ffill", ()), + ], +) +def test_methods_iloc_getitem_dont_warn(func, args): + df = DataFrame({"a": [1, 2, 3], "b": 1}) + ser = df.iloc[:, 0] + getattr(ser, func)(*args, inplace=True) + + # parent that holds item_cache is dead, so don't increase ref count + ser = df.copy()["a"] + getattr(ser, func)(*args, inplace=True) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 5bde4c21cfab5..26f1311e950ef 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -51,6 +51,7 @@ "_chained_assignment_msg", "_chained_assignment_method_msg", "_chained_assignment_warning_method_msg", + "_check_cacher", "_version_meson", # The numba extensions need this to mock the iloc object "_iLocIndexer", From 047201705a0adbc06b05501c5a42bb88970aebf4 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 25 Nov 2023 20:43:34 +0100 Subject: [PATCH 2/5] Add more tests --- pandas/errors/__init__.py | 5 ++++- .../test_chained_assignment_deprecation.py | 17 ++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index e32479a95d4e9..c89e4aa2cac0f 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -527,7 +527,10 @@ def _check_cacher(obj): if parent is None: return False if hasattr(parent, "_item_cache"): - return obj._cacher[0] in parent._item_cache + if obj._cacher[0] in parent._item_cache: + # Check if we are actually the item from item_cache, iloc creates a + # new object + return obj is parent._item_cache[obj._cacher[0]] return False diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index c8ca50788b2e8..c89b7aa90251c 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -32,7 +32,7 @@ def test_methods_iloc_warn(): ("ffill", ()), ], ) -def test_methods_iloc_getitem_dont_warn(func, args): +def test_methods_iloc_getitem_item_cache(func, args): df = DataFrame({"a": [1, 2, 3], "b": 1}) ser = df.iloc[:, 0] getattr(ser, func)(*args, inplace=True) @@ -40,3 +40,18 @@ def test_methods_iloc_getitem_dont_warn(func, args): # parent that holds item_cache is dead, so don't increase ref count ser = df.copy()["a"] getattr(ser, func)(*args, inplace=True) + + df = df.copy() + + df["a"] # populate the item_cache + ser = df.iloc[:, 0] # iloc creates a new object + ser.fillna(0, inplace=True) + + df["a"] # populate the item_cache + ser = df["a"] + ser.fillna(0, inplace=True) + + df = df.copy() + df["a"] # populate the item_cache + with tm.assert_cow_warning(match="A value"): + df["a"].fillna(0, inplace=True) From c8d093b64273b60d016048ec473a99c31b9e9f91 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 25 Nov 2023 23:37:15 +0100 Subject: [PATCH 3/5] Fix --- .../copy_view/test_chained_assignment_deprecation.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index c89b7aa90251c..96fbce028ddce 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -32,7 +32,7 @@ def test_methods_iloc_warn(): ("ffill", ()), ], ) -def test_methods_iloc_getitem_item_cache(func, args): +def test_methods_iloc_getitem_item_cache(func, args, using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": 1}) ser = df.iloc[:, 0] getattr(ser, func)(*args, inplace=True) @@ -53,5 +53,9 @@ def test_methods_iloc_getitem_item_cache(func, args): df = df.copy() df["a"] # populate the item_cache - with tm.assert_cow_warning(match="A value"): - df["a"].fillna(0, inplace=True) + if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df["a"].fillna(0, inplace=True) + else: + with tm.assert_cow_warning(match="A value"): + df["a"].fillna(0, inplace=True) From 3b7f73ce554b8031c9614fca89609953b90d1b7d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 00:22:54 +0100 Subject: [PATCH 4/5] Update --- .../test_chained_assignment_deprecation.py | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 96fbce028ddce..8aa2d5199b6d9 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -4,22 +4,23 @@ import pandas._testing as tm -def test_methods_iloc_warn(): - df = DataFrame({"a": [1, 2, 3], "b": 1}) - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].replace(1, 5, inplace=True) +def test_methods_iloc_warn(using_copy_on_write): + if not using_copy_on_write: + df = DataFrame({"a": [1, 2, 3], "b": 1}) + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].replace(1, 5, inplace=True) - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].fillna(1, inplace=True) + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].fillna(1, inplace=True) - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].interpolate(inplace=True) + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].interpolate(inplace=True) - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].ffill(inplace=True) + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].ffill(inplace=True) - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].bfill(inplace=True) + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].bfill(inplace=True) @pytest.mark.parametrize( From 9492962054e82f1022f3432da4529c1956efc7bd Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 27 Nov 2023 15:07:06 +0100 Subject: [PATCH 5/5] Update pandas/tests/copy_view/test_chained_assignment_deprecation.py --- pandas/tests/copy_view/test_chained_assignment_deprecation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 8aa2d5199b6d9..37431f39bdaa0 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -36,6 +36,7 @@ def test_methods_iloc_warn(using_copy_on_write): def test_methods_iloc_getitem_item_cache(func, args, using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": 1}) ser = df.iloc[:, 0] + TODO(CoW-warn) should warn about updating a view getattr(ser, func)(*args, inplace=True) # parent that holds item_cache is dead, so don't increase ref count