diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e4e95a973a3c1..64f1341235710 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -101,6 +101,7 @@ SettingWithCopyWarning, _chained_assignment_method_msg, _chained_assignment_warning_method_msg, + _check_cacher, ) from pandas.util._decorators import ( deprecate_nonkeyword_arguments, @@ -7195,7 +7196,7 @@ def fillna( elif not PYPY and not using_copy_on_write(): ctr = sys.getrefcount(self) ref_count = REF_COUNT - if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + if isinstance(self, ABCSeries) and _check_cacher(self): # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: @@ -7477,7 +7478,7 @@ def ffill( elif not PYPY and not using_copy_on_write(): ctr = sys.getrefcount(self) ref_count = REF_COUNT - if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + if isinstance(self, ABCSeries) and _check_cacher(self): # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: @@ -7660,7 +7661,7 @@ def bfill( elif not PYPY and not using_copy_on_write(): ctr = sys.getrefcount(self) ref_count = REF_COUNT - if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + if isinstance(self, ABCSeries) and _check_cacher(self): # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: @@ -7826,12 +7827,12 @@ def replace( elif not PYPY and not using_copy_on_write(): ctr = sys.getrefcount(self) ref_count = REF_COUNT - if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + if isinstance(self, ABCSeries) and _check_cacher(self): # in non-CoW mode, chained Series access will populate the # `_item_cache` which results in an increased ref count not below # the threshold, while we still need to warn. We detect this case # of a Series derived from a DataFrame through the presence of - # `_cacher` + # checking the `_cacher` ref_count += 1 if ctr <= ref_count: warnings.warn( @@ -8267,7 +8268,7 @@ def interpolate( elif not PYPY and not using_copy_on_write(): ctr = sys.getrefcount(self) ref_count = REF_COUNT - if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): + if isinstance(self, ABCSeries) and _check_cacher(self): # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: diff --git a/pandas/core/series.py b/pandas/core/series.py index a9679f22f9933..13b3423497b54 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -45,6 +45,7 @@ _chained_assignment_method_msg, _chained_assignment_msg, _chained_assignment_warning_method_msg, + _check_cacher, ) from pandas.util._decorators import ( Appender, @@ -3564,7 +3565,7 @@ def update(self, other: Series | Sequence | Mapping) -> None: elif not PYPY and not using_copy_on_write(): ctr = sys.getrefcount(self) ref_count = REF_COUNT - if hasattr(self, "_cacher"): + if _check_cacher(self): # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index e2aa9010dc109..c89e4aa2cac0f 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -516,6 +516,24 @@ class ChainedAssignmentError(Warning): ) +def _check_cacher(obj): + # This is a mess, selection paths that return a view set the _cacher attribute + # on the Series; most of them also set _item_cache which adds 1 to our relevant + # reference count, but iloc does not, so we have to check if we are actually + # in the item cache + if hasattr(obj, "_cacher"): + parent = obj._cacher[1]() + # parent could be dead + if parent is None: + return False + if hasattr(parent, "_item_cache"): + if obj._cacher[0] in parent._item_cache: + # Check if we are actually the item from item_cache, iloc creates a + # new object + return obj is parent._item_cache[obj._cacher[0]] + return False + + class NumExprClobberingError(NameError): """ Exception raised when trying to use a built-in numexpr name as a variable name. diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py new file mode 100644 index 0000000000000..37431f39bdaa0 --- /dev/null +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -0,0 +1,63 @@ +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +def test_methods_iloc_warn(using_copy_on_write): + if not using_copy_on_write: + df = DataFrame({"a": [1, 2, 3], "b": 1}) + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].replace(1, 5, inplace=True) + + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].fillna(1, inplace=True) + + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].interpolate(inplace=True) + + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].ffill(inplace=True) + + with tm.assert_cow_warning(match="A value"): + df.iloc[:, 0].bfill(inplace=True) + + +@pytest.mark.parametrize( + "func, args", + [ + ("replace", (1, 5)), + ("fillna", (1,)), + ("interpolate", ()), + ("bfill", ()), + ("ffill", ()), + ], +) +def test_methods_iloc_getitem_item_cache(func, args, using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": 1}) + ser = df.iloc[:, 0] + TODO(CoW-warn) should warn about updating a view + getattr(ser, func)(*args, inplace=True) + + # parent that holds item_cache is dead, so don't increase ref count + ser = df.copy()["a"] + getattr(ser, func)(*args, inplace=True) + + df = df.copy() + + df["a"] # populate the item_cache + ser = df.iloc[:, 0] # iloc creates a new object + ser.fillna(0, inplace=True) + + df["a"] # populate the item_cache + ser = df["a"] + ser.fillna(0, inplace=True) + + df = df.copy() + df["a"] # populate the item_cache + if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df["a"].fillna(0, inplace=True) + else: + with tm.assert_cow_warning(match="A value"): + df["a"].fillna(0, inplace=True) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 5bde4c21cfab5..26f1311e950ef 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -51,6 +51,7 @@ "_chained_assignment_msg", "_chained_assignment_method_msg", "_chained_assignment_warning_method_msg", + "_check_cacher", "_version_meson", # The numba extensions need this to mock the iloc object "_iLocIndexer",