Skip to content

CoW: Fix deprecation warning for chained assignment #56166

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
SettingWithCopyWarning,
_chained_assignment_method_msg,
_chained_assignment_warning_method_msg,
_check_cacher,
)
from pandas.util._decorators import (
deprecate_nonkeyword_arguments,
Expand Down Expand Up @@ -7195,7 +7196,7 @@ def fillna(
elif not PYPY and not using_copy_on_write():
ctr = sys.getrefcount(self)
ref_count = REF_COUNT
if isinstance(self, ABCSeries) and hasattr(self, "_cacher"):
if isinstance(self, ABCSeries) and _check_cacher(self):
# see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
ref_count += 1
if ctr <= ref_count:
Expand Down Expand Up @@ -7477,7 +7478,7 @@ def ffill(
elif not PYPY and not using_copy_on_write():
ctr = sys.getrefcount(self)
ref_count = REF_COUNT
if isinstance(self, ABCSeries) and hasattr(self, "_cacher"):
if isinstance(self, ABCSeries) and _check_cacher(self):
# see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
ref_count += 1
if ctr <= ref_count:
Expand Down Expand Up @@ -7660,7 +7661,7 @@ def bfill(
elif not PYPY and not using_copy_on_write():
ctr = sys.getrefcount(self)
ref_count = REF_COUNT
if isinstance(self, ABCSeries) and hasattr(self, "_cacher"):
if isinstance(self, ABCSeries) and _check_cacher(self):
# see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
ref_count += 1
if ctr <= ref_count:
Expand Down Expand Up @@ -7826,12 +7827,12 @@ def replace(
elif not PYPY and not using_copy_on_write():
ctr = sys.getrefcount(self)
ref_count = REF_COUNT
if isinstance(self, ABCSeries) and hasattr(self, "_cacher"):
if isinstance(self, ABCSeries) and _check_cacher(self):
# in non-CoW mode, chained Series access will populate the
# `_item_cache` which results in an increased ref count not below
# the threshold, while we still need to warn. We detect this case
# of a Series derived from a DataFrame through the presence of
# `_cacher`
# checking the `_cacher`
ref_count += 1
if ctr <= ref_count:
warnings.warn(
Expand Down Expand Up @@ -8267,7 +8268,7 @@ def interpolate(
elif not PYPY and not using_copy_on_write():
ctr = sys.getrefcount(self)
ref_count = REF_COUNT
if isinstance(self, ABCSeries) and hasattr(self, "_cacher"):
if isinstance(self, ABCSeries) and _check_cacher(self):
# see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
ref_count += 1
if ctr <= ref_count:
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
_chained_assignment_method_msg,
_chained_assignment_msg,
_chained_assignment_warning_method_msg,
_check_cacher,
)
from pandas.util._decorators import (
Appender,
Expand Down Expand Up @@ -3564,7 +3565,7 @@ def update(self, other: Series | Sequence | Mapping) -> None:
elif not PYPY and not using_copy_on_write():
ctr = sys.getrefcount(self)
ref_count = REF_COUNT
if hasattr(self, "_cacher"):
if _check_cacher(self):
# see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
ref_count += 1
if ctr <= ref_count:
Expand Down
18 changes: 18 additions & 0 deletions pandas/errors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,24 @@ class ChainedAssignmentError(Warning):
)


def _check_cacher(obj):
# This is a mess, selection paths that return a view set the _cacher attribute
# on the Series; most of them also set _item_cache which adds 1 to our relevant
# reference count, but iloc does not, so we have to check if we are actually
# in the item cache
if hasattr(obj, "_cacher"):
parent = obj._cacher[1]()
# parent could be dead
if parent is None:
return False
if hasattr(parent, "_item_cache"):
if obj._cacher[0] in parent._item_cache:
# Check if we are actually the item from item_cache, iloc creates a
# new object
return obj is parent._item_cache[obj._cacher[0]]
return False


class NumExprClobberingError(NameError):
"""
Exception raised when trying to use a built-in numexpr name as a variable name.
Expand Down
63 changes: 63 additions & 0 deletions pandas/tests/copy_view/test_chained_assignment_deprecation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import pytest

from pandas import DataFrame
import pandas._testing as tm


def test_methods_iloc_warn(using_copy_on_write):
if not using_copy_on_write:
df = DataFrame({"a": [1, 2, 3], "b": 1})
with tm.assert_cow_warning(match="A value"):
df.iloc[:, 0].replace(1, 5, inplace=True)

with tm.assert_cow_warning(match="A value"):
df.iloc[:, 0].fillna(1, inplace=True)

with tm.assert_cow_warning(match="A value"):
df.iloc[:, 0].interpolate(inplace=True)

with tm.assert_cow_warning(match="A value"):
df.iloc[:, 0].ffill(inplace=True)

with tm.assert_cow_warning(match="A value"):
df.iloc[:, 0].bfill(inplace=True)


@pytest.mark.parametrize(
"func, args",
[
("replace", (1, 5)),
("fillna", (1,)),
("interpolate", ()),
("bfill", ()),
("ffill", ()),
],
)
def test_methods_iloc_getitem_item_cache(func, args, using_copy_on_write):
df = DataFrame({"a": [1, 2, 3], "b": 1})
ser = df.iloc[:, 0]
TODO(CoW-warn) should warn about updating a view
getattr(ser, func)(*args, inplace=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should raise a warning?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(or is that something we haven't implemented yet)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That should raise but we haven’t done that yet I think, but it shouldn’t raise a chained assignment warning, which is what happens on main right now

the implementation will look similar to #56168


# parent that holds item_cache is dead, so don't increase ref count
ser = df.copy()["a"]
getattr(ser, func)(*args, inplace=True)

df = df.copy()

df["a"] # populate the item_cache
ser = df.iloc[:, 0] # iloc creates a new object
ser.fillna(0, inplace=True)

df["a"] # populate the item_cache
ser = df["a"]
ser.fillna(0, inplace=True)

df = df.copy()
df["a"] # populate the item_cache
if using_copy_on_write:
with tm.raises_chained_assignment_error():
df["a"].fillna(0, inplace=True)
else:
with tm.assert_cow_warning(match="A value"):
df["a"].fillna(0, inplace=True)
1 change: 1 addition & 0 deletions scripts/validate_unwanted_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"_chained_assignment_msg",
"_chained_assignment_method_msg",
"_chained_assignment_warning_method_msg",
"_check_cacher",
"_version_meson",
# The numba extensions need this to mock the iloc object
"_iLocIndexer",
Expand Down