Skip to content

Commit 7012d6a

Browse files
CoW warning mode: warn in case of chained assignment (#55522)
1 parent ae6d279 commit 7012d6a

23 files changed

+219
-188
lines changed

.pre-commit-config.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,9 @@ repos:
240240
# pytest raises without context
241241
|\s\ pytest.raises
242242
243+
# TODO
243244
# pytest.warns (use tm.assert_produces_warning instead)
244-
|pytest\.warns
245+
# |pytest\.warns
245246
246247
# os.remove
247248
|os\.remove

doc/source/user_guide/basics.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ using ``fillna`` if you wish).
269269
.. ipython:: python
270270
271271
df2 = df.copy()
272-
df2["three"]["a"] = 1.0
272+
df2.loc["a", "three"] = 1.0
273273
df
274274
df2
275275
df + df2

doc/source/user_guide/copy_on_write.rst

+1
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ Chained assignment references a technique where an object is updated through
138138
two subsequent indexing operations, e.g.
139139

140140
.. ipython:: python
141+
:okwarning:
141142
142143
with pd.option_context("mode.copy_on_write", False):
143144
df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})

doc/source/whatsnew/v0.13.1.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,10 @@ Highlights include:
2929

3030
This would previously segfault:
3131

32-
.. ipython:: python
32+
.. code-block:: python
3333
3434
df = pd.DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])})
3535
df["A"].iloc[0] = np.nan
36-
df
3736
3837
The recommended way to do this type of assignment is:
3938

pandas/_testing/contexts.py

+21-6
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
)
1212
import uuid
1313

14+
from pandas._config import using_copy_on_write
15+
1416
from pandas.compat import PYPY
1517
from pandas.errors import ChainedAssignmentError
1618

@@ -193,9 +195,14 @@ def use_numexpr(use, min_elements=None) -> Generator[None, None, None]:
193195
set_option("compute.use_numexpr", olduse)
194196

195197

196-
def raises_chained_assignment_error(extra_warnings=(), extra_match=()):
198+
def raises_chained_assignment_error(warn=True, extra_warnings=(), extra_match=()):
197199
from pandas._testing import assert_produces_warning
198200

201+
if not warn:
202+
from contextlib import nullcontext
203+
204+
return nullcontext()
205+
199206
if PYPY and not extra_warnings:
200207
from contextlib import nullcontext
201208

@@ -206,12 +213,20 @@ def raises_chained_assignment_error(extra_warnings=(), extra_match=()):
206213
match="|".join(extra_match),
207214
)
208215
else:
209-
match = (
210-
"A value is trying to be set on a copy of a DataFrame or Series "
211-
"through chained assignment"
212-
)
216+
if using_copy_on_write():
217+
warning = ChainedAssignmentError
218+
match = (
219+
"A value is trying to be set on a copy of a DataFrame or Series "
220+
"through chained assignment"
221+
)
222+
else:
223+
warning = FutureWarning # type: ignore[assignment]
224+
# TODO update match
225+
match = "ChainedAssignmentError"
226+
if extra_warnings:
227+
warning = (warning, *extra_warnings) # type: ignore[assignment]
213228
return assert_produces_warning(
214-
(ChainedAssignmentError, *extra_warnings),
229+
warning,
215230
match="|".join((match, *extra_match)),
216231
)
217232

pandas/core/frame.py

+13
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
_chained_assignment_method_msg,
6464
_chained_assignment_msg,
6565
_chained_assignment_warning_method_msg,
66+
_chained_assignment_warning_msg,
6667
)
6768
from pandas.util._decorators import (
6869
Appender,
@@ -4205,6 +4206,18 @@ def __setitem__(self, key, value) -> None:
42054206
warnings.warn(
42064207
_chained_assignment_msg, ChainedAssignmentError, stacklevel=2
42074208
)
4209+
# elif not PYPY and not using_copy_on_write():
4210+
elif not PYPY and warn_copy_on_write():
4211+
if sys.getrefcount(self) <= 3: # and (
4212+
# warn_copy_on_write()
4213+
# or (
4214+
# not warn_copy_on_write()
4215+
# and self._mgr.blocks[0].refs.has_reference()
4216+
# )
4217+
# ):
4218+
warnings.warn(
4219+
_chained_assignment_warning_msg, FutureWarning, stacklevel=2
4220+
)
42084221

42094222
key = com.apply_if_callable(key, self)
42104223

pandas/core/indexing.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313

1414
import numpy as np
1515

16-
from pandas._config import using_copy_on_write
16+
from pandas._config import (
17+
using_copy_on_write,
18+
warn_copy_on_write,
19+
)
1720

1821
from pandas._libs.indexing import NDFrameIndexerBase
1922
from pandas._libs.lib import item_from_zerodim
@@ -25,6 +28,8 @@
2528
InvalidIndexError,
2629
LossySetitemError,
2730
_chained_assignment_msg,
31+
_chained_assignment_warning_msg,
32+
_check_cacher,
2833
)
2934
from pandas.util._decorators import doc
3035
from pandas.util._exceptions import find_stack_level
@@ -881,6 +886,16 @@ def __setitem__(self, key, value) -> None:
881886
warnings.warn(
882887
_chained_assignment_msg, ChainedAssignmentError, stacklevel=2
883888
)
889+
elif not PYPY and not using_copy_on_write():
890+
ctr = sys.getrefcount(self.obj)
891+
ref_count = 2
892+
if not warn_copy_on_write() and _check_cacher(self.obj):
893+
# see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
894+
ref_count += 1
895+
if ctr <= ref_count:
896+
warnings.warn(
897+
_chained_assignment_warning_msg, FutureWarning, stacklevel=2
898+
)
884899

885900
check_dict_or_set_indexers(key)
886901
if isinstance(key, tuple):

pandas/core/internals/array_manager.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ def apply_with_block(self, f, align_keys=None, **kwargs) -> Self:
309309

310310
return type(self)(result_arrays, self._axes)
311311

312-
def setitem(self, indexer, value) -> Self:
312+
def setitem(self, indexer, value, warn: bool = True) -> Self:
313313
return self.apply_with_block("setitem", indexer=indexer, value=value)
314314

315315
def diff(self, n: int) -> Self:
@@ -1187,7 +1187,7 @@ def apply(self, func, **kwargs) -> Self: # type: ignore[override]
11871187
new_array = getattr(self.array, func)(**kwargs)
11881188
return type(self)([new_array], self._axes)
11891189

1190-
def setitem(self, indexer, value) -> SingleArrayManager:
1190+
def setitem(self, indexer, value, warn: bool = True) -> SingleArrayManager:
11911191
"""
11921192
Set values with indexer.
11931193

pandas/core/internals/managers.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ def apply(
387387
# Alias so we can share code with ArrayManager
388388
apply_with_block = apply
389389

390-
def setitem(self, indexer, value) -> Self:
390+
def setitem(self, indexer, value, warn: bool = True) -> Self:
391391
"""
392392
Set values with indexer.
393393
@@ -396,7 +396,7 @@ def setitem(self, indexer, value) -> Self:
396396
if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim:
397397
raise ValueError(f"Cannot set values with ndim > {self.ndim}")
398398

399-
if warn_copy_on_write() and not self._has_no_reference(0):
399+
if warn and warn_copy_on_write() and not self._has_no_reference(0):
400400
warnings.warn(
401401
COW_WARNING_GENERAL_MSG,
402402
FutureWarning,
@@ -2059,7 +2059,7 @@ def setitem_inplace(self, indexer, value, warn: bool = True) -> None:
20592059
if using_cow:
20602060
self.blocks = (self._block.copy(),)
20612061
self._cache.clear()
2062-
elif warn and warn_cow:
2062+
elif warn_cow and warn:
20632063
warnings.warn(
20642064
COW_WARNING_SETITEM_MSG,
20652065
FutureWarning,

pandas/core/series.py

+36-14
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,10 @@
2626

2727
import numpy as np
2828

29-
from pandas._config import using_copy_on_write
29+
from pandas._config import (
30+
using_copy_on_write,
31+
warn_copy_on_write,
32+
)
3033
from pandas._config.config import _get_option
3134

3235
from pandas._libs import (
@@ -45,6 +48,7 @@
4548
_chained_assignment_method_msg,
4649
_chained_assignment_msg,
4750
_chained_assignment_warning_method_msg,
51+
_chained_assignment_warning_msg,
4852
_check_cacher,
4953
)
5054
from pandas.util._decorators import (
@@ -1221,11 +1225,29 @@ def _get_value(self, label, takeable: bool = False):
12211225
return self.iloc[loc]
12221226

12231227
def __setitem__(self, key, value) -> None:
1228+
warn = True
12241229
if not PYPY and using_copy_on_write():
12251230
if sys.getrefcount(self) <= 3:
12261231
warnings.warn(
12271232
_chained_assignment_msg, ChainedAssignmentError, stacklevel=2
12281233
)
1234+
elif not PYPY and not using_copy_on_write():
1235+
ctr = sys.getrefcount(self)
1236+
ref_count = 3
1237+
if not warn_copy_on_write() and _check_cacher(self):
1238+
# see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
1239+
ref_count += 1
1240+
if ctr <= ref_count and (
1241+
warn_copy_on_write()
1242+
or (
1243+
not warn_copy_on_write()
1244+
and self._mgr.blocks[0].refs.has_reference() # type: ignore[union-attr]
1245+
)
1246+
):
1247+
warn = False
1248+
warnings.warn(
1249+
_chained_assignment_warning_msg, FutureWarning, stacklevel=2
1250+
)
12291251

12301252
check_dict_or_set_indexers(key)
12311253
key = com.apply_if_callable(key, self)
@@ -1236,10 +1258,10 @@ def __setitem__(self, key, value) -> None:
12361258

12371259
if isinstance(key, slice):
12381260
indexer = self.index._convert_slice_indexer(key, kind="getitem")
1239-
return self._set_values(indexer, value)
1261+
return self._set_values(indexer, value, warn=warn)
12401262

12411263
try:
1242-
self._set_with_engine(key, value)
1264+
self._set_with_engine(key, value, warn=warn)
12431265
except KeyError:
12441266
# We have a scalar (or for MultiIndex or object-dtype, scalar-like)
12451267
# key that is not present in self.index.
@@ -1305,18 +1327,18 @@ def __setitem__(self, key, value) -> None:
13051327
return
13061328

13071329
else:
1308-
self._set_with(key, value)
1330+
self._set_with(key, value, warn=warn)
13091331

13101332
if cacher_needs_updating:
13111333
self._maybe_update_cacher(inplace=True)
13121334

1313-
def _set_with_engine(self, key, value) -> None:
1335+
def _set_with_engine(self, key, value, warn: bool = True) -> None:
13141336
loc = self.index.get_loc(key)
13151337

13161338
# this is equivalent to self._values[key] = value
1317-
self._mgr.setitem_inplace(loc, value)
1339+
self._mgr.setitem_inplace(loc, value, warn=warn)
13181340

1319-
def _set_with(self, key, value) -> None:
1341+
def _set_with(self, key, value, warn: bool = True) -> None:
13201342
# We got here via exception-handling off of InvalidIndexError, so
13211343
# key should always be listlike at this point.
13221344
assert not isinstance(key, tuple)
@@ -1327,7 +1349,7 @@ def _set_with(self, key, value) -> None:
13271349

13281350
if not self.index._should_fallback_to_positional:
13291351
# Regardless of the key type, we're treating it as labels
1330-
self._set_labels(key, value)
1352+
self._set_labels(key, value, warn=warn)
13311353

13321354
else:
13331355
# Note: key_type == "boolean" should not occur because that
@@ -1344,23 +1366,23 @@ def _set_with(self, key, value) -> None:
13441366
FutureWarning,
13451367
stacklevel=find_stack_level(),
13461368
)
1347-
self._set_values(key, value)
1369+
self._set_values(key, value, warn=warn)
13481370
else:
1349-
self._set_labels(key, value)
1371+
self._set_labels(key, value, warn=warn)
13501372

1351-
def _set_labels(self, key, value) -> None:
1373+
def _set_labels(self, key, value, warn: bool = True) -> None:
13521374
key = com.asarray_tuplesafe(key)
13531375
indexer: np.ndarray = self.index.get_indexer(key)
13541376
mask = indexer == -1
13551377
if mask.any():
13561378
raise KeyError(f"{key[mask]} not in index")
1357-
self._set_values(indexer, value)
1379+
self._set_values(indexer, value, warn=warn)
13581380

1359-
def _set_values(self, key, value) -> None:
1381+
def _set_values(self, key, value, warn: bool = True) -> None:
13601382
if isinstance(key, (Index, Series)):
13611383
key = key._values
13621384

1363-
self._mgr = self._mgr.setitem(indexer=key, value=value)
1385+
self._mgr = self._mgr.setitem(indexer=key, value=value, warn=warn)
13641386
self._maybe_update_cacher()
13651387

13661388
def _set_value(self, label, value, takeable: bool = False) -> None:

pandas/errors/__init__.py

+18
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,24 @@ class ChainedAssignmentError(Warning):
503503
)
504504

505505

506+
_chained_assignment_warning_msg = (
507+
"ChainedAssignmentError: behaviour will change in pandas 3.0!\n"
508+
"You are setting values through chained assignment. Currently this works "
509+
"in certain cases, but when using Copy-on-Write (which will become the "
510+
"default behaviour in pandas 3.0) this will never work to update the "
511+
"original DataFrame or Series, because the intermediate object on which "
512+
"we are setting values will behave as a copy.\n"
513+
"A typical example is when you are setting values in a column of a "
514+
"DataFrame, like:\n\n"
515+
'df["col"][row_indexer] = value\n\n'
516+
'Use `df.loc[row_indexer, "col"] = values` instead, to perform the '
517+
"assignment in a single step and ensure this keeps updating the original `df`.\n\n"
518+
"See the caveats in the documentation: "
519+
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
520+
"indexing.html#returning-a-view-versus-a-copy\n"
521+
)
522+
523+
506524
_chained_assignment_warning_method_msg = (
507525
"A value is trying to be set on a copy of a DataFrame or Series "
508526
"through chained assignment using an inplace method.\n"

pandas/tests/copy_view/test_chained_assignment_deprecation.py

+24
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
import numpy as np
12
import pytest
23

4+
from pandas.errors import ChainedAssignmentError
5+
36
from pandas import DataFrame
47
import pandas._testing as tm
58

@@ -61,3 +64,24 @@ def test_methods_iloc_getitem_item_cache(func, args, using_copy_on_write):
6164
else:
6265
with tm.assert_cow_warning(match="A value"):
6366
df["a"].fillna(0, inplace=True)
67+
68+
69+
# TODO(CoW-warn) expand the cases
70+
@pytest.mark.parametrize(
71+
"indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])]
72+
)
73+
def test_series_setitem(indexer, using_copy_on_write):
74+
# ensure we only get a single warning for those typical cases of chained
75+
# assignment
76+
df = DataFrame({"a": [1, 2, 3], "b": 1})
77+
78+
# using custom check instead of tm.assert_produces_warning because that doesn't
79+
# fail if multiple warnings are raised
80+
with pytest.warns() as record:
81+
df["a"][indexer] = 0
82+
assert len(record) == 1
83+
if using_copy_on_write:
84+
assert record[0].category == ChainedAssignmentError
85+
else:
86+
assert record[0].category == FutureWarning
87+
assert "ChainedAssignmentError" in record[0].message.args[0]

0 commit comments

Comments
 (0)