Skip to content

Commit 73840ef

Browse files
API / CoW: detect and raise error for chained assignment under Copy-on-Write (#49467)
1 parent 5752494 commit 73840ef

20 files changed

+220
-49
lines changed

.github/workflows/ubuntu.yml

-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ jobs:
7373
env_file: actions-pypy-38.yaml
7474
pattern: "not slow and not network and not single_cpu"
7575
test_args: "--max-worker-restart 0"
76-
error_on_warnings: "0"
7776
- name: "Numpy Dev"
7877
env_file: actions-310-numpydev.yaml
7978
pattern: "not slow and not network and not single_cpu"

doc/source/reference/testing.rst

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Exceptions and warnings
2828
errors.AccessorRegistrationWarning
2929
errors.AttributeConflictWarning
3030
errors.CategoricalConversionWarning
31+
errors.ChainedAssignmentError
3132
errors.ClosedFileError
3233
errors.CSSWarning
3334
errors.DatabaseError

doc/source/whatsnew/v2.0.0.rst

+8
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,14 @@ Copy-on-Write improvements
135135
a modification to the data happens) when constructing a Series from an existing
136136
Series with the default of ``copy=False`` (:issue:`50471`)
137137

138+
- Trying to set values using chained assignment (for example, ``df["a"][1:3] = 0``)
139+
will now always raise an exception when Copy-on-Write is enabled. In this mode,
140+
chained assignment can never work because we are always setting into a temporary
141+
object that is the result of an indexing operation (getitem), which under
142+
Copy-on-Write always behaves as a copy. Thus, assigning through a chain
143+
can never update the original Series or DataFrame. Therefore, an informative
144+
error is raised to the user instead of silently doing nothing (:issue:`49467`)
145+
138146
Copy-on-Write can be enabled through
139147

140148
.. code-block:: python

pandas/_testing/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@
103103
decompress_file,
104104
ensure_clean,
105105
ensure_safe_environment_variables,
106+
raises_chained_assignment_error,
106107
set_timezone,
107108
use_numexpr,
108109
with_csv_dialect,
@@ -1125,6 +1126,7 @@ def shares_memory(left, right) -> bool:
11251126
"rands",
11261127
"reset_display_options",
11271128
"RNGContext",
1129+
"raises_chained_assignment_error",
11281130
"round_trip_localpath",
11291131
"round_trip_pathlib",
11301132
"round_trip_pickle",

pandas/_testing/contexts.py

+21
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414

1515
import numpy as np
1616

17+
from pandas.compat import PYPY
18+
from pandas.errors import ChainedAssignmentError
19+
1720
from pandas import set_option
1821

1922
from pandas.io.common import get_handle
@@ -227,3 +230,21 @@ def __exit__(
227230
) -> None:
228231

229232
np.random.set_state(self.start_state)
233+
234+
235+
def raises_chained_assignment_error():
236+
237+
if PYPY:
238+
from contextlib import nullcontext
239+
240+
return nullcontext()
241+
else:
242+
import pytest
243+
244+
return pytest.raises(
245+
ChainedAssignmentError,
246+
match=(
247+
"A value is trying to be set on a copy of a DataFrame or Series "
248+
"through chained assignment"
249+
),
250+
)

pandas/core/frame.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import functools
1717
from io import StringIO
1818
import itertools
19+
import sys
1920
from textwrap import dedent
2021
from typing import (
2122
TYPE_CHECKING,
@@ -91,12 +92,17 @@
9192
WriteBuffer,
9293
npt,
9394
)
95+
from pandas.compat import PYPY
9496
from pandas.compat._optional import import_optional_dependency
9597
from pandas.compat.numpy import (
9698
function as nv,
9799
np_percentile_argname,
98100
)
99-
from pandas.errors import InvalidIndexError
101+
from pandas.errors import (
102+
ChainedAssignmentError,
103+
InvalidIndexError,
104+
_chained_assignment_msg,
105+
)
100106
from pandas.util._decorators import (
101107
Appender,
102108
Substitution,
@@ -3862,6 +3868,10 @@ def isetitem(self, loc, value) -> None:
38623868
self._iset_item_mgr(loc, arraylike, inplace=False)
38633869

38643870
def __setitem__(self, key, value):
3871+
if not PYPY and using_copy_on_write():
3872+
if sys.getrefcount(self) <= 3:
3873+
raise ChainedAssignmentError(_chained_assignment_msg)
3874+
38653875
key = com.apply_if_callable(key, self)
38663876

38673877
# see if we can slice the rows

pandas/core/indexing.py

+10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from contextlib import suppress
4+
import sys
45
from typing import (
56
TYPE_CHECKING,
67
Hashable,
@@ -12,17 +13,22 @@
1213

1314
import numpy as np
1415

16+
from pandas._config import using_copy_on_write
17+
1518
from pandas._libs.indexing import NDFrameIndexerBase
1619
from pandas._libs.lib import item_from_zerodim
1720
from pandas._typing import (
1821
Axis,
1922
AxisInt,
2023
)
24+
from pandas.compat import PYPY
2125
from pandas.errors import (
2226
AbstractMethodError,
27+
ChainedAssignmentError,
2328
IndexingError,
2429
InvalidIndexError,
2530
LossySetitemError,
31+
_chained_assignment_msg,
2632
)
2733
from pandas.util._decorators import doc
2834

@@ -830,6 +836,10 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
830836

831837
@final
832838
def __setitem__(self, key, value) -> None:
839+
if not PYPY and using_copy_on_write():
840+
if sys.getrefcount(self.obj) <= 2:
841+
raise ChainedAssignmentError(_chained_assignment_msg)
842+
833843
check_dict_or_set_indexers(key)
834844
if isinstance(key, tuple):
835845
key = tuple(list(x) if is_iterator(x) else x for x in key)

pandas/core/series.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44
from __future__ import annotations
55

6+
import sys
67
from textwrap import dedent
78
from typing import (
89
IO,
@@ -67,8 +68,13 @@
6768
WriteBuffer,
6869
npt,
6970
)
71+
from pandas.compat import PYPY
7072
from pandas.compat.numpy import function as nv
71-
from pandas.errors import InvalidIndexError
73+
from pandas.errors import (
74+
ChainedAssignmentError,
75+
InvalidIndexError,
76+
_chained_assignment_msg,
77+
)
7278
from pandas.util._decorators import (
7379
Appender,
7480
Substitution,
@@ -1074,6 +1080,10 @@ def _get_value(self, label, takeable: bool = False):
10741080
return self.iloc[loc]
10751081

10761082
def __setitem__(self, key, value) -> None:
1083+
if not PYPY and using_copy_on_write():
1084+
if sys.getrefcount(self) <= 3:
1085+
raise ChainedAssignmentError(_chained_assignment_msg)
1086+
10771087
check_dict_or_set_indexers(key)
10781088
key = com.apply_if_callable(key, self)
10791089
cacher_needs_updating = self._check_is_chained_assignment_possible()

pandas/errors/__init__.py

+36
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,42 @@ class SettingWithCopyWarning(Warning):
320320
"""
321321

322322

323+
class ChainedAssignmentError(ValueError):
324+
"""
325+
Exception raised when trying to set using chained assignment.
326+
327+
When the ``mode.copy_on_write`` option is enabled, chained assignment can
328+
never work. In such a situation, we are always setting into a temporary
329+
object that is the result of an indexing operation (getitem), which under
330+
Copy-on-Write always behaves as a copy. Thus, assigning through a chain
331+
can never update the original Series or DataFrame.
332+
333+
For more information on view vs. copy,
334+
see :ref:`the user guide<indexing.view_versus_copy>`.
335+
336+
Examples
337+
--------
338+
>>> pd.options.mode.copy_on_write = True
339+
>>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
340+
>>> df["A"][0:3] = 10 # doctest: +SKIP
341+
... # ChainedAssignmentError: ...
342+
"""
343+
344+
345+
_chained_assignment_msg = (
346+
"A value is trying to be set on a copy of a DataFrame or Series "
347+
"through chained assignment.\n"
348+
"When using the Copy-on-Write mode, such chained assignment never works "
349+
"to update the original DataFrame or Series, because the intermediate "
350+
"object on which we are setting values always behaves as a copy.\n\n"
351+
"Try using '.loc[row_indexer, col_indexer] = value' instead, to perform "
352+
"the assignment in a single step.\n\n"
353+
"See the caveats in the documentation: "
354+
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
355+
"indexing.html#returning-a-view-versus-a-copy"
356+
)
357+
358+
323359
class NumExprClobberingError(NameError):
324360
"""
325361
Exception raised when trying to use a built-in numexpr name as a variable name.

pandas/tests/frame/indexing/test_setitem.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1245,12 +1245,15 @@ def test_setitem_column_update_inplace(self, using_copy_on_write):
12451245
df = DataFrame({col: np.zeros(len(labels)) for col in labels}, index=labels)
12461246
values = df._mgr.blocks[0].values
12471247

1248-
for label in df.columns:
1249-
df[label][label] = 1
1250-
12511248
if not using_copy_on_write:
1249+
for label in df.columns:
1250+
df[label][label] = 1
1251+
12521252
# diagonal values all updated
12531253
assert np.all(values[np.arange(10), np.arange(10)] == 1)
12541254
else:
1255+
with tm.raises_chained_assignment_error():
1256+
for label in df.columns:
1257+
df[label][label] = 1
12551258
# original dataframe not updated
12561259
assert np.all(values[np.arange(10), np.arange(10)] == 0)

pandas/tests/frame/indexing/test_xs.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ def test_xs_view(self, using_array_manager, using_copy_on_write):
124124
df_orig = dm.copy()
125125

126126
if using_copy_on_write:
127-
dm.xs(2)[:] = 20
127+
with tm.raises_chained_assignment_error():
128+
dm.xs(2)[:] = 20
128129
tm.assert_frame_equal(dm, df_orig)
129130
elif using_array_manager:
130131
# INFO(ArrayManager) with ArrayManager getting a row as a view is

pandas/tests/frame/test_block_internals.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,11 @@ def test_stale_cached_series_bug_473(self, using_copy_on_write):
340340
)
341341
repr(Y)
342342
Y["e"] = Y["e"].astype("object")
343-
Y["g"]["c"] = np.NaN
343+
if using_copy_on_write:
344+
with tm.raises_chained_assignment_error():
345+
Y["g"]["c"] = np.NaN
346+
else:
347+
Y["g"]["c"] = np.NaN
344348
repr(Y)
345349
result = Y.sum() # noqa
346350
exp = Y["g"].sum() # noqa

pandas/tests/indexing/multiindex/test_chaining_and_caching.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,13 @@ def test_cache_updating(using_copy_on_write):
5050

5151
# setting via chained assignment
5252
# but actually works, since everything is a view
53-
df.loc[0]["z"].iloc[0] = 1.0
54-
result = df.loc[(0, 0), "z"]
5553
if using_copy_on_write:
56-
assert result == df_original.loc[0, "z"]
54+
with tm.raises_chained_assignment_error():
55+
df.loc[0]["z"].iloc[0] = 1.0
56+
assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"]
5757
else:
58+
df.loc[0]["z"].iloc[0] = 1.0
59+
result = df.loc[(0, 0), "z"]
5860
assert result == 1
5961

6062
# correct setting

pandas/tests/indexing/multiindex/test_partial.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -128,20 +128,26 @@ def test_partial_set(
128128
exp.iloc[65:85] = 0
129129
tm.assert_frame_equal(df, exp)
130130

131-
df["A"].loc[2000, 4] = 1
132-
if not using_copy_on_write:
133-
exp["A"].loc[2000, 4].values[:] = 1
131+
if using_copy_on_write:
132+
with tm.raises_chained_assignment_error():
133+
df["A"].loc[2000, 4] = 1
134+
df.loc[(2000, 4), "A"] = 1
135+
else:
136+
df["A"].loc[2000, 4] = 1
137+
exp.iloc[65:85, 0] = 1
134138
tm.assert_frame_equal(df, exp)
135139

136140
df.loc[2000] = 5
137141
exp.iloc[:100] = 5
138142
tm.assert_frame_equal(df, exp)
139143

140144
# this works...for now
141-
df["A"].iloc[14] = 5
142145
if using_copy_on_write:
146+
with tm.raises_chained_assignment_error():
147+
df["A"].iloc[14] = 5
143148
df["A"].iloc[14] == exp["A"].iloc[14]
144149
else:
150+
df["A"].iloc[14] = 5
145151
assert df["A"].iloc[14] == 5
146152

147153
@pytest.mark.parametrize("dtype", [int, float])

pandas/tests/indexing/multiindex/test_setitem.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -501,8 +501,8 @@ def test_frame_setitem_copy_raises(
501501
# will raise/warn as its chained assignment
502502
df = multiindex_dataframe_random_data.T
503503
if using_copy_on_write:
504-
# TODO(CoW) it would be nice if this could still warn/raise
505-
df["foo"]["one"] = 2
504+
with tm.raises_chained_assignment_error():
505+
df["foo"]["one"] = 2
506506
else:
507507
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
508508
with pytest.raises(SettingWithCopyError, match=msg):
@@ -516,7 +516,8 @@ def test_frame_setitem_copy_no_write(
516516
expected = frame
517517
df = frame.copy()
518518
if using_copy_on_write:
519-
df["foo"]["one"] = 2
519+
with tm.raises_chained_assignment_error():
520+
df["foo"]["one"] = 2
520521
else:
521522
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
522523
with pytest.raises(SettingWithCopyError, match=msg):

0 commit comments

Comments
 (0)