Skip to content

Commit 16e31e9

Browse files
API: detect and raise error for chained assignment under Copy-on-Write
1 parent b1b56d4 commit 16e31e9

14 files changed

+194
-56
lines changed

pandas/core/frame.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import functools
1717
from io import StringIO
1818
import itertools
19+
import sys
1920
from textwrap import dedent
2021
from typing import (
2122
TYPE_CHECKING,
@@ -95,7 +96,10 @@
9596
function as nv,
9697
np_percentile_argname,
9798
)
98-
from pandas.errors import InvalidIndexError
99+
from pandas.errors import (
100+
ChainedAssignmentError,
101+
InvalidIndexError,
102+
)
99103
from pandas.util._decorators import (
100104
Appender,
101105
Substitution,
@@ -3838,6 +3842,13 @@ def isetitem(self, loc, value) -> None:
38383842
self._iset_item_mgr(loc, arraylike, inplace=False)
38393843

38403844
def __setitem__(self, key, value):
3845+
if (
3846+
get_option("mode.copy_on_write")
3847+
and get_option("mode.data_manager") == "block"
3848+
):
3849+
if sys.getrefcount(self) <= 3:
3850+
raise ChainedAssignmentError("Chained assignment doesn't work!!")
3851+
38413852
key = com.apply_if_callable(key, self)
38423853

38433854
# see if we can slice the rows

pandas/core/indexing.py

+12
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from contextlib import suppress
4+
import sys
45
from typing import (
56
TYPE_CHECKING,
67
Hashable,
@@ -12,6 +13,8 @@
1213

1314
import numpy as np
1415

16+
from pandas._config import get_option
17+
1518
from pandas._libs.indexing import NDFrameIndexerBase
1619
from pandas._libs.lib import item_from_zerodim
1720
from pandas._typing import (
@@ -20,6 +23,7 @@
2023
)
2124
from pandas.errors import (
2225
AbstractMethodError,
26+
ChainedAssignmentError,
2327
IndexingError,
2428
InvalidIndexError,
2529
LossySetitemError,
@@ -830,6 +834,14 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
830834

831835
@final
832836
def __setitem__(self, key, value) -> None:
837+
if (
838+
get_option("mode.copy_on_write")
839+
and get_option("mode.data_manager") == "block"
840+
):
841+
print("_LocationIndexer.__setitem__ refcount: ", sys.getrefcount(self.obj))
842+
if sys.getrefcount(self.obj) <= 2:
843+
raise ChainedAssignmentError("Chained assignment doesn't work!!")
844+
833845
check_dict_or_set_indexers(key)
834846
if isinstance(key, tuple):
835847
key = tuple(list(x) if is_iterator(x) else x for x in key)

pandas/core/series.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44
from __future__ import annotations
55

6+
import sys
67
from textwrap import dedent
78
from typing import (
89
IO,
@@ -68,7 +69,10 @@
6869
npt,
6970
)
7071
from pandas.compat.numpy import function as nv
71-
from pandas.errors import InvalidIndexError
72+
from pandas.errors import (
73+
ChainedAssignmentError,
74+
InvalidIndexError,
75+
)
7276
from pandas.util._decorators import (
7377
Appender,
7478
Substitution,
@@ -1070,6 +1074,14 @@ def _get_value(self, label, takeable: bool = False):
10701074
return self.iloc[loc]
10711075

10721076
def __setitem__(self, key, value) -> None:
1077+
if (
1078+
get_option("mode.copy_on_write")
1079+
and get_option("mode.data_manager") == "block"
1080+
):
1081+
print("Series.__getitem__ refcount: ", sys.getrefcount(self))
1082+
if sys.getrefcount(self) <= 3:
1083+
raise ChainedAssignmentError("Chained assignment doesn't work!!")
1084+
10731085
check_dict_or_set_indexers(key)
10741086
key = com.apply_if_callable(key, self)
10751087
cacher_needs_updating = self._check_is_chained_assignment_possible()

pandas/errors/__init__.py

+22
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,28 @@ class SettingWithCopyError(ValueError):
298298
"""
299299

300300

301+
class ChainedAssignmentError(ValueError):
302+
"""
303+
Exception raised when trying to set on a copied slice from a ``DataFrame``.
304+
305+
The ``mode.chained_assignment`` needs to be set to set to 'raise.' This can
306+
happen unintentionally when chained indexing.
307+
308+
For more information on eveluation order,
309+
see :ref:`the user guide<indexing.evaluation_order>`.
310+
311+
For more information on view vs. copy,
312+
see :ref:`the user guide<indexing.view_versus_copy>`.
313+
314+
Examples
315+
--------
316+
>>> pd.options.mode.chained_assignment = 'raise'
317+
>>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
318+
>>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP
319+
... # SettingWithCopyError: A value is trying to be set on a copy of a...
320+
"""
321+
322+
301323
class SettingWithCopyWarning(Warning):
302324
"""
303325
Warning raised when trying to set on a copied slice from a ``DataFrame``.

pandas/tests/frame/indexing/test_setitem.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import numpy as np
44
import pytest
55

6+
from pandas.errors import ChainedAssignmentError
67
import pandas.util._test_decorators as td
78

89
from pandas.core.dtypes.base import _registry as ea_registry
@@ -1126,7 +1127,7 @@ def test_setitem_ea_boolean_mask(self):
11261127

11271128

11281129
class TestDataFrameSetitemCopyViewSemantics:
1129-
def test_setitem_always_copy(self, float_frame):
1130+
def test_setitem_always_copy(self, float_frame, using_copy_on_write):
11301131
assert "E" not in float_frame.columns
11311132
s = float_frame["A"].copy()
11321133
float_frame["E"] = s
@@ -1245,12 +1246,15 @@ def test_setitem_column_update_inplace(self, using_copy_on_write):
12451246
df = DataFrame({col: np.zeros(len(labels)) for col in labels}, index=labels)
12461247
values = df._mgr.blocks[0].values
12471248

1248-
for label in df.columns:
1249-
df[label][label] = 1
1250-
12511249
if not using_copy_on_write:
1250+
for label in df.columns:
1251+
df[label][label] = 1
1252+
12521253
# diagonal values all updated
12531254
assert np.all(values[np.arange(10), np.arange(10)] == 1)
12541255
else:
1256+
with pytest.raises(ChainedAssignmentError):
1257+
for label in df.columns:
1258+
df[label][label] = 1
12551259
# original dataframe not updated
12561260
assert np.all(values[np.arange(10), np.arange(10)] == 0)

pandas/tests/frame/indexing/test_xs.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
import numpy as np
44
import pytest
55

6-
from pandas.errors import SettingWithCopyError
6+
from pandas.errors import (
7+
ChainedAssignmentError,
8+
SettingWithCopyError,
9+
)
710

811
from pandas import (
912
DataFrame,
@@ -124,7 +127,8 @@ def test_xs_view(self, using_array_manager, using_copy_on_write):
124127
df_orig = dm.copy()
125128

126129
if using_copy_on_write:
127-
dm.xs(2)[:] = 20
130+
with pytest.raises(ChainedAssignmentError):
131+
dm.xs(2)[:] = 20
128132
tm.assert_frame_equal(dm, df_orig)
129133
elif using_array_manager:
130134
# INFO(ArrayManager) with ArrayManager getting a row as a view is

pandas/tests/frame/methods/test_sort_values.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -345,8 +345,8 @@ def test_sort_values_frame_column_inplace_sort_exception(
345345
with pytest.raises(ValueError, match="This Series is a view"):
346346
s.sort_values(inplace=True)
347347

348-
cp = s.copy()
349-
cp.sort_values() # it works!
348+
cp = s.copy()
349+
cp.sort_values() # it works!
350350

351351
def test_sort_values_nat_values_in_int_column(self):
352352

pandas/tests/frame/test_block_internals.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
import numpy as np
88
import pytest
99

10-
from pandas.errors import PerformanceWarning
10+
from pandas.errors import (
11+
ChainedAssignmentError,
12+
PerformanceWarning,
13+
)
1114
import pandas.util._test_decorators as td
1215

1316
import pandas as pd
@@ -340,7 +343,11 @@ def test_stale_cached_series_bug_473(self, using_copy_on_write):
340343
)
341344
repr(Y)
342345
Y["e"] = Y["e"].astype("object")
343-
Y["g"]["c"] = np.NaN
346+
if using_copy_on_write:
347+
with pytest.raises(ChainedAssignmentError):
348+
Y["g"]["c"] = np.NaN
349+
else:
350+
Y["g"]["c"] = np.NaN
344351
repr(Y)
345352
result = Y.sum() # noqa
346353
exp = Y["g"].sum() # noqa

pandas/tests/indexing/multiindex/test_chaining_and_caching.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import numpy as np
22
import pytest
33

4-
from pandas.errors import SettingWithCopyError
4+
from pandas.errors import (
5+
ChainedAssignmentError,
6+
SettingWithCopyError,
7+
)
58
import pandas.util._test_decorators as td
69

710
from pandas import (
@@ -50,11 +53,13 @@ def test_cache_updating(using_copy_on_write):
5053

5154
# setting via chained assignment
5255
# but actually works, since everything is a view
53-
df.loc[0]["z"].iloc[0] = 1.0
54-
result = df.loc[(0, 0), "z"]
5556
if using_copy_on_write:
56-
assert result == df_original.loc[0, "z"]
57+
with pytest.raises(ChainedAssignmentError):
58+
df.loc[0]["z"].iloc[0] = 1.0
59+
assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"]
5760
else:
61+
df.loc[0]["z"].iloc[0] = 1.0
62+
result = df.loc[(0, 0), "z"]
5863
assert result == 1
5964

6065
# correct setting

pandas/tests/indexing/multiindex/test_partial.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import pytest
33

4+
from pandas.errors import ChainedAssignmentError
45
import pandas.util._test_decorators as td
56

67
from pandas import (
@@ -132,20 +133,26 @@ def test_partial_set(
132133
exp.iloc[65:85] = 0
133134
tm.assert_frame_equal(df, exp)
134135

135-
df["A"].loc[2000, 4] = 1
136-
if not using_copy_on_write:
137-
exp["A"].loc[2000, 4].values[:] = 1
136+
if using_copy_on_write:
137+
with pytest.raises(ChainedAssignmentError):
138+
df["A"].loc[2000, 4] = 1
139+
df.loc[(2000, 4), "A"] = 1
140+
else:
141+
df["A"].loc[2000, 4] = 1
142+
exp.iloc[65:85, 0] = 1
138143
tm.assert_frame_equal(df, exp)
139144

140145
df.loc[2000] = 5
141146
exp.iloc[:100] = 5
142147
tm.assert_frame_equal(df, exp)
143148

144149
# this works...for now
145-
df["A"].iloc[14] = 5
146150
if using_copy_on_write:
151+
with pytest.raises(ChainedAssignmentError):
152+
df["A"].iloc[14] = 5
147153
df["A"].iloc[14] == exp["A"].iloc[14]
148154
else:
155+
df["A"].iloc[14] = 5
149156
assert df["A"].iloc[14] == 5
150157

151158
@pytest.mark.parametrize("dtype", [int, float])

pandas/tests/indexing/multiindex/test_setitem.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import numpy as np
22
import pytest
33

4-
from pandas.errors import SettingWithCopyError
4+
from pandas.errors import (
5+
ChainedAssignmentError,
6+
SettingWithCopyError,
7+
)
58
import pandas.util._test_decorators as td
69

710
import pandas as pd
@@ -501,8 +504,8 @@ def test_frame_setitem_copy_raises(
501504
# will raise/warn as its chained assignment
502505
df = multiindex_dataframe_random_data.T
503506
if using_copy_on_write:
504-
# TODO(CoW) it would be nice if this could still warn/raise
505-
df["foo"]["one"] = 2
507+
with pytest.raises(ChainedAssignmentError):
508+
df["foo"]["one"] = 2
506509
else:
507510
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
508511
with pytest.raises(SettingWithCopyError, match=msg):
@@ -516,7 +519,8 @@ def test_frame_setitem_copy_no_write(
516519
expected = frame
517520
df = frame.copy()
518521
if using_copy_on_write:
519-
df["foo"]["one"] = 2
522+
with pytest.raises(ChainedAssignmentError):
523+
df["foo"]["one"] = 2
520524
else:
521525
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
522526
with pytest.raises(SettingWithCopyError, match=msg):

0 commit comments

Comments
 (0)