Skip to content

Commit 09ed69e

Browse files
CoW warning mode: add warning for single block setitem (#55838)
1 parent c684830 commit 09ed69e

40 files changed

+433
-194
lines changed

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from pandas._config import (
4343
get_option,
4444
using_copy_on_write,
45+
warn_copy_on_write,
4546
)
4647
from pandas._config.config import _get_option
4748

@@ -4538,7 +4539,7 @@ def _clear_item_cache(self) -> None:
45384539

45394540
def _get_item_cache(self, item: Hashable) -> Series:
45404541
"""Return the cached item, item represents a label indexer."""
4541-
if using_copy_on_write():
4542+
if using_copy_on_write() or warn_copy_on_write():
45424543
loc = self.columns.get_loc(item)
45434544
return self._ixs(loc, axis=1)
45444545

pandas/core/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12392,7 +12392,7 @@ def _inplace_method(self, other, op) -> Self:
1239212392
"""
1239312393
warn = True
1239412394
if not PYPY and warn_copy_on_write():
12395-
if sys.getrefcount(self) <= 5:
12395+
if sys.getrefcount(self) <= 4:
1239612396
# we are probably in an inplace setitem context (e.g. df['a'] += 1)
1239712397
warn = False
1239812398

pandas/core/groupby/grouper.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@
1212

1313
import numpy as np
1414

15-
from pandas._config import using_copy_on_write
15+
from pandas._config import (
16+
using_copy_on_write,
17+
warn_copy_on_write,
18+
)
1619

1720
from pandas._libs import lib
1821
from pandas._libs.tslibs import OutOfBoundsDatetime
@@ -966,7 +969,7 @@ def is_in_axis(key) -> bool:
966969
def is_in_obj(gpr) -> bool:
967970
if not hasattr(gpr, "name"):
968971
return False
969-
if using_copy_on_write():
972+
if using_copy_on_write() or warn_copy_on_write():
970973
# For the CoW case, we check the references to determine if the
971974
# series is part of the object
972975
try:

pandas/core/internals/managers.py

+25-3
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,15 @@
100100
from pandas.api.extensions import ExtensionArray
101101

102102

103+
COW_WARNING_GENERAL_MSG = """\
104+
Setting a value on a view: behaviour will change in pandas 3.0.
105+
You are mutating a Series or DataFrame object, and currently this mutation will
106+
also have effect on other Series or DataFrame objects that share data with this
107+
object. In pandas 3.0 (with Copy-on-Write), updating one Series or DataFrame object
108+
will never modify another.
109+
"""
110+
111+
103112
COW_WARNING_SETITEM_MSG = """\
104113
Setting a value on a view: behaviour will change in pandas 3.0.
105114
Currently, the mutation will also have effect on the object that shares data
@@ -387,7 +396,14 @@ def setitem(self, indexer, value) -> Self:
387396
if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim:
388397
raise ValueError(f"Cannot set values with ndim > {self.ndim}")
389398

390-
if using_copy_on_write() and not self._has_no_reference(0):
399+
if warn_copy_on_write() and not self._has_no_reference(0):
400+
warnings.warn(
401+
COW_WARNING_GENERAL_MSG,
402+
FutureWarning,
403+
stacklevel=find_stack_level(),
404+
)
405+
406+
elif using_copy_on_write() and not self._has_no_reference(0):
391407
# this method is only called if there is a single block -> hardcoded 0
392408
# Split blocks to only copy the columns we want to modify
393409
if self.ndim == 2 and isinstance(indexer, tuple):
@@ -1951,9 +1967,15 @@ def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Self:
19511967
return type(self)(blk.copy(deep=False), self.index)
19521968
array = blk.values[indexer]
19531969

1970+
if isinstance(indexer, np.ndarray) and indexer.dtype.kind == "b":
1971+
# boolean indexing always gives a copy with numpy
1972+
refs = None
1973+
else:
1974+
# TODO(CoW) in theory only need to track reference if new_array is a view
1975+
refs = blk.refs
1976+
19541977
bp = BlockPlacement(slice(0, len(array)))
1955-
# TODO(CoW) in theory only need to track reference if new_array is a view
1956-
block = type(blk)(array, placement=bp, ndim=1, refs=blk.refs)
1978+
block = type(blk)(array, placement=bp, ndim=1, refs=refs)
19571979

19581980
new_idx = self.index[indexer]
19591981
return type(self)(block, new_idx)

pandas/tests/apply/test_frame_apply.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1453,7 +1453,7 @@ def test_apply_dtype(col):
14531453
tm.assert_series_equal(result, expected)
14541454

14551455

1456-
def test_apply_mutating(using_array_manager, using_copy_on_write):
1456+
def test_apply_mutating(using_array_manager, using_copy_on_write, warn_copy_on_write):
14571457
# GH#35462 case where applied func pins a new BlockManager to a row
14581458
df = DataFrame({"a": range(100), "b": range(100, 200)})
14591459
df_orig = df.copy()
@@ -1467,7 +1467,8 @@ def func(row):
14671467
expected = df.copy()
14681468
expected["a"] += 1
14691469

1470-
result = df.apply(func, axis=1)
1470+
with tm.assert_cow_warning(warn_copy_on_write):
1471+
result = df.apply(func, axis=1)
14711472

14721473
tm.assert_frame_equal(result, expected)
14731474
if using_copy_on_write or using_array_manager:

pandas/tests/copy_view/index/test_datetimeindex.py

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
)
99
import pandas._testing as tm
1010

11+
pytestmark = pytest.mark.filterwarnings(
12+
"ignore:Setting a value on a view:FutureWarning"
13+
)
14+
1115

1216
@pytest.mark.parametrize(
1317
"cons",

pandas/tests/copy_view/index/test_index.py

+18-12
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,12 @@ def index_view(index_data=[1, 2]):
1919
return idx, view
2020

2121

22-
def test_set_index_update_column(using_copy_on_write):
22+
def test_set_index_update_column(using_copy_on_write, warn_copy_on_write):
2323
df = DataFrame({"a": [1, 2], "b": 1})
2424
df = df.set_index("a", drop=False)
2525
expected = df.index.copy(deep=True)
26-
df.iloc[0, 0] = 100
26+
with tm.assert_cow_warning(warn_copy_on_write):
27+
df.iloc[0, 0] = 100
2728
if using_copy_on_write:
2829
tm.assert_index_equal(df.index, expected)
2930
else:
@@ -39,49 +40,53 @@ def test_set_index_drop_update_column(using_copy_on_write):
3940
tm.assert_index_equal(df.index, expected)
4041

4142

42-
def test_set_index_series(using_copy_on_write):
43+
def test_set_index_series(using_copy_on_write, warn_copy_on_write):
4344
df = DataFrame({"a": [1, 2], "b": 1.5})
4445
ser = Series([10, 11])
4546
df = df.set_index(ser)
4647
expected = df.index.copy(deep=True)
47-
ser.iloc[0] = 100
48+
with tm.assert_cow_warning(warn_copy_on_write):
49+
ser.iloc[0] = 100
4850
if using_copy_on_write:
4951
tm.assert_index_equal(df.index, expected)
5052
else:
5153
tm.assert_index_equal(df.index, Index([100, 11]))
5254

5355

54-
def test_assign_index_as_series(using_copy_on_write):
56+
def test_assign_index_as_series(using_copy_on_write, warn_copy_on_write):
5557
df = DataFrame({"a": [1, 2], "b": 1.5})
5658
ser = Series([10, 11])
5759
df.index = ser
5860
expected = df.index.copy(deep=True)
59-
ser.iloc[0] = 100
61+
with tm.assert_cow_warning(warn_copy_on_write):
62+
ser.iloc[0] = 100
6063
if using_copy_on_write:
6164
tm.assert_index_equal(df.index, expected)
6265
else:
6366
tm.assert_index_equal(df.index, Index([100, 11]))
6467

6568

66-
def test_assign_index_as_index(using_copy_on_write):
69+
def test_assign_index_as_index(using_copy_on_write, warn_copy_on_write):
6770
df = DataFrame({"a": [1, 2], "b": 1.5})
6871
ser = Series([10, 11])
6972
rhs_index = Index(ser)
7073
df.index = rhs_index
7174
rhs_index = None # overwrite to clear reference
7275
expected = df.index.copy(deep=True)
73-
ser.iloc[0] = 100
76+
with tm.assert_cow_warning(warn_copy_on_write):
77+
ser.iloc[0] = 100
7478
if using_copy_on_write:
7579
tm.assert_index_equal(df.index, expected)
7680
else:
7781
tm.assert_index_equal(df.index, Index([100, 11]))
7882

7983

80-
def test_index_from_series(using_copy_on_write):
84+
def test_index_from_series(using_copy_on_write, warn_copy_on_write):
8185
ser = Series([1, 2])
8286
idx = Index(ser)
8387
expected = idx.copy(deep=True)
84-
ser.iloc[0] = 100
88+
with tm.assert_cow_warning(warn_copy_on_write):
89+
ser.iloc[0] = 100
8590
if using_copy_on_write:
8691
tm.assert_index_equal(idx, expected)
8792
else:
@@ -96,12 +101,13 @@ def test_index_from_series_copy(using_copy_on_write):
96101
assert np.shares_memory(get_array(ser), arr)
97102

98103

99-
def test_index_from_index(using_copy_on_write):
104+
def test_index_from_index(using_copy_on_write, warn_copy_on_write):
100105
ser = Series([1, 2])
101106
idx = Index(ser)
102107
idx = Index(idx)
103108
expected = idx.copy(deep=True)
104-
ser.iloc[0] = 100
109+
with tm.assert_cow_warning(warn_copy_on_write):
110+
ser.iloc[0] = 100
105111
if using_copy_on_write:
106112
tm.assert_index_equal(idx, expected)
107113
else:

pandas/tests/copy_view/index/test_periodindex.py

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
)
99
import pandas._testing as tm
1010

11+
pytestmark = pytest.mark.filterwarnings(
12+
"ignore:Setting a value on a view:FutureWarning"
13+
)
14+
1115

1216
@pytest.mark.parametrize(
1317
"cons",

pandas/tests/copy_view/index/test_timedeltaindex.py

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
)
99
import pandas._testing as tm
1010

11+
pytestmark = pytest.mark.filterwarnings(
12+
"ignore:Setting a value on a view:FutureWarning"
13+
)
14+
1115

1216
@pytest.mark.parametrize(
1317
"cons",

pandas/tests/copy_view/test_constructors.py

+27-13
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222

2323
@pytest.mark.parametrize("dtype", [None, "int64"])
24-
def test_series_from_series(dtype, using_copy_on_write):
24+
def test_series_from_series(dtype, using_copy_on_write, warn_copy_on_write):
2525
# Case: constructing a Series from another Series object follows CoW rules:
2626
# a new object is returned and thus mutations are not propagated
2727
ser = Series([1, 2, 3], name="name")
@@ -43,7 +43,8 @@ def test_series_from_series(dtype, using_copy_on_write):
4343
assert not np.shares_memory(get_array(ser), get_array(result))
4444
else:
4545
# mutating shallow copy does mutate original
46-
result.iloc[0] = 0
46+
with tm.assert_cow_warning(warn_copy_on_write):
47+
result.iloc[0] = 0
4748
assert ser.iloc[0] == 0
4849
# and still shares memory
4950
assert np.shares_memory(get_array(ser), get_array(result))
@@ -57,11 +58,12 @@ def test_series_from_series(dtype, using_copy_on_write):
5758
assert result.iloc[0] == 1
5859
else:
5960
# mutating original does mutate shallow copy
60-
ser.iloc[0] = 0
61+
with tm.assert_cow_warning(warn_copy_on_write):
62+
ser.iloc[0] = 0
6163
assert result.iloc[0] == 0
6264

6365

64-
def test_series_from_series_with_reindex(using_copy_on_write):
66+
def test_series_from_series_with_reindex(using_copy_on_write, warn_copy_on_write):
6567
# Case: constructing a Series from another Series with specifying an index
6668
# that potentially requires a reindex of the values
6769
ser = Series([1, 2, 3], name="name")
@@ -76,7 +78,8 @@ def test_series_from_series_with_reindex(using_copy_on_write):
7678
]:
7779
result = Series(ser, index=index)
7880
assert np.shares_memory(ser.values, result.values)
79-
result.iloc[0] = 0
81+
with tm.assert_cow_warning(warn_copy_on_write):
82+
result.iloc[0] = 0
8083
if using_copy_on_write:
8184
assert ser.iloc[0] == 1
8285
else:
@@ -153,6 +156,7 @@ def test_series_from_index_different_dtypes(using_copy_on_write):
153156
assert ser._mgr._has_no_reference(0)
154157

155158

159+
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
156160
@pytest.mark.parametrize("fastpath", [False, True])
157161
@pytest.mark.parametrize("dtype", [None, "int64"])
158162
@pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)])
@@ -186,7 +190,9 @@ def test_series_from_block_manager_different_dtype(using_copy_on_write):
186190

187191
@pytest.mark.parametrize("use_mgr", [True, False])
188192
@pytest.mark.parametrize("columns", [None, ["a"]])
189-
def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr):
193+
def test_dataframe_constructor_mgr_or_df(
194+
using_copy_on_write, warn_copy_on_write, columns, use_mgr
195+
):
190196
df = DataFrame({"a": [1, 2, 3]})
191197
df_orig = df.copy()
192198

@@ -201,7 +207,8 @@ def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr):
201207
new_df = DataFrame(data)
202208

203209
assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
204-
new_df.iloc[0] = 100
210+
with tm.assert_cow_warning(warn_copy_on_write and not use_mgr):
211+
new_df.iloc[0] = 100
205212

206213
if using_copy_on_write:
207214
assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
@@ -215,7 +222,7 @@ def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr):
215222
@pytest.mark.parametrize("index", [None, [0, 1, 2]])
216223
@pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]])
217224
def test_dataframe_from_dict_of_series(
218-
request, using_copy_on_write, columns, index, dtype
225+
request, using_copy_on_write, warn_copy_on_write, columns, index, dtype
219226
):
220227
# Case: constructing a DataFrame from Series objects with copy=False
221228
# has to do a lazy following CoW rules
@@ -235,6 +242,7 @@ def test_dataframe_from_dict_of_series(
235242
assert np.shares_memory(get_array(result, "a"), get_array(s1))
236243

237244
# mutating the new dataframe doesn't mutate original
245+
# TODO(CoW-warn) this should also warn
238246
result.iloc[0, 0] = 10
239247
if using_copy_on_write:
240248
assert not np.shares_memory(get_array(result, "a"), get_array(s1))
@@ -248,7 +256,8 @@ def test_dataframe_from_dict_of_series(
248256
result = DataFrame(
249257
{"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False
250258
)
251-
s1.iloc[0] = 10
259+
with tm.assert_cow_warning(warn_copy_on_write):
260+
s1.iloc[0] = 10
252261
if using_copy_on_write:
253262
assert not np.shares_memory(get_array(result, "a"), get_array(s1))
254263
tm.assert_frame_equal(result, expected)
@@ -278,15 +287,19 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype):
278287
@pytest.mark.parametrize(
279288
"data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)]
280289
)
281-
def test_dataframe_from_series_or_index(using_copy_on_write, data, dtype, cons):
290+
def test_dataframe_from_series_or_index(
291+
using_copy_on_write, warn_copy_on_write, data, dtype, cons
292+
):
282293
obj = cons(data, dtype=dtype)
283294
obj_orig = obj.copy()
284295
df = DataFrame(obj, dtype=dtype)
285296
assert np.shares_memory(get_array(obj), get_array(df, 0))
286297
if using_copy_on_write:
287298
assert not df._mgr._has_no_reference(0)
288299

289-
df.iloc[0, 0] = data[-1]
300+
# TODO(CoW-warn) should not warn for an index?
301+
with tm.assert_cow_warning(warn_copy_on_write):
302+
df.iloc[0, 0] = data[-1]
290303
if using_copy_on_write:
291304
tm.assert_equal(obj, obj_orig)
292305

@@ -341,15 +354,16 @@ def test_frame_from_numpy_array(using_copy_on_write, copy, using_array_manager):
341354
assert np.shares_memory(get_array(df, 0), arr)
342355

343356

344-
def test_dataframe_from_records_with_dataframe(using_copy_on_write):
357+
def test_dataframe_from_records_with_dataframe(using_copy_on_write, warn_copy_on_write):
345358
df = DataFrame({"a": [1, 2, 3]})
346359
df_orig = df.copy()
347360
with tm.assert_produces_warning(FutureWarning):
348361
df2 = DataFrame.from_records(df)
349362
if using_copy_on_write:
350363
assert not df._mgr._has_no_reference(0)
351364
assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
352-
df2.iloc[0, 0] = 100
365+
with tm.assert_cow_warning(warn_copy_on_write):
366+
df2.iloc[0, 0] = 100
353367
if using_copy_on_write:
354368
tm.assert_frame_equal(df, df_orig)
355369
else:

0 commit comments

Comments
 (0)