Skip to content

Commit e55b985

Browse files
CoW warning mode: add warning for single block setitem
1 parent 6493d2a commit e55b985

21 files changed

+291
-149
lines changed

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from pandas._config import (
4343
get_option,
4444
using_copy_on_write,
45+
warn_copy_on_write,
4546
)
4647
from pandas._config.config import _get_option
4748

@@ -4538,7 +4539,7 @@ def _clear_item_cache(self) -> None:
45384539

45394540
def _get_item_cache(self, item: Hashable) -> Series:
45404541
"""Return the cached item, item represents a label indexer."""
4541-
if using_copy_on_write():
4542+
if using_copy_on_write() or warn_copy_on_write():
45424543
loc = self.columns.get_loc(item)
45434544
return self._ixs(loc, axis=1)
45444545

pandas/core/generic.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -6572,6 +6572,8 @@ def astype(
65726572
# GH 18099/22869: columnwise conversion to extension dtype
65736573
# GH 24704: self.items handles duplicate column names
65746574
results = [ser.astype(dtype, copy=copy) for _, ser in self.items()]
6575+
# if warn_copy_on_write():
6576+
# self._clear_item_cache()
65756577

65766578
else:
65776579
# else, only a single dtype is given
@@ -12392,7 +12394,7 @@ def _inplace_method(self, other, op) -> Self:
1239212394
"""
1239312395
warn = True
1239412396
if not PYPY and warn_copy_on_write():
12395-
if sys.getrefcount(self) <= 5:
12397+
if sys.getrefcount(self) <= 4:
1239612398
# we are probably in an inplace setitem context (e.g. df['a'] += 1)
1239712399
warn = False
1239812400

pandas/core/internals/managers.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,14 @@ def setitem(self, indexer, value) -> Self:
387387
if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim:
388388
raise ValueError(f"Cannot set values with ndim > {self.ndim}")
389389

390-
if using_copy_on_write() and not self._has_no_reference(0):
390+
if warn_copy_on_write() and not self._has_no_reference(0):
391+
warnings.warn(
392+
"Setting a value on a view",
393+
FutureWarning,
394+
stacklevel=find_stack_level(),
395+
)
396+
397+
elif using_copy_on_write() and not self._has_no_reference(0):
391398
# this method is only called if there is a single block -> hardcoded 0
392399
# Split blocks to only copy the columns we want to modify
393400
if self.ndim == 2 and isinstance(indexer, tuple):
@@ -1951,9 +1958,15 @@ def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Self:
19511958
return type(self)(blk.copy(deep=False), self.index)
19521959
array = blk.values[indexer]
19531960

1961+
if isinstance(indexer, np.ndarray) and indexer.dtype.kind == "b":
1962+
# boolean indexing always gives a copy with numpy
1963+
refs = None
1964+
else:
1965+
# TODO(CoW) in theory only need to track reference if new_array is a view
1966+
refs = blk.refs
1967+
19541968
bp = BlockPlacement(slice(0, len(array)))
1955-
# TODO(CoW) in theory only need to track reference if new_array is a view
1956-
block = type(blk)(array, placement=bp, ndim=1, refs=blk.refs)
1969+
block = type(blk)(array, placement=bp, ndim=1, refs=refs)
19571970

19581971
new_idx = self.index[indexer]
19591972
return type(self)(block, new_idx)

pandas/tests/copy_view/index/test_datetimeindex.py

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
)
99
import pandas._testing as tm
1010

11+
pytestmark = pytest.mark.filterwarnings(
12+
"ignore:Setting a value on a view:FutureWarning"
13+
)
14+
1115

1216
@pytest.mark.parametrize(
1317
"cons",

pandas/tests/copy_view/index/test_index.py

+18-12
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,12 @@ def index_view(index_data=[1, 2]):
1919
return idx, view
2020

2121

22-
def test_set_index_update_column(using_copy_on_write):
22+
def test_set_index_update_column(using_copy_on_write, warn_copy_on_write):
2323
df = DataFrame({"a": [1, 2], "b": 1})
2424
df = df.set_index("a", drop=False)
2525
expected = df.index.copy(deep=True)
26-
df.iloc[0, 0] = 100
26+
with tm.assert_cow_warning(warn_copy_on_write):
27+
df.iloc[0, 0] = 100
2728
if using_copy_on_write:
2829
tm.assert_index_equal(df.index, expected)
2930
else:
@@ -39,49 +40,53 @@ def test_set_index_drop_update_column(using_copy_on_write):
3940
tm.assert_index_equal(df.index, expected)
4041

4142

42-
def test_set_index_series(using_copy_on_write):
43+
def test_set_index_series(using_copy_on_write, warn_copy_on_write):
4344
df = DataFrame({"a": [1, 2], "b": 1.5})
4445
ser = Series([10, 11])
4546
df = df.set_index(ser)
4647
expected = df.index.copy(deep=True)
47-
ser.iloc[0] = 100
48+
with tm.assert_cow_warning(warn_copy_on_write):
49+
ser.iloc[0] = 100
4850
if using_copy_on_write:
4951
tm.assert_index_equal(df.index, expected)
5052
else:
5153
tm.assert_index_equal(df.index, Index([100, 11]))
5254

5355

54-
def test_assign_index_as_series(using_copy_on_write):
56+
def test_assign_index_as_series(using_copy_on_write, warn_copy_on_write):
5557
df = DataFrame({"a": [1, 2], "b": 1.5})
5658
ser = Series([10, 11])
5759
df.index = ser
5860
expected = df.index.copy(deep=True)
59-
ser.iloc[0] = 100
61+
with tm.assert_cow_warning(warn_copy_on_write):
62+
ser.iloc[0] = 100
6063
if using_copy_on_write:
6164
tm.assert_index_equal(df.index, expected)
6265
else:
6366
tm.assert_index_equal(df.index, Index([100, 11]))
6467

6568

66-
def test_assign_index_as_index(using_copy_on_write):
69+
def test_assign_index_as_index(using_copy_on_write, warn_copy_on_write):
6770
df = DataFrame({"a": [1, 2], "b": 1.5})
6871
ser = Series([10, 11])
6972
rhs_index = Index(ser)
7073
df.index = rhs_index
7174
rhs_index = None # overwrite to clear reference
7275
expected = df.index.copy(deep=True)
73-
ser.iloc[0] = 100
76+
with tm.assert_cow_warning(warn_copy_on_write):
77+
ser.iloc[0] = 100
7478
if using_copy_on_write:
7579
tm.assert_index_equal(df.index, expected)
7680
else:
7781
tm.assert_index_equal(df.index, Index([100, 11]))
7882

7983

80-
def test_index_from_series(using_copy_on_write):
84+
def test_index_from_series(using_copy_on_write, warn_copy_on_write):
8185
ser = Series([1, 2])
8286
idx = Index(ser)
8387
expected = idx.copy(deep=True)
84-
ser.iloc[0] = 100
88+
with tm.assert_cow_warning(warn_copy_on_write):
89+
ser.iloc[0] = 100
8590
if using_copy_on_write:
8691
tm.assert_index_equal(idx, expected)
8792
else:
@@ -96,12 +101,13 @@ def test_index_from_series_copy(using_copy_on_write):
96101
assert np.shares_memory(get_array(ser), arr)
97102

98103

99-
def test_index_from_index(using_copy_on_write):
104+
def test_index_from_index(using_copy_on_write, warn_copy_on_write):
100105
ser = Series([1, 2])
101106
idx = Index(ser)
102107
idx = Index(idx)
103108
expected = idx.copy(deep=True)
104-
ser.iloc[0] = 100
109+
with tm.assert_cow_warning(warn_copy_on_write):
110+
ser.iloc[0] = 100
105111
if using_copy_on_write:
106112
tm.assert_index_equal(idx, expected)
107113
else:

pandas/tests/copy_view/index/test_periodindex.py

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
)
99
import pandas._testing as tm
1010

11+
pytestmark = pytest.mark.filterwarnings(
12+
"ignore:Setting a value on a view:FutureWarning"
13+
)
14+
1115

1216
@pytest.mark.parametrize(
1317
"cons",

pandas/tests/copy_view/index/test_timedeltaindex.py

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
)
99
import pandas._testing as tm
1010

11+
pytestmark = pytest.mark.filterwarnings(
12+
"ignore:Setting a value on a view:FutureWarning"
13+
)
14+
1115

1216
@pytest.mark.parametrize(
1317
"cons",

pandas/tests/copy_view/test_astype.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def test_astype_single_dtype(using_copy_on_write):
4343

4444
@pytest.mark.parametrize("dtype", ["int64", "Int64"])
4545
@pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"])
46-
def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype):
46+
def test_astype_avoids_copy(using_copy_on_write, warn_copy_on_write, dtype, new_dtype):
4747
if new_dtype == "int64[pyarrow]":
4848
pytest.importorskip("pyarrow")
4949
df = DataFrame({"a": [1, 2, 3]}, dtype=dtype)

pandas/tests/copy_view/test_constructors.py

+27-13
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222

2323
@pytest.mark.parametrize("dtype", [None, "int64"])
24-
def test_series_from_series(dtype, using_copy_on_write):
24+
def test_series_from_series(dtype, using_copy_on_write, warn_copy_on_write):
2525
# Case: constructing a Series from another Series object follows CoW rules:
2626
# a new object is returned and thus mutations are not propagated
2727
ser = Series([1, 2, 3], name="name")
@@ -43,7 +43,8 @@ def test_series_from_series(dtype, using_copy_on_write):
4343
assert not np.shares_memory(get_array(ser), get_array(result))
4444
else:
4545
# mutating shallow copy does mutate original
46-
result.iloc[0] = 0
46+
with tm.assert_cow_warning(warn_copy_on_write):
47+
result.iloc[0] = 0
4748
assert ser.iloc[0] == 0
4849
# and still shares memory
4950
assert np.shares_memory(get_array(ser), get_array(result))
@@ -57,11 +58,12 @@ def test_series_from_series(dtype, using_copy_on_write):
5758
assert result.iloc[0] == 1
5859
else:
5960
# mutating original does mutate shallow copy
60-
ser.iloc[0] = 0
61+
with tm.assert_cow_warning(warn_copy_on_write):
62+
ser.iloc[0] = 0
6163
assert result.iloc[0] == 0
6264

6365

64-
def test_series_from_series_with_reindex(using_copy_on_write):
66+
def test_series_from_series_with_reindex(using_copy_on_write, warn_copy_on_write):
6567
# Case: constructing a Series from another Series with specifying an index
6668
# that potentially requires a reindex of the values
6769
ser = Series([1, 2, 3], name="name")
@@ -76,7 +78,8 @@ def test_series_from_series_with_reindex(using_copy_on_write):
7678
]:
7779
result = Series(ser, index=index)
7880
assert np.shares_memory(ser.values, result.values)
79-
result.iloc[0] = 0
81+
with tm.assert_cow_warning(warn_copy_on_write):
82+
result.iloc[0] = 0
8083
if using_copy_on_write:
8184
assert ser.iloc[0] == 1
8285
else:
@@ -153,6 +156,7 @@ def test_series_from_index_different_dtypes(using_copy_on_write):
153156
assert ser._mgr._has_no_reference(0)
154157

155158

159+
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
156160
@pytest.mark.parametrize("fastpath", [False, True])
157161
@pytest.mark.parametrize("dtype", [None, "int64"])
158162
@pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)])
@@ -186,7 +190,9 @@ def test_series_from_block_manager_different_dtype(using_copy_on_write):
186190

187191
@pytest.mark.parametrize("use_mgr", [True, False])
188192
@pytest.mark.parametrize("columns", [None, ["a"]])
189-
def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr):
193+
def test_dataframe_constructor_mgr_or_df(
194+
using_copy_on_write, warn_copy_on_write, columns, use_mgr
195+
):
190196
df = DataFrame({"a": [1, 2, 3]})
191197
df_orig = df.copy()
192198

@@ -201,7 +207,8 @@ def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr):
201207
new_df = DataFrame(data)
202208

203209
assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
204-
new_df.iloc[0] = 100
210+
with tm.assert_cow_warning(warn_copy_on_write and not use_mgr):
211+
new_df.iloc[0] = 100
205212

206213
if using_copy_on_write:
207214
assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
@@ -215,7 +222,7 @@ def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr):
215222
@pytest.mark.parametrize("index", [None, [0, 1, 2]])
216223
@pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]])
217224
def test_dataframe_from_dict_of_series(
218-
request, using_copy_on_write, columns, index, dtype
225+
request, using_copy_on_write, warn_copy_on_write, columns, index, dtype
219226
):
220227
# Case: constructing a DataFrame from Series objects with copy=False
221228
# has to do a lazy following CoW rules
@@ -235,6 +242,7 @@ def test_dataframe_from_dict_of_series(
235242
assert np.shares_memory(get_array(result, "a"), get_array(s1))
236243

237244
# mutating the new dataframe doesn't mutate original
245+
# TODO(CoW-warn) this should also warn
238246
result.iloc[0, 0] = 10
239247
if using_copy_on_write:
240248
assert not np.shares_memory(get_array(result, "a"), get_array(s1))
@@ -248,7 +256,8 @@ def test_dataframe_from_dict_of_series(
248256
result = DataFrame(
249257
{"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False
250258
)
251-
s1.iloc[0] = 10
259+
with tm.assert_cow_warning(warn_copy_on_write):
260+
s1.iloc[0] = 10
252261
if using_copy_on_write:
253262
assert not np.shares_memory(get_array(result, "a"), get_array(s1))
254263
tm.assert_frame_equal(result, expected)
@@ -278,15 +287,19 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype):
278287
@pytest.mark.parametrize(
279288
"data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)]
280289
)
281-
def test_dataframe_from_series_or_index(using_copy_on_write, data, dtype, cons):
290+
def test_dataframe_from_series_or_index(
291+
using_copy_on_write, warn_copy_on_write, data, dtype, cons
292+
):
282293
obj = cons(data, dtype=dtype)
283294
obj_orig = obj.copy()
284295
df = DataFrame(obj, dtype=dtype)
285296
assert np.shares_memory(get_array(obj), get_array(df, 0))
286297
if using_copy_on_write:
287298
assert not df._mgr._has_no_reference(0)
288299

289-
df.iloc[0, 0] = data[-1]
300+
# TODO(CoW-warn) should not warn for an index?
301+
with tm.assert_cow_warning(warn_copy_on_write):
302+
df.iloc[0, 0] = data[-1]
290303
if using_copy_on_write:
291304
tm.assert_equal(obj, obj_orig)
292305

@@ -341,15 +354,16 @@ def test_frame_from_numpy_array(using_copy_on_write, copy, using_array_manager):
341354
assert np.shares_memory(get_array(df, 0), arr)
342355

343356

344-
def test_dataframe_from_records_with_dataframe(using_copy_on_write):
357+
def test_dataframe_from_records_with_dataframe(using_copy_on_write, warn_copy_on_write):
345358
df = DataFrame({"a": [1, 2, 3]})
346359
df_orig = df.copy()
347360
with tm.assert_produces_warning(FutureWarning):
348361
df2 = DataFrame.from_records(df)
349362
if using_copy_on_write:
350363
assert not df._mgr._has_no_reference(0)
351364
assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
352-
df2.iloc[0, 0] = 100
365+
with tm.assert_cow_warning(warn_copy_on_write):
366+
df2.iloc[0, 0] = 100
353367
if using_copy_on_write:
354368
tm.assert_frame_equal(df, df_orig)
355369
else:

0 commit comments

Comments
 (0)