Skip to content

Commit 192a4e3

Browse files
CoW: Add warning mode for cases that will change behaviour (#55428)
1 parent a39f783 commit 192a4e3

31 files changed

+328
-93
lines changed

.github/workflows/unit-tests.yml

+4
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ jobs:
6969
env_file: actions-311.yaml
7070
pattern: "not slow and not network and not single_cpu"
7171
pandas_copy_on_write: "1"
72+
- name: "Copy-on-Write (warnings)"
73+
env_file: actions-311.yaml
74+
pattern: "not slow and not network and not single_cpu"
75+
pandas_copy_on_write: "warn"
7276
- name: "Pypy"
7377
env_file: actions-pypy-39.yaml
7478
pattern: "not slow and not network and not single_cpu"

pandas/_config/__init__.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"option_context",
1616
"options",
1717
"using_copy_on_write",
18+
"warn_copy_on_write",
1819
]
1920
from pandas._config import config
2021
from pandas._config import dates # pyright: ignore[reportUnusedImport] # noqa: F401
@@ -32,7 +33,18 @@
3233

3334
def using_copy_on_write() -> bool:
3435
_mode_options = _global_config["mode"]
35-
return _mode_options["copy_on_write"] and _mode_options["data_manager"] == "block"
36+
return (
37+
_mode_options["copy_on_write"] is True
38+
and _mode_options["data_manager"] == "block"
39+
)
40+
41+
42+
def warn_copy_on_write() -> bool:
43+
_mode_options = _global_config["mode"]
44+
return (
45+
_mode_options["copy_on_write"] == "warn"
46+
and _mode_options["data_manager"] == "block"
47+
)
3648

3749

3850
def using_nullable_dtypes() -> bool:

pandas/_testing/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
get_obj,
8989
)
9090
from pandas._testing.contexts import (
91+
assert_cow_warning,
9192
decompress_file,
9293
ensure_clean,
9394
raises_chained_assignment_error,
@@ -1097,6 +1098,7 @@ def shares_memory(left, right) -> bool:
10971098
"assert_series_equal",
10981099
"assert_sp_array_equal",
10991100
"assert_timedelta_array_equal",
1101+
"assert_cow_warning",
11001102
"at",
11011103
"BOOL_DTYPES",
11021104
"box_expected",

pandas/_testing/contexts.py

+26
Original file line numberDiff line numberDiff line change
@@ -214,3 +214,29 @@ def raises_chained_assignment_error(extra_warnings=(), extra_match=()):
214214
(ChainedAssignmentError, *extra_warnings),
215215
match="|".join((match, *extra_match)),
216216
)
217+
218+
219+
def assert_cow_warning(warn=True, match=None, **kwargs):
220+
"""
221+
Assert that a warning is raised in the CoW warning mode.
222+
223+
Parameters
224+
----------
225+
warn : bool, default True
226+
By default, check that a warning is raised. Can be turned off by passing False.
227+
match : str
228+
The warning message to match against, if different from the default.
229+
kwargs
230+
Passed through to assert_produces_warning
231+
"""
232+
from pandas._testing import assert_produces_warning
233+
234+
if not warn:
235+
from contextlib import nullcontext
236+
237+
return nullcontext()
238+
239+
if not match:
240+
match = "Setting a value on a view"
241+
242+
return assert_produces_warning(FutureWarning, match=match, **kwargs)

pandas/conftest.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -1995,7 +1995,18 @@ def using_copy_on_write() -> bool:
19951995
Fixture to check if Copy-on-Write is enabled.
19961996
"""
19971997
return (
1998-
pd.options.mode.copy_on_write
1998+
pd.options.mode.copy_on_write is True
1999+
and _get_option("mode.data_manager", silent=True) == "block"
2000+
)
2001+
2002+
2003+
@pytest.fixture
2004+
def warn_copy_on_write() -> bool:
2005+
"""
2006+
Fixture to check if Copy-on-Write is enabled.
2007+
"""
2008+
return (
2009+
pd.options.mode.copy_on_write == "warn"
19992010
and _get_option("mode.data_manager", silent=True) == "block"
20002011
)
20012012

pandas/core/config_init.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -476,9 +476,11 @@ def use_inf_as_na_cb(key) -> None:
476476
"copy_on_write",
477477
# Get the default from an environment variable, if set, otherwise defaults
478478
# to False. This environment variable can be set for testing.
479-
os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1",
479+
"warn"
480+
if os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "warn"
481+
else os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1",
480482
copy_on_write_doc,
481-
validator=is_bool,
483+
validator=is_one_of_factory([True, False, "warn"]),
482484
)
483485

484486

pandas/core/generic.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from pandas._config import (
3131
config,
3232
using_copy_on_write,
33+
warn_copy_on_write,
3334
)
3435

3536
from pandas._libs import lib
@@ -4397,7 +4398,7 @@ def _check_setitem_copy(self, t: str = "setting", force: bool_t = False):
43974398
df.iloc[0:5]['group'] = 'a'
43984399
43994400
"""
4400-
if using_copy_on_write():
4401+
if using_copy_on_write() or warn_copy_on_write():
44014402
return
44024403

44034404
# return early if the check is not needed
@@ -12391,14 +12392,20 @@ def _inplace_method(self, other, op) -> Self:
1239112392
"""
1239212393
Wrap arithmetic method to operate inplace.
1239312394
"""
12395+
warn = True
12396+
if not PYPY and warn_copy_on_write():
12397+
if sys.getrefcount(self) <= 5:
12398+
# we are probably in an inplace setitem context (e.g. df['a'] += 1)
12399+
warn = False
12400+
1239412401
result = op(self, other)
1239512402

1239612403
if self.ndim == 1 and result._indexed_same(self) and result.dtype == self.dtype:
1239712404
# GH#36498 this inplace op can _actually_ be inplace.
1239812405
# Item "ArrayManager" of "Union[ArrayManager, SingleArrayManager,
1239912406
# BlockManager, SingleBlockManager]" has no attribute "setitem_inplace"
1240012407
self._mgr.setitem_inplace( # type: ignore[union-attr]
12401-
slice(None), result._values
12408+
slice(None), result._values, warn=warn
1240212409
)
1240312410
return self
1240412411

pandas/core/internals/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ def array(self) -> ArrayLike:
307307
# error: "SingleDataManager" has no attribute "arrays"; maybe "array"
308308
return self.arrays[0] # type: ignore[attr-defined]
309309

310-
def setitem_inplace(self, indexer, value) -> None:
310+
def setitem_inplace(self, indexer, value, warn: bool = True) -> None:
311311
"""
312312
Set values with indexer.
313313

pandas/core/internals/managers.py

+31-5
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616

1717
import numpy as np
1818

19-
from pandas._config import using_copy_on_write
19+
from pandas._config import (
20+
using_copy_on_write,
21+
warn_copy_on_write,
22+
)
2023

2124
from pandas._libs import (
2225
internals as libinternals,
@@ -97,6 +100,20 @@
97100
from pandas.api.extensions import ExtensionArray
98101

99102

103+
COW_WARNING_SETITEM_MSG = """\
104+
Setting a value on a view: behaviour will change in pandas 3.0.
105+
Currently, the mutation will also have effect on the object that shares data
106+
with this object. For example, when setting a value in a Series that was
107+
extracted from a column of a DataFrame, that DataFrame will also be updated:
108+
109+
ser = df["col"]
110+
ser[0] = 0 <--- in pandas 2, this also updates `df`
111+
112+
In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never
113+
modify another, and thus in the example above, `df` will not be changed.
114+
"""
115+
116+
100117
class BaseBlockManager(DataManager):
101118
"""
102119
Core internal data structure to implement DataFrame, Series, etc.
@@ -1988,7 +2005,7 @@ def get_numeric_data(self, copy: bool = False) -> Self:
19882005
def _can_hold_na(self) -> bool:
19892006
return self._block._can_hold_na
19902007

1991-
def setitem_inplace(self, indexer, value) -> None:
2008+
def setitem_inplace(self, indexer, value, warn: bool = True) -> None:
19922009
"""
19932010
Set values with indexer.
19942011
@@ -1998,9 +2015,18 @@ def setitem_inplace(self, indexer, value) -> None:
19982015
in place, not returning a new Manager (and Block), and thus never changing
19992016
the dtype.
20002017
"""
2001-
if using_copy_on_write() and not self._has_no_reference(0):
2002-
self.blocks = (self._block.copy(),)
2003-
self._cache.clear()
2018+
using_cow = using_copy_on_write()
2019+
warn_cow = warn_copy_on_write()
2020+
if (using_cow or warn_cow) and not self._has_no_reference(0):
2021+
if using_cow:
2022+
self.blocks = (self._block.copy(),)
2023+
self._cache.clear()
2024+
elif warn and warn_cow:
2025+
warnings.warn(
2026+
COW_WARNING_SETITEM_MSG,
2027+
FutureWarning,
2028+
stacklevel=find_stack_level(),
2029+
)
20042030

20052031
super().setitem_inplace(indexer, value)
20062032

pandas/tests/apply/test_invalid_arg.py

+2
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ def test_transform_axis_1_raises():
147147
Series([1]).transform("sum", axis=1)
148148

149149

150+
# TODO(CoW-warn) should not need to warn
151+
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
150152
def test_apply_modify_traceback():
151153
data = DataFrame(
152154
{

0 commit comments

Comments
 (0)