pandas-dev · phofl · Nov 10, 2023 · Oct 30, 2023 · Nov 6, 2023 · Nov 6, 2023
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -42,6 +42,7 @@
 from pandas._config import (
     get_option,
     using_copy_on_write,
+    warn_copy_on_write,
 )
 from pandas._config.config import _get_option
 
@@ -4538,7 +4539,7 @@ def _clear_item_cache(self) -> None:
 
     def _get_item_cache(self, item: Hashable) -> Series:
         """Return the cached item, item represents a label indexer."""
-        if using_copy_on_write():
+        if using_copy_on_write() or warn_copy_on_write():
             loc = self.columns.get_loc(item)
             return self._ixs(loc, axis=1)
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -12392,7 +12392,7 @@ def _inplace_method(self, other, op) -> Self:
         """
         warn = True
         if not PYPY and warn_copy_on_write():
-            if sys.getrefcount(self) <= 5:
+            if sys.getrefcount(self) <= 4:
                 # we are probably in an inplace setitem context (e.g. df['a'] += 1)
                 warn = False
 

@@ -12,7 +12,10 @@
 
 import numpy as np
 
-from pandas._config import using_copy_on_write
+from pandas._config import (
+    using_copy_on_write,
+    warn_copy_on_write,
+)
 
 from pandas._libs import lib
 from pandas._libs.tslibs import OutOfBoundsDatetime
@@ -966,7 +969,7 @@ def is_in_axis(key) -> bool:
     def is_in_obj(gpr) -> bool:
         if not hasattr(gpr, "name"):
             return False
-        if using_copy_on_write():
+        if using_copy_on_write() or warn_copy_on_write():
             # For the CoW case, we check the references to determine if the
             # series is part of the object
             try:

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -100,6 +100,15 @@
     from pandas.api.extensions import ExtensionArray
 
 
+COW_WARNING_GENERAL_MSG = """\
+Setting a value on a view: behaviour will change in pandas 3.0.
+You are mutating a Series or DataFrame object, and currently this mutation will
+also have effect on other Series or DataFrame objects that share data with this
+object. In pandas 3.0 (with Copy-on-Write), updating one Series or DataFrame object
+will never modify another.
+"""
+
+
 COW_WARNING_SETITEM_MSG = """\
 Setting a value on a view: behaviour will change in pandas 3.0.
 Currently, the mutation will also have effect on the object that shares data
@@ -387,7 +396,14 @@ def setitem(self, indexer, value) -> Self:
         if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim:
             raise ValueError(f"Cannot set values with ndim > {self.ndim}")
 
-        if using_copy_on_write() and not self._has_no_reference(0):
+        if warn_copy_on_write() and not self._has_no_reference(0):
+            warnings.warn(
+                COW_WARNING_GENERAL_MSG,
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+
+        elif using_copy_on_write() and not self._has_no_reference(0):
             # this method is only called if there is a single block -> hardcoded 0
             # Split blocks to only copy the columns we want to modify
             if self.ndim == 2 and isinstance(indexer, tuple):
@@ -1951,9 +1967,15 @@ def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Self:
             return type(self)(blk.copy(deep=False), self.index)
         array = blk.values[indexer]
 
+        if isinstance(indexer, np.ndarray) and indexer.dtype.kind == "b":
+            # boolean indexing always gives a copy with numpy
+            refs = None
+        else:
+            # TODO(CoW) in theory only need to track reference if new_array is a view
+            refs = blk.refs
+
         bp = BlockPlacement(slice(0, len(array)))
-        # TODO(CoW) in theory only need to track reference if new_array is a view
-        block = type(blk)(array, placement=bp, ndim=1, refs=blk.refs)
+        block = type(blk)(array, placement=bp, ndim=1, refs=refs)
 
         new_idx = self.index[indexer]
         return type(self)(block, new_idx)

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -1453,7 +1453,7 @@ def test_apply_dtype(col):
     tm.assert_series_equal(result, expected)
 
 
-def test_apply_mutating(using_array_manager, using_copy_on_write):
+def test_apply_mutating(using_array_manager, using_copy_on_write, warn_copy_on_write):
     # GH#35462 case where applied func pins a new BlockManager to a row
     df = DataFrame({"a": range(100), "b": range(100, 200)})
     df_orig = df.copy()
@@ -1467,7 +1467,8 @@ def func(row):
     expected = df.copy()
     expected["a"] += 1
 
-    result = df.apply(func, axis=1)
+    with tm.assert_cow_warning(warn_copy_on_write):
+        result = df.apply(func, axis=1)
 
     tm.assert_frame_equal(result, expected)
     if using_copy_on_write or using_array_manager:

diff --git a/pandas/tests/copy_view/index/test_datetimeindex.py b/pandas/tests/copy_view/index/test_datetimeindex.py
@@ -8,6 +8,10 @@
 )
 import pandas._testing as tm
 
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Setting a value on a view:FutureWarning"
+)
+
 
 @pytest.mark.parametrize(
     "cons",

diff --git a/pandas/tests/copy_view/index/test_index.py b/pandas/tests/copy_view/index/test_index.py
@@ -19,11 +19,12 @@ def index_view(index_data=[1, 2]):
     return idx, view
 
 
-def test_set_index_update_column(using_copy_on_write):
+def test_set_index_update_column(using_copy_on_write, warn_copy_on_write):
     df = DataFrame({"a": [1, 2], "b": 1})
     df = df.set_index("a", drop=False)
     expected = df.index.copy(deep=True)
-    df.iloc[0, 0] = 100
+    with tm.assert_cow_warning(warn_copy_on_write):
+        df.iloc[0, 0] = 100
     if using_copy_on_write:
         tm.assert_index_equal(df.index, expected)
     else:
@@ -39,49 +40,53 @@ def test_set_index_drop_update_column(using_copy_on_write):
     tm.assert_index_equal(df.index, expected)
 
 
-def test_set_index_series(using_copy_on_write):
+def test_set_index_series(using_copy_on_write, warn_copy_on_write):
     df = DataFrame({"a": [1, 2], "b": 1.5})
     ser = Series([10, 11])
     df = df.set_index(ser)
     expected = df.index.copy(deep=True)
-    ser.iloc[0] = 100
+    with tm.assert_cow_warning(warn_copy_on_write):
+        ser.iloc[0] = 100
     if using_copy_on_write:
         tm.assert_index_equal(df.index, expected)
     else:
         tm.assert_index_equal(df.index, Index([100, 11]))
 
 
-def test_assign_index_as_series(using_copy_on_write):
+def test_assign_index_as_series(using_copy_on_write, warn_copy_on_write):
     df = DataFrame({"a": [1, 2], "b": 1.5})
     ser = Series([10, 11])
     df.index = ser
     expected = df.index.copy(deep=True)
-    ser.iloc[0] = 100
+    with tm.assert_cow_warning(warn_copy_on_write):
+        ser.iloc[0] = 100
     if using_copy_on_write:
         tm.assert_index_equal(df.index, expected)
     else:
         tm.assert_index_equal(df.index, Index([100, 11]))
 
 
-def test_assign_index_as_index(using_copy_on_write):
+def test_assign_index_as_index(using_copy_on_write, warn_copy_on_write):
     df = DataFrame({"a": [1, 2], "b": 1.5})
     ser = Series([10, 11])
     rhs_index = Index(ser)
     df.index = rhs_index
     rhs_index = None  # overwrite to clear reference
     expected = df.index.copy(deep=True)
-    ser.iloc[0] = 100
+    with tm.assert_cow_warning(warn_copy_on_write):
+        ser.iloc[0] = 100
     if using_copy_on_write:
         tm.assert_index_equal(df.index, expected)
     else:
         tm.assert_index_equal(df.index, Index([100, 11]))
 
 
-def test_index_from_series(using_copy_on_write):
+def test_index_from_series(using_copy_on_write, warn_copy_on_write):
     ser = Series([1, 2])
     idx = Index(ser)
     expected = idx.copy(deep=True)
-    ser.iloc[0] = 100
+    with tm.assert_cow_warning(warn_copy_on_write):
+        ser.iloc[0] = 100
     if using_copy_on_write:
         tm.assert_index_equal(idx, expected)
     else:
@@ -96,12 +101,13 @@ def test_index_from_series_copy(using_copy_on_write):
     assert np.shares_memory(get_array(ser), arr)
 
 
-def test_index_from_index(using_copy_on_write):
+def test_index_from_index(using_copy_on_write, warn_copy_on_write):
     ser = Series([1, 2])
     idx = Index(ser)
     idx = Index(idx)
     expected = idx.copy(deep=True)
-    ser.iloc[0] = 100
+    with tm.assert_cow_warning(warn_copy_on_write):
+        ser.iloc[0] = 100
     if using_copy_on_write:
         tm.assert_index_equal(idx, expected)
     else:

diff --git a/pandas/tests/copy_view/index/test_periodindex.py b/pandas/tests/copy_view/index/test_periodindex.py
@@ -8,6 +8,10 @@
 )
 import pandas._testing as tm
 
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Setting a value on a view:FutureWarning"
+)
+
 
 @pytest.mark.parametrize(
     "cons",

diff --git a/pandas/tests/copy_view/index/test_timedeltaindex.py b/pandas/tests/copy_view/index/test_timedeltaindex.py
@@ -8,6 +8,10 @@
 )
 import pandas._testing as tm
 
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Setting a value on a view:FutureWarning"
+)
+
 
 @pytest.mark.parametrize(
     "cons",

diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
@@ -21,7 +21,7 @@
 
 
 @pytest.mark.parametrize("dtype", [None, "int64"])
-def test_series_from_series(dtype, using_copy_on_write):
+def test_series_from_series(dtype, using_copy_on_write, warn_copy_on_write):
     # Case: constructing a Series from another Series object follows CoW rules:
     # a new object is returned and thus mutations are not propagated
     ser = Series([1, 2, 3], name="name")
@@ -43,7 +43,8 @@ def test_series_from_series(dtype, using_copy_on_write):
         assert not np.shares_memory(get_array(ser), get_array(result))
     else:
         # mutating shallow copy does mutate original
-        result.iloc[0] = 0
+        with tm.assert_cow_warning(warn_copy_on_write):
+            result.iloc[0] = 0
         assert ser.iloc[0] == 0
         # and still shares memory
         assert np.shares_memory(get_array(ser), get_array(result))
@@ -57,11 +58,12 @@ def test_series_from_series(dtype, using_copy_on_write):
         assert result.iloc[0] == 1
     else:
         # mutating original does mutate shallow copy
-        ser.iloc[0] = 0
+        with tm.assert_cow_warning(warn_copy_on_write):
+            ser.iloc[0] = 0
         assert result.iloc[0] == 0
 
 
-def test_series_from_series_with_reindex(using_copy_on_write):
+def test_series_from_series_with_reindex(using_copy_on_write, warn_copy_on_write):
     # Case: constructing a Series from another Series with specifying an index
     # that potentially requires a reindex of the values
     ser = Series([1, 2, 3], name="name")
@@ -76,7 +78,8 @@ def test_series_from_series_with_reindex(using_copy_on_write):
     ]:
         result = Series(ser, index=index)
         assert np.shares_memory(ser.values, result.values)
-        result.iloc[0] = 0
+        with tm.assert_cow_warning(warn_copy_on_write):
+            result.iloc[0] = 0
         if using_copy_on_write:
             assert ser.iloc[0] == 1
         else:
@@ -153,6 +156,7 @@ def test_series_from_index_different_dtypes(using_copy_on_write):
         assert ser._mgr._has_no_reference(0)
 
 
+@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
 @pytest.mark.parametrize("fastpath", [False, True])
 @pytest.mark.parametrize("dtype", [None, "int64"])
 @pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)])
@@ -186,7 +190,9 @@ def test_series_from_block_manager_different_dtype(using_copy_on_write):
 
 @pytest.mark.parametrize("use_mgr", [True, False])
 @pytest.mark.parametrize("columns", [None, ["a"]])
-def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr):
+def test_dataframe_constructor_mgr_or_df(
+    using_copy_on_write, warn_copy_on_write, columns, use_mgr
+):
     df = DataFrame({"a": [1, 2, 3]})
     df_orig = df.copy()
 
@@ -201,7 +207,8 @@ def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr):
         new_df = DataFrame(data)
 
     assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
-    new_df.iloc[0] = 100
+    with tm.assert_cow_warning(warn_copy_on_write and not use_mgr):
+        new_df.iloc[0] = 100
 
     if using_copy_on_write:
         assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
@@ -215,7 +222,7 @@ def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr):
 @pytest.mark.parametrize("index", [None, [0, 1, 2]])
 @pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]])
 def test_dataframe_from_dict_of_series(
-    request, using_copy_on_write, columns, index, dtype
+    request, using_copy_on_write, warn_copy_on_write, columns, index, dtype
 ):
     # Case: constructing a DataFrame from Series objects with copy=False
     # has to do a lazy following CoW rules
@@ -235,6 +242,7 @@ def test_dataframe_from_dict_of_series(
     assert np.shares_memory(get_array(result, "a"), get_array(s1))
 
     # mutating the new dataframe doesn't mutate original
+    # TODO(CoW-warn) this should also warn
     result.iloc[0, 0] = 10
     if using_copy_on_write:
         assert not np.shares_memory(get_array(result, "a"), get_array(s1))
@@ -248,7 +256,8 @@ def test_dataframe_from_dict_of_series(
     result = DataFrame(
         {"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False
     )
-    s1.iloc[0] = 10
+    with tm.assert_cow_warning(warn_copy_on_write):
+        s1.iloc[0] = 10
     if using_copy_on_write:
         assert not np.shares_memory(get_array(result, "a"), get_array(s1))
         tm.assert_frame_equal(result, expected)
@@ -278,15 +287,19 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype):
 @pytest.mark.parametrize(
     "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)]
 )
-def test_dataframe_from_series_or_index(using_copy_on_write, data, dtype, cons):
+def test_dataframe_from_series_or_index(
+    using_copy_on_write, warn_copy_on_write, data, dtype, cons
+):
     obj = cons(data, dtype=dtype)
     obj_orig = obj.copy()
     df = DataFrame(obj, dtype=dtype)
     assert np.shares_memory(get_array(obj), get_array(df, 0))
     if using_copy_on_write:
         assert not df._mgr._has_no_reference(0)
 
-    df.iloc[0, 0] = data[-1]
+    # TODO(CoW-warn) should not warn for an index?
+    with tm.assert_cow_warning(warn_copy_on_write):
+        df.iloc[0, 0] = data[-1]
     if using_copy_on_write:
         tm.assert_equal(obj, obj_orig)
 
@@ -341,15 +354,16 @@ def test_frame_from_numpy_array(using_copy_on_write, copy, using_array_manager):
         assert np.shares_memory(get_array(df, 0), arr)
 
 
-def test_dataframe_from_records_with_dataframe(using_copy_on_write):
+def test_dataframe_from_records_with_dataframe(using_copy_on_write, warn_copy_on_write):
     df = DataFrame({"a": [1, 2, 3]})
     df_orig = df.copy()
     with tm.assert_produces_warning(FutureWarning):
         df2 = DataFrame.from_records(df)
     if using_copy_on_write:
         assert not df._mgr._has_no_reference(0)
     assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
-    df2.iloc[0, 0] = 100
+    with tm.assert_cow_warning(warn_copy_on_write):
+        df2.iloc[0, 0] = 100
     if using_copy_on_write:
         tm.assert_frame_equal(df, df_orig)
     else: