From 9b30bdd98a7a675fc66000c3c01947e807da982e Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 25 Nov 2023 23:32:15 +0100 Subject: [PATCH 1/9] CoW: Warn for cases that go through putmask --- pandas/core/internals/base.py | 15 ++++- pandas/core/internals/blocks.py | 58 ++++++++++++++++++- pandas/core/internals/managers.py | 25 +------- pandas/tests/copy_view/test_clip.py | 8 ++- pandas/tests/copy_view/test_indexing.py | 19 ++---- pandas/tests/copy_view/test_methods.py | 25 +++++--- pandas/tests/copy_view/test_replace.py | 8 ++- pandas/tests/frame/methods/test_replace.py | 8 ++- .../indexing/test_chaining_and_caching.py | 5 +- 9 files changed, 115 insertions(+), 56 deletions(-) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index fe366c7375c6a..ee45707791fbf 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -14,7 +14,10 @@ import numpy as np -from pandas._config import using_copy_on_write +from pandas._config import ( + using_copy_on_write, + warn_copy_on_write, +) from pandas._libs import ( algos as libalgos, @@ -49,6 +52,11 @@ ) +class _AlreadyWarned: + def __init__(self): + self.warned_already = False + + class DataManager(PandasObject): # TODO share more methods/attributes @@ -203,12 +211,17 @@ def putmask(self, mask, new, align: bool = True) -> Self: align_keys = ["mask"] new = extract_array(new, extract_numpy=True) + already_warned = None + if warn_copy_on_write(): + already_warned = _AlreadyWarned() + return self.apply_with_block( "putmask", align_keys=align_keys, mask=mask, new=new, using_cow=using_copy_on_write(), + already_warned=already_warned, ) @final diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 535d18f99f0ef..ec58b46371149 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -18,6 +18,7 @@ from pandas._config import ( get_option, using_copy_on_write, + warn_copy_on_write, ) from pandas._libs import ( @@ -136,6 +137,29 @@ _dtype_obj = np.dtype("object") +COW_WARNING_GENERAL_MSG = """\ +Setting a value on a view: behaviour will change in pandas 3.0. +You are mutating a Series or DataFrame object, and currently this mutation will +also have effect on other Series or DataFrame objects that share data with this +object. In pandas 3.0 (with Copy-on-Write), updating one Series or DataFrame object +will never modify another. +""" + + +COW_WARNING_SETITEM_MSG = """\ +Setting a value on a view: behaviour will change in pandas 3.0. +Currently, the mutation will also have effect on the object that shares data +with this object. For example, when setting a value in a Series that was +extracted from a column of a DataFrame, that DataFrame will also be updated: + + ser = df["col"] + ser[0] = 0 <--- in pandas 2, this also updates `df` + +In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never +modify another, and thus in the example above, `df` will not be changed. +""" + + def maybe_split(meth: F) -> F: """ If we have a multi-column block, split and operate block-wise. Otherwise @@ -1355,7 +1379,9 @@ def setitem(self, indexer, value, using_cow: bool = False) -> Block: values[indexer] = casted return self - def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: + def putmask( + self, mask, new, using_cow: bool = False, already_warned=None + ) -> list[Block]: """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -1388,6 +1414,19 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: return [self.copy(deep=False)] return [self] + if ( + warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_SETITEM_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True + try: casted = np_can_hold_element(values.dtype, new) @@ -2020,7 +2059,9 @@ def where( return [nb] @final - def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: + def putmask( + self, mask, new, using_cow: bool = False, already_warned=None + ) -> list[Block]: """ See Block.putmask.__doc__ """ @@ -2038,6 +2079,19 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: return [self.copy(deep=False)] return [self] + if ( + warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_SETITEM_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True + self = self._maybe_copy(using_cow, inplace=True) values = self.values if values.ndim == 2: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 843441e4865c7..6d6552f38c59c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -72,6 +72,8 @@ interleaved_dtype, ) from pandas.core.internals.blocks import ( + COW_WARNING_GENERAL_MSG, + COW_WARNING_SETITEM_MSG, Block, NumpyBlock, ensure_block_shape, @@ -100,29 +102,6 @@ from pandas.api.extensions import ExtensionArray -COW_WARNING_GENERAL_MSG = """\ -Setting a value on a view: behaviour will change in pandas 3.0. -You are mutating a Series or DataFrame object, and currently this mutation will -also have effect on other Series or DataFrame objects that share data with this -object. In pandas 3.0 (with Copy-on-Write), updating one Series or DataFrame object -will never modify another. -""" - - -COW_WARNING_SETITEM_MSG = """\ -Setting a value on a view: behaviour will change in pandas 3.0. -Currently, the mutation will also have effect on the object that shares data -with this object. For example, when setting a value in a Series that was -extracted from a column of a DataFrame, that DataFrame will also be updated: - - ser = df["col"] - ser[0] = 0 <--- in pandas 2, this also updates `df` - -In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never -modify another, and thus in the example above, `df` will not be changed. -""" - - class BaseBlockManager(DataManager): """ Core internal data structure to implement DataFrame, Series, etc. diff --git a/pandas/tests/copy_view/test_clip.py b/pandas/tests/copy_view/test_clip.py index 6a27a0633a4aa..1363500f0d6c0 100644 --- a/pandas/tests/copy_view/test_clip.py +++ b/pandas/tests/copy_view/test_clip.py @@ -5,12 +5,16 @@ from pandas.tests.copy_view.util import get_array -def test_clip_inplace_reference(using_copy_on_write): +def test_clip_inplace_reference(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1.5, 2, 3]}) df_copy = df.copy() arr_a = get_array(df, "a") view = df[:] - df.clip(lower=2, inplace=True) + if warn_copy_on_write: + with tm.assert_cow_warning(): + df.clip(lower=2, inplace=True) + else: + df.clip(lower=2, inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr_a) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 2e623f885b648..20522218876d6 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -367,10 +367,11 @@ def test_subset_set_with_mask(backend, using_copy_on_write, warn_copy_on_write): mask = subset > 3 - # TODO(CoW-warn) should warn -> mask is a DataFrame, which ends up going through - # DataFrame._where(..., inplace=True) - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: subset[mask] = 0 + elif warn_copy_on_write: + with tm.assert_cow_warning(): + subset[mask] = 0 else: with pd.option_context("chained_assignment", "warn"): with tm.assert_produces_warning(SettingWithCopyWarning): @@ -867,18 +868,8 @@ def test_series_subset_set_with_indexer( and indexer.dtype.kind == "i" ): warn = FutureWarning - is_mask = ( - indexer_si is tm.setitem - and isinstance(indexer, np.ndarray) - and indexer.dtype.kind == "b" - ) if warn_copy_on_write: - # TODO(CoW-warn) should also warn for setting with mask - # -> Series.__setitem__ with boolean mask ends up using Series._set_values - # or Series._where depending on value being set - with tm.assert_cow_warning( - not is_mask, raise_on_extra_warnings=warn is not None - ): + with tm.assert_cow_warning(raise_on_extra_warnings=warn is not None): indexer_si(subset)[indexer] = 0 else: with tm.assert_produces_warning(warn, match=msg): diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 6416dd50daddc..f5ac2530a9ee2 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1407,11 +1407,12 @@ def test_items(using_copy_on_write, warn_copy_on_write): @pytest.mark.parametrize("dtype", ["int64", "Int64"]) -def test_putmask(using_copy_on_write, dtype): +def test_putmask(using_copy_on_write, dtype, warn_copy_on_write): df = DataFrame({"a": [1, 2], "b": 1, "c": 2}, dtype=dtype) view = df[:] df_orig = df.copy() - df[df == df] = 5 + with tm.assert_cow_warning(warn_copy_on_write): + df[df == df] = 5 if using_copy_on_write: assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) @@ -1445,15 +1446,21 @@ def test_putmask_aligns_rhs_no_reference(using_copy_on_write, dtype): @pytest.mark.parametrize( "val, exp, warn", [(5.5, True, FutureWarning), (5, False, None)] ) -def test_putmask_dont_copy_some_blocks(using_copy_on_write, val, exp, warn): +def test_putmask_dont_copy_some_blocks( + using_copy_on_write, val, exp, warn, warn_copy_on_write +): df = DataFrame({"a": [1, 2], "b": 1, "c": 1.5}) view = df[:] df_orig = df.copy() indexer = DataFrame( [[True, False, False], [True, False, False]], columns=list("abc") ) - with tm.assert_produces_warning(warn, match="incompatible dtype"): - df[indexer] = val + if warn_copy_on_write: + with tm.assert_cow_warning(): + df[indexer] = val + else: + with tm.assert_produces_warning(warn, match="incompatible dtype"): + df[indexer] = val if using_copy_on_write: assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) @@ -1785,13 +1792,17 @@ def test_update_frame(using_copy_on_write, warn_copy_on_write): tm.assert_frame_equal(view, expected) -def test_update_series(using_copy_on_write): +def test_update_series(using_copy_on_write, warn_copy_on_write): ser1 = Series([1.0, 2.0, 3.0]) ser2 = Series([100.0], index=[1]) ser1_orig = ser1.copy() view = ser1[:] - ser1.update(ser2) + if warn_copy_on_write: + with tm.assert_cow_warning(): + ser1.update(ser2) + else: + ser1.update(ser2) expected = Series([1.0, 100.0, 3.0]) tm.assert_series_equal(ser1, expected) diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index d11a2893becdc..3d8559a1905fc 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -279,14 +279,18 @@ def test_replace_categorical(using_copy_on_write, val): @pytest.mark.parametrize("method", ["where", "mask"]) -def test_masking_inplace(using_copy_on_write, method): +def test_masking_inplace(using_copy_on_write, method, warn_copy_on_write): df = DataFrame({"a": [1.5, 2, 3]}) df_orig = df.copy() arr_a = get_array(df, "a") view = df[:] method = getattr(df, method) - method(df["a"] > 1.6, -1, inplace=True) + if warn_copy_on_write: + with tm.assert_cow_warning(): + method(df["a"] > 1.6, -1, inplace=True) + else: + method(df["a"] > 1.6, -1, inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr_a) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index f07c53060a06b..cbca15ea1ed2d 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -717,7 +717,7 @@ def test_replace_value_is_none(self, datetime_frame): datetime_frame.iloc[0, 0] = orig_value datetime_frame.iloc[1, 0] = orig2 - def test_replace_for_new_dtypes(self, datetime_frame): + def test_replace_for_new_dtypes(self, datetime_frame, warn_copy_on_write): # dtypes tsframe = datetime_frame.copy().astype(np.float32) tsframe.loc[tsframe.index[:5], "A"] = np.nan @@ -732,7 +732,11 @@ def test_replace_for_new_dtypes(self, datetime_frame): tsframe.loc[tsframe.index[:5], "B"] = -1e8 b = tsframe["B"] - b[b == -1e8] = np.nan + if warn_copy_on_write: + with tm.assert_cow_warning(): + b[b == -1e8] = np.nan + else: + b[b == -1e8] = np.nan tsframe["B"] = b msg = "DataFrame.fillna with 'method' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 21aab6652a300..b406118246e88 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -164,9 +164,8 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: - # TODO(CoW-warn) should warn - # with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" + with tm.assert_cow_warning(warn_copy_on_write): + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) recarray = np.rec.fromarrays([data], names=["response"]) From 8585bd761a05847705dee803a30c438588f363b5 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 00:29:29 +0100 Subject: [PATCH 2/9] Fix --- pandas/tests/indexing/test_chaining_and_caching.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index b406118246e88..3243e5577feb4 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -176,9 +176,8 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: - # TODO(CoW-warn) should warn - # with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" + with tm.assert_cow_warning(warn_copy_on_write): + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) df = DataFrame({"response": data, "response1": data}) @@ -189,9 +188,8 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) df.response[mask] = "none" tm.assert_frame_equal(df, df_original) else: - # TODO(CoW-warn) should warn - # with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" + with tm.assert_cow_warning(warn_copy_on_write): + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data})) # GH 6056 @@ -202,7 +200,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) df["A"].iloc[0] = np.nan expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) else: - # TODO(CoW-warn) custom warning message with tm.assert_cow_warning(warn_copy_on_write): df["A"].iloc[0] = np.nan expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) From cad8aad6e320e25c2d73d346a5243029866e6190 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 27 Nov 2023 22:21:35 +0100 Subject: [PATCH 3/9] Update base.py --- pandas/core/internals/base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index ee45707791fbf..5ac82c25bcde9 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -54,6 +54,11 @@ class _AlreadyWarned: def __init__(self): + # This class is used on the manager level to the block level to + # ensure that we warn only once. The block method can update the + # warned_already option without returning a value to keep the + # interface consistent. This is only a temporary solution for + # CoW warnings. self.warned_already = False From 4aa030af74a8d6c99c9225d55d06722eed8842dc Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 27 Nov 2023 23:52:58 +0100 Subject: [PATCH 4/9] Update base.py --- pandas/core/internals/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 5ac82c25bcde9..60ab251e437c7 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -54,10 +54,10 @@ class _AlreadyWarned: def __init__(self): - # This class is used on the manager level to the block level to + # This class is used on the manager level to the block level to # ensure that we warn only once. The block method can update the - # warned_already option without returning a value to keep the - # interface consistent. This is only a temporary solution for + # warned_already option without returning a value to keep the + # interface consistent. This is only a temporary solution for # CoW warnings. self.warned_already = False From b5992b31ff6a18ee9a1dd35d4a0bbc07b9647dc2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 1 Dec 2023 21:43:53 +0100 Subject: [PATCH 5/9] Update test --- pandas/tests/indexing/test_chaining_and_caching.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 08e9a94dc6ead..8287e7107ba66 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -155,8 +155,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) if using_copy_on_write: tm.assert_frame_equal(df, DataFrame({"response": data})) else: - with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) recarray = np.rec.fromarrays([data], names=["response"]) @@ -167,8 +165,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) if using_copy_on_write: tm.assert_frame_equal(df, DataFrame({"response": data})) else: - with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) df = DataFrame({"response": data, "response1": data}) @@ -179,8 +175,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) if using_copy_on_write: tm.assert_frame_equal(df, df_original) else: - with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data})) # GH 6056 @@ -191,8 +185,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) if using_copy_on_write: expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) else: - with tm.assert_cow_warning(warn_copy_on_write): - df["A"].iloc[0] = np.nan expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) result = df.head() tm.assert_frame_equal(result, expected) From 9dee585247789ad613464d0fdbea624b298a6327 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 1 Dec 2023 22:12:40 +0100 Subject: [PATCH 6/9] Fixup --- .../copy_view/test_chained_assignment_deprecation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 7b08d9b80fc9b..4f53dd8a78858 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -70,7 +70,7 @@ def test_methods_iloc_getitem_item_cache(func, args, using_copy_on_write): @pytest.mark.parametrize( "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] ) -def test_series_setitem(indexer, using_copy_on_write): +def test_series_setitem(indexer, using_copy_on_write, warn_copy_on_write): # ensure we only get a single warning for those typical cases of chained # assignment df = DataFrame({"a": [1, 2, 3], "b": 1}) @@ -79,7 +79,11 @@ def test_series_setitem(indexer, using_copy_on_write): # fail if multiple warnings are raised with pytest.warns() as record: df["a"][indexer] = 0 - assert len(record) == 1 + assert ( + len(record) == 2 + if warn_copy_on_write and isinstance(indexer, np.ndarray) + else 1 + ) if using_copy_on_write: assert record[0].category == ChainedAssignmentError else: From f903dd7f38bf6aa0551e4dc23874f04263b67b6c Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 1 Dec 2023 22:58:41 +0100 Subject: [PATCH 7/9] Fixup --- pandas/core/internals/base.py | 1 + pandas/core/internals/blocks.py | 21 +++++++++++++++++++- pandas/tests/copy_view/test_interp_fillna.py | 14 +++++++------ pandas/tests/frame/methods/test_fillna.py | 3 +-- 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 60ab251e437c7..9618f3d64b1e9 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -190,6 +190,7 @@ def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self: inplace=inplace, downcast=downcast, using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), ) @final diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ec58b46371149..152313843eeb8 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1591,6 +1591,7 @@ def fillna( inplace: bool = False, downcast=None, using_cow: bool = False, + already_warned=None, ) -> list[Block]: """ fillna on the block with the value. If we fail, then convert to @@ -1626,7 +1627,9 @@ def fillna( mask[mask.cumsum(self.ndim - 1) > limit] = False if inplace: - nbs = self.putmask(mask.T, value, using_cow=using_cow) + nbs = self.putmask( + mask.T, value, using_cow=using_cow, already_warned=already_warned + ) else: # without _downcast, we would break # test_fillna_dtype_conversion_equiv_replace @@ -2208,6 +2211,7 @@ def fillna( inplace: bool = False, downcast=None, using_cow: bool = False, + already_warned=None, ) -> list[Block]: if isinstance(self.dtype, IntervalDtype): # Block.fillna handles coercion (test_fillna_interval) @@ -2217,6 +2221,7 @@ def fillna( inplace=inplace, downcast=downcast, using_cow=using_cow, + already_warned=already_warned, ) if using_cow and self._can_hold_na and not self.values._hasna: refs = self.refs @@ -2244,6 +2249,20 @@ def fillna( DeprecationWarning, stacklevel=find_stack_level(), ) + else: + if ( + not copy + and warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_SETITEM_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True nb = self.make_block_same_class(new_values, refs=refs) return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna") diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index cdb06de90a568..ecfb7e67bfae4 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -229,14 +229,15 @@ def test_fillna_inplace(using_copy_on_write, downcast): assert df._mgr._has_no_reference(1) -def test_fillna_inplace_reference(using_copy_on_write): +def test_fillna_inplace_reference(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1.5, np.nan], "b": 1}) df_orig = df.copy() arr_a = get_array(df, "a") arr_b = get_array(df, "b") view = df[:] - df.fillna(5.5, inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.fillna(5.5, inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr_a) assert np.shares_memory(get_array(df, "b"), arr_b) @@ -250,7 +251,7 @@ def test_fillna_inplace_reference(using_copy_on_write): tm.assert_frame_equal(df, expected) -def test_fillna_interval_inplace_reference(using_copy_on_write): +def test_fillna_interval_inplace_reference(using_copy_on_write, warn_copy_on_write): # Set dtype explicitly to avoid implicit cast when setting nan ser = Series( interval_range(start=0, end=5), name="a", dtype="interval[float64, right]" @@ -259,7 +260,8 @@ def test_fillna_interval_inplace_reference(using_copy_on_write): ser_orig = ser.copy() view = ser[:] - ser.fillna(value=Interval(left=0, right=5), inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + ser.fillna(value=Interval(left=0, right=5), inplace=True) if using_copy_on_write: assert not np.shares_memory( @@ -330,8 +332,8 @@ def test_fillna_inplace_ea_noop_shares_memory( df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype) df_orig = df.copy() view = df[:] - # TODO(CoW-warn) - df.fillna(100, inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.fillna(100, inplace=True) if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index f80ef09b50629..960f05a6457a4 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -745,8 +745,7 @@ def test_inplace_dict_update_view( df = DataFrame({"x": [np.nan, 2], "y": [np.nan, 2]}) df_orig = df.copy() result_view = df[:] - # TODO(CoW-warn) better warning message + should warn in all cases - with tm.assert_cow_warning(warn_copy_on_write and not isinstance(val, int)): + with tm.assert_cow_warning(warn_copy_on_write): df.fillna(val, inplace=True) expected = DataFrame({"x": [-1, 2.0], "y": [-1.0, 2]}) tm.assert_frame_equal(df, expected) From b637bfc6ef1b411815ac7ec7bb1244530fd0ab74 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 4 Dec 2023 20:39:55 +0100 Subject: [PATCH 8/9] clean-up merge --- .../tests/copy_view/test_chained_assignment_deprecation.py | 6 +----- pandas/tests/frame/methods/test_replace.py | 2 +- pandas/tests/indexing/test_chaining_and_caching.py | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 16b58bd80a419..b51a5920917d6 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -85,11 +85,7 @@ def test_series_setitem(indexer, using_copy_on_write, warn_copy_on_write): # fail if multiple warnings are raised with pytest.warns() as record: df["a"][indexer] = 0 - assert ( - len(record) == 2 - if warn_copy_on_write and isinstance(indexer, np.ndarray) - else 1 - ) + assert len(record) == 1 if using_copy_on_write: assert record[0].category == ChainedAssignmentError else: diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 088bbb5584605..13e2c1a249ac2 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -717,7 +717,7 @@ def test_replace_value_is_none(self, datetime_frame): datetime_frame.iloc[0, 0] = orig_value datetime_frame.iloc[1, 0] = orig2 - def test_replace_for_new_dtypes(self, datetime_frame, warn_copy_on_write): + def test_replace_for_new_dtypes(self, datetime_frame): # dtypes tsframe = datetime_frame.copy().astype(np.float32) tsframe.loc[tsframe.index[:5], "A"] = np.nan diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 2e08b393d0283..b97df376ac47f 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -139,7 +139,7 @@ def test_altering_series_clears_parent_cache( class TestChaining: - def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write): + def test_setitem_chained_setfault(self, using_copy_on_write): # GH6026 data = ["right", "left", "left", "left", "right", "left", "timeout"] mdata = ["right", "left", "left", "left", "right", "left", "none"] From 9acd8f6e821ce5069cd8fdde23c200cb23067d2d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 4 Dec 2023 22:43:10 +0100 Subject: [PATCH 9/9] Update pandas/core/internals/blocks.py Co-authored-by: Joris Van den Bossche --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 394dedf129cbf..85780bad1e403 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2258,7 +2258,7 @@ def fillna( ): if self.refs.has_reference(): warnings.warn( - COW_WARNING_SETITEM_MSG, + COW_WARNING_GENERAL_MSG, FutureWarning, stacklevel=find_stack_level(), )