From ea5889bd7eeec46a3e51742c95c0cb7a79016a80 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 13 Mar 2023 11:59:11 +0100 Subject: [PATCH 1/3] CoW: change ChainedAssignmentError exception to a warning --- pandas/_testing/contexts.py | 4 ++-- pandas/core/frame.py | 4 +++- pandas/core/indexing.py | 5 ++++- pandas/core/series.py | 4 +++- pandas/errors/__init__.py | 4 ++-- pandas/tests/io/test_spss.py | 12 ++++++------ 6 files changed, 20 insertions(+), 13 deletions(-) diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index eb1e39ebd9c4d..db4ddd45db955 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -211,9 +211,9 @@ def raises_chained_assignment_error(): return nullcontext() else: - import pytest + from pandas._testing import assert_produces_warning - return pytest.raises( + return assert_produces_warning( ChainedAssignmentError, match=( "A value is trying to be set on a copy of a DataFrame or Series " diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 70019030da182..72db30bd03e0b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3946,7 +3946,9 @@ def isetitem(self, loc, value) -> None: def __setitem__(self, key, value): if not PYPY and using_copy_on_write(): if sys.getrefcount(self) <= 3: - raise ChainedAssignmentError(_chained_assignment_msg) + warnings.warn( + _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 + ) key = com.apply_if_callable(key, self) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4a24ed221c89f..cef9184e9d997 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -10,6 +10,7 @@ cast, final, ) +import warnings import numpy as np @@ -832,7 +833,9 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: def __setitem__(self, key, value) -> None: if not PYPY and using_copy_on_write(): if sys.getrefcount(self.obj) <= 2: - raise ChainedAssignmentError(_chained_assignment_msg) + warnings.warn( + _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 + ) check_dict_or_set_indexers(key) if isinstance(key, tuple): diff --git a/pandas/core/series.py b/pandas/core/series.py index 1dac028d7b54a..e1f4e6c351b8f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1076,7 +1076,9 @@ def _get_value(self, label, takeable: bool = False): def __setitem__(self, key, value) -> None: if not PYPY and using_copy_on_write(): if sys.getrefcount(self) <= 3: - raise ChainedAssignmentError(_chained_assignment_msg) + warnings.warn( + _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 + ) check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index c4b804de6a110..c26b478338a55 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -320,9 +320,9 @@ class SettingWithCopyWarning(Warning): """ -class ChainedAssignmentError(ValueError): +class ChainedAssignmentError(Warning): """ - Exception raised when trying to set using chained assignment. + Warning raised when trying to set using chained assignment. When the ``mode.copy_on_write`` option is enabled, chained assignment can never work. In such a situation, we are always setting into a temporary diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py index 7b19d2dafb34e..c02f2bf3c29b9 100644 --- a/pandas/tests/io/test_spss.py +++ b/pandas/tests/io/test_spss.py @@ -3,15 +3,15 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd import pandas._testing as tm pyreadstat = pytest.importorskip("pyreadstat") -@td.skip_copy_on_write_not_yet_implemented +# TODO(CoW) - detection of chained assignment in cython +# https://github.com/pandas-dev/pandas/issues/51315 +@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") @pytest.mark.parametrize("path_klass", [lambda p: p, Path]) def test_spss_labelled_num(path_klass, datapath): # test file from the Haven project (https://haven.tidyverse.org/) @@ -27,7 +27,7 @@ def test_spss_labelled_num(path_klass, datapath): tm.assert_frame_equal(df, expected) -@td.skip_copy_on_write_not_yet_implemented +@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") def test_spss_labelled_num_na(datapath): # test file from the Haven project (https://haven.tidyverse.org/) fname = datapath("io", "data", "spss", "labelled-num-na.sav") @@ -42,7 +42,7 @@ def test_spss_labelled_num_na(datapath): tm.assert_frame_equal(df, expected) -@td.skip_copy_on_write_not_yet_implemented +@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") def test_spss_labelled_str(datapath): # test file from the Haven project (https://haven.tidyverse.org/) fname = datapath("io", "data", "spss", "labelled-str.sav") @@ -57,7 +57,7 @@ def test_spss_labelled_str(datapath): tm.assert_frame_equal(df, expected) -@td.skip_copy_on_write_not_yet_implemented +@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") def test_spss_umlauts(datapath): # test file from the Haven project (https://haven.tidyverse.org/) fname = datapath("io", "data", "spss", "umlauts.sav") From 84d48c465fab1b7f2d40b7b58cd53d1623006a27 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 13 Mar 2023 15:04:43 +0100 Subject: [PATCH 2/3] update docs --- doc/source/user_guide/copy_on_write.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/copy_on_write.rst b/doc/source/user_guide/copy_on_write.rst index 94dde9a6ffd70..b1d9166782128 100644 --- a/doc/source/user_guide/copy_on_write.rst +++ b/doc/source/user_guide/copy_on_write.rst @@ -114,10 +114,11 @@ two subsequent indexing operations, e.g. The column ``foo`` is updated where the column ``bar`` is greater than 5. This violates the CoW principles though, because it would have to modify the view ``df["foo"]`` and ``df`` in one step. Hence, chained assignment will -consistently never work and raise a ``ChainedAssignmentError`` with CoW enabled: +consistently never work and raise a ``ChainedAssignmentError`` warning +with CoW enabled: .. ipython:: python - :okexcept: + :okwarning: df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]}) df["foo"][df["bar"] > 5] = 100 From 892e961c1d89cdd2df4f12a7405f118efcf2d5f0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 13 Mar 2023 23:21:13 +0100 Subject: [PATCH 3/3] update whatsnew --- doc/source/whatsnew/v2.0.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c987588097953..7a5f914215677 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -206,12 +206,12 @@ Copy-on-Write improvements of those Series objects for the columns of the DataFrame (:issue:`50777`) - Trying to set values using chained assignment (for example, ``df["a"][1:3] = 0``) - will now always raise an exception when Copy-on-Write is enabled. In this mode, + will now always raise an warning when Copy-on-Write is enabled. In this mode, chained assignment can never work because we are always setting into a temporary object that is the result of an indexing operation (getitem), which under Copy-on-Write always behaves as a copy. Thus, assigning through a chain can never update the original Series or DataFrame. Therefore, an informative - error is raised to the user instead of silently doing nothing (:issue:`49467`) + warning is raised to the user to avoid silently doing nothing (:issue:`49467`) - :meth:`DataFrame.replace` will now respect the Copy-on-Write mechanism when ``inplace=True``.