From bee9e9709fb66a8181df5e340ddf2299e1dd5d07 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 15 Mar 2023 16:29:07 +0100 Subject: [PATCH] Backport PR #51926: CoW: change ChainedAssignmentError exception to a warning --- doc/source/user_guide/copy_on_write.rst | 5 +++-- doc/source/whatsnew/v2.0.0.rst | 4 ++-- pandas/_testing/contexts.py | 4 ++-- pandas/core/frame.py | 4 +++- pandas/core/indexing.py | 5 ++++- pandas/core/series.py | 4 +++- pandas/errors/__init__.py | 4 ++-- pandas/tests/io/test_spss.py | 12 ++++++------ 8 files changed, 25 insertions(+), 17 deletions(-) diff --git a/doc/source/user_guide/copy_on_write.rst b/doc/source/user_guide/copy_on_write.rst index 94dde9a6ffd70..b1d9166782128 100644 --- a/doc/source/user_guide/copy_on_write.rst +++ b/doc/source/user_guide/copy_on_write.rst @@ -114,10 +114,11 @@ two subsequent indexing operations, e.g. The column ``foo`` is updated where the column ``bar`` is greater than 5. This violates the CoW principles though, because it would have to modify the view ``df["foo"]`` and ``df`` in one step. Hence, chained assignment will -consistently never work and raise a ``ChainedAssignmentError`` with CoW enabled: +consistently never work and raise a ``ChainedAssignmentError`` warning +with CoW enabled: .. ipython:: python - :okexcept: + :okwarning: df = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]}) df["foo"][df["bar"] > 5] = 100 diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 6650077355615..ba2f504b4c944 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -191,12 +191,12 @@ Copy-on-Write improvements of those Series objects for the columns of the DataFrame (:issue:`50777`) - Trying to set values using chained assignment (for example, ``df["a"][1:3] = 0``) - will now always raise an exception when Copy-on-Write is enabled. In this mode, + will now always raise an warning when Copy-on-Write is enabled. In this mode, chained assignment can never work because we are always setting into a temporary object that is the result of an indexing operation (getitem), which under Copy-on-Write always behaves as a copy. Thus, assigning through a chain can never update the original Series or DataFrame. Therefore, an informative - error is raised to the user instead of silently doing nothing (:issue:`49467`) + warning is raised to the user to avoid silently doing nothing (:issue:`49467`) - :meth:`DataFrame.replace` will now respect the Copy-on-Write mechanism when ``inplace=True``. diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index bf625a086b9ad..4479cfc06f89c 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -208,9 +208,9 @@ def raises_chained_assignment_error(): return nullcontext() else: - import pytest + from pandas._testing import assert_produces_warning - return pytest.raises( + return assert_produces_warning( ChainedAssignmentError, match=( "A value is trying to be set on a copy of a DataFrame or Series " diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 18f0278705924..786f7c7a11ed0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3904,7 +3904,9 @@ def isetitem(self, loc, value) -> None: def __setitem__(self, key, value): if not PYPY and using_copy_on_write(): if sys.getrefcount(self) <= 3: - raise ChainedAssignmentError(_chained_assignment_msg) + warnings.warn( + _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 + ) key = com.apply_if_callable(key, self) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4c5bc659897d1..afb5ab036b5b5 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -10,6 +10,7 @@ cast, final, ) +import warnings import numpy as np @@ -831,7 +832,9 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: def __setitem__(self, key, value) -> None: if not PYPY and using_copy_on_write(): if sys.getrefcount(self.obj) <= 2: - raise ChainedAssignmentError(_chained_assignment_msg) + warnings.warn( + _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 + ) check_dict_or_set_indexers(key) if isinstance(key, tuple): diff --git a/pandas/core/series.py b/pandas/core/series.py index e55e5461befd5..4ba63e05bae17 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1121,7 +1121,9 @@ def _get_value(self, label, takeable: bool = False): def __setitem__(self, key, value) -> None: if not PYPY and using_copy_on_write(): if sys.getrefcount(self) <= 3: - raise ChainedAssignmentError(_chained_assignment_msg) + warnings.warn( + _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 + ) check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 3ecee50ffbaa7..e9c0047711f42 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -320,9 +320,9 @@ class SettingWithCopyWarning(Warning): """ -class ChainedAssignmentError(ValueError): +class ChainedAssignmentError(Warning): """ - Exception raised when trying to set using chained assignment. + Warning raised when trying to set using chained assignment. When the ``mode.copy_on_write`` option is enabled, chained assignment can never work. In such a situation, we are always setting into a temporary diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py index 9e1f6cf7cd8d4..d1d0795234e72 100644 --- a/pandas/tests/io/test_spss.py +++ b/pandas/tests/io/test_spss.py @@ -3,15 +3,15 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd import pandas._testing as tm pyreadstat = pytest.importorskip("pyreadstat") -@td.skip_copy_on_write_not_yet_implemented +# TODO(CoW) - detection of chained assignment in cython +# https://github.com/pandas-dev/pandas/issues/51315 +@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") @pytest.mark.parametrize("path_klass", [lambda p: p, Path]) def test_spss_labelled_num(path_klass, datapath): # test file from the Haven project (https://haven.tidyverse.org/) @@ -27,7 +27,7 @@ def test_spss_labelled_num(path_klass, datapath): tm.assert_frame_equal(df, expected) -@td.skip_copy_on_write_not_yet_implemented +@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") def test_spss_labelled_num_na(datapath): # test file from the Haven project (https://haven.tidyverse.org/) fname = datapath("io", "data", "spss", "labelled-num-na.sav") @@ -42,7 +42,7 @@ def test_spss_labelled_num_na(datapath): tm.assert_frame_equal(df, expected) -@td.skip_copy_on_write_not_yet_implemented +@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") def test_spss_labelled_str(datapath): # test file from the Haven project (https://haven.tidyverse.org/) fname = datapath("io", "data", "spss", "labelled-str.sav") @@ -57,7 +57,7 @@ def test_spss_labelled_str(datapath): tm.assert_frame_equal(df, expected) -@td.skip_copy_on_write_not_yet_implemented +@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") def test_spss_umlauts(datapath): # test file from the Haven project (https://haven.tidyverse.org/) fname = datapath("io", "data", "spss", "umlauts.sav")