From 16e31e96ae7fa6f8d6e183e7a124ad75e6d1405c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 1 Nov 2022 15:21:18 +0100 Subject: [PATCH 01/11] API: detect and raise error for chained assignment under Copy-on-Write --- pandas/core/frame.py | 13 ++- pandas/core/indexing.py | 12 ++ pandas/core/series.py | 14 ++- pandas/errors/__init__.py | 22 ++++ pandas/tests/frame/indexing/test_setitem.py | 12 +- pandas/tests/frame/indexing/test_xs.py | 8 +- .../tests/frame/methods/test_sort_values.py | 4 +- pandas/tests/frame/test_block_internals.py | 11 +- .../multiindex/test_chaining_and_caching.py | 13 ++- .../tests/indexing/multiindex/test_partial.py | 15 ++- .../tests/indexing/multiindex/test_setitem.py | 12 +- .../indexing/test_chaining_and_caching.py | 104 +++++++++++++----- .../series/accessors/test_dt_accessor.py | 9 +- .../tests/series/methods/test_sort_values.py | 1 + 14 files changed, 194 insertions(+), 56 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fbc78da26c4b6..462d221224a2f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -16,6 +16,7 @@ import functools from io import StringIO import itertools +import sys from textwrap import dedent from typing import ( TYPE_CHECKING, @@ -95,7 +96,10 @@ function as nv, np_percentile_argname, ) -from pandas.errors import InvalidIndexError +from pandas.errors import ( + ChainedAssignmentError, + InvalidIndexError, +) from pandas.util._decorators import ( Appender, Substitution, @@ -3838,6 +3842,13 @@ def isetitem(self, loc, value) -> None: self._iset_item_mgr(loc, arraylike, inplace=False) def __setitem__(self, key, value): + if ( + get_option("mode.copy_on_write") + and get_option("mode.data_manager") == "block" + ): + if sys.getrefcount(self) <= 3: + raise ChainedAssignmentError("Chained assignment doesn't work!!") + key = com.apply_if_callable(key, self) # see if we can slice the rows diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 636e376197ef1..56e5f1c5efc7c 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,6 +1,7 @@ from __future__ import annotations from contextlib import suppress +import sys from typing import ( TYPE_CHECKING, Hashable, @@ -12,6 +13,8 @@ import numpy as np +from pandas._config import get_option + from pandas._libs.indexing import NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim from pandas._typing import ( @@ -20,6 +23,7 @@ ) from pandas.errors import ( AbstractMethodError, + ChainedAssignmentError, IndexingError, InvalidIndexError, LossySetitemError, @@ -830,6 +834,14 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: @final def __setitem__(self, key, value) -> None: + if ( + get_option("mode.copy_on_write") + and get_option("mode.data_manager") == "block" + ): + print("_LocationIndexer.__setitem__ refcount: ", sys.getrefcount(self.obj)) + if sys.getrefcount(self.obj) <= 2: + raise ChainedAssignmentError("Chained assignment doesn't work!!") + check_dict_or_set_indexers(key) if isinstance(key, tuple): key = tuple(list(x) if is_iterator(x) else x for x in key) diff --git a/pandas/core/series.py b/pandas/core/series.py index 950499b1ae40d..9437343ac9051 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3,6 +3,7 @@ """ from __future__ import annotations +import sys from textwrap import dedent from typing import ( IO, @@ -68,7 +69,10 @@ npt, ) from pandas.compat.numpy import function as nv -from pandas.errors import InvalidIndexError +from pandas.errors import ( + ChainedAssignmentError, + InvalidIndexError, +) from pandas.util._decorators import ( Appender, Substitution, @@ -1070,6 +1074,14 @@ def _get_value(self, label, takeable: bool = False): return self.iloc[loc] def __setitem__(self, key, value) -> None: + if ( + get_option("mode.copy_on_write") + and get_option("mode.data_manager") == "block" + ): + print("Series.__getitem__ refcount: ", sys.getrefcount(self)) + if sys.getrefcount(self) <= 3: + raise ChainedAssignmentError("Chained assignment doesn't work!!") + check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) cacher_needs_updating = self._check_is_chained_assignment_possible() diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 89ac1c10254cb..fda7782a983d6 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -298,6 +298,28 @@ class SettingWithCopyError(ValueError): """ +class ChainedAssignmentError(ValueError): + """ + Exception raised when trying to set on a copied slice from a ``DataFrame``. + + The ``mode.chained_assignment`` needs to be set to set to 'raise.' This can + happen unintentionally when chained indexing. + + For more information on eveluation order, + see :ref:`the user guide`. + + For more information on view vs. copy, + see :ref:`the user guide`. + + Examples + -------- + >>> pd.options.mode.chained_assignment = 'raise' + >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A']) + >>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP + ... # SettingWithCopyError: A value is trying to be set on a copy of a... + """ + + class SettingWithCopyWarning(Warning): """ Warning raised when trying to set on a copied slice from a ``DataFrame``. diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index d8d626b3af84a..4b34eac7a5add 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -3,6 +3,7 @@ import numpy as np import pytest +from pandas.errors import ChainedAssignmentError import pandas.util._test_decorators as td from pandas.core.dtypes.base import _registry as ea_registry @@ -1126,7 +1127,7 @@ def test_setitem_ea_boolean_mask(self): class TestDataFrameSetitemCopyViewSemantics: - def test_setitem_always_copy(self, float_frame): + def test_setitem_always_copy(self, float_frame, using_copy_on_write): assert "E" not in float_frame.columns s = float_frame["A"].copy() float_frame["E"] = s @@ -1245,12 +1246,15 @@ def test_setitem_column_update_inplace(self, using_copy_on_write): df = DataFrame({col: np.zeros(len(labels)) for col in labels}, index=labels) values = df._mgr.blocks[0].values - for label in df.columns: - df[label][label] = 1 - if not using_copy_on_write: + for label in df.columns: + df[label][label] = 1 + # diagonal values all updated assert np.all(values[np.arange(10), np.arange(10)] == 1) else: + with pytest.raises(ChainedAssignmentError): + for label in df.columns: + df[label][label] = 1 # original dataframe not updated assert np.all(values[np.arange(10), np.arange(10)] == 0) diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index b7549771c7cc5..cb0308cac6f29 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -3,7 +3,10 @@ import numpy as np import pytest -from pandas.errors import SettingWithCopyError +from pandas.errors import ( + ChainedAssignmentError, + SettingWithCopyError, +) from pandas import ( DataFrame, @@ -124,7 +127,8 @@ def test_xs_view(self, using_array_manager, using_copy_on_write): df_orig = dm.copy() if using_copy_on_write: - dm.xs(2)[:] = 20 + with pytest.raises(ChainedAssignmentError): + dm.xs(2)[:] = 20 tm.assert_frame_equal(dm, df_orig) elif using_array_manager: # INFO(ArrayManager) with ArrayManager getting a row as a view is diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index c16c500a11d0e..b1a78696a4cd4 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -345,8 +345,8 @@ def test_sort_values_frame_column_inplace_sort_exception( with pytest.raises(ValueError, match="This Series is a view"): s.sort_values(inplace=True) - cp = s.copy() - cp.sort_values() # it works! + cp = s.copy() + cp.sort_values() # it works! def test_sort_values_nat_values_in_int_column(self): diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index f2de6b607d737..1cb427fb186b5 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -7,7 +7,10 @@ import numpy as np import pytest -from pandas.errors import PerformanceWarning +from pandas.errors import ( + ChainedAssignmentError, + PerformanceWarning, +) import pandas.util._test_decorators as td import pandas as pd @@ -340,7 +343,11 @@ def test_stale_cached_series_bug_473(self, using_copy_on_write): ) repr(Y) Y["e"] = Y["e"].astype("object") - Y["g"]["c"] = np.NaN + if using_copy_on_write: + with pytest.raises(ChainedAssignmentError): + Y["g"]["c"] = np.NaN + else: + Y["g"]["c"] = np.NaN repr(Y) result = Y.sum() # noqa exp = Y["g"].sum() # noqa diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 2efb288a73f8d..70e5f4ce5d9c3 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas.errors import SettingWithCopyError +from pandas.errors import ( + ChainedAssignmentError, + SettingWithCopyError, +) import pandas.util._test_decorators as td from pandas import ( @@ -50,11 +53,13 @@ def test_cache_updating(using_copy_on_write): # setting via chained assignment # but actually works, since everything is a view - df.loc[0]["z"].iloc[0] = 1.0 - result = df.loc[(0, 0), "z"] if using_copy_on_write: - assert result == df_original.loc[0, "z"] + with pytest.raises(ChainedAssignmentError): + df.loc[0]["z"].iloc[0] = 1.0 + assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"] else: + df.loc[0]["z"].iloc[0] = 1.0 + result = df.loc[(0, 0), "z"] assert result == 1 # correct setting diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index c81473cb945bc..3950f9d13a83b 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -1,6 +1,7 @@ import numpy as np import pytest +from pandas.errors import ChainedAssignmentError import pandas.util._test_decorators as td from pandas import ( @@ -132,9 +133,13 @@ def test_partial_set( exp.iloc[65:85] = 0 tm.assert_frame_equal(df, exp) - df["A"].loc[2000, 4] = 1 - if not using_copy_on_write: - exp["A"].loc[2000, 4].values[:] = 1 + if using_copy_on_write: + with pytest.raises(ChainedAssignmentError): + df["A"].loc[2000, 4] = 1 + df.loc[(2000, 4), "A"] = 1 + else: + df["A"].loc[2000, 4] = 1 + exp.iloc[65:85, 0] = 1 tm.assert_frame_equal(df, exp) df.loc[2000] = 5 @@ -142,10 +147,12 @@ def test_partial_set( tm.assert_frame_equal(df, exp) # this works...for now - df["A"].iloc[14] = 5 if using_copy_on_write: + with pytest.raises(ChainedAssignmentError): + df["A"].iloc[14] = 5 df["A"].iloc[14] == exp["A"].iloc[14] else: + df["A"].iloc[14] = 5 assert df["A"].iloc[14] == 5 @pytest.mark.parametrize("dtype", [int, float]) diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index ac10a6d82dc89..41466a68d2fb1 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas.errors import SettingWithCopyError +from pandas.errors import ( + ChainedAssignmentError, + SettingWithCopyError, +) import pandas.util._test_decorators as td import pandas as pd @@ -501,8 +504,8 @@ def test_frame_setitem_copy_raises( # will raise/warn as its chained assignment df = multiindex_dataframe_random_data.T if using_copy_on_write: - # TODO(CoW) it would be nice if this could still warn/raise - df["foo"]["one"] = 2 + with pytest.raises(ChainedAssignmentError): + df["foo"]["one"] = 2 else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" with pytest.raises(SettingWithCopyError, match=msg): @@ -516,7 +519,8 @@ def test_frame_setitem_copy_no_write( expected = frame df = frame.copy() if using_copy_on_write: - df["foo"]["one"] = 2 + with pytest.raises(ChainedAssignmentError): + df["foo"]["one"] = 2 else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" with pytest.raises(SettingWithCopyError, match=msg): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 2656cc77c2a9d..61425690265a4 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -4,6 +4,7 @@ import pytest from pandas.errors import ( + ChainedAssignmentError, SettingWithCopyError, SettingWithCopyWarning, ) @@ -50,7 +51,11 @@ def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write): repr(df) # Assignment to wrong series - df["bb"].iloc[0] = 0.17 + if using_copy_on_write: + with pytest.raises(ChainedAssignmentError): + df["bb"].iloc[0] = 0.17 + else: + df["bb"].iloc[0] = 0.17 df._clear_item_cache() if not using_copy_on_write: tm.assert_almost_equal(df["bb"][0], 0.17) @@ -99,7 +104,11 @@ def test_setitem_cache_updating_slices(self, using_copy_on_write): out_original = out.copy() for ix, row in df.iterrows(): v = out[row["C"]][six:eix] + row["D"] - out[row["C"]][six:eix] = v + if using_copy_on_write: + with pytest.raises(ChainedAssignmentError): + out[row["C"]][six:eix] = v + else: + out[row["C"]][six:eix] = v if not using_copy_on_write: tm.assert_frame_equal(out, expected) @@ -143,43 +152,55 @@ def test_setitem_chained_setfault(self, using_copy_on_write): df = DataFrame({"response": np.array(data)}) mask = df.response == "timeout" - df.response[mask] = "none" if using_copy_on_write: + with pytest.raises(ChainedAssignmentError): + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) recarray = np.rec.fromarrays([data], names=["response"]) df = DataFrame(recarray) mask = df.response == "timeout" - df.response[mask] = "none" if using_copy_on_write: + with pytest.raises(ChainedAssignmentError): + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) df = DataFrame({"response": data, "response1": data}) df_original = df.copy() mask = df.response == "timeout" - df.response[mask] = "none" if using_copy_on_write: + with pytest.raises(ChainedAssignmentError): + df.response[mask] = "none" tm.assert_frame_equal(df, df_original) else: + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data})) # GH 6056 expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) - df["A"].iloc[0] = np.nan - result = df.head() if using_copy_on_write: + with pytest.raises(ChainedAssignmentError): + df["A"].iloc[0] = np.nan expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) else: + df["A"].iloc[0] = np.nan expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) + result = df.head() tm.assert_frame_equal(result, expected) df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) - df.A.iloc[0] = np.nan + if using_copy_on_write: + with pytest.raises(ChainedAssignmentError): + df.A.iloc[0] = np.nan + else: + df.A.iloc[0] = np.nan result = df.head() tm.assert_frame_equal(result, expected) @@ -195,11 +216,15 @@ def test_detect_chained_assignment(self, using_copy_on_write): df_original = df.copy() assert df._is_copy is None - df["A"][0] = -5 - df["A"][1] = -6 if using_copy_on_write: + with pytest.raises(ChainedAssignmentError): + df["A"][0] = -5 + with pytest.raises(ChainedAssignmentError): + df["A"][1] = -6 tm.assert_frame_equal(df, df_original) else: + df["A"][0] = -5 + df["A"][1] = -6 tm.assert_frame_equal(df, expected) @pytest.mark.arm_slow @@ -218,8 +243,10 @@ def test_detect_chained_assignment_raises( assert df._is_copy is None if using_copy_on_write: - df["A"][0] = -5 - df["A"][1] = -6 + with pytest.raises(ChainedAssignmentError): + df["A"][0] = -5 + with pytest.raises(ChainedAssignmentError): + df["A"][1] = -6 tm.assert_frame_equal(df, df_original) elif not using_array_manager: with pytest.raises(SettingWithCopyError, match=msg): @@ -250,8 +277,8 @@ def test_detect_chained_assignment_fails(self, using_copy_on_write): ) if using_copy_on_write: - # TODO(CoW) can we still warn here? - df.loc[0]["A"] = -5 + with pytest.raises(ChainedAssignmentError): + df.loc[0]["A"] = -5 else: with pytest.raises(SettingWithCopyError, match=msg): df.loc[0]["A"] = -5 @@ -269,9 +296,9 @@ def test_detect_chained_assignment_doc_example(self, using_copy_on_write): assert df._is_copy is None if using_copy_on_write: - # TODO(CoW) can we still warn here? indexer = df.a.str.startswith("o") - df[indexer]["c"] = 42 + with pytest.raises(ChainedAssignmentError): + df[indexer]["c"] = 42 else: with pytest.raises(SettingWithCopyError, match=msg): indexer = df.a.str.startswith("o") @@ -291,8 +318,8 @@ def test_detect_chained_assignment_object_dtype( df.loc[0]["A"] = 111 if using_copy_on_write: - # TODO(CoW) can we still warn here? - df["A"][0] = 111 + with pytest.raises(ChainedAssignmentError): + df["A"][0] = 111 tm.assert_frame_equal(df, df_original) elif not using_array_manager: with pytest.raises(SettingWithCopyError, match=msg): @@ -420,8 +447,8 @@ def test_detect_chained_assignment_undefined_column(self, using_copy_on_write): df_original = df.copy() if using_copy_on_write: - # TODO(CoW) can we still warn here? - df.iloc[0:5]["group"] = "a" + with pytest.raises(ChainedAssignmentError): + df.iloc[0:5]["group"] = "a" tm.assert_frame_equal(df, df_original) else: with pytest.raises(SettingWithCopyError, match=msg): @@ -444,9 +471,12 @@ def test_detect_chained_assignment_changing_dtype( df_original = df.copy() if using_copy_on_write: - df.loc[2]["D"] = "foo" - df.loc[2]["C"] = "foo" - df["C"][2] = "foo" + with pytest.raises(ChainedAssignmentError): + df.loc[2]["D"] = "foo" + with pytest.raises(ChainedAssignmentError): + df.loc[2]["C"] = "foo" + with pytest.raises(ChainedAssignmentError): + df["C"][2] = "foo" tm.assert_frame_equal(df, df_original) if not using_copy_on_write: @@ -475,7 +505,8 @@ def test_setting_with_copy_bug(self, using_copy_on_write): mask = pd.isna(df.c) if using_copy_on_write: - df[["c"]][mask] = df[["b"]][mask] + with pytest.raises(ChainedAssignmentError): + df[["c"]][mask] = df[["b"]][mask] tm.assert_frame_equal(df, df_original) else: with pytest.raises(SettingWithCopyError, match=msg): @@ -493,7 +524,8 @@ def test_setting_with_copy_bug_no_warning(self): def test_detect_chained_assignment_warnings_errors(self, using_copy_on_write): df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) if using_copy_on_write: - df.loc[0]["A"] = 111 + with pytest.raises(ChainedAssignmentError): + df.loc[0]["A"] = 111 return with option_context("chained_assignment", "warn"): @@ -559,6 +591,7 @@ def test_cache_updating2(self): index=range(5), ) df["f"] = 0 + # TODO(CoW) df.f.values[3] = 1 df.f.values[3] = 2 @@ -580,20 +613,33 @@ def test_iloc_setitem_chained_assignment(self, using_copy_on_write): ck = [True] * len(df) - df["bb"].iloc[0] = 0.13 + if using_copy_on_write: + with pytest.raises(ChainedAssignmentError): + df["bb"].iloc[0] = 0.13 + else: + df["bb"].iloc[0] = 0.13 # GH#3970 this lookup used to break the chained setting to 0.15 df.iloc[ck] - df["bb"].iloc[0] = 0.15 + if using_copy_on_write: + with pytest.raises(ChainedAssignmentError): + df["bb"].iloc[0] = 0.15 + else: + df["bb"].iloc[0] = 0.15 + if not using_copy_on_write: assert df["bb"].iloc[0] == 0.15 else: assert df["bb"].iloc[0] == 2.2 - def test_getitem_loc_assignment_slice_state(self): + def test_getitem_loc_assignment_slice_state(self, using_copy_on_write): # GH 13569 df = DataFrame({"a": [10, 20, 30]}) - df["a"].loc[4] = 40 + if using_copy_on_write: + with pytest.raises(ChainedAssignmentError): + df["a"].loc[4] = 40 + else: + df["a"].loc[4] = 40 tm.assert_frame_equal(df, DataFrame({"a": [10, 20, 30]})) tm.assert_series_equal(df["a"], Series([10, 20, 30], name="a")) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 689c8ba845a6c..254ec46eac7cf 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -12,7 +12,10 @@ import pytz from pandas._libs.tslibs.timezones import maybe_get_tz -from pandas.errors import SettingWithCopyError +from pandas.errors import ( + ChainedAssignmentError, + SettingWithCopyError, +) from pandas.core.dtypes.common import ( is_integer_dtype, @@ -287,8 +290,8 @@ def test_dt_accessor_not_writeable(self, using_copy_on_write): msg = "modifications to a property of a datetimelike.+not supported" with pd.option_context("chained_assignment", "raise"): if using_copy_on_write: - # TODO(CoW) it would be nice to keep a warning/error for this case - ser.dt.hour[0] = 5 + with pytest.raises(ChainedAssignmentError): + ser.dt.hour[0] = 5 else: with pytest.raises(SettingWithCopyError, match=msg): ser.dt.hour[0] = 5 diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index 6ca08c32dcfe7..d3d5165c61773 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -80,6 +80,7 @@ def test_sort_values(self, datetime_series, using_copy_on_write): # Series.sort_values operating on a view df = DataFrame(np.random.randn(10, 4)) s = df.iloc[:, 0] + s_orig_sorted = s.copy().sort_values() msg = ( "This Series is a view of some other array, to sort in-place " From 296170137010e52676a045de6823889aa01ff94f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 2 Nov 2022 14:46:50 +0100 Subject: [PATCH 02/11] skip SPSS tests for now (to further investigate) --- pandas/tests/io/test_spss.py | 6 ++++++ pandas/util/_test_decorators.py | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py index a4894ff66ab9f..d507ab07b7cd1 100644 --- a/pandas/tests/io/test_spss.py +++ b/pandas/tests/io/test_spss.py @@ -3,12 +3,15 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd import pandas._testing as tm pyreadstat = pytest.importorskip("pyreadstat") +@td.skip_copy_on_write_not_yet_implemented @pytest.mark.parametrize("path_klass", [lambda p: p, Path]) def test_spss_labelled_num(path_klass, datapath): # test file from the Haven project (https://haven.tidyverse.org/) @@ -24,6 +27,7 @@ def test_spss_labelled_num(path_klass, datapath): tm.assert_frame_equal(df, expected) +@td.skip_copy_on_write_not_yet_implemented def test_spss_labelled_num_na(datapath): # test file from the Haven project (https://haven.tidyverse.org/) fname = datapath("io", "data", "spss", "labelled-num-na.sav") @@ -38,6 +42,7 @@ def test_spss_labelled_num_na(datapath): tm.assert_frame_equal(df, expected) +@td.skip_copy_on_write_not_yet_implemented def test_spss_labelled_str(datapath): # test file from the Haven project (https://haven.tidyverse.org/) fname = datapath("io", "data", "spss", "labelled-str.sav") @@ -52,6 +57,7 @@ def test_spss_labelled_str(datapath): tm.assert_frame_equal(df, expected) +@td.skip_copy_on_write_not_yet_implemented def test_spss_umlauts(datapath): # test file from the Haven project (https://haven.tidyverse.org/) fname = datapath("io", "data", "spss", "umlauts.sav") diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 5bc55ee789fe6..de100dba8144d 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -294,3 +294,8 @@ def mark_array_manager_not_yet_implemented(request) -> None: get_option("mode.data_manager") == "array", reason="Test that relies on BlockManager internals or specific behaviour", ) + +skip_copy_on_write_not_yet_implemented = pytest.mark.xfail( + get_option("mode.copy_on_write"), + reason="Not yet implemented/adapted for Copy-on-Write mode", +) From 017ed3e29d8c2d04950b5de05d48ecc1896ee146 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 13 Jan 2023 14:48:47 +0100 Subject: [PATCH 03/11] use helper for option --- pandas/core/frame.py | 5 +---- pandas/core/indexing.py | 9 +++------ pandas/core/series.py | 7 ++----- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 462d221224a2f..d2db094784a4f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3842,10 +3842,7 @@ def isetitem(self, loc, value) -> None: self._iset_item_mgr(loc, arraylike, inplace=False) def __setitem__(self, key, value): - if ( - get_option("mode.copy_on_write") - and get_option("mode.data_manager") == "block" - ): + if using_copy_on_write(): if sys.getrefcount(self) <= 3: raise ChainedAssignmentError("Chained assignment doesn't work!!") diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 56e5f1c5efc7c..f8ae01bf68dde 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -13,7 +13,7 @@ import numpy as np -from pandas._config import get_option +from pandas._config import using_copy_on_write from pandas._libs.indexing import NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim @@ -834,11 +834,8 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: @final def __setitem__(self, key, value) -> None: - if ( - get_option("mode.copy_on_write") - and get_option("mode.data_manager") == "block" - ): - print("_LocationIndexer.__setitem__ refcount: ", sys.getrefcount(self.obj)) + if using_copy_on_write(): + # print("_LocationIndexer.__setitem__ refcount: ",sys.getrefcount(self.obj)) if sys.getrefcount(self.obj) <= 2: raise ChainedAssignmentError("Chained assignment doesn't work!!") diff --git a/pandas/core/series.py b/pandas/core/series.py index 9437343ac9051..bde50d05357a2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1074,11 +1074,8 @@ def _get_value(self, label, takeable: bool = False): return self.iloc[loc] def __setitem__(self, key, value) -> None: - if ( - get_option("mode.copy_on_write") - and get_option("mode.data_manager") == "block" - ): - print("Series.__getitem__ refcount: ", sys.getrefcount(self)) + if using_copy_on_write(): + # print("Series.__getitem__ refcount: ", sys.getrefcount(self)) if sys.getrefcount(self) <= 3: raise ChainedAssignmentError("Chained assignment doesn't work!!") From e170b6c4ea706e42a4c951f6831050b2ec9245a3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 13 Jan 2023 15:17:08 +0100 Subject: [PATCH 04/11] update error message --- doc/source/reference/testing.rst | 1 + pandas/core/frame.py | 3 +- pandas/core/indexing.py | 3 +- pandas/core/series.py | 3 +- pandas/errors/__init__.py | 50 ++++++++++++------- pandas/tests/frame/indexing/test_setitem.py | 2 +- .../tests/frame/methods/test_sort_values.py | 4 +- .../indexing/test_chaining_and_caching.py | 2 +- .../tests/series/methods/test_sort_values.py | 1 - scripts/validate_unwanted_patterns.py | 4 ++ 10 files changed, 47 insertions(+), 26 deletions(-) diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index 07624e87d82e0..edfafee430d1d 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -28,6 +28,7 @@ Exceptions and warnings errors.AccessorRegistrationWarning errors.AttributeConflictWarning errors.CategoricalConversionWarning + errors.ChainedAssignmentError errors.ClosedFileError errors.CSSWarning errors.DatabaseError diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d2db094784a4f..0b898ac024da0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -99,6 +99,7 @@ from pandas.errors import ( ChainedAssignmentError, InvalidIndexError, + _chained_assignment_msg, ) from pandas.util._decorators import ( Appender, @@ -3844,7 +3845,7 @@ def isetitem(self, loc, value) -> None: def __setitem__(self, key, value): if using_copy_on_write(): if sys.getrefcount(self) <= 3: - raise ChainedAssignmentError("Chained assignment doesn't work!!") + raise ChainedAssignmentError(_chained_assignment_msg) key = com.apply_if_callable(key, self) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f8ae01bf68dde..45eb5fb2fa072 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -27,6 +27,7 @@ IndexingError, InvalidIndexError, LossySetitemError, + _chained_assignment_msg, ) from pandas.util._decorators import doc @@ -837,7 +838,7 @@ def __setitem__(self, key, value) -> None: if using_copy_on_write(): # print("_LocationIndexer.__setitem__ refcount: ",sys.getrefcount(self.obj)) if sys.getrefcount(self.obj) <= 2: - raise ChainedAssignmentError("Chained assignment doesn't work!!") + raise ChainedAssignmentError(_chained_assignment_msg) check_dict_or_set_indexers(key) if isinstance(key, tuple): diff --git a/pandas/core/series.py b/pandas/core/series.py index bde50d05357a2..6a4dbc48065cc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -72,6 +72,7 @@ from pandas.errors import ( ChainedAssignmentError, InvalidIndexError, + _chained_assignment_msg, ) from pandas.util._decorators import ( Appender, @@ -1077,7 +1078,7 @@ def __setitem__(self, key, value) -> None: if using_copy_on_write(): # print("Series.__getitem__ refcount: ", sys.getrefcount(self)) if sys.getrefcount(self) <= 3: - raise ChainedAssignmentError("Chained assignment doesn't work!!") + raise ChainedAssignmentError(_chained_assignment_msg) check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index fda7782a983d6..3ed09cbff74b8 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -298,14 +298,15 @@ class SettingWithCopyError(ValueError): """ -class ChainedAssignmentError(ValueError): +class SettingWithCopyWarning(Warning): """ - Exception raised when trying to set on a copied slice from a ``DataFrame``. + Warning raised when trying to set on a copied slice from a ``DataFrame``. - The ``mode.chained_assignment`` needs to be set to set to 'raise.' This can - happen unintentionally when chained indexing. + The ``mode.chained_assignment`` needs to be set to set to 'warn.' + 'Warn' is the default option. This can happen unintentionally when + chained indexing. - For more information on eveluation order, + For more information on evaluation order, see :ref:`the user guide`. For more information on view vs. copy, @@ -313,33 +314,46 @@ class ChainedAssignmentError(ValueError): Examples -------- - >>> pd.options.mode.chained_assignment = 'raise' >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A']) >>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP - ... # SettingWithCopyError: A value is trying to be set on a copy of a... + ... # SettingWithCopyWarning: A value is trying to be set on a copy of a... """ -class SettingWithCopyWarning(Warning): +class ChainedAssignmentError(ValueError): """ - Warning raised when trying to set on a copied slice from a ``DataFrame``. + Exception raised when trying to set using chained assignment. - The ``mode.chained_assignment`` needs to be set to set to 'warn.' - 'Warn' is the default option. This can happen unintentionally when - chained indexing. - - For more information on evaluation order, - see :ref:`the user guide`. + When the ``mode.copy_on_write`` option is enabled, chained assignment can + never work. In such a situation, we are always setting into a temporary + object that is the result of indexing (getitem), which under Copy-on-Write + always behaves as a copy. Thus, assigning through a chain can never + update the original Series or DataFrame. For more information on view vs. copy, see :ref:`the user guide`. Examples -------- + >>> pd.options.mode.copy_on_write = True >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A']) - >>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP - ... # SettingWithCopyWarning: A value is trying to be set on a copy of a... - """ + >>> df["A"][0:3] = 10 # doctest: +SKIP + ... # ChainedAssignmentError: ... + """ + + +_chained_assignment_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment.\n" + "When using the Copy-on-Write mode, such chained assignment never works " + "to update the original DataFrame or Series, because the intermediate " + "object on which we are setting values always behaves as a copy.\n\n" + "Try using '.loc[row_indexer, col_indexer] = value' instead, to perform " + "the assignment in a single step.\n\n" + "See the caveats in the documentation: " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy" +) class NumExprClobberingError(NameError): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 4b34eac7a5add..b1139c922244f 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1127,7 +1127,7 @@ def test_setitem_ea_boolean_mask(self): class TestDataFrameSetitemCopyViewSemantics: - def test_setitem_always_copy(self, float_frame, using_copy_on_write): + def test_setitem_always_copy(self, float_frame): assert "E" not in float_frame.columns s = float_frame["A"].copy() float_frame["E"] = s diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index b1a78696a4cd4..c16c500a11d0e 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -345,8 +345,8 @@ def test_sort_values_frame_column_inplace_sort_exception( with pytest.raises(ValueError, match="This Series is a view"): s.sort_values(inplace=True) - cp = s.copy() - cp.sort_values() # it works! + cp = s.copy() + cp.sort_values() # it works! def test_sort_values_nat_values_in_int_column(self): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 61425690265a4..822d89d749618 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -591,7 +591,7 @@ def test_cache_updating2(self): index=range(5), ) df["f"] = 0 - # TODO(CoW) + # TODO(CoW) protect underlying values of being written to? df.f.values[3] = 1 df.f.values[3] = 2 diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index d3d5165c61773..6ca08c32dcfe7 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -80,7 +80,6 @@ def test_sort_values(self, datetime_series, using_copy_on_write): # Series.sort_values operating on a view df = DataFrame(np.random.randn(10, 4)) s = df.iloc[:, 0] - s_orig_sorted = s.copy().sort_values() msg = ( "This Series is a view of some other array, to sort in-place " diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 68a376956429b..d6a7409abf238 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -53,6 +53,7 @@ "__version__", # check np.__version__ in compat.numpy.function "_arrow_dtype_mapping", "_global_config", + "_chained_assignment_msg", } @@ -132,6 +133,9 @@ def bare_pytest_raises(file_obj: IO[str]) -> Iterable[Tuple[int, str]]: except AttributeError: continue + if node.args[0].id == "ChainedAssignmentError": + continue + if not node.keywords: yield ( node.lineno, From 8d50d167ab02a08cf0b4690ad181a5f37841c51f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 16 Jan 2023 16:59:54 +0100 Subject: [PATCH 05/11] address feedback --- pandas/core/indexing.py | 1 - pandas/core/series.py | 1 - pandas/errors/__init__.py | 6 +++--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 45eb5fb2fa072..85f9b33b5028e 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -836,7 +836,6 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: @final def __setitem__(self, key, value) -> None: if using_copy_on_write(): - # print("_LocationIndexer.__setitem__ refcount: ",sys.getrefcount(self.obj)) if sys.getrefcount(self.obj) <= 2: raise ChainedAssignmentError(_chained_assignment_msg) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9b2d2cbf4c6f4..3a2243a468ad9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1080,7 +1080,6 @@ def _get_value(self, label, takeable: bool = False): def __setitem__(self, key, value) -> None: if using_copy_on_write(): - # print("Series.__getitem__ refcount: ", sys.getrefcount(self)) if sys.getrefcount(self) <= 3: raise ChainedAssignmentError(_chained_assignment_msg) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 3ed09cbff74b8..5a1915956616c 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -326,9 +326,9 @@ class ChainedAssignmentError(ValueError): When the ``mode.copy_on_write`` option is enabled, chained assignment can never work. In such a situation, we are always setting into a temporary - object that is the result of indexing (getitem), which under Copy-on-Write - always behaves as a copy. Thus, assigning through a chain can never - update the original Series or DataFrame. + object that is the result of an indexing operation (getitem), which under + Copy-on-Write always behaves as a copy. Thus, assigning through a chain + can never update the original Series or DataFrame. For more information on view vs. copy, see :ref:`the user guide`. From 4521d87c37b5fa7c51c5b65570d50da1e747e95e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 16 Jan 2023 17:03:45 +0100 Subject: [PATCH 06/11] add whatsnew --- doc/source/whatsnew/v2.0.0.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 7f983c97691ca..60d2fbd42641c 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -120,6 +120,14 @@ Copy-on-Write improvements a modification to the data happens) when constructing a Series from an existing Series with the default of ``copy=False`` (:issue:`50471`) +- Trying to set values using chained assignment (for example, ``df["a"][1:3] = 0``) + will now always raise an exception when Copy-on-Write is enabled. In this mode, + chained assignment can never work because we are always setting into a temporary + object that is the result of an indexing operation (getitem), which under + Copy-on-Write always behaves as a copy. Thus, assigning through a chain + can never update the original Series or DataFrame. Therefore, an informative + error is raised to the user instead of silently doing nothing (:issue:`49467`) + Copy-on-Write can be enabled through .. code-block:: python From dc712368df63df44fac560b193c9d369ac53a13c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 23 Jan 2023 11:02:44 +0100 Subject: [PATCH 07/11] add test build for CoW using PyPy --- .github/workflows/ubuntu.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index d3ad2710a0efa..38a43da205a00 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -73,6 +73,11 @@ jobs: env_file: actions-pypy-38.yaml pattern: "not slow and not network and not single_cpu" test_args: "--max-worker-restart 0" + - name: "Pypy with Copy-on-Write" + env_file: actions-pypy-38.yaml + pattern: "not slow and not network and not single_cpu" + test_args: "--max-worker-restart 0" + pandas_copy_on_write: "1" error_on_warnings: "0" - name: "Numpy Dev" env_file: actions-310-numpydev.yaml From 085dd70e388277b754755dd57c76c36285b72c4b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 23 Jan 2023 11:36:36 +0100 Subject: [PATCH 08/11] don't try to raise on PYPY + update tests for that --- pandas/_testing/__init__.py | 2 + pandas/_testing/contexts.py | 19 ++++++++ pandas/core/frame.py | 3 +- pandas/core/indexing.py | 3 +- pandas/core/series.py | 3 +- pandas/tests/frame/indexing/test_setitem.py | 3 +- pandas/tests/frame/indexing/test_xs.py | 7 +-- pandas/tests/frame/test_block_internals.py | 7 +-- .../multiindex/test_chaining_and_caching.py | 7 +-- .../tests/indexing/multiindex/test_partial.py | 5 +- .../tests/indexing/multiindex/test_setitem.py | 9 ++-- .../indexing/test_chaining_and_caching.py | 47 +++++++++---------- .../series/accessors/test_dt_accessor.py | 7 +-- scripts/validate_unwanted_patterns.py | 3 -- 14 files changed, 64 insertions(+), 61 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 6a7a1c7126cd3..eb25566e7983e 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -103,6 +103,7 @@ decompress_file, ensure_clean, ensure_safe_environment_variables, + raises_chained_assignment_error, set_timezone, use_numexpr, with_csv_dialect, @@ -1125,6 +1126,7 @@ def shares_memory(left, right) -> bool: "rands", "reset_display_options", "RNGContext", + "raises_chained_assignment_error", "round_trip_localpath", "round_trip_pathlib", "round_trip_pickle", diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index e5f716c62eca7..0511806277e5f 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -14,6 +14,9 @@ import numpy as np +from pandas.compat import PYPY +from pandas.errors import ChainedAssignmentError + from pandas import set_option from pandas.io.common import get_handle @@ -227,3 +230,19 @@ def __exit__( ) -> None: np.random.set_state(self.start_state) + + +if PYPY: + from contextlib import nullcontext + + raises_chained_assignment_error = nullcontext() +else: + import pytest + + raises_chained_assignment_error = pytest.raises( + ChainedAssignmentError, + match=( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment" + ), + ) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9c5ae62bfcf7c..685ed9f972a32 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -92,6 +92,7 @@ WriteBuffer, npt, ) +from pandas.compat import PYPY from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import ( function as nv, @@ -3867,7 +3868,7 @@ def isetitem(self, loc, value) -> None: self._iset_item_mgr(loc, arraylike, inplace=False) def __setitem__(self, key, value): - if using_copy_on_write(): + if not PYPY and using_copy_on_write(): if sys.getrefcount(self) <= 3: raise ChainedAssignmentError(_chained_assignment_msg) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b8c077fb2c0db..26b5a4077b0ff 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -21,6 +21,7 @@ Axis, AxisInt, ) +from pandas.compat import PYPY from pandas.errors import ( AbstractMethodError, ChainedAssignmentError, @@ -835,7 +836,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: @final def __setitem__(self, key, value) -> None: - if using_copy_on_write(): + if not PYPY and using_copy_on_write(): if sys.getrefcount(self.obj) <= 2: raise ChainedAssignmentError(_chained_assignment_msg) diff --git a/pandas/core/series.py b/pandas/core/series.py index 55c081644ab1a..106cf166acd14 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -68,6 +68,7 @@ WriteBuffer, npt, ) +from pandas.compat import PYPY from pandas.compat.numpy import function as nv from pandas.errors import ( ChainedAssignmentError, @@ -1079,7 +1080,7 @@ def _get_value(self, label, takeable: bool = False): return self.iloc[loc] def __setitem__(self, key, value) -> None: - if using_copy_on_write(): + if not PYPY and using_copy_on_write(): if sys.getrefcount(self) <= 3: raise ChainedAssignmentError(_chained_assignment_msg) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index b1139c922244f..3260198dcd90d 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -3,7 +3,6 @@ import numpy as np import pytest -from pandas.errors import ChainedAssignmentError import pandas.util._test_decorators as td from pandas.core.dtypes.base import _registry as ea_registry @@ -1253,7 +1252,7 @@ def test_setitem_column_update_inplace(self, using_copy_on_write): # diagonal values all updated assert np.all(values[np.arange(10), np.arange(10)] == 1) else: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: for label in df.columns: df[label][label] = 1 # original dataframe not updated diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index cb0308cac6f29..fbf4b928f8b8b 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -3,10 +3,7 @@ import numpy as np import pytest -from pandas.errors import ( - ChainedAssignmentError, - SettingWithCopyError, -) +from pandas.errors import SettingWithCopyError from pandas import ( DataFrame, @@ -127,7 +124,7 @@ def test_xs_view(self, using_array_manager, using_copy_on_write): df_orig = dm.copy() if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: dm.xs(2)[:] = 20 tm.assert_frame_equal(dm, df_orig) elif using_array_manager: diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 24997e67f5752..d8002cd3cf8a0 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -7,10 +7,7 @@ import numpy as np import pytest -from pandas.errors import ( - ChainedAssignmentError, - PerformanceWarning, -) +from pandas.errors import PerformanceWarning import pandas.util._test_decorators as td import pandas as pd @@ -344,7 +341,7 @@ def test_stale_cached_series_bug_473(self, using_copy_on_write): repr(Y) Y["e"] = Y["e"].astype("object") if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: Y["g"]["c"] = np.NaN else: Y["g"]["c"] = np.NaN diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 70e5f4ce5d9c3..2e773a16d417e 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -1,10 +1,7 @@ import numpy as np import pytest -from pandas.errors import ( - ChainedAssignmentError, - SettingWithCopyError, -) +from pandas.errors import SettingWithCopyError import pandas.util._test_decorators as td from pandas import ( @@ -54,7 +51,7 @@ def test_cache_updating(using_copy_on_write): # setting via chained assignment # but actually works, since everything is a view if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df.loc[0]["z"].iloc[0] = 1.0 assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"] else: diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index f42c748237942..725e3df06696c 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -1,7 +1,6 @@ import numpy as np import pytest -from pandas.errors import ChainedAssignmentError import pandas.util._test_decorators as td from pandas import ( @@ -130,7 +129,7 @@ def test_partial_set( tm.assert_frame_equal(df, exp) if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["A"].loc[2000, 4] = 1 df.loc[(2000, 4), "A"] = 1 else: @@ -144,7 +143,7 @@ def test_partial_set( # this works...for now if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["A"].iloc[14] = 5 df["A"].iloc[14] == exp["A"].iloc[14] else: diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 41466a68d2fb1..6ec0f2694c6aa 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -1,10 +1,7 @@ import numpy as np import pytest -from pandas.errors import ( - ChainedAssignmentError, - SettingWithCopyError, -) +from pandas.errors import SettingWithCopyError import pandas.util._test_decorators as td import pandas as pd @@ -504,7 +501,7 @@ def test_frame_setitem_copy_raises( # will raise/warn as its chained assignment df = multiindex_dataframe_random_data.T if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["foo"]["one"] = 2 else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" @@ -519,7 +516,7 @@ def test_frame_setitem_copy_no_write( expected = frame df = frame.copy() if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["foo"]["one"] = 2 else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 822d89d749618..7a71a41aaa491 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -4,7 +4,6 @@ import pytest from pandas.errors import ( - ChainedAssignmentError, SettingWithCopyError, SettingWithCopyWarning, ) @@ -52,7 +51,7 @@ def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write): # Assignment to wrong series if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["bb"].iloc[0] = 0.17 else: df["bb"].iloc[0] = 0.17 @@ -105,7 +104,7 @@ def test_setitem_cache_updating_slices(self, using_copy_on_write): for ix, row in df.iterrows(): v = out[row["C"]][six:eix] + row["D"] if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: out[row["C"]][six:eix] = v else: out[row["C"]][six:eix] = v @@ -153,7 +152,7 @@ def test_setitem_chained_setfault(self, using_copy_on_write): df = DataFrame({"response": np.array(data)}) mask = df.response == "timeout" if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: @@ -164,7 +163,7 @@ def test_setitem_chained_setfault(self, using_copy_on_write): df = DataFrame(recarray) mask = df.response == "timeout" if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: @@ -175,7 +174,7 @@ def test_setitem_chained_setfault(self, using_copy_on_write): df_original = df.copy() mask = df.response == "timeout" if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df.response[mask] = "none" tm.assert_frame_equal(df, df_original) else: @@ -186,7 +185,7 @@ def test_setitem_chained_setfault(self, using_copy_on_write): expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["A"].iloc[0] = np.nan expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) else: @@ -197,7 +196,7 @@ def test_setitem_chained_setfault(self, using_copy_on_write): df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df.A.iloc[0] = np.nan else: df.A.iloc[0] = np.nan @@ -217,9 +216,9 @@ def test_detect_chained_assignment(self, using_copy_on_write): assert df._is_copy is None if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["A"][0] = -5 - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["A"][1] = -6 tm.assert_frame_equal(df, df_original) else: @@ -243,9 +242,9 @@ def test_detect_chained_assignment_raises( assert df._is_copy is None if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["A"][0] = -5 - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["A"][1] = -6 tm.assert_frame_equal(df, df_original) elif not using_array_manager: @@ -277,7 +276,7 @@ def test_detect_chained_assignment_fails(self, using_copy_on_write): ) if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df.loc[0]["A"] = -5 else: with pytest.raises(SettingWithCopyError, match=msg): @@ -297,7 +296,7 @@ def test_detect_chained_assignment_doc_example(self, using_copy_on_write): if using_copy_on_write: indexer = df.a.str.startswith("o") - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df[indexer]["c"] = 42 else: with pytest.raises(SettingWithCopyError, match=msg): @@ -318,7 +317,7 @@ def test_detect_chained_assignment_object_dtype( df.loc[0]["A"] = 111 if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["A"][0] = 111 tm.assert_frame_equal(df, df_original) elif not using_array_manager: @@ -447,7 +446,7 @@ def test_detect_chained_assignment_undefined_column(self, using_copy_on_write): df_original = df.copy() if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df.iloc[0:5]["group"] = "a" tm.assert_frame_equal(df, df_original) else: @@ -471,11 +470,11 @@ def test_detect_chained_assignment_changing_dtype( df_original = df.copy() if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df.loc[2]["D"] = "foo" - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df.loc[2]["C"] = "foo" - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["C"][2] = "foo" tm.assert_frame_equal(df, df_original) @@ -505,7 +504,7 @@ def test_setting_with_copy_bug(self, using_copy_on_write): mask = pd.isna(df.c) if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df[["c"]][mask] = df[["b"]][mask] tm.assert_frame_equal(df, df_original) else: @@ -524,7 +523,7 @@ def test_setting_with_copy_bug_no_warning(self): def test_detect_chained_assignment_warnings_errors(self, using_copy_on_write): df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df.loc[0]["A"] = 111 return @@ -614,7 +613,7 @@ def test_iloc_setitem_chained_assignment(self, using_copy_on_write): ck = [True] * len(df) if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["bb"].iloc[0] = 0.13 else: df["bb"].iloc[0] = 0.13 @@ -623,7 +622,7 @@ def test_iloc_setitem_chained_assignment(self, using_copy_on_write): df.iloc[ck] if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["bb"].iloc[0] = 0.15 else: df["bb"].iloc[0] = 0.15 @@ -637,7 +636,7 @@ def test_getitem_loc_assignment_slice_state(self, using_copy_on_write): # GH 13569 df = DataFrame({"a": [10, 20, 30]}) if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: df["a"].loc[4] = 40 else: df["a"].loc[4] = 40 diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index d53e1ef512e35..792dbd7b166ca 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -12,10 +12,7 @@ import pytz from pandas._libs.tslibs.timezones import maybe_get_tz -from pandas.errors import ( - ChainedAssignmentError, - SettingWithCopyError, -) +from pandas.errors import SettingWithCopyError from pandas.core.dtypes.common import ( is_integer_dtype, @@ -290,7 +287,7 @@ def test_dt_accessor_not_writeable(self, using_copy_on_write): msg = "modifications to a property of a datetimelike.+not supported" with pd.option_context("chained_assignment", "raise"): if using_copy_on_write: - with pytest.raises(ChainedAssignmentError): + with tm.raises_chained_assignment_error: ser.dt.hour[0] = 5 else: with pytest.raises(SettingWithCopyError, match=msg): diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index d6a7409abf238..8d4aecd596328 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -133,9 +133,6 @@ def bare_pytest_raises(file_obj: IO[str]) -> Iterable[Tuple[int, str]]: except AttributeError: continue - if node.args[0].id == "ChainedAssignmentError": - continue - if not node.keywords: yield ( node.lineno, From 1c3a4bb0ec8ed0592d48144f17eeb7d150bf97b9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 23 Jan 2023 15:41:00 +0100 Subject: [PATCH 09/11] try fix typing --- pandas/_testing/contexts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index 0511806277e5f..0799e02584fe7 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -239,7 +239,7 @@ def __exit__( else: import pytest - raises_chained_assignment_error = pytest.raises( + raises_chained_assignment_error = pytest.raises( # type: ignore[assignment] ChainedAssignmentError, match=( "A value is trying to be set on a copy of a DataFrame or Series " From a7089ba1d76158e8d113372b775b227a5d4c83da Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 23 Jan 2023 17:18:34 +0100 Subject: [PATCH 10/11] remove PyPy with CoW build again --- .github/workflows/ubuntu.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 38a43da205a00..6726139ed5fa4 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -73,12 +73,6 @@ jobs: env_file: actions-pypy-38.yaml pattern: "not slow and not network and not single_cpu" test_args: "--max-worker-restart 0" - - name: "Pypy with Copy-on-Write" - env_file: actions-pypy-38.yaml - pattern: "not slow and not network and not single_cpu" - test_args: "--max-worker-restart 0" - pandas_copy_on_write: "1" - error_on_warnings: "0" - name: "Numpy Dev" env_file: actions-310-numpydev.yaml pattern: "not slow and not network and not single_cpu" From e2bb7e1fb0ee6325e9584647d2137a79bcc7ed81 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 24 Jan 2023 08:57:10 +0100 Subject: [PATCH 11/11] convert raises helper to function to avoid pytest import --- pandas/_testing/contexts.py | 30 ++++++------ pandas/tests/frame/indexing/test_setitem.py | 2 +- pandas/tests/frame/indexing/test_xs.py | 2 +- pandas/tests/frame/test_block_internals.py | 2 +- .../multiindex/test_chaining_and_caching.py | 2 +- .../tests/indexing/multiindex/test_partial.py | 4 +- .../tests/indexing/multiindex/test_setitem.py | 4 +- .../indexing/test_chaining_and_caching.py | 46 +++++++++---------- .../series/accessors/test_dt_accessor.py | 2 +- 9 files changed, 48 insertions(+), 46 deletions(-) diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index 0799e02584fe7..d0de085788782 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -232,17 +232,19 @@ def __exit__( np.random.set_state(self.start_state) -if PYPY: - from contextlib import nullcontext - - raises_chained_assignment_error = nullcontext() -else: - import pytest - - raises_chained_assignment_error = pytest.raises( # type: ignore[assignment] - ChainedAssignmentError, - match=( - "A value is trying to be set on a copy of a DataFrame or Series " - "through chained assignment" - ), - ) +def raises_chained_assignment_error(): + + if PYPY: + from contextlib import nullcontext + + return nullcontext() + else: + import pytest + + return pytest.raises( + ChainedAssignmentError, + match=( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment" + ), + ) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 3260198dcd90d..62f05cb523b1b 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1252,7 +1252,7 @@ def test_setitem_column_update_inplace(self, using_copy_on_write): # diagonal values all updated assert np.all(values[np.arange(10), np.arange(10)] == 1) else: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): for label in df.columns: df[label][label] = 1 # original dataframe not updated diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index fbf4b928f8b8b..7e0623f7beaa9 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -124,7 +124,7 @@ def test_xs_view(self, using_array_manager, using_copy_on_write): df_orig = dm.copy() if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): dm.xs(2)[:] = 20 tm.assert_frame_equal(dm, df_orig) elif using_array_manager: diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index d8002cd3cf8a0..04f4766e49227 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -341,7 +341,7 @@ def test_stale_cached_series_bug_473(self, using_copy_on_write): repr(Y) Y["e"] = Y["e"].astype("object") if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): Y["g"]["c"] = np.NaN else: Y["g"]["c"] = np.NaN diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 2e773a16d417e..932457eebcd8e 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -51,7 +51,7 @@ def test_cache_updating(using_copy_on_write): # setting via chained assignment # but actually works, since everything is a view if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df.loc[0]["z"].iloc[0] = 1.0 assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"] else: diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 725e3df06696c..d34daaf640305 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -129,7 +129,7 @@ def test_partial_set( tm.assert_frame_equal(df, exp) if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["A"].loc[2000, 4] = 1 df.loc[(2000, 4), "A"] = 1 else: @@ -143,7 +143,7 @@ def test_partial_set( # this works...for now if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["A"].iloc[14] = 5 df["A"].iloc[14] == exp["A"].iloc[14] else: diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 6ec0f2694c6aa..3ca057b80e578 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -501,7 +501,7 @@ def test_frame_setitem_copy_raises( # will raise/warn as its chained assignment df = multiindex_dataframe_random_data.T if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["foo"]["one"] = 2 else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" @@ -516,7 +516,7 @@ def test_frame_setitem_copy_no_write( expected = frame df = frame.copy() if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["foo"]["one"] = 2 else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 7a71a41aaa491..5e7abeb86705b 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -51,7 +51,7 @@ def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write): # Assignment to wrong series if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["bb"].iloc[0] = 0.17 else: df["bb"].iloc[0] = 0.17 @@ -104,7 +104,7 @@ def test_setitem_cache_updating_slices(self, using_copy_on_write): for ix, row in df.iterrows(): v = out[row["C"]][six:eix] + row["D"] if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): out[row["C"]][six:eix] = v else: out[row["C"]][six:eix] = v @@ -152,7 +152,7 @@ def test_setitem_chained_setfault(self, using_copy_on_write): df = DataFrame({"response": np.array(data)}) mask = df.response == "timeout" if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: @@ -163,7 +163,7 @@ def test_setitem_chained_setfault(self, using_copy_on_write): df = DataFrame(recarray) mask = df.response == "timeout" if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: @@ -174,7 +174,7 @@ def test_setitem_chained_setfault(self, using_copy_on_write): df_original = df.copy() mask = df.response == "timeout" if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df.response[mask] = "none" tm.assert_frame_equal(df, df_original) else: @@ -185,7 +185,7 @@ def test_setitem_chained_setfault(self, using_copy_on_write): expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["A"].iloc[0] = np.nan expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) else: @@ -196,7 +196,7 @@ def test_setitem_chained_setfault(self, using_copy_on_write): df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df.A.iloc[0] = np.nan else: df.A.iloc[0] = np.nan @@ -216,9 +216,9 @@ def test_detect_chained_assignment(self, using_copy_on_write): assert df._is_copy is None if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["A"][0] = -5 - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["A"][1] = -6 tm.assert_frame_equal(df, df_original) else: @@ -242,9 +242,9 @@ def test_detect_chained_assignment_raises( assert df._is_copy is None if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["A"][0] = -5 - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["A"][1] = -6 tm.assert_frame_equal(df, df_original) elif not using_array_manager: @@ -276,7 +276,7 @@ def test_detect_chained_assignment_fails(self, using_copy_on_write): ) if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df.loc[0]["A"] = -5 else: with pytest.raises(SettingWithCopyError, match=msg): @@ -296,7 +296,7 @@ def test_detect_chained_assignment_doc_example(self, using_copy_on_write): if using_copy_on_write: indexer = df.a.str.startswith("o") - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df[indexer]["c"] = 42 else: with pytest.raises(SettingWithCopyError, match=msg): @@ -317,7 +317,7 @@ def test_detect_chained_assignment_object_dtype( df.loc[0]["A"] = 111 if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["A"][0] = 111 tm.assert_frame_equal(df, df_original) elif not using_array_manager: @@ -446,7 +446,7 @@ def test_detect_chained_assignment_undefined_column(self, using_copy_on_write): df_original = df.copy() if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df.iloc[0:5]["group"] = "a" tm.assert_frame_equal(df, df_original) else: @@ -470,11 +470,11 @@ def test_detect_chained_assignment_changing_dtype( df_original = df.copy() if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df.loc[2]["D"] = "foo" - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df.loc[2]["C"] = "foo" - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["C"][2] = "foo" tm.assert_frame_equal(df, df_original) @@ -504,7 +504,7 @@ def test_setting_with_copy_bug(self, using_copy_on_write): mask = pd.isna(df.c) if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df[["c"]][mask] = df[["b"]][mask] tm.assert_frame_equal(df, df_original) else: @@ -523,7 +523,7 @@ def test_setting_with_copy_bug_no_warning(self): def test_detect_chained_assignment_warnings_errors(self, using_copy_on_write): df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df.loc[0]["A"] = 111 return @@ -613,7 +613,7 @@ def test_iloc_setitem_chained_assignment(self, using_copy_on_write): ck = [True] * len(df) if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["bb"].iloc[0] = 0.13 else: df["bb"].iloc[0] = 0.13 @@ -622,7 +622,7 @@ def test_iloc_setitem_chained_assignment(self, using_copy_on_write): df.iloc[ck] if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["bb"].iloc[0] = 0.15 else: df["bb"].iloc[0] = 0.15 @@ -636,7 +636,7 @@ def test_getitem_loc_assignment_slice_state(self, using_copy_on_write): # GH 13569 df = DataFrame({"a": [10, 20, 30]}) if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): df["a"].loc[4] = 40 else: df["a"].loc[4] = 40 diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 792dbd7b166ca..1914bdae07e4b 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -287,7 +287,7 @@ def test_dt_accessor_not_writeable(self, using_copy_on_write): msg = "modifications to a property of a datetimelike.+not supported" with pd.option_context("chained_assignment", "raise"): if using_copy_on_write: - with tm.raises_chained_assignment_error: + with tm.raises_chained_assignment_error(): ser.dt.hour[0] = 5 else: with pytest.raises(SettingWithCopyError, match=msg):