diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index d3ad2710a0efa..6726139ed5fa4 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -73,7 +73,6 @@ jobs: env_file: actions-pypy-38.yaml pattern: "not slow and not network and not single_cpu" test_args: "--max-worker-restart 0" - error_on_warnings: "0" - name: "Numpy Dev" env_file: actions-310-numpydev.yaml pattern: "not slow and not network and not single_cpu" diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index 07624e87d82e0..edfafee430d1d 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -28,6 +28,7 @@ Exceptions and warnings errors.AccessorRegistrationWarning errors.AttributeConflictWarning errors.CategoricalConversionWarning + errors.ChainedAssignmentError errors.ClosedFileError errors.CSSWarning errors.DatabaseError diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 605f1d4b26e13..513d9494f23cc 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -125,6 +125,14 @@ Copy-on-Write improvements a modification to the data happens) when constructing a Series from an existing Series with the default of ``copy=False`` (:issue:`50471`) +- Trying to set values using chained assignment (for example, ``df["a"][1:3] = 0``) + will now always raise an exception when Copy-on-Write is enabled. In this mode, + chained assignment can never work because we are always setting into a temporary + object that is the result of an indexing operation (getitem), which under + Copy-on-Write always behaves as a copy. Thus, assigning through a chain + can never update the original Series or DataFrame. Therefore, an informative + error is raised to the user instead of silently doing nothing (:issue:`49467`) + Copy-on-Write can be enabled through .. code-block:: python diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 6a7a1c7126cd3..eb25566e7983e 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -103,6 +103,7 @@ decompress_file, ensure_clean, ensure_safe_environment_variables, + raises_chained_assignment_error, set_timezone, use_numexpr, with_csv_dialect, @@ -1125,6 +1126,7 @@ def shares_memory(left, right) -> bool: "rands", "reset_display_options", "RNGContext", + "raises_chained_assignment_error", "round_trip_localpath", "round_trip_pathlib", "round_trip_pickle", diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index e5f716c62eca7..d0de085788782 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -14,6 +14,9 @@ import numpy as np +from pandas.compat import PYPY +from pandas.errors import ChainedAssignmentError + from pandas import set_option from pandas.io.common import get_handle @@ -227,3 +230,21 @@ def __exit__( ) -> None: np.random.set_state(self.start_state) + + +def raises_chained_assignment_error(): + + if PYPY: + from contextlib import nullcontext + + return nullcontext() + else: + import pytest + + return pytest.raises( + ChainedAssignmentError, + match=( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment" + ), + ) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9efc07628cccd..685ed9f972a32 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -16,6 +16,7 @@ import functools from io import StringIO import itertools +import sys from textwrap import dedent from typing import ( TYPE_CHECKING, @@ -91,12 +92,17 @@ WriteBuffer, npt, ) +from pandas.compat import PYPY from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import ( function as nv, np_percentile_argname, ) -from pandas.errors import InvalidIndexError +from pandas.errors import ( + ChainedAssignmentError, + InvalidIndexError, + _chained_assignment_msg, +) from pandas.util._decorators import ( Appender, Substitution, @@ -3862,6 +3868,10 @@ def isetitem(self, loc, value) -> None: self._iset_item_mgr(loc, arraylike, inplace=False) def __setitem__(self, key, value): + if not PYPY and using_copy_on_write(): + if sys.getrefcount(self) <= 3: + raise ChainedAssignmentError(_chained_assignment_msg) + key = com.apply_if_callable(key, self) # see if we can slice the rows diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a7b19e3180fff..26b5a4077b0ff 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,6 +1,7 @@ from __future__ import annotations from contextlib import suppress +import sys from typing import ( TYPE_CHECKING, Hashable, @@ -12,17 +13,22 @@ import numpy as np +from pandas._config import using_copy_on_write + from pandas._libs.indexing import NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim from pandas._typing import ( Axis, AxisInt, ) +from pandas.compat import PYPY from pandas.errors import ( AbstractMethodError, + ChainedAssignmentError, IndexingError, InvalidIndexError, LossySetitemError, + _chained_assignment_msg, ) from pandas.util._decorators import doc @@ -830,6 +836,10 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: @final def __setitem__(self, key, value) -> None: + if not PYPY and using_copy_on_write(): + if sys.getrefcount(self.obj) <= 2: + raise ChainedAssignmentError(_chained_assignment_msg) + check_dict_or_set_indexers(key) if isinstance(key, tuple): key = tuple(list(x) if is_iterator(x) else x for x in key) diff --git a/pandas/core/series.py b/pandas/core/series.py index c6ba217042353..106cf166acd14 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3,6 +3,7 @@ """ from __future__ import annotations +import sys from textwrap import dedent from typing import ( IO, @@ -67,8 +68,13 @@ WriteBuffer, npt, ) +from pandas.compat import PYPY from pandas.compat.numpy import function as nv -from pandas.errors import InvalidIndexError +from pandas.errors import ( + ChainedAssignmentError, + InvalidIndexError, + _chained_assignment_msg, +) from pandas.util._decorators import ( Appender, Substitution, @@ -1074,6 +1080,10 @@ def _get_value(self, label, takeable: bool = False): return self.iloc[loc] def __setitem__(self, key, value) -> None: + if not PYPY and using_copy_on_write(): + if sys.getrefcount(self) <= 3: + raise ChainedAssignmentError(_chained_assignment_msg) + check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) cacher_needs_updating = self._check_is_chained_assignment_possible() diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 89ac1c10254cb..5a1915956616c 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -320,6 +320,42 @@ class SettingWithCopyWarning(Warning): """ +class ChainedAssignmentError(ValueError): + """ + Exception raised when trying to set using chained assignment. + + When the ``mode.copy_on_write`` option is enabled, chained assignment can + never work. In such a situation, we are always setting into a temporary + object that is the result of an indexing operation (getitem), which under + Copy-on-Write always behaves as a copy. Thus, assigning through a chain + can never update the original Series or DataFrame. + + For more information on view vs. copy, + see :ref:`the user guide`. + + Examples + -------- + >>> pd.options.mode.copy_on_write = True + >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A']) + >>> df["A"][0:3] = 10 # doctest: +SKIP + ... # ChainedAssignmentError: ... + """ + + +_chained_assignment_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment.\n" + "When using the Copy-on-Write mode, such chained assignment never works " + "to update the original DataFrame or Series, because the intermediate " + "object on which we are setting values always behaves as a copy.\n\n" + "Try using '.loc[row_indexer, col_indexer] = value' instead, to perform " + "the assignment in a single step.\n\n" + "See the caveats in the documentation: " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy" +) + + class NumExprClobberingError(NameError): """ Exception raised when trying to use a built-in numexpr name as a variable name. diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index d8d626b3af84a..62f05cb523b1b 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1245,12 +1245,15 @@ def test_setitem_column_update_inplace(self, using_copy_on_write): df = DataFrame({col: np.zeros(len(labels)) for col in labels}, index=labels) values = df._mgr.blocks[0].values - for label in df.columns: - df[label][label] = 1 - if not using_copy_on_write: + for label in df.columns: + df[label][label] = 1 + # diagonal values all updated assert np.all(values[np.arange(10), np.arange(10)] == 1) else: + with tm.raises_chained_assignment_error(): + for label in df.columns: + df[label][label] = 1 # original dataframe not updated assert np.all(values[np.arange(10), np.arange(10)] == 0) diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index b7549771c7cc5..7e0623f7beaa9 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -124,7 +124,8 @@ def test_xs_view(self, using_array_manager, using_copy_on_write): df_orig = dm.copy() if using_copy_on_write: - dm.xs(2)[:] = 20 + with tm.raises_chained_assignment_error(): + dm.xs(2)[:] = 20 tm.assert_frame_equal(dm, df_orig) elif using_array_manager: # INFO(ArrayManager) with ArrayManager getting a row as a view is diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 5fca8d0568a67..04f4766e49227 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -340,7 +340,11 @@ def test_stale_cached_series_bug_473(self, using_copy_on_write): ) repr(Y) Y["e"] = Y["e"].astype("object") - Y["g"]["c"] = np.NaN + if using_copy_on_write: + with tm.raises_chained_assignment_error(): + Y["g"]["c"] = np.NaN + else: + Y["g"]["c"] = np.NaN repr(Y) result = Y.sum() # noqa exp = Y["g"].sum() # noqa diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 2efb288a73f8d..932457eebcd8e 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -50,11 +50,13 @@ def test_cache_updating(using_copy_on_write): # setting via chained assignment # but actually works, since everything is a view - df.loc[0]["z"].iloc[0] = 1.0 - result = df.loc[(0, 0), "z"] if using_copy_on_write: - assert result == df_original.loc[0, "z"] + with tm.raises_chained_assignment_error(): + df.loc[0]["z"].iloc[0] = 1.0 + assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"] else: + df.loc[0]["z"].iloc[0] = 1.0 + result = df.loc[(0, 0), "z"] assert result == 1 # correct setting diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 0c63326118ac3..d34daaf640305 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -128,9 +128,13 @@ def test_partial_set( exp.iloc[65:85] = 0 tm.assert_frame_equal(df, exp) - df["A"].loc[2000, 4] = 1 - if not using_copy_on_write: - exp["A"].loc[2000, 4].values[:] = 1 + if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df["A"].loc[2000, 4] = 1 + df.loc[(2000, 4), "A"] = 1 + else: + df["A"].loc[2000, 4] = 1 + exp.iloc[65:85, 0] = 1 tm.assert_frame_equal(df, exp) df.loc[2000] = 5 @@ -138,10 +142,12 @@ def test_partial_set( tm.assert_frame_equal(df, exp) # this works...for now - df["A"].iloc[14] = 5 if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df["A"].iloc[14] = 5 df["A"].iloc[14] == exp["A"].iloc[14] else: + df["A"].iloc[14] = 5 assert df["A"].iloc[14] == 5 @pytest.mark.parametrize("dtype", [int, float]) diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index ac10a6d82dc89..3ca057b80e578 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -501,8 +501,8 @@ def test_frame_setitem_copy_raises( # will raise/warn as its chained assignment df = multiindex_dataframe_random_data.T if using_copy_on_write: - # TODO(CoW) it would be nice if this could still warn/raise - df["foo"]["one"] = 2 + with tm.raises_chained_assignment_error(): + df["foo"]["one"] = 2 else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" with pytest.raises(SettingWithCopyError, match=msg): @@ -516,7 +516,8 @@ def test_frame_setitem_copy_no_write( expected = frame df = frame.copy() if using_copy_on_write: - df["foo"]["one"] = 2 + with tm.raises_chained_assignment_error(): + df["foo"]["one"] = 2 else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" with pytest.raises(SettingWithCopyError, match=msg): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 2656cc77c2a9d..5e7abeb86705b 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -50,7 +50,11 @@ def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write): repr(df) # Assignment to wrong series - df["bb"].iloc[0] = 0.17 + if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df["bb"].iloc[0] = 0.17 + else: + df["bb"].iloc[0] = 0.17 df._clear_item_cache() if not using_copy_on_write: tm.assert_almost_equal(df["bb"][0], 0.17) @@ -99,7 +103,11 @@ def test_setitem_cache_updating_slices(self, using_copy_on_write): out_original = out.copy() for ix, row in df.iterrows(): v = out[row["C"]][six:eix] + row["D"] - out[row["C"]][six:eix] = v + if using_copy_on_write: + with tm.raises_chained_assignment_error(): + out[row["C"]][six:eix] = v + else: + out[row["C"]][six:eix] = v if not using_copy_on_write: tm.assert_frame_equal(out, expected) @@ -143,43 +151,55 @@ def test_setitem_chained_setfault(self, using_copy_on_write): df = DataFrame({"response": np.array(data)}) mask = df.response == "timeout" - df.response[mask] = "none" if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) recarray = np.rec.fromarrays([data], names=["response"]) df = DataFrame(recarray) mask = df.response == "timeout" - df.response[mask] = "none" if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) df = DataFrame({"response": data, "response1": data}) df_original = df.copy() mask = df.response == "timeout" - df.response[mask] = "none" if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df.response[mask] = "none" tm.assert_frame_equal(df, df_original) else: + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data})) # GH 6056 expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) - df["A"].iloc[0] = np.nan - result = df.head() if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df["A"].iloc[0] = np.nan expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) else: + df["A"].iloc[0] = np.nan expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) + result = df.head() tm.assert_frame_equal(result, expected) df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) - df.A.iloc[0] = np.nan + if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df.A.iloc[0] = np.nan + else: + df.A.iloc[0] = np.nan result = df.head() tm.assert_frame_equal(result, expected) @@ -195,11 +215,15 @@ def test_detect_chained_assignment(self, using_copy_on_write): df_original = df.copy() assert df._is_copy is None - df["A"][0] = -5 - df["A"][1] = -6 if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df["A"][0] = -5 + with tm.raises_chained_assignment_error(): + df["A"][1] = -6 tm.assert_frame_equal(df, df_original) else: + df["A"][0] = -5 + df["A"][1] = -6 tm.assert_frame_equal(df, expected) @pytest.mark.arm_slow @@ -218,8 +242,10 @@ def test_detect_chained_assignment_raises( assert df._is_copy is None if using_copy_on_write: - df["A"][0] = -5 - df["A"][1] = -6 + with tm.raises_chained_assignment_error(): + df["A"][0] = -5 + with tm.raises_chained_assignment_error(): + df["A"][1] = -6 tm.assert_frame_equal(df, df_original) elif not using_array_manager: with pytest.raises(SettingWithCopyError, match=msg): @@ -250,8 +276,8 @@ def test_detect_chained_assignment_fails(self, using_copy_on_write): ) if using_copy_on_write: - # TODO(CoW) can we still warn here? - df.loc[0]["A"] = -5 + with tm.raises_chained_assignment_error(): + df.loc[0]["A"] = -5 else: with pytest.raises(SettingWithCopyError, match=msg): df.loc[0]["A"] = -5 @@ -269,9 +295,9 @@ def test_detect_chained_assignment_doc_example(self, using_copy_on_write): assert df._is_copy is None if using_copy_on_write: - # TODO(CoW) can we still warn here? indexer = df.a.str.startswith("o") - df[indexer]["c"] = 42 + with tm.raises_chained_assignment_error(): + df[indexer]["c"] = 42 else: with pytest.raises(SettingWithCopyError, match=msg): indexer = df.a.str.startswith("o") @@ -291,8 +317,8 @@ def test_detect_chained_assignment_object_dtype( df.loc[0]["A"] = 111 if using_copy_on_write: - # TODO(CoW) can we still warn here? - df["A"][0] = 111 + with tm.raises_chained_assignment_error(): + df["A"][0] = 111 tm.assert_frame_equal(df, df_original) elif not using_array_manager: with pytest.raises(SettingWithCopyError, match=msg): @@ -420,8 +446,8 @@ def test_detect_chained_assignment_undefined_column(self, using_copy_on_write): df_original = df.copy() if using_copy_on_write: - # TODO(CoW) can we still warn here? - df.iloc[0:5]["group"] = "a" + with tm.raises_chained_assignment_error(): + df.iloc[0:5]["group"] = "a" tm.assert_frame_equal(df, df_original) else: with pytest.raises(SettingWithCopyError, match=msg): @@ -444,9 +470,12 @@ def test_detect_chained_assignment_changing_dtype( df_original = df.copy() if using_copy_on_write: - df.loc[2]["D"] = "foo" - df.loc[2]["C"] = "foo" - df["C"][2] = "foo" + with tm.raises_chained_assignment_error(): + df.loc[2]["D"] = "foo" + with tm.raises_chained_assignment_error(): + df.loc[2]["C"] = "foo" + with tm.raises_chained_assignment_error(): + df["C"][2] = "foo" tm.assert_frame_equal(df, df_original) if not using_copy_on_write: @@ -475,7 +504,8 @@ def test_setting_with_copy_bug(self, using_copy_on_write): mask = pd.isna(df.c) if using_copy_on_write: - df[["c"]][mask] = df[["b"]][mask] + with tm.raises_chained_assignment_error(): + df[["c"]][mask] = df[["b"]][mask] tm.assert_frame_equal(df, df_original) else: with pytest.raises(SettingWithCopyError, match=msg): @@ -493,7 +523,8 @@ def test_setting_with_copy_bug_no_warning(self): def test_detect_chained_assignment_warnings_errors(self, using_copy_on_write): df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) if using_copy_on_write: - df.loc[0]["A"] = 111 + with tm.raises_chained_assignment_error(): + df.loc[0]["A"] = 111 return with option_context("chained_assignment", "warn"): @@ -559,6 +590,7 @@ def test_cache_updating2(self): index=range(5), ) df["f"] = 0 + # TODO(CoW) protect underlying values of being written to? df.f.values[3] = 1 df.f.values[3] = 2 @@ -580,20 +612,33 @@ def test_iloc_setitem_chained_assignment(self, using_copy_on_write): ck = [True] * len(df) - df["bb"].iloc[0] = 0.13 + if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df["bb"].iloc[0] = 0.13 + else: + df["bb"].iloc[0] = 0.13 # GH#3970 this lookup used to break the chained setting to 0.15 df.iloc[ck] - df["bb"].iloc[0] = 0.15 + if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df["bb"].iloc[0] = 0.15 + else: + df["bb"].iloc[0] = 0.15 + if not using_copy_on_write: assert df["bb"].iloc[0] == 0.15 else: assert df["bb"].iloc[0] == 2.2 - def test_getitem_loc_assignment_slice_state(self): + def test_getitem_loc_assignment_slice_state(self, using_copy_on_write): # GH 13569 df = DataFrame({"a": [10, 20, 30]}) - df["a"].loc[4] = 40 + if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df["a"].loc[4] = 40 + else: + df["a"].loc[4] = 40 tm.assert_frame_equal(df, DataFrame({"a": [10, 20, 30]})) tm.assert_series_equal(df["a"], Series([10, 20, 30], name="a")) diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py index a4894ff66ab9f..d507ab07b7cd1 100644 --- a/pandas/tests/io/test_spss.py +++ b/pandas/tests/io/test_spss.py @@ -3,12 +3,15 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd import pandas._testing as tm pyreadstat = pytest.importorskip("pyreadstat") +@td.skip_copy_on_write_not_yet_implemented @pytest.mark.parametrize("path_klass", [lambda p: p, Path]) def test_spss_labelled_num(path_klass, datapath): # test file from the Haven project (https://haven.tidyverse.org/) @@ -24,6 +27,7 @@ def test_spss_labelled_num(path_klass, datapath): tm.assert_frame_equal(df, expected) +@td.skip_copy_on_write_not_yet_implemented def test_spss_labelled_num_na(datapath): # test file from the Haven project (https://haven.tidyverse.org/) fname = datapath("io", "data", "spss", "labelled-num-na.sav") @@ -38,6 +42,7 @@ def test_spss_labelled_num_na(datapath): tm.assert_frame_equal(df, expected) +@td.skip_copy_on_write_not_yet_implemented def test_spss_labelled_str(datapath): # test file from the Haven project (https://haven.tidyverse.org/) fname = datapath("io", "data", "spss", "labelled-str.sav") @@ -52,6 +57,7 @@ def test_spss_labelled_str(datapath): tm.assert_frame_equal(df, expected) +@td.skip_copy_on_write_not_yet_implemented def test_spss_umlauts(datapath): # test file from the Haven project (https://haven.tidyverse.org/) fname = datapath("io", "data", "spss", "umlauts.sav") diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index adb11b88cf667..1914bdae07e4b 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -287,8 +287,8 @@ def test_dt_accessor_not_writeable(self, using_copy_on_write): msg = "modifications to a property of a datetimelike.+not supported" with pd.option_context("chained_assignment", "raise"): if using_copy_on_write: - # TODO(CoW) it would be nice to keep a warning/error for this case - ser.dt.hour[0] = 5 + with tm.raises_chained_assignment_error(): + ser.dt.hour[0] = 5 else: with pytest.raises(SettingWithCopyError, match=msg): ser.dt.hour[0] = 5 diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 5bc55ee789fe6..de100dba8144d 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -294,3 +294,8 @@ def mark_array_manager_not_yet_implemented(request) -> None: get_option("mode.data_manager") == "array", reason="Test that relies on BlockManager internals or specific behaviour", ) + +skip_copy_on_write_not_yet_implemented = pytest.mark.xfail( + get_option("mode.copy_on_write"), + reason="Not yet implemented/adapted for Copy-on-Write mode", +) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 68a376956429b..8d4aecd596328 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -53,6 +53,7 @@ "__version__", # check np.__version__ in compat.numpy.function "_arrow_dtype_mapping", "_global_config", + "_chained_assignment_msg", }