From b2183baa4f1b9c68e7eda37d15e6f3be3a295719 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 24 Dec 2023 19:16:35 +0100 Subject: [PATCH 1/7] DEPR: Remove SettingWithCopyWarning --- pandas/_config/__init__.py | 6 +- pandas/conftest.py | 5 +- pandas/core/frame.py | 98 +------ pandas/core/generic.py | 219 +------------- pandas/core/groupby/groupby.py | 2 +- pandas/core/indexes/accessors.py | 22 +- pandas/core/indexing.py | 14 +- pandas/core/interchange/from_dataframe.py | 5 - pandas/core/series.py | 92 ------ pandas/errors/__init__.py | 46 --- pandas/io/json/_json.py | 2 - .../test_chained_assignment_deprecation.py | 20 +- pandas/tests/copy_view/test_indexing.py | 264 ++++------------- pandas/tests/copy_view/test_internals.py | 31 -- pandas/tests/copy_view/test_methods.py | 51 +--- pandas/tests/extension/conftest.py | 12 +- pandas/tests/frame/indexing/test_indexing.py | 14 +- pandas/tests/frame/indexing/test_xs.py | 53 +--- pandas/tests/frame/methods/test_asof.py | 13 - pandas/tests/frame/methods/test_copy.py | 20 -- .../tests/frame/methods/test_sort_values.py | 20 +- pandas/tests/indexes/multi/test_get_set.py | 2 - .../multiindex/test_chaining_and_caching.py | 17 +- .../tests/indexing/multiindex/test_setitem.py | 32 +- .../indexing/test_chaining_and_caching.py | 273 +++--------------- .../series/accessors/test_dt_accessor.py | 18 +- .../tests/series/methods/test_sort_values.py | 13 +- pandas/tests/series/test_ufunc.py | 4 +- pandas/tests/test_downstream.py | 2 + pandas/tests/test_errors.py | 2 - 30 files changed, 164 insertions(+), 1208 deletions(-) diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index 97784c924dab4..8d3a7b6a1c28f 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -32,11 +32,7 @@ def using_copy_on_write() -> bool: - _mode_options = _global_config["mode"] - return ( - _mode_options["copy_on_write"] is True - and _mode_options["data_manager"] == "block" - ) + return True def warn_copy_on_write() -> bool: diff --git a/pandas/conftest.py b/pandas/conftest.py index 983272d79081e..9a564bb066146 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1890,10 +1890,7 @@ def using_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is enabled. """ - return ( - pd.options.mode.copy_on_write is True - and _get_option("mode.data_manager", silent=True) == "block" - ) + return True @pytest.fixture diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3e2e589440bd9..1cc94484f36a1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1456,12 +1456,8 @@ def style(self) -> Styler: @Appender(_shared_docs["items"]) def items(self) -> Iterable[tuple[Hashable, Series]]: - if self.columns.is_unique and hasattr(self, "_item_cache"): - for k in self.columns: - yield k, self._get_item_cache(k) - else: - for i, k in enumerate(self.columns): - yield k, self._ixs(i, axis=1) + for i, k in enumerate(self.columns): + yield k, self._ixs(i, axis=1) def iterrows(self) -> Iterable[tuple[Hashable, Series]]: """ @@ -3956,24 +3952,14 @@ def _ixs(self, i: int, axis: AxisInt = 0) -> Series: if axis == 0: new_mgr = self._mgr.fast_xs(i) - # if we are a copy, mark as such - copy = isinstance(new_mgr.array, np.ndarray) and new_mgr.array.base is None result = self._constructor_sliced_from_mgr(new_mgr, axes=new_mgr.axes) result._name = self.index[i] - result = result.__finalize__(self) - result._set_is_copy(self, copy=copy) - return result + return result.__finalize__(self) # icol else: - label = self.columns[i] - col_mgr = self._mgr.iget(i) - result = self._box_col_values(col_mgr, i) - - # this is a cached value, mark it so - result._set_as_cached(label, self) - return result + return self._box_col_values(col_mgr, i) def _get_column_array(self, i: int) -> ArrayLike: """ @@ -4034,7 +4020,7 @@ def __getitem__(self, key): and key in self.columns or key in self.columns.drop_duplicates(keep=False) ): - return self._get_item_cache(key) + return self._get_item(key) elif is_mi and self.columns.is_unique and key in self.columns: return self._getitem_multilevel(key) @@ -4073,7 +4059,7 @@ def __getitem__(self, key): if isinstance(indexer, slice): return self._slice(indexer, axis=1) - data = self._take_with_is_copy(indexer, axis=1) + data = self.take(indexer, axis=1) if is_single_key: # What does looking for a single key in a non-unique index return? @@ -4082,7 +4068,7 @@ def __getitem__(self, key): # - we have a MultiIndex on columns (test on self.columns, #21309) if data.shape[1] == 1 and not isinstance(self.columns, MultiIndex): # GH#26490 using data[key] can cause RecursionError - return data._get_item_cache(key) + return data._get_item(key) return data @@ -4111,7 +4097,7 @@ def _getitem_bool_array(self, key): return self.copy(deep=None) indexer = key.nonzero()[0] - return self._take_with_is_copy(indexer, axis=0) + return self.take(indexer, axis=0) def _getitem_multilevel(self, key): # self.columns is a MultiIndex @@ -4141,7 +4127,6 @@ def _getitem_multilevel(self, key): result, index=self.index, name=key ) - result._set_is_copy(self) return result else: # loc is neither a slice nor ndarray, so must be an int @@ -4170,7 +4155,7 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar: series = self._ixs(col, axis=1) return series._values[index] - series = self._get_item_cache(col) + series = self[col] engine = self.index._engine if not isinstance(self.index, MultiIndex): @@ -4273,7 +4258,6 @@ def _setitem_slice(self, key: slice, value) -> None: # NB: we can't just use self.loc[key] = value because that # operates on labels and we need to operate positional for # backwards-compat, xref GH#31469 - self._check_setitem_copy() self.iloc[key] = value def _setitem_array(self, key, value): @@ -4286,7 +4270,6 @@ def _setitem_array(self, key, value): ) key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] - self._check_setitem_copy() if isinstance(value, DataFrame): # GH#39931 reindex since iloc does not align value = value.reindex(self.index.take(indexer)) @@ -4373,7 +4356,6 @@ def _setitem_frame(self, key, value): "Must pass DataFrame or 2-d ndarray with boolean values only" ) - self._check_setitem_copy() self._where(-key, value, inplace=True) def _set_item_frame_value(self, key, value: DataFrame) -> None: @@ -4435,7 +4417,6 @@ def _iset_item_mgr( ) -> None: # when called from _set_item_mgr loc can be anything returned from get_loc self._mgr.iset(loc, value, inplace=inplace, refs=refs) - self._clear_item_cache() def _set_item_mgr( self, key, value: ArrayLike, refs: BlockValuesRefs | None = None @@ -4448,12 +4429,6 @@ def _set_item_mgr( else: self._iset_item_mgr(loc, value, refs=refs) - # check if we are modifying a copy - # try to set first as we want an invalid - # value exception to occur first - if len(self): - self._check_setitem_copy() - def _iset_item(self, loc: int, value: Series, inplace: bool = True) -> None: # We are only called from _replace_columnwise which guarantees that # no reindex is necessary @@ -4464,12 +4439,6 @@ def _iset_item(self, loc: int, value: Series, inplace: bool = True) -> None: else: self._iset_item_mgr(loc, value._values.copy(), inplace=True) - # check if we are modifying a copy - # try to set first as we want an invalid - # value exception to occur first - if len(self): - self._check_setitem_copy() - def _set_item(self, key, value) -> None: """ Add series to DataFrame in specified column. @@ -4520,7 +4489,6 @@ def _set_value( icol = self.columns.get_loc(col) iindex = self.index.get_loc(index) self._mgr.column_setitem(icol, iindex, value, inplace_only=True) - self._clear_item_cache() except (KeyError, TypeError, ValueError, LossySetitemError): # get_loc might raise a KeyError for missing labels (falling back @@ -4532,7 +4500,6 @@ def _set_value( self.iloc[index, col] = value else: self.loc[index, col] = value - self._item_cache.pop(col, None) except InvalidIndexError as ii_err: # GH48729: Seems like you are trying to assign a value to a @@ -4576,50 +4543,9 @@ def _box_col_values(self, values: SingleDataManager, loc: int) -> Series: obj._name = name return obj.__finalize__(self) - # ---------------------------------------------------------------------- - # Lookup Caching - - def _clear_item_cache(self) -> None: - self._item_cache.clear() - - def _get_item_cache(self, item: Hashable) -> Series: - """Return the cached item, item represents a label indexer.""" - if using_copy_on_write() or warn_copy_on_write(): - loc = self.columns.get_loc(item) - return self._ixs(loc, axis=1) - - cache = self._item_cache - res = cache.get(item) - if res is None: - # All places that call _get_item_cache have unique columns, - # pending resolution of GH#33047 - - loc = self.columns.get_loc(item) - res = self._ixs(loc, axis=1) - - cache[item] = res - - # for a chain - res._is_copy = self._is_copy - return res - - def _reset_cacher(self) -> None: - # no-op for DataFrame - pass - - def _maybe_cache_changed(self, item, value: Series, inplace: bool) -> None: - """ - The object has called back to us saying maybe it has changed. - """ - loc = self._info_axis.get_loc(item) - arraylike = value._values - - old = self._ixs(loc, axis=1) - if old._values is value._values and inplace: - # GH#46149 avoid making unnecessary copies/block-splitting - return - - self._mgr.iset(loc, arraylike, inplace=inplace) + def _get_item(self, item: Hashable) -> Series: + loc = self.columns.get_loc(item) + return self._ixs(loc, axis=1) # ---------------------------------------------------------------------- # Unsorted diff --git a/pandas/core/generic.py b/pandas/core/generic.py index de25a02c6b37c..1048918bb73c7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5,7 +5,6 @@ from copy import deepcopy import datetime as dt from functools import partial -import gc from json import loads import operator import pickle @@ -23,7 +22,6 @@ overload, ) import warnings -import weakref import numpy as np @@ -97,8 +95,6 @@ AbstractMethodError, ChainedAssignmentError, InvalidIndexError, - SettingWithCopyError, - SettingWithCopyWarning, _chained_assignment_method_msg, _chained_assignment_warning_method_msg, _check_cacher, @@ -172,11 +168,7 @@ default_index, ensure_index, ) -from pandas.core.internals import ( - ArrayManager, - BlockManager, - SingleArrayManager, -) +from pandas.core.internals import BlockManager from pandas.core.internals.construction import ( mgr_to_mgr, ndarray_to_mgr, @@ -255,10 +247,8 @@ class NDFrame(PandasObject, indexing.IndexingMixin): _internal_names: list[str] = [ "_mgr", - "_cacher", "_item_cache", "_cache", - "_is_copy", "_name", "_metadata", "_flags", @@ -267,7 +257,6 @@ class NDFrame(PandasObject, indexing.IndexingMixin): _accessors: set[str] = set() _hidden_attrs: frozenset[str] = frozenset([]) _metadata: list[str] = [] - _is_copy: weakref.ReferenceType[NDFrame] | str | None = None _mgr: Manager _attrs: dict[Hashable, Any] _typ: str @@ -276,9 +265,7 @@ class NDFrame(PandasObject, indexing.IndexingMixin): # Constructors def __init__(self, data: Manager) -> None: - object.__setattr__(self, "_is_copy", None) object.__setattr__(self, "_mgr", data) - object.__setattr__(self, "_item_cache", {}) object.__setattr__(self, "_attrs", {}) object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True)) @@ -811,7 +798,6 @@ def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None: """ labels = ensure_index(labels) self._mgr.set_axis(axis, labels) - self._clear_item_cache() @final def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self: @@ -1130,7 +1116,6 @@ def _rename( new_index = ax._transform_index(f, level=level) result._set_axis_nocheck(new_index, axis=axis_no, inplace=True, copy=False) - result._clear_item_cache() if inplace: self._update_inplace(result) @@ -2218,8 +2203,6 @@ def __setstate__(self, state) -> None: elif len(state) == 2: raise NotImplementedError("Pre-0.12 pickles are no longer supported") - self._item_cache: dict[Hashable, Series] = {} - # ---------------------------------------------------------------------- # Rendering Methods @@ -3977,44 +3960,6 @@ def to_csv( storage_options=storage_options, ) - # ---------------------------------------------------------------------- - # Lookup Caching - - def _reset_cacher(self) -> None: - """ - Reset the cacher. - """ - raise AbstractMethodError(self) - - def _maybe_update_cacher( - self, - clear: bool_t = False, - verify_is_copy: bool_t = True, - inplace: bool_t = False, - ) -> None: - """ - See if we need to update our parent cacher if clear, then clear our - cache. - - Parameters - ---------- - clear : bool, default False - Clear the item cache. - verify_is_copy : bool, default True - Provide is_copy checks. - """ - if using_copy_on_write(): - return - - if verify_is_copy: - self._check_setitem_copy(t="referent") - - if clear: - self._clear_item_cache() - - def _clear_item_cache(self) -> None: - raise AbstractMethodError(self) - # ---------------------------------------------------------------------- # Indexing Methods @@ -4133,23 +4078,6 @@ class max_speed self, method="take" ) - @final - def _take_with_is_copy(self, indices, axis: Axis = 0) -> Self: - """ - Internal version of the `take` method that sets the `_is_copy` - attribute to keep track of the parent dataframe (using in indexing - for the SettingWithCopyWarning). - - For Series this does the same as the public take (it never sets `_is_copy`). - - See the docstring of `take` for full explanation of the parameters. - """ - result = self.take(indices=indices, axis=axis) - # Maybe set copy if we didn't actually change the index. - if self.ndim == 2 and not result._get_axis(axis).equals(self._get_axis(axis)): - result._set_is_copy(self) - return result - @final def xs( self, @@ -4297,9 +4225,9 @@ class animal locomotion if isinstance(loc, np.ndarray): if loc.dtype == np.bool_: (inds,) = loc.nonzero() - return self._take_with_is_copy(inds, axis=axis) + return self.take(inds, axis=axis) else: - return self._take_with_is_copy(loc, axis=axis) + return self.take(loc, axis=axis) if not is_scalar(loc): new_index = index[loc] @@ -4325,9 +4253,6 @@ class animal locomotion result = self.iloc[loc] result.index = new_index - # this could be a view - # but only in a single-dtyped view sliceable case - result._set_is_copy(self, copy=not result._is_view) return result def __getitem__(self, item): @@ -4363,111 +4288,8 @@ def _slice(self, slobj: slice, axis: AxisInt = 0) -> Self: new_mgr = self._mgr.get_slice(slobj, axis=axis) result = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) result = result.__finalize__(self) - - # this could be a view - # but only in a single-dtyped view sliceable case - is_copy = axis != 0 or result._is_view - result._set_is_copy(self, copy=is_copy) return result - @final - def _set_is_copy(self, ref: NDFrame, copy: bool_t = True) -> None: - if not copy: - self._is_copy = None - else: - assert ref is not None - self._is_copy = weakref.ref(ref) - - def _check_is_chained_assignment_possible(self) -> bool_t: - """ - Check if we are a view, have a cacher, and are of mixed type. - If so, then force a setitem_copy check. - - Should be called just near setting a value - - Will return a boolean if it we are a view and are cached, but a - single-dtype meaning that the cacher should be updated following - setting. - """ - if self._is_copy: - self._check_setitem_copy(t="referent") - return False - - @final - def _check_setitem_copy(self, t: str = "setting", force: bool_t = False): - """ - - Parameters - ---------- - t : str, the type of setting error - force : bool, default False - If True, then force showing an error. - - validate if we are doing a setitem on a chained copy. - - It is technically possible to figure out that we are setting on - a copy even WITH a multi-dtyped pandas object. In other words, some - blocks may be views while other are not. Currently _is_view will ALWAYS - return False for multi-blocks to avoid having to handle this case. - - df = DataFrame(np.arange(0,9), columns=['count']) - df['group'] = 'b' - - # This technically need not raise SettingWithCopy if both are view - # (which is not generally guaranteed but is usually True. However, - # this is in general not a good practice and we recommend using .loc. - df.iloc[0:5]['group'] = 'a' - - """ - if using_copy_on_write() or warn_copy_on_write(): - return - - # return early if the check is not needed - if not (force or self._is_copy): - return - - value = config.get_option("mode.chained_assignment") - if value is None: - return - - # see if the copy is not actually referred; if so, then dissolve - # the copy weakref - if self._is_copy is not None and not isinstance(self._is_copy, str): - r = self._is_copy() - if not gc.get_referents(r) or (r is not None and r.shape == self.shape): - self._is_copy = None - return - - # a custom message - if isinstance(self._is_copy, str): - t = self._is_copy - - elif t == "referent": - t = ( - "\n" - "A value is trying to be set on a copy of a slice from a " - "DataFrame\n\n" - "See the caveats in the documentation: " - "https://pandas.pydata.org/pandas-docs/stable/user_guide/" - "indexing.html#returning-a-view-versus-a-copy" - ) - - else: - t = ( - "\n" - "A value is trying to be set on a copy of a slice from a " - "DataFrame.\n" - "Try using .loc[row_indexer,col_indexer] = value " - "instead\n\nSee the caveats in the documentation: " - "https://pandas.pydata.org/pandas-docs/stable/user_guide/" - "indexing.html#returning-a-view-versus-a-copy" - ) - - if value == "raise": - raise SettingWithCopyError(t) - if value == "warn": - warnings.warn(t, SettingWithCopyWarning, stacklevel=find_stack_level()) - @final def __delitem__(self, key) -> None: """ @@ -4500,12 +4322,6 @@ def __delitem__(self, key) -> None: loc = self.axes[-1].get_loc(key) self._mgr = self._mgr.idelete(loc) - # delete from the caches - try: - del self._item_cache[key] - except KeyError: - pass - # ---------------------------------------------------------------------- # Unsorted @@ -4875,22 +4691,17 @@ def _drop_axis( return result.__finalize__(self) @final - def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: + def _update_inplace(self, result) -> None: """ Replace self internals with result. Parameters ---------- result : same type as self - verify_is_copy : bool, default True - Provide is_copy checks. """ # NOTE: This does *not* call __finalize__ and that's an explicit # decision that we may revisit in the future. - self._reset_cache() - self._clear_item_cache() self._mgr = result._mgr - self._maybe_update_cacher(verify_is_copy=verify_is_copy, inplace=True) @final def add_prefix(self, prefix: str, axis: Axis | None = None) -> Self: @@ -6354,13 +6165,7 @@ def _protect_consolidate(self, f): Consolidate _mgr -- if the blocks have changed, then clear the cache """ - if isinstance(self._mgr, (ArrayManager, SingleArrayManager)): - return f() - blocks_before = len(self._mgr.blocks) - result = f() - if len(self._mgr.blocks) != blocks_before: - self._clear_item_cache() - return result + return f() @final def _consolidate_inplace(self) -> None: @@ -6803,7 +6608,6 @@ def copy(self, deep: bool_t | None = True) -> Self: dtype: int64 """ data = self._mgr.copy(deep=deep) - self._clear_item_cache() return self._constructor_from_mgr(data, axes=data.axes).__finalize__( self, method="copy" ) @@ -9280,7 +9084,7 @@ def at_time(self, time, asof: bool_t = False, axis: Axis | None = None) -> Self: raise TypeError("Index must be DatetimeIndex") indexer = index.indexer_at_time(time, asof=asof) - return self._take_with_is_copy(indexer, axis=axis) + return self.take(indexer, axis=axis) @final def between_time( @@ -9365,7 +9169,7 @@ def between_time( include_start=left_inclusive, include_end=right_inclusive, ) - return self._take_with_is_copy(indexer, axis=axis) + return self.take(indexer, axis=axis) @final @doc(klass=_shared_doc_kwargs["klass"]) @@ -12690,14 +12494,9 @@ def _inplace_method(self, other, op) -> Self: ) return self - # Delete cacher - self._reset_cacher() - # this makes sure that we are aligned like the input - # we are updating inplace so we want to ignore is_copy - self._update_inplace( - result.reindex_like(self, copy=False), verify_is_copy=False - ) + # we are updating inplace + self._update_inplace(result.reindex_like(self, copy=False)) return self @final diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 089e15afd465b..5fdb9072e4ba8 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1124,7 +1124,7 @@ def get_group(self, name, obj=None) -> DataFrame | Series: FutureWarning, stacklevel=find_stack_level(), ) - return obj._take_with_is_copy(inds, axis=self.axis) + return obj.take(inds, axis=self.axis) @final def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 929c7f4a63f8f..64ad765ce007f 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -106,16 +106,7 @@ def _delegate_property_get(self, name: str): else: index = self._parent.index # return the result as a Series - result = Series(result, index=index, name=self.name).__finalize__(self._parent) - - # setting this object will show a SettingWithCopyWarning/Error - result._is_copy = ( - "modifications to a property of a datetimelike " - "object are not supported and are discarded. " - "Change values on the original." - ) - - return result + return Series(result, index=index, name=self.name).__finalize__(self._parent) def _delegate_property_set(self, name: str, value, *args, **kwargs): raise ValueError( @@ -134,19 +125,10 @@ def _delegate_method(self, name: str, *args, **kwargs): if not is_list_like(result): return result - result = Series(result, index=self._parent.index, name=self.name).__finalize__( + return Series(result, index=self._parent.index, name=self.name).__finalize__( self._parent ) - # setting this object will show a SettingWithCopyWarning/Error - result._is_copy = ( - "modifications to a method of a datetimelike " - "object are not supported and are discarded. " - "Change values on the original." - ) - - return result - @delegate_names( delegate=ArrowExtensionArray, diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4be7e17035128..37e0894476e87 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1209,7 +1209,7 @@ def _getbool_axis(self, key, axis: AxisInt): labels = self.obj._get_axis(axis) key = check_bool_indexer(labels, key) inds = key.nonzero()[0] - return self.obj._take_with_is_copy(inds, axis=axis) + return self.obj.take(inds, axis=axis) @doc(IndexingMixin.loc) @@ -1712,7 +1712,7 @@ def _get_list_axis(self, key, axis: AxisInt): `axis` can only be zero. """ try: - return self.obj._take_with_is_copy(key, axis=axis) + return self.obj.take(key, axis=axis) except IndexError as err: # re-raise with different error message, e.g. test_getitem_ndarray_3d raise IndexError("positional indexers are out-of-bounds") from err @@ -1920,8 +1920,6 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc"): reindexers, allow_dups=True ) self.obj._mgr = new_obj._mgr - self.obj._maybe_update_cacher(clear=True) - self.obj._is_copy = None nindexer.append(labels.get_loc(key)) @@ -2147,8 +2145,6 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: # falling back to casting if necessary) self.obj._mgr.column_setitem(loc, plane_indexer, value) - self.obj._clear_item_cache() - def _setitem_single_block(self, indexer, value, name: str) -> None: """ _setitem_with_indexer for the case when we have a single Block. @@ -2184,12 +2180,8 @@ def _setitem_single_block(self, indexer, value, name: str) -> None: if isinstance(value, ABCDataFrame) and name != "iloc": value = self._align_frame(indexer, value)._values - # check for chained assignment - self.obj._check_is_chained_assignment_possible() - # actually do the set self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value) - self.obj._maybe_update_cacher(clear=True, inplace=True) def _setitem_with_indexer_missing(self, indexer, value): """ @@ -2255,7 +2247,6 @@ def _setitem_with_indexer_missing(self, indexer, value): self.obj._mgr = self.obj._constructor( new_values, index=new_index, name=self.obj.name )._mgr - self.obj._maybe_update_cacher(clear=True) elif self.ndim == 2: if not len(self.obj.columns): @@ -2299,7 +2290,6 @@ def _setitem_with_indexer_missing(self, indexer, value): self.obj._mgr = df._mgr else: self.obj._mgr = self.obj._append(value)._mgr - self.obj._maybe_update_cacher(clear=True) def _ensure_iterable_column_indexer(self, column_indexer): """ diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index d45ae37890ba7..99d1ec033b753 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -7,7 +7,6 @@ import numpy as np from pandas.compat._optional import import_optional_dependency -from pandas.errors import SettingWithCopyError import pandas as pd from pandas.core.interchange.dataframe_protocol import ( @@ -515,9 +514,5 @@ def set_nulls( # cast the `data` to nullable float dtype. data = data.astype(float) data[null_pos] = None - except SettingWithCopyError: - # `SettingWithCopyError` may happen for datetime-like with missing values. - data = data.copy() - data[null_pos] = None return data diff --git a/pandas/core/series.py b/pandas/core/series.py index e3b401cd3c88b..0257a6d78a0ac 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -22,7 +22,6 @@ overload, ) import warnings -import weakref import numpy as np @@ -1268,7 +1267,6 @@ def __setitem__(self, key, value) -> None: check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) - cacher_needs_updating = self._check_is_chained_assignment_possible() if key is Ellipsis: key = slice(None) @@ -1346,9 +1344,6 @@ def __setitem__(self, key, value) -> None: else: self._set_with(key, value, warn=warn) - if cacher_needs_updating: - self._maybe_update_cacher(inplace=True) - def _set_with_engine(self, key, value, warn: bool = True) -> None: loc = self.index.get_loc(key) @@ -1400,7 +1395,6 @@ def _set_values(self, key, value, warn: bool = True) -> None: key = key._values self._mgr = self._mgr.setitem(indexer=key, value=value, warn=warn) - self._maybe_update_cacher() def _set_value(self, label, value, takeable: bool = False) -> None: """ @@ -1429,84 +1423,6 @@ def _set_value(self, label, value, takeable: bool = False) -> None: self._set_values(loc, value) - # ---------------------------------------------------------------------- - # Lookup Caching - - @property - def _is_cached(self) -> bool: - """Return boolean indicating if self is cached or not.""" - return getattr(self, "_cacher", None) is not None - - def _get_cacher(self): - """return my cacher or None""" - cacher = getattr(self, "_cacher", None) - if cacher is not None: - cacher = cacher[1]() - return cacher - - def _reset_cacher(self) -> None: - """ - Reset the cacher. - """ - if hasattr(self, "_cacher"): - del self._cacher - - def _set_as_cached(self, item, cacher) -> None: - """ - Set the _cacher attribute on the calling object with a weakref to - cacher. - """ - if using_copy_on_write(): - return - self._cacher = (item, weakref.ref(cacher)) - - def _clear_item_cache(self) -> None: - # no-op for Series - pass - - def _check_is_chained_assignment_possible(self) -> bool: - """ - See NDFrame._check_is_chained_assignment_possible.__doc__ - """ - if self._is_view and self._is_cached: - ref = self._get_cacher() - if ref is not None and ref._is_mixed_type: - self._check_setitem_copy(t="referent", force=True) - return True - return super()._check_is_chained_assignment_possible() - - def _maybe_update_cacher( - self, clear: bool = False, verify_is_copy: bool = True, inplace: bool = False - ) -> None: - """ - See NDFrame._maybe_update_cacher.__doc__ - """ - # for CoW, we never want to update the parent DataFrame cache - # if the Series changed, but don't keep track of any cacher - if using_copy_on_write(): - return - cacher = getattr(self, "_cacher", None) - if cacher is not None: - ref: DataFrame = cacher[1]() - - # we are trying to reference a dead referent, hence - # a copy - if ref is None: - del self._cacher - elif len(self) == len(ref) and self.name in ref.columns: - # GH#42530 self.name must be in ref.columns - # to ensure column still in dataframe - # otherwise, either self or ref has swapped in new arrays - ref._maybe_cache_changed(cacher[0], self, inplace=inplace) - else: - # GH#33675 we have swapped in a new array, so parent - # reference to self is now invalid - ref._item_cache.pop(cacher[0], None) - - super()._maybe_update_cacher( - clear=clear, verify_is_copy=verify_is_copy, inplace=inplace - ) - # ---------------------------------------------------------------------- # Unsorted @@ -3615,7 +3531,6 @@ def update(self, other: Series | Sequence | Mapping) -> None: mask = notna(other) self._mgr = self._mgr.putmask(mask=mask, new=other) - self._maybe_update_cacher() # ---------------------------------------------------------------------- # Reindexing, sorting @@ -3819,13 +3734,6 @@ def sort_values( # Validate the axis parameter self._get_axis_number(axis) - # GH 5856/5853 - if inplace and self._is_cached: - raise ValueError( - "This Series is a view of some other array, to " - "sort in-place you must create a copy" - ) - if is_list_like(ascending): ascending = cast(Sequence[bool], ascending) if len(ascending) != 1: diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 01094ba36b9dd..3b8ea14e764c8 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -408,50 +408,6 @@ class SpecificationError(Exception): """ -class SettingWithCopyError(ValueError): - """ - Exception raised when trying to set on a copied slice from a ``DataFrame``. - - The ``mode.chained_assignment`` needs to be set to set to 'raise.' This can - happen unintentionally when chained indexing. - - For more information on evaluation order, - see :ref:`the user guide`. - - For more information on view vs. copy, - see :ref:`the user guide`. - - Examples - -------- - >>> pd.options.mode.chained_assignment = 'raise' - >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A']) - >>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP - ... # SettingWithCopyError: A value is trying to be set on a copy of a... - """ - - -class SettingWithCopyWarning(Warning): - """ - Warning raised when trying to set on a copied slice from a ``DataFrame``. - - The ``mode.chained_assignment`` needs to be set to set to 'warn.' - 'Warn' is the default option. This can happen unintentionally when - chained indexing. - - For more information on evaluation order, - see :ref:`the user guide`. - - For more information on view vs. copy, - see :ref:`the user guide`. - - Examples - -------- - >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A']) - >>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP - ... # SettingWithCopyWarning: A value is trying to be set on a copy of a... - """ - - class ChainedAssignmentError(Warning): """ Warning raised when trying to set using chained assignment. @@ -840,8 +796,6 @@ class InvalidComparison(Exception): "PossiblePrecisionLoss", "PyperclipException", "PyperclipWindowsException", - "SettingWithCopyError", - "SettingWithCopyWarning", "SpecificationError", "UndefinedVariableError", "UnsortedIndexError", diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index ed66e46b300f7..9a36464dbf556 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -251,8 +251,6 @@ def __init__( self.default_handler = default_handler self.index = index self.indent = indent - - self.is_copy = None self._format_axes() def _format_axes(self): diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 80e38380ed27c..e3203dd7c4d65 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -1,15 +1,9 @@ import numpy as np import pytest -from pandas.errors import ( - ChainedAssignmentError, - SettingWithCopyWarning, -) +from pandas.errors import ChainedAssignmentError -from pandas import ( - DataFrame, - option_context, -) +from pandas import DataFrame import pandas._testing as tm @@ -97,15 +91,11 @@ def test_series_setitem(indexer, using_copy_on_write, warn_copy_on_write): assert "ChainedAssignmentError" in record[0].message.args[0] -@pytest.mark.filterwarnings("ignore::pandas.errors.SettingWithCopyWarning") @pytest.mark.parametrize( "indexer", ["a", ["a", "b"], slice(0, 2), np.array([True, False, True])] ) -def test_frame_setitem(indexer, using_copy_on_write): +def test_frame_setitem(indexer): df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1}) - extra_warnings = () if using_copy_on_write else (SettingWithCopyWarning,) - - with option_context("chained_assignment", "warn"): - with tm.raises_chained_assignment_error(extra_warnings=extra_warnings): - df[0:3][indexer] = 10 + with tm.raises_chained_assignment_error(): + df[0:3][indexer] = 10 diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 6f3850ab64daa..c241da56992c6 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas.errors import SettingWithCopyWarning - from pandas.core.dtypes.common import is_float_dtype import pandas as pd @@ -59,17 +57,10 @@ def test_subset_column_selection(backend, using_copy_on_write): subset = df[["a", "c"]] - if using_copy_on_write: - # the subset shares memory ... - assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) - # ... but uses CoW when being modified - subset.iloc[0, 0] = 0 - else: - assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) - # INFO this no longer raise warning since pandas 1.4 - # with pd.option_context("chained_assignment", "warn"): - # with tm.assert_produces_warning(SettingWithCopyWarning): - subset.iloc[0, 0] = 0 + # the subset shares memory ... + assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) + # ... but uses CoW when being modified + subset.iloc[0, 0] = 0 assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) @@ -113,42 +104,24 @@ def test_subset_row_slice(backend, using_copy_on_write, warn_copy_on_write): assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) - if using_copy_on_write: - subset.iloc[0, 0] = 0 - assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) - - else: - # INFO this no longer raise warning since pandas 1.4 - # with pd.option_context("chained_assignment", "warn"): - # with tm.assert_produces_warning(SettingWithCopyWarning): - with tm.assert_cow_warning(warn_copy_on_write): - subset.iloc[0, 0] = 0 + subset.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) subset._mgr._verify_integrity() expected = DataFrame({"a": [0, 3], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3)) tm.assert_frame_equal(subset, expected) - if using_copy_on_write: - # original parent dataframe is not modified (CoW) - tm.assert_frame_equal(df, df_orig) - else: - # original parent dataframe is actually updated - df_orig.iloc[1, 0] = 0 - tm.assert_frame_equal(df, df_orig) + # original parent dataframe is not modified (CoW) + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_column_slice( - backend, using_copy_on_write, warn_copy_on_write, using_array_manager, dtype -): +def test_subset_column_slice(backend, dtype): # Case: taking a subset of the columns of a DataFrame using a slice # + afterwards modifying the subset dtype_backend, DataFrame, _ = backend - single_block = ( - dtype == "int64" and dtype_backend == "numpy" - ) and not using_array_manager df = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} ) @@ -157,30 +130,16 @@ def test_subset_column_slice( subset = df.iloc[:, 1:] subset._mgr._verify_integrity() - if using_copy_on_write: - assert np.shares_memory(get_array(subset, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(subset, "b"), get_array(df, "b")) - subset.iloc[0, 0] = 0 - assert not np.shares_memory(get_array(subset, "b"), get_array(df, "b")) - elif warn_copy_on_write: - with tm.assert_cow_warning(single_block): - subset.iloc[0, 0] = 0 - else: - # we only get a warning in case of a single block - warn = SettingWithCopyWarning if single_block else None - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(warn): - subset.iloc[0, 0] = 0 + subset.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(subset, "b"), get_array(df, "b")) expected = DataFrame({"b": [0, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}) tm.assert_frame_equal(subset, expected) # original parent dataframe is not modified (also not for BlockManager case, # except for single block) - if not using_copy_on_write and (using_array_manager or single_block): - df_orig.iloc[0, 1] = 0 - tm.assert_frame_equal(df, df_orig) - else: - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize( @@ -316,9 +275,7 @@ def test_subset_iloc_rows_columns( [slice(0, 2), np.array([True, True, False]), np.array([0, 1])], ids=["slice", "mask", "array"], ) -def test_subset_set_with_row_indexer( - backend, indexer_si, indexer, using_copy_on_write, warn_copy_on_write -): +def test_subset_set_with_row_indexer(backend, indexer_si, indexer): # Case: setting values with a row indexer on a viewing subset # subset[indexer] = value and subset.iloc[indexer] = value _, DataFrame, _ = backend @@ -333,32 +290,17 @@ def test_subset_set_with_row_indexer( ): pytest.skip("setitem with labels selects on columns") - if using_copy_on_write: - indexer_si(subset)[indexer] = 0 - elif warn_copy_on_write: - with tm.assert_cow_warning(): - indexer_si(subset)[indexer] = 0 - else: - # INFO iloc no longer raises warning since pandas 1.4 - warn = SettingWithCopyWarning if indexer_si is tm.setitem else None - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(warn): - indexer_si(subset)[indexer] = 0 + indexer_si(subset)[indexer] = 0 expected = DataFrame( {"a": [0, 0, 4], "b": [0, 0, 7], "c": [0.0, 0.0, 0.4]}, index=range(1, 4) ) tm.assert_frame_equal(subset, expected) - if using_copy_on_write: - # original parent dataframe is not modified (CoW) - tm.assert_frame_equal(df, df_orig) - else: - # original parent dataframe is actually updated - df_orig[1:3] = 0 - tm.assert_frame_equal(df, df_orig) + # original parent dataframe is not modified (CoW) + tm.assert_frame_equal(df, df_orig) -def test_subset_set_with_mask(backend, using_copy_on_write, warn_copy_on_write): +def test_subset_set_with_mask(backend): # Case: setting values with a mask on a viewing subset: subset[mask] = value _, DataFrame, _ = backend df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]}) @@ -367,31 +309,16 @@ def test_subset_set_with_mask(backend, using_copy_on_write, warn_copy_on_write): mask = subset > 3 - if using_copy_on_write: - subset[mask] = 0 - elif warn_copy_on_write: - with tm.assert_cow_warning(): - subset[mask] = 0 - else: - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(SettingWithCopyWarning): - subset[mask] = 0 + subset[mask] = 0 expected = DataFrame( {"a": [2, 3, 0], "b": [0, 0, 0], "c": [0.20, 0.3, 0.4]}, index=range(1, 4) ) tm.assert_frame_equal(subset, expected) - if using_copy_on_write: - # original parent dataframe is not modified (CoW) - tm.assert_frame_equal(df, df_orig) - else: - # original parent dataframe is actually updated - df_orig.loc[3, "a"] = 0 - df_orig.loc[1:3, "b"] = 0 - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) -def test_subset_set_column(backend, using_copy_on_write, warn_copy_on_write): +def test_subset_set_column(backend): # Case: setting a single column on a viewing subset -> subset[col] = value dtype_backend, DataFrame, _ = backend df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) @@ -403,13 +330,7 @@ def test_subset_set_column(backend, using_copy_on_write, warn_copy_on_write): else: arr = pd.array([10, 11], dtype="Int64") - if using_copy_on_write or warn_copy_on_write: - subset["a"] = arr - else: - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(SettingWithCopyWarning): - subset["a"] = arr - + subset["a"] = arr subset._mgr._verify_integrity() expected = DataFrame( {"a": [10, 11], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3) @@ -511,17 +432,11 @@ def test_subset_set_columns(backend, using_copy_on_write, warn_copy_on_write, dt df_orig = df.copy() subset = df[1:3] - if using_copy_on_write or warn_copy_on_write: - subset[["a", "c"]] = 0 - else: - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(SettingWithCopyWarning): - subset[["a", "c"]] = 0 + subset[["a", "c"]] = 0 subset._mgr._verify_integrity() - if using_copy_on_write: - # first and third column should certainly have no references anymore - assert all(subset._mgr._has_no_reference(i) for i in [0, 2]) + # first and third column should certainly have no references anymore + assert all(subset._mgr._has_no_reference(i) for i in [0, 2]) expected = DataFrame({"a": [0, 0], "b": [5, 6], "c": [0, 0]}, index=range(1, 3)) if dtype_backend == "nullable": # there is not yet a global option, so overriding a column by setting a scalar @@ -650,9 +565,7 @@ def test_subset_chained_getitem( @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_chained_getitem_column( - backend, dtype, using_copy_on_write, warn_copy_on_write -): +def test_subset_chained_getitem_column(backend, dtype, warn_copy_on_write): # Case: creating a subset using multiple, chained getitem calls using views # still needs to guarantee proper CoW behaviour dtype_backend, DataFrame, Series = backend @@ -663,13 +576,9 @@ def test_subset_chained_getitem_column( # modify subset -> don't modify parent subset = df[:]["a"][0:2] - df._clear_item_cache() with tm.assert_cow_warning(warn_copy_on_write): subset.iloc[0] = 0 - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - else: - assert df.iloc[0, 0] == 0 + tm.assert_frame_equal(df, df_orig) # modify parent -> don't modify subset subset = df[:]["a"][0:2] @@ -677,10 +586,7 @@ def test_subset_chained_getitem_column( with tm.assert_cow_warning(warn_copy_on_write): df.iloc[0, 0] = 0 expected = Series([1, 2], name="a") - if using_copy_on_write: - tm.assert_series_equal(subset, expected) - else: - assert subset.iloc[0] == 0 + tm.assert_series_equal(subset, expected) @pytest.mark.parametrize( @@ -967,9 +873,7 @@ def test_del_series(backend): # Accessing column as Series -def test_column_as_series( - backend, using_copy_on_write, warn_copy_on_write, using_array_manager -): +def test_column_as_series(backend): # Case: selecting a single column now also uses Copy-on-Write dtype_backend, DataFrame, Series = backend df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) @@ -978,34 +882,17 @@ def test_column_as_series( s = df["a"] assert np.shares_memory(get_array(s, "a"), get_array(df, "a")) - - if using_copy_on_write or using_array_manager: - s[0] = 0 - else: - if warn_copy_on_write: - with tm.assert_cow_warning(): - s[0] = 0 - else: - warn = SettingWithCopyWarning if dtype_backend == "numpy" else None - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(warn): - s[0] = 0 + s[0] = 0 expected = Series([0, 2, 3], name="a") tm.assert_series_equal(s, expected) - if using_copy_on_write: - # assert not np.shares_memory(s.values, get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) - # ensure cached series on getitem is not the changed series - tm.assert_series_equal(df["a"], df_orig["a"]) - else: - df_orig.iloc[0, 0] = 0 - tm.assert_frame_equal(df, df_orig) + # assert not np.shares_memory(s.values, get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + # ensure cached series on getitem is not the changed series + tm.assert_series_equal(df["a"], df_orig["a"]) -def test_column_as_series_set_with_upcast( - backend, using_copy_on_write, using_array_manager, warn_copy_on_write -): +def test_column_as_series_set_with_upcast(backend, warn_copy_on_write): # Case: selecting a single column now also uses Copy-on-Write -> when # setting a value causes an upcast, we don't need to update the parent # DataFrame through the cache mechanism @@ -1019,33 +906,17 @@ def test_column_as_series_set_with_upcast( with pytest.raises(TypeError, match="Invalid value"): s[0] = "foo" expected = Series([1, 2, 3], name="a") - elif using_copy_on_write or warn_copy_on_write or using_array_manager: + else: # TODO(CoW-warn) assert the FutureWarning for CoW is also raised with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): s[0] = "foo" expected = Series(["foo", 2, 3], dtype=object, name="a") - else: - with pd.option_context("chained_assignment", "warn"): - msg = "|".join( - [ - "A value is trying to be set on a copy of a slice from a DataFrame", - "Setting an item of incompatible dtype is deprecated", - ] - ) - with tm.assert_produces_warning( - (SettingWithCopyWarning, FutureWarning), match=msg - ): - s[0] = "foo" expected = Series(["foo", 2, 3], dtype=object, name="a") tm.assert_series_equal(s, expected) - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - # ensure cached series on getitem is not the changed series - tm.assert_series_equal(df["a"], df_orig["a"]) - else: - df_orig["a"] = expected - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) + # ensure cached series on getitem is not the changed series + tm.assert_series_equal(df["a"], df_orig["a"]) @pytest.mark.parametrize( @@ -1057,14 +928,7 @@ def test_column_as_series_set_with_upcast( ], ids=["getitem", "loc", "iloc"], ) -def test_column_as_series_no_item_cache( - request, - backend, - method, - using_copy_on_write, - warn_copy_on_write, - using_array_manager, -): +def test_column_as_series_no_item_cache(request, backend, method): # Case: selecting a single column (which now also uses Copy-on-Write to protect # the view) should always give a new object (i.e. not make use of a cache) dtype_backend, DataFrame, _ = backend @@ -1074,28 +938,12 @@ def test_column_as_series_no_item_cache( s1 = method(df) s2 = method(df) - is_iloc = "iloc" in request.node.name - if using_copy_on_write or warn_copy_on_write or is_iloc: - assert s1 is not s2 - else: - assert s1 is s2 + assert s1 is not s2 - if using_copy_on_write or using_array_manager: - s1.iloc[0] = 0 - elif warn_copy_on_write: - with tm.assert_cow_warning(): - s1.iloc[0] = 0 - else: - warn = SettingWithCopyWarning if dtype_backend == "numpy" else None - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(warn): - s1.iloc[0] = 0 + s1.iloc[0] = 0 - if using_copy_on_write: - tm.assert_series_equal(s2, df_orig["a"]) - tm.assert_frame_equal(df, df_orig) - else: - assert s2.iloc[0] == 0 + tm.assert_series_equal(s2, df_orig["a"]) + tm.assert_frame_equal(df, df_orig) # TODO add tests for other indexing methods on the Series @@ -1181,30 +1029,16 @@ def test_series_midx_slice(using_copy_on_write, warn_copy_on_write): tm.assert_series_equal(ser, expected) -def test_getitem_midx_slice( - using_copy_on_write, warn_copy_on_write, using_array_manager -): +def test_getitem_midx_slice(): df = DataFrame({("a", "x"): [1, 2], ("a", "y"): 1, ("b", "x"): 2}) df_orig = df.copy() new_df = df[("a",)] - if using_copy_on_write: - assert not new_df._mgr._has_no_reference(0) + assert not new_df._mgr._has_no_reference(0) - if not using_array_manager: - assert np.shares_memory(get_array(df, ("a", "x")), get_array(new_df, "x")) - if using_copy_on_write: - new_df.iloc[0, 0] = 100 - tm.assert_frame_equal(df_orig, df) - else: - if warn_copy_on_write: - with tm.assert_cow_warning(): - new_df.iloc[0, 0] = 100 - else: - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(SettingWithCopyWarning): - new_df.iloc[0, 0] = 100 - assert df.iloc[0, 0] == 100 + assert np.shares_memory(get_array(df, ("a", "x")), get_array(new_df, "x")) + new_df.iloc[0, 0] = 100 + tm.assert_frame_equal(df_orig, df) def test_series_midx_tuples_slice(using_copy_on_write, warn_copy_on_write): diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py index a727331307d7e..e1c56ee02df06 100644 --- a/pandas/tests/copy_view/test_internals.py +++ b/pandas/tests/copy_view/test_internals.py @@ -3,7 +3,6 @@ import pandas.util._test_decorators as td -import pandas as pd from pandas import DataFrame import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -45,36 +44,6 @@ def test_consolidate(using_copy_on_write): assert df.loc[0, "b"] == 0.1 -@pytest.mark.single_cpu -@td.skip_array_manager_invalid_test -def test_switch_options(): - # ensure we can switch the value of the option within one session - # (assuming data is constructed after switching) - - # using the option_context to ensure we set back to global option value - # after running the test - with pd.option_context("mode.copy_on_write", False): - df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) - subset = df[:] - subset.iloc[0, 0] = 0 - # df updated with CoW disabled - assert df.iloc[0, 0] == 0 - - pd.options.mode.copy_on_write = True - df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) - subset = df[:] - subset.iloc[0, 0] = 0 - # df not updated with CoW enabled - assert df.iloc[0, 0] == 1 - - pd.options.mode.copy_on_write = False - df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) - subset = df[:] - subset.iloc[0, 0] = 0 - # df updated with CoW disabled - assert df.iloc[0, 0] == 0 - - @td.skip_array_manager_invalid_test @pytest.mark.parametrize("dtype", [np.intp, np.int8]) @pytest.mark.parametrize( diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 862aebdc70a9d..f83d5b078b3b1 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas.errors import SettingWithCopyWarning - import pandas as pd from pandas import ( DataFrame, @@ -1692,26 +1690,10 @@ def test_get(using_copy_on_write, warn_copy_on_write, key): result = df.get(key) - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) - result.iloc[0] = 0 - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) - else: - # for non-CoW it depends on whether we got a Series or DataFrame if it - # is a view or copy or triggers a warning or not - if warn_copy_on_write: - warn = FutureWarning if isinstance(key, str) else None - else: - warn = SettingWithCopyWarning if isinstance(key, list) else None - with option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(warn): - result.iloc[0] = 0 - - if isinstance(key, list): - tm.assert_frame_equal(df, df_orig) - else: - assert df.iloc[0, 0] == 0 + assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) + result.iloc[0] = 0 + assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("axis, key", [(0, 0), (1, "a")]) @@ -1732,23 +1714,13 @@ def test_xs( if axis == 1 or single_block: assert np.shares_memory(get_array(df, "a"), get_array(result)) - elif using_copy_on_write: + else: assert result._mgr._has_no_reference(0) if using_copy_on_write or (is_view and not warn_copy_on_write): result.iloc[0] = 0 - elif warn_copy_on_write: - with tm.assert_cow_warning(single_block or axis == 1): - result.iloc[0] = 0 - else: - with option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(SettingWithCopyWarning): - result.iloc[0] = 0 - if using_copy_on_write or (not single_block and axis == 0): - tm.assert_frame_equal(df, df_orig) - else: - assert df.iloc[0, 0] == 0 + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("axis", [0, 1]) @@ -1769,16 +1741,7 @@ def test_xs_multiindex( assert np.shares_memory( get_array(df, df.columns[0]), get_array(result, result.columns[0]) ) - - if warn_copy_on_write: - warn = FutureWarning if level == 0 else None - elif not using_copy_on_write and not using_array_manager: - warn = SettingWithCopyWarning - else: - warn = None - with option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(warn): - result.iloc[0, 0] = 0 + result.iloc[0, 0] = 0 tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index c5b1295ee4a7d..3a3844d5a8b7a 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -2,12 +2,7 @@ import pytest -from pandas._config.config import _get_option - -from pandas import ( - Series, - options, -) +from pandas import Series @pytest.fixture @@ -224,7 +219,4 @@ def using_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is enabled. """ - return ( - options.mode.copy_on_write is True - and _get_option("mode.data_manager", silent=True) == "block" - ) + return True diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 97e7ae15c6c63..3277543b0088d 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -13,7 +13,6 @@ from pandas.errors import ( InvalidIndexError, PerformanceWarning, - SettingWithCopyError, ) import pandas.util._test_decorators as td @@ -288,9 +287,7 @@ def test_setattr_column(self): df.foobar = 5 assert (df.foobar == 5).all() - def test_setitem( - self, float_frame, using_copy_on_write, warn_copy_on_write, using_infer_string - ): + def test_setitem(self, float_frame, using_infer_string): # not sure what else to do here series = float_frame["A"][::2] float_frame["col5"] = series @@ -325,13 +322,8 @@ def test_setitem( # so raise/warn smaller = float_frame[:2] - msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" - if using_copy_on_write or warn_copy_on_write: - # With CoW, adding a new column doesn't raise a warning - smaller["col10"] = ["1", "2"] - else: - with pytest.raises(SettingWithCopyError, match=msg): - smaller["col10"] = ["1", "2"] + # With CoW, adding a new column doesn't raise a warning + smaller["col10"] = ["1", "2"] if using_infer_string: assert smaller["col10"].dtype == "string" diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index be809e3a17c8e..c3d7c7eedff24 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas.errors import SettingWithCopyError - from pandas import ( DataFrame, Index, @@ -122,30 +120,16 @@ def test_xs_keep_level(self): result = df.xs((2008, "sat"), level=["year", "day"], drop_level=False) tm.assert_frame_equal(result, expected) - def test_xs_view( - self, using_array_manager, using_copy_on_write, warn_copy_on_write - ): + def test_xs_view(self): # in 0.14 this will return a view if possible a copy otherwise, but # this is numpy dependent dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5)) df_orig = dm.copy() - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - dm.xs(2)[:] = 20 - tm.assert_frame_equal(dm, df_orig) - elif using_array_manager: - # INFO(ArrayManager) with ArrayManager getting a row as a view is - # not possible - msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(SettingWithCopyError, match=msg): - dm.xs(2)[:] = 20 - assert not (dm.xs(2) == 20).any() - else: - with tm.raises_chained_assignment_error(): - dm.xs(2)[:] = 20 - assert (dm.xs(2) == 20).all() + with tm.raises_chained_assignment_error(): + dm.xs(2)[:] = 20 + tm.assert_frame_equal(dm, df_orig) class TestXSWithMultiIndex: @@ -203,43 +187,22 @@ def test_xs_level_eq_2(self): result = df.xs("c", level=2) tm.assert_frame_equal(result, expected) - def test_xs_setting_with_copy_error( - self, - multiindex_dataframe_random_data, - using_copy_on_write, - warn_copy_on_write, - ): + def test_xs_setting_with_copy_error(self, multiindex_dataframe_random_data): # this is a copy in 0.14 df = multiindex_dataframe_random_data df_orig = df.copy() result = df.xs("two", level="second") - if using_copy_on_write or warn_copy_on_write: - result[:] = 10 - else: - # setting this will give a SettingWithCopyError - # as we are trying to write a view - msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(SettingWithCopyError, match=msg): - result[:] = 10 + result[:] = 10 tm.assert_frame_equal(df, df_orig) - def test_xs_setting_with_copy_error_multiple( - self, four_level_index_dataframe, using_copy_on_write, warn_copy_on_write - ): + def test_xs_setting_with_copy_error_multiple(self, four_level_index_dataframe): # this is a copy in 0.14 df = four_level_index_dataframe df_orig = df.copy() result = df.xs(("a", 4), level=["one", "four"]) - if using_copy_on_write or warn_copy_on_write: - result[:] = 10 - else: - # setting this will give a SettingWithCopyError - # as we are trying to write a view - msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(SettingWithCopyError, match=msg): - result[:] = 10 + result[:] = 10 tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])]) diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index 4a8adf89b3aef..029aa3a5b8f05 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -163,19 +163,6 @@ def test_time_zone_aware_index(self, stamp, expected): result = df.asof(stamp) tm.assert_series_equal(result, expected) - def test_is_copy(self, date_range_frame): - # GH-27357, GH-30784: ensure the result of asof is an actual copy and - # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings - df = date_range_frame.astype({"A": "float"}) - N = 50 - df.loc[df.index[15:30], "A"] = np.nan - dates = date_range("1/1/1990", periods=N * 3, freq="25s") - - result = df.asof(dates) - - with tm.assert_produces_warning(None): - result["C"] = 1 - def test_asof_periodindex_mismatched_freq(self): N = 50 rng = period_range("1/1/1990", periods=N, freq="h") diff --git a/pandas/tests/frame/methods/test_copy.py b/pandas/tests/frame/methods/test_copy.py index e7901ed363106..9d0dd91e18094 100644 --- a/pandas/tests/frame/methods/test_copy.py +++ b/pandas/tests/frame/methods/test_copy.py @@ -4,7 +4,6 @@ import pandas.util._test_decorators as td from pandas import DataFrame -import pandas._testing as tm class TestCopy: @@ -18,25 +17,6 @@ def test_copy_index_name_checking(self, float_frame, attr): getattr(cp, attr).name = "foo" assert getattr(float_frame, attr).name is None - @td.skip_copy_on_write_invalid_test - def test_copy_cache(self): - # GH#31784 _item_cache not cleared on copy causes incorrect reads after updates - df = DataFrame({"a": [1]}) - - df["x"] = [0] - df["a"] - - df.copy() - - df["a"].values[0] = -1 - - tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0]})) - - df["y"] = [0] - - assert df["a"].values[0] == -1 - tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0], "y": [0]})) - def test_copy(self, float_frame, float_string_frame): cop = float_frame.copy() cop["E"] = cop["A"] diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index f2f02058a534e..d58a9972a02de 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -331,21 +331,15 @@ def test_sort_values_datetimes(self): df2 = df.sort_values(by=["C", "B"]) tm.assert_frame_equal(df1, df2) - def test_sort_values_frame_column_inplace_sort_exception( - self, float_frame, using_copy_on_write - ): + def test_sort_values_frame_column_inplace_sort_exception(self, float_frame): s = float_frame["A"] float_frame_orig = float_frame.copy() - if using_copy_on_write: - # INFO(CoW) Series is a new object, so can be changed inplace - # without modifying original datafame - s.sort_values(inplace=True) - tm.assert_series_equal(s, float_frame_orig["A"].sort_values()) - # column in dataframe is not changed - tm.assert_frame_equal(float_frame, float_frame_orig) - else: - with pytest.raises(ValueError, match="This Series is a view"): - s.sort_values(inplace=True) + # INFO(CoW) Series is a new object, so can be changed inplace + # without modifying original datafame + s.sort_values(inplace=True) + tm.assert_series_equal(s, float_frame_orig["A"].sort_values()) + # column in dataframe is not changed + tm.assert_frame_equal(float_frame, float_frame_orig) cp = s.copy() cp.sort_values() # it works! diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 6eeaeb6711d03..dd4bba42eda6f 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -332,10 +332,8 @@ def test_set_value_keeps_names(): index=idx, ) df = df.sort_index() - assert df._is_copy is None assert df.index.names == ("Name", "Number") df.at[("grethe", "4"), "one"] = 99.34 - assert df._is_copy is None assert df.index.names == ("Name", "Number") diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 0dd1a56890fee..c160f4d717e06 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -1,8 +1,6 @@ import numpy as np -import pytest from pandas._libs import index as libindex -from pandas.errors import SettingWithCopyError import pandas.util._test_decorators as td from pandas import ( @@ -13,7 +11,7 @@ import pandas._testing as tm -def test_detect_chained_assignment(using_copy_on_write, warn_copy_on_write): +def test_detect_chained_assignment(): # Inplace ops, originally from: # https://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug a = [12, 23] @@ -30,17 +28,8 @@ def test_detect_chained_assignment(using_copy_on_write, warn_copy_on_write): multiind = MultiIndex.from_tuples(tuples, names=["part", "side"]) zed = DataFrame(events, index=["a", "b"], columns=multiind) - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - zed["eyes"]["right"].fillna(value=555, inplace=True) - elif warn_copy_on_write: - with tm.assert_produces_warning(None): - zed["eyes"]["right"].fillna(value=555, inplace=True) - else: - msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(SettingWithCopyError, match=msg): - with tm.assert_produces_warning(None): - zed["eyes"]["right"].fillna(value=555, inplace=True) + with tm.raises_chained_assignment_error(): + zed["eyes"]["right"].fillna(value=555, inplace=True) @td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 53ad4d6b41687..5249f29b32d42 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -1,7 +1,6 @@ import numpy as np import pytest -from pandas.errors import SettingWithCopyError import pandas.util._test_decorators as td import pandas as pd @@ -537,38 +536,21 @@ def test_frame_setitem_view_direct( assert (df["foo"].values == 0).all() -def test_frame_setitem_copy_raises( - multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write -): +def test_frame_setitem_copy_raises(multiindex_dataframe_random_data): # will raise/warn as its chained assignment df = multiindex_dataframe_random_data.T - if using_copy_on_write or warn_copy_on_write: - with tm.raises_chained_assignment_error(): - df["foo"]["one"] = 2 - else: - msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df["foo"]["one"] = 2 + with tm.raises_chained_assignment_error(): + df["foo"]["one"] = 2 -def test_frame_setitem_copy_no_write( - multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write -): +def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data.T expected = frame df = frame.copy() - if using_copy_on_write or warn_copy_on_write: - with tm.raises_chained_assignment_error(): - df["foo"]["one"] = 2 - else: - msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df["foo"]["one"] = 2 + with tm.raises_chained_assignment_error(): + df["foo"]["one"] = 2 - result = df - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(df, expected) def test_frame_setitem_partial_multiindex(): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index b97df376ac47f..4c700215e41af 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -3,10 +3,6 @@ import numpy as np import pytest -from pandas.errors import ( - SettingWithCopyError, - SettingWithCopyWarning, -) import pandas.util._test_decorators as td import pandas as pd @@ -48,12 +44,7 @@ def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write): # Assignment to wrong series with tm.raises_chained_assignment_error(): df["bb"].iloc[0] = 0.17 - df._clear_item_cache() - if not using_copy_on_write: - tm.assert_almost_equal(df["bb"][0], 0.17) - else: - # with ArrayManager, parent is not mutated with chained assignment - tm.assert_almost_equal(df["bb"][0], 2.2) + tm.assert_almost_equal(df["bb"][0], 2.2) @pytest.mark.parametrize("do_ref", [True, False]) def test_setitem_cache_updating(self, do_ref): @@ -124,16 +115,10 @@ def test_altering_series_clears_parent_cache( df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"]) ser = df["A"] - if using_copy_on_write or warn_copy_on_write: - assert "A" not in df._item_cache - else: - assert "A" in df._item_cache - # Adding a new entry to ser swaps in a new array, so "A" needs to # be removed from df._item_cache ser["c"] = 5 assert len(ser) == 3 - assert "A" not in df._item_cache assert df["A"] is not ser assert len(df["A"]) == 2 @@ -200,7 +185,6 @@ def test_detect_chained_assignment(self, using_copy_on_write): np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64" ) df_original = df.copy() - assert df._is_copy is None with tm.raises_chained_assignment_error(): df["A"][0] = -5 @@ -212,9 +196,7 @@ def test_detect_chained_assignment(self, using_copy_on_write): tm.assert_frame_equal(df, expected) @pytest.mark.arm_slow - def test_detect_chained_assignment_raises( - self, using_array_manager, using_copy_on_write, warn_copy_on_write - ): + def test_detect_chained_assignment_raises(self): # test with the chaining df = DataFrame( { @@ -223,42 +205,14 @@ def test_detect_chained_assignment_raises( } ) df_original = df.copy() - assert df._is_copy is None - - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["A"][0] = -5 - with tm.raises_chained_assignment_error(): - df["A"][1] = -6 - tm.assert_frame_equal(df, df_original) - elif warn_copy_on_write: - with tm.raises_chained_assignment_error(): - df["A"][0] = -5 - with tm.raises_chained_assignment_error(): - df["A"][1] = np.nan - elif not using_array_manager: - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df["A"][0] = -5 - - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df["A"][1] = np.nan - - assert df["A"]._is_copy is None - else: - # INFO(ArrayManager) for ArrayManager it doesn't matter that it's - # a mixed dataframe + with tm.raises_chained_assignment_error(): df["A"][0] = -5 + with tm.raises_chained_assignment_error(): df["A"][1] = -6 - expected = DataFrame([[-5, 2], [-6, 3]], columns=list("AB")) - expected["B"] = expected["B"].astype("float64") - tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df, df_original) @pytest.mark.arm_slow - def test_detect_chained_assignment_fails( - self, using_copy_on_write, warn_copy_on_write - ): + def test_detect_chained_assignment_fails(self): # Using a copy (the chain), fails df = DataFrame( { @@ -267,17 +221,11 @@ def test_detect_chained_assignment_fails( } ) - if using_copy_on_write or warn_copy_on_write: - with tm.raises_chained_assignment_error(): - df.loc[0]["A"] = -5 - else: - with pytest.raises(SettingWithCopyError, match=msg): - df.loc[0]["A"] = -5 + with tm.raises_chained_assignment_error(): + df.loc[0]["A"] = -5 @pytest.mark.arm_slow - def test_detect_chained_assignment_doc_example( - self, using_copy_on_write, warn_copy_on_write - ): + def test_detect_chained_assignment_doc_example(self): # Doc example df = DataFrame( { @@ -285,56 +233,26 @@ def test_detect_chained_assignment_doc_example( "c": Series(range(7), dtype="int64"), } ) - assert df._is_copy is None indexer = df.a.str.startswith("o") - if using_copy_on_write or warn_copy_on_write: - with tm.raises_chained_assignment_error(): - df[indexer]["c"] = 42 - else: - with pytest.raises(SettingWithCopyError, match=msg): - df[indexer]["c"] = 42 + with tm.raises_chained_assignment_error(): + df[indexer]["c"] = 42 @pytest.mark.arm_slow - def test_detect_chained_assignment_object_dtype( - self, using_array_manager, using_copy_on_write, warn_copy_on_write - ): - expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]}) + def test_detect_chained_assignment_object_dtype(self): df = DataFrame( {"A": Series(["aaa", "bbb", "ccc"], dtype=object), "B": [1, 2, 3]} ) df_original = df.copy() - if not using_copy_on_write and not warn_copy_on_write: - with pytest.raises(SettingWithCopyError, match=msg): - df.loc[0]["A"] = 111 - - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["A"][0] = 111 - tm.assert_frame_equal(df, df_original) - elif warn_copy_on_write: - with tm.raises_chained_assignment_error(): - df["A"][0] = 111 - tm.assert_frame_equal(df, expected) - elif not using_array_manager: - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df["A"][0] = 111 - - df.loc[0, "A"] = 111 - tm.assert_frame_equal(df, expected) - else: - # INFO(ArrayManager) for ArrayManager it doesn't matter that it's - # a mixed dataframe + with tm.raises_chained_assignment_error(): df["A"][0] = 111 - tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df, df_original) @pytest.mark.arm_slow def test_detect_chained_assignment_is_copy_pickle(self): # gh-5475: Make sure that is_copy is picked up reconstruction df = DataFrame({"A": [1, 2]}) - assert df._is_copy is None with tm.ensure_clean("__tmp__pickle") as path: df.to_pickle(path) @@ -342,70 +260,12 @@ def test_detect_chained_assignment_is_copy_pickle(self): df2["B"] = df2["A"] df2["B"] = df2["A"] - @pytest.mark.arm_slow - def test_detect_chained_assignment_setting_entire_column(self): - # gh-5597: a spurious raise as we are setting the entire column here - - df = random_text(100000) - - # Always a copy - x = df.iloc[[0, 1, 2]] - assert x._is_copy is not None - - x = df.iloc[[0, 1, 2, 4]] - assert x._is_copy is not None - - # Explicitly copy - indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.loc[indexer].copy() - - assert df._is_copy is None - df["letters"] = df["letters"].apply(str.lower) - - @pytest.mark.arm_slow - def test_detect_chained_assignment_implicit_take(self): - # Implicitly take - df = random_text(100000) - indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.loc[indexer] - - assert df._is_copy is not None - df["letters"] = df["letters"].apply(str.lower) - - @pytest.mark.arm_slow - def test_detect_chained_assignment_implicit_take2( - self, using_copy_on_write, warn_copy_on_write - ): - if using_copy_on_write or warn_copy_on_write: - pytest.skip("_is_copy is not always set for CoW") - # Implicitly take 2 - df = random_text(100000) - indexer = df.letters.apply(lambda x: len(x) > 10) - - df = df.loc[indexer] - assert df._is_copy is not None - df.loc[:, "letters"] = df["letters"].apply(str.lower) - - # with the enforcement of #45333 in 2.0, the .loc[:, letters] setting - # is inplace, so df._is_copy remains non-None. - assert df._is_copy is not None - - df["letters"] = df["letters"].apply(str.lower) - assert df._is_copy is None - @pytest.mark.arm_slow def test_detect_chained_assignment_str(self): df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower) - @pytest.mark.arm_slow - def test_detect_chained_assignment_is_copy(self): - # an identical take, so no copy - df = DataFrame({"a": [1]}).dropna() - assert df._is_copy is None - df["a"] += 1 - @pytest.mark.arm_slow def test_detect_chained_assignment_sorting(self): df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) @@ -430,31 +290,18 @@ def test_detect_chained_assignment_false_positives(self): str(df) @pytest.mark.arm_slow - def test_detect_chained_assignment_undefined_column( - self, using_copy_on_write, warn_copy_on_write - ): + def test_detect_chained_assignment_undefined_column(self): # from SO: # https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc df = DataFrame(np.arange(0, 9), columns=["count"]) df["group"] = "b" df_original = df.copy() - - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df.iloc[0:5]["group"] = "a" - tm.assert_frame_equal(df, df_original) - elif warn_copy_on_write: - with tm.raises_chained_assignment_error(): - df.iloc[0:5]["group"] = "a" - else: - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df.iloc[0:5]["group"] = "a" + with tm.raises_chained_assignment_error(): + df.iloc[0:5]["group"] = "a" + tm.assert_frame_equal(df, df_original) @pytest.mark.arm_slow - def test_detect_chained_assignment_changing_dtype( - self, using_array_manager, using_copy_on_write, warn_copy_on_write - ): + def test_detect_chained_assignment_changing_dtype(self): # Mixed type setting but same dtype & changing dtype df = DataFrame( { @@ -466,53 +313,25 @@ def test_detect_chained_assignment_changing_dtype( ) df_original = df.copy() - if using_copy_on_write or warn_copy_on_write: - with tm.raises_chained_assignment_error(): - df.loc[2]["D"] = "foo" - with tm.raises_chained_assignment_error(): - df.loc[2]["C"] = "foo" - tm.assert_frame_equal(df, df_original) - with tm.raises_chained_assignment_error(extra_warnings=(FutureWarning,)): - df["C"][2] = "foo" - if using_copy_on_write: - tm.assert_frame_equal(df, df_original) - else: - assert df.loc[2, "C"] == "foo" - else: - with pytest.raises(SettingWithCopyError, match=msg): - df.loc[2]["D"] = "foo" - - with pytest.raises(SettingWithCopyError, match=msg): - df.loc[2]["C"] = "foo" - - if not using_array_manager: - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df["C"][2] = "foo" - else: - # INFO(ArrayManager) for ArrayManager it doesn't matter if it's - # changing the dtype or not - df["C"][2] = "foo" - assert df.loc[2, "C"] == "foo" + with tm.raises_chained_assignment_error(): + df.loc[2]["D"] = "foo" + with tm.raises_chained_assignment_error(): + df.loc[2]["C"] = "foo" + tm.assert_frame_equal(df, df_original) + with tm.raises_chained_assignment_error(extra_warnings=(FutureWarning,)): + df["C"][2] = "foo" + tm.assert_frame_equal(df, df_original) - def test_setting_with_copy_bug(self, using_copy_on_write, warn_copy_on_write): + def test_setting_with_copy_bug(self): # operating on a copy df = DataFrame( {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]} ) df_original = df.copy() mask = pd.isna(df.c) - - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df[["c"]][mask] = df[["b"]][mask] - tm.assert_frame_equal(df, df_original) - elif warn_copy_on_write: - with tm.raises_chained_assignment_error(): - df[["c"]][mask] = df[["b"]][mask] - else: - with pytest.raises(SettingWithCopyError, match=msg): - df[["c"]][mask] = df[["b"]][mask] + with tm.raises_chained_assignment_error(): + df[["c"]][mask] = df[["b"]][mask] + tm.assert_frame_equal(df, df_original) def test_setting_with_copy_bug_no_warning(self): # invalid warning as we are returning a new object @@ -523,22 +342,10 @@ def test_setting_with_copy_bug_no_warning(self): # this should not raise df2["y"] = ["g", "h", "i"] - def test_detect_chained_assignment_warnings_errors( - self, using_copy_on_write, warn_copy_on_write - ): + def test_detect_chained_assignment_warnings_errors(self): df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) - if using_copy_on_write or warn_copy_on_write: - with tm.raises_chained_assignment_error(): - df.loc[0]["A"] = 111 - return - - with option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(SettingWithCopyWarning): - df.loc[0]["A"] = 111 - - with option_context("chained_assignment", "raise"): - with pytest.raises(SettingWithCopyError, match=msg): - df.loc[0]["A"] = 111 + with tm.raises_chained_assignment_error(): + df.loc[0]["A"] = 111 @pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})]) def test_detect_chained_assignment_warning_stacklevel( @@ -548,15 +355,9 @@ def test_detect_chained_assignment_warning_stacklevel( df = DataFrame(np.arange(25).reshape(5, 5)) df_original = df.copy() chained = df.loc[:3] - with option_context("chained_assignment", "warn"): - if not using_copy_on_write and not warn_copy_on_write: - with tm.assert_produces_warning(SettingWithCopyWarning) as t: - chained[2] = rhs - assert t[0].filename == __file__ - else: - # INFO(CoW) no warning, and original dataframe not changed - chained[2] = rhs - tm.assert_frame_equal(df, df_original) + # INFO(CoW) no warning, and original dataframe not changed + chained[2] = rhs + tm.assert_frame_equal(df, df_original) # TODO(ArrayManager) fast_xs with array-like scalars is not yet working @td.skip_array_manager_not_yet_implemented diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 34465a7c12c18..feae190a0886c 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -12,7 +12,6 @@ import pytz from pandas._libs.tslibs.timezones import maybe_get_tz -from pandas.errors import SettingWithCopyError from pandas.core.dtypes.common import ( is_integer_dtype, @@ -281,26 +280,15 @@ def test_dt_accessor_ambiguous_freq_conversions(self): expected = Series(exp_values, name="xxx") tm.assert_series_equal(ser, expected) - def test_dt_accessor_not_writeable(self, using_copy_on_write, warn_copy_on_write): + def test_dt_accessor_not_writeable(self): # no setting allowed ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx") with pytest.raises(ValueError, match="modifications"): ser.dt.hour = 5 # trying to set a copy - msg = "modifications to a property of a datetimelike.+not supported" - with pd.option_context("chained_assignment", "raise"): - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - ser.dt.hour[0] = 5 - elif warn_copy_on_write: - with tm.assert_produces_warning( - FutureWarning, match="ChainedAssignmentError" - ): - ser.dt.hour[0] = 5 - else: - with pytest.raises(SettingWithCopyError, match=msg): - ser.dt.hour[0] = 5 + with tm.raises_chained_assignment_error(): + ser.dt.hour[0] = 5 @pytest.mark.parametrize( "method, dates", diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index 4808272879071..f898c89649040 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -79,17 +79,8 @@ def test_sort_values(self, datetime_series, using_copy_on_write): # Series.sort_values operating on a view df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) s = df.iloc[:, 0] - - msg = ( - "This Series is a view of some other array, to sort in-place " - "you must create a copy" - ) - if using_copy_on_write: - s.sort_values(inplace=True) - tm.assert_series_equal(s, df.iloc[:, 0].sort_values()) - else: - with pytest.raises(ValueError, match=msg): - s.sort_values(inplace=True) + s.sort_values(inplace=True) + tm.assert_series_equal(s, df.iloc[:, 0].sort_values()) def test_sort_values_categorical(self): c = Categorical(["a", "b", "b", "a"], ordered=False) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 9d13ebf740eab..fb160d6aa0d5a 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -5,8 +5,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd import pandas._testing as tm from pandas.arrays import SparseArray @@ -450,7 +448,7 @@ def add3(x, y, z): # TODO(CoW) see https://github.com/pandas-dev/pandas/pull/51082 -@td.skip_copy_on_write_not_yet_implemented +@pytest.mark.skip(reason="not implemented with CoW") def test_np_fix(): # np.fix is not a ufunc but is composed of several ufunc calls under the hood # with `out` and `where` keywords diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 51ce73ef54300..5f18079fc87a8 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -46,6 +46,8 @@ def test_dask(df): pd.set_option("compute.use_numexpr", olduse) +# TODO(CoW) see https://github.com/pandas-dev/pandas/pull/51082 +@pytest.mark.skip(reason="not implemented with CoW") def test_dask_ufunc(): # dask sets "compute.use_numexpr" to False, so catch the current value # and ensure to reset it afterwards to avoid impacting other tests diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index aeddc08e4b888..ac621e409fe55 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -37,8 +37,6 @@ "PossibleDataLossError", "PossiblePrecisionLoss", "PyperclipException", - "SettingWithCopyError", - "SettingWithCopyWarning", "SpecificationError", "UnsortedIndexError", "UnsupportedFunctionCall", From 53c3f7694edfbb8a0ab3b327a595eaa00651d505 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 25 Dec 2023 02:10:50 +0100 Subject: [PATCH 2/7] Fixup --- pandas/tests/copy_view/test_indexing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index c241da56992c6..858964502bdf0 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -582,7 +582,6 @@ def test_subset_chained_getitem_column(backend, dtype, warn_copy_on_write): # modify parent -> don't modify subset subset = df[:]["a"][0:2] - df._clear_item_cache() with tm.assert_cow_warning(warn_copy_on_write): df.iloc[0, 0] = 0 expected = Series([1, 2], name="a") From a0ddaa17c176ed6d2a3bef26b7514cdf822b9f3d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 25 Dec 2023 21:34:36 +0100 Subject: [PATCH 3/7] Remove docs --- ci/code_checks.sh | 2 - doc/source/reference/testing.rst | 2 - doc/source/user_guide/advanced.rst | 8 +- doc/source/user_guide/indexing.rst | 252 +----------------- doc/source/whatsnew/v0.13.0.rst | 2 +- doc/source/whatsnew/v0.13.1.rst | 4 +- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/apply.py | 16 +- pandas/core/groupby/groupby.py | 50 ++-- pandas/core/internals/managers.py | 2 +- pandas/errors/__init__.py | 4 +- pandas/tests/copy_view/test_clip.py | 11 +- pandas/tests/copy_view/test_interp_fillna.py | 13 +- pandas/tests/copy_view/test_methods.py | 13 +- pandas/tests/copy_view/test_replace.py | 7 +- pandas/tests/frame/methods/test_sample.py | 2 +- .../tests/groupby/transform/test_transform.py | 7 +- 17 files changed, 58 insertions(+), 339 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index e41f625e583c0..6cf572e07adf9 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -82,8 +82,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.errors.NumExprClobberingError \ pandas.errors.PossibleDataLossError \ pandas.errors.PossiblePrecisionLoss \ - pandas.errors.SettingWithCopyError \ - pandas.errors.SettingWithCopyWarning \ pandas.errors.SpecificationError \ pandas.errors.UndefinedVariableError \ pandas.errors.ValueLabelTypeMismatch \ diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index a5d61703aceed..1f164d1aa98b4 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -58,8 +58,6 @@ Exceptions and warnings errors.PossiblePrecisionLoss errors.PyperclipException errors.PyperclipWindowsException - errors.SettingWithCopyError - errors.SettingWithCopyWarning errors.SpecificationError errors.UndefinedVariableError errors.UnsortedIndexError diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 453536098cfbb..f7ab466e92d93 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -11,13 +11,6 @@ and :ref:`other advanced indexing features `. See the :ref:`Indexing and Selecting Data ` for general indexing documentation. -.. warning:: - - Whether a copy or a reference is returned for a setting operation may - depend on the context. This is sometimes called ``chained assignment`` and - should be avoided. See :ref:`Returning a View versus Copy - `. - See the :ref:`cookbook` for some advanced strategies. .. _advanced.hierarchical: @@ -402,6 +395,7 @@ slicers on a single axis. Furthermore, you can *set* the values using the following methods. .. ipython:: python + :okwarning: df2 = dfmi.copy() df2.loc(axis=0)[:, :, ["C1", "C3"]] = -10 diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index 4954ee1538697..eb414e4bb74d6 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -29,13 +29,6 @@ this area. production code, we recommended that you take advantage of the optimized pandas data access methods exposed in this chapter. -.. warning:: - - Whether a copy or a reference is returned for a setting operation, may - depend on the context. This is sometimes called ``chained assignment`` and - should be avoided. See :ref:`Returning a View versus Copy - `. - See the :ref:`MultiIndex / Advanced Indexing ` for ``MultiIndex`` and more advanced indexing documentation. See the :ref:`cookbook` for some advanced strategies. @@ -299,12 +292,6 @@ largely as a convenience since it is such a common operation. Selection by label ------------------ -.. warning:: - - Whether a copy or a reference is returned for a setting operation, may depend on the context. - This is sometimes called ``chained assignment`` and should be avoided. - See :ref:`Returning a View versus Copy `. - .. warning:: ``.loc`` is strict when you present slicers that are not compatible (or convertible) with the index type. For example @@ -445,12 +432,6 @@ For more information about duplicate labels, see Selection by position --------------------- -.. warning:: - - Whether a copy or a reference is returned for a setting operation, may depend on the context. - This is sometimes called ``chained assignment`` and should be avoided. - See :ref:`Returning a View versus Copy `. - pandas provides a suite of methods in order to get **purely integer based indexing**. The semantics follow closely Python and NumPy slicing. These are ``0-based`` indexing. When slicing, the start bound is *included*, while the upper bound is *excluded*. Trying to use a non-integer, even a **valid** label will raise an ``IndexError``. The ``.iloc`` attribute is the primary access method. The following are valid inputs: @@ -1722,234 +1703,11 @@ You can assign a custom index to the ``index`` attribute: df_idx.index = pd.Index([10, 20, 30, 40], name="a") df_idx -.. _indexing.view_versus_copy: - -Returning a view versus a copy ------------------------------- - -.. warning:: - - :ref:`Copy-on-Write ` - will become the new default in pandas 3.0. This means than chained indexing will - never work. As a consequence, the ``SettingWithCopyWarning`` won't be necessary - anymore. - See :ref:`this section ` - for more context. - We recommend turning Copy-on-Write on to leverage the improvements with - - ``` - pd.options.mode.copy_on_write = True - ``` - - even before pandas 3.0 is available. - -When setting values in a pandas object, care must be taken to avoid what is called -``chained indexing``. Here is an example. - -.. ipython:: python - - dfmi = pd.DataFrame([list('abcd'), - list('efgh'), - list('ijkl'), - list('mnop')], - columns=pd.MultiIndex.from_product([['one', 'two'], - ['first', 'second']])) - dfmi - -Compare these two access methods: - -.. ipython:: python - - dfmi['one']['second'] - -.. ipython:: python - - dfmi.loc[:, ('one', 'second')] - -These both yield the same results, so which should you use? It is instructive to understand the order -of operations on these and why method 2 (``.loc``) is much preferred over method 1 (chained ``[]``). - -``dfmi['one']`` selects the first level of the columns and returns a DataFrame that is singly-indexed. -Then another Python operation ``dfmi_with_one['second']`` selects the series indexed by ``'second'``. -This is indicated by the variable ``dfmi_with_one`` because pandas sees these operations as separate events. -e.g. separate calls to ``__getitem__``, so it has to treat them as linear operations, they happen one after another. - -Contrast this to ``df.loc[:,('one','second')]`` which passes a nested tuple of ``(slice(None),('one','second'))`` to a single call to -``__getitem__``. This allows pandas to deal with this as a single entity. Furthermore this order of operations *can* be significantly -faster, and allows one to index *both* axes if so desired. - Why does assignment fail when using chained indexing? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. warning:: - - :ref:`Copy-on-Write ` - will become the new default in pandas 3.0. This means than chained indexing will - never work. As a consequence, the ``SettingWithCopyWarning`` won't be necessary - anymore. - See :ref:`this section ` - for more context. - We recommend turning Copy-on-Write on to leverage the improvements with - - ``` - pd.options.mode.copy_on_write = True - ``` - - even before pandas 3.0 is available. - -The problem in the previous section is just a performance issue. What's up with -the ``SettingWithCopy`` warning? We don't **usually** throw warnings around when -you do something that might cost a few extra milliseconds! - -But it turns out that assigning to the product of chained indexing has -inherently unpredictable results. To see this, think about how the Python -interpreter executes this code: - -.. code-block:: python - - dfmi.loc[:, ('one', 'second')] = value - # becomes - dfmi.loc.__setitem__((slice(None), ('one', 'second')), value) - -But this code is handled differently: - -.. code-block:: python - - dfmi['one']['second'] = value - # becomes - dfmi.__getitem__('one').__setitem__('second', value) - -See that ``__getitem__`` in there? Outside of simple cases, it's very hard to -predict whether it will return a view or a copy (it depends on the memory layout -of the array, about which pandas makes no guarantees), and therefore whether -the ``__setitem__`` will modify ``dfmi`` or a temporary object that gets thrown -out immediately afterward. **That's** what ``SettingWithCopy`` is warning you -about! - -.. note:: You may be wondering whether we should be concerned about the ``loc`` - property in the first example. But ``dfmi.loc`` is guaranteed to be ``dfmi`` - itself with modified indexing behavior, so ``dfmi.loc.__getitem__`` / - ``dfmi.loc.__setitem__`` operate on ``dfmi`` directly. Of course, - ``dfmi.loc.__getitem__(idx)`` may be a view or a copy of ``dfmi``. - -Sometimes a ``SettingWithCopy`` warning will arise at times when there's no -obvious chained indexing going on. **These** are the bugs that -``SettingWithCopy`` is designed to catch! pandas is probably trying to warn you -that you've done this: - -.. code-block:: python - - def do_something(df): - foo = df[['bar', 'baz']] # Is foo a view? A copy? Nobody knows! - # ... many lines here ... - # We don't know whether this will modify df or not! - foo['quux'] = value - return foo - -Yikes! - -.. _indexing.evaluation_order: - -Evaluation order matters -~~~~~~~~~~~~~~~~~~~~~~~~ - -.. warning:: - - :ref:`Copy-on-Write ` - will become the new default in pandas 3.0. This means than chained indexing will - never work. As a consequence, the ``SettingWithCopyWarning`` won't be necessary - anymore. - See :ref:`this section ` - for more context. - We recommend turning Copy-on-Write on to leverage the improvements with - - ``` - pd.options.mode.copy_on_write = True - ``` - - even before pandas 3.0 is available. - -When you use chained indexing, the order and type of the indexing operation -partially determine whether the result is a slice into the original object, or -a copy of the slice. - -pandas has the ``SettingWithCopyWarning`` because assigning to a copy of a -slice is frequently not intentional, but a mistake caused by chained indexing -returning a copy where a slice was expected. - -If you would like pandas to be more or less trusting about assignment to a -chained indexing expression, you can set the :ref:`option ` -``mode.chained_assignment`` to one of these values: - -* ``'warn'``, the default, means a ``SettingWithCopyWarning`` is printed. -* ``'raise'`` means pandas will raise a ``SettingWithCopyError`` - you have to deal with. -* ``None`` will suppress the warnings entirely. - -.. ipython:: python - :okwarning: - - dfb = pd.DataFrame({'a': ['one', 'one', 'two', - 'three', 'two', 'one', 'six'], - 'c': np.arange(7)}) - - # This will show the SettingWithCopyWarning - # but the frame values will be set - dfb['c'][dfb['a'].str.startswith('o')] = 42 - -This however is operating on a copy and will not work. - -.. ipython:: python - :okwarning: - :okexcept: - - with pd.option_context('mode.chained_assignment','warn'): - dfb[dfb['a'].str.startswith('o')]['c'] = 42 - -A chained assignment can also crop up in setting in a mixed dtype frame. - -.. note:: - - These setting rules apply to all of ``.loc/.iloc``. - -The following is the recommended access method using ``.loc`` for multiple items (using ``mask``) and a single item using a fixed index: - -.. ipython:: python - - dfc = pd.DataFrame({'a': ['one', 'one', 'two', - 'three', 'two', 'one', 'six'], - 'c': np.arange(7)}) - dfd = dfc.copy() - # Setting multiple items using a mask - mask = dfd['a'].str.startswith('o') - dfd.loc[mask, 'c'] = 42 - dfd - - # Setting a single item - dfd = dfc.copy() - dfd.loc[2, 'a'] = 11 - dfd - -The following *can* work at times, but it is not guaranteed to, and therefore should be avoided: - -.. ipython:: python - :okwarning: - - dfd = dfc.copy() - dfd['a'][2] = 111 - dfd - -Last, the subsequent example will **not** work at all, and so should be avoided: - -.. ipython:: python - :okwarning: - :okexcept: - - with pd.option_context('mode.chained_assignment','raise'): - dfd.loc[0]['a'] = 1111 - -.. warning:: - - The chained assignment warnings / exceptions are aiming to inform the user of a possibly invalid - assignment. There may be false positives; situations where a chained assignment is inadvertently - reported. +:ref:`Copy-on-Write ` is the new default with pandas 3.0. +This means than chained indexing will never work. As a consequence, +the ``SettingWithCopyWarning`` is not necessary anymore. +See :ref:`this section ` +for more context. diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst index f2e29121760ab..a624e81d17db9 100644 --- a/doc/source/whatsnew/v0.13.0.rst +++ b/doc/source/whatsnew/v0.13.0.rst @@ -172,7 +172,7 @@ API changes statistical mode(s) by axis/Series. (:issue:`5367`) - Chained assignment will now by default warn if the user is assigning to a copy. This can be changed - with the option ``mode.chained_assignment``, allowed options are ``raise/warn/None``. See :ref:`the docs`. + with the option ``mode.chained_assignment``, allowed options are ``raise/warn/None``. .. ipython:: python diff --git a/doc/source/whatsnew/v0.13.1.rst b/doc/source/whatsnew/v0.13.1.rst index 8c85868e1aedb..483dd15a8467a 100644 --- a/doc/source/whatsnew/v0.13.1.rst +++ b/doc/source/whatsnew/v0.13.1.rst @@ -24,8 +24,8 @@ Highlights include: .. warning:: 0.13.1 fixes a bug that was caused by a combination of having numpy < 1.8, and doing - chained assignment on a string-like array. Please review :ref:`the docs`, - chained indexing can have unexpected results and should generally be avoided. + chained assignment on a string-like array. + Chained indexing can have unexpected results and should generally be avoided. This would previously segfault: diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 8fa1361cc30c1..f4cd57af105dd 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -383,7 +383,7 @@ Other enhancements - Added ``validate`` argument to :meth:`DataFrame.join` (:issue:`46622`) - Added ``numeric_only`` argument to :meth:`.Resampler.sum`, :meth:`.Resampler.prod`, :meth:`.Resampler.min`, :meth:`.Resampler.max`, :meth:`.Resampler.first`, and :meth:`.Resampler.last` (:issue:`46442`) - ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`) -- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, :class:`.IndexingError`, :class:`.PyperclipException`, :class:`.PyperclipWindowsException`, :class:`.CSSWarning`, :class:`.PossibleDataLossError`, :class:`.ClosedFileError`, :class:`.IncompatibilityWarning`, :class:`.AttributeConflictWarning`, :class:`.DatabaseError`, :class:`.PossiblePrecisionLoss`, :class:`.ValueLabelTypeMismatch`, :class:`.InvalidColumnName`, and :class:`.CategoricalConversionWarning` are now exposed in ``pandas.errors`` (:issue:`27656`) +- :class:`.DataError`, :class:`.SpecificationError`, ``SettingWithCopyError``, ``SettingWithCopyWarning``, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, :class:`.IndexingError`, :class:`.PyperclipException`, :class:`.PyperclipWindowsException`, :class:`.CSSWarning`, :class:`.PossibleDataLossError`, :class:`.ClosedFileError`, :class:`.IncompatibilityWarning`, :class:`.AttributeConflictWarning`, :class:`.DatabaseError`, :class:`.PossiblePrecisionLoss`, :class:`.ValueLabelTypeMismatch`, :class:`.InvalidColumnName`, and :class:`.CategoricalConversionWarning` are now exposed in ``pandas.errors`` (:issue:`27656`) - Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`) - Add support for :meth:`.DataFrameGroupBy.ohlc` and :meth:`.SeriesGroupBy.ohlc` for extension array dtypes (:issue:`37493`) - Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 25a71ce5b5f4f..6334ad687e279 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -16,8 +16,6 @@ import numpy as np -from pandas._config import option_context - from pandas._libs import lib from pandas._libs.internals import BlockValuesRefs from pandas._typing import ( @@ -1075,14 +1073,12 @@ def apply_series_generator(self) -> tuple[ResType, Index]: results = {} - with option_context("mode.chained_assignment", None): - for i, v in enumerate(series_gen): - # ignore SettingWithCopy here in case the user mutates - results[i] = self.func(v, *self.args, **self.kwargs) - if isinstance(results[i], ABCSeries): - # If we have a view on v, we need to make a copy because - # series_generator will swap out the underlying data - results[i] = results[i].copy(deep=False) + for i, v in enumerate(series_gen): + results[i] = self.func(v, *self.args, **self.kwargs) + if isinstance(results[i], ABCSeries): + # If we have a view on v, we need to make a copy because + # series_generator will swap out the underlying data + results[i] = results[i].copy(deep=False) return results, res_index diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5fdb9072e4ba8..02133bd34f57c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -34,8 +34,6 @@ class providing the base-class of operations. import numpy as np -from pandas._config.config import option_context - from pandas._libs import ( Timestamp, lib, @@ -1818,32 +1816,28 @@ def f(g): if not include_groups: return self._python_apply_general(f, self._obj_with_exclusions) - # ignore SettingWithCopy here in case the user mutates - with option_context("mode.chained_assignment", None): - try: - result = self._python_apply_general(f, self._selected_obj) - if ( - not isinstance(self.obj, Series) - and self._selection is None - and self._selected_obj.shape != self._obj_with_exclusions.shape - ): - warnings.warn( - message=_apply_groupings_depr.format( - type(self).__name__, "apply" - ), - category=FutureWarning, - stacklevel=find_stack_level(), - ) - except TypeError: - # gh-20949 - # try again, with .apply acting as a filtering - # operation, by excluding the grouping column - # This would normally not be triggered - # except if the udf is trying an operation that - # fails on *some* columns, e.g. a numeric operation - # on a string grouper column - - return self._python_apply_general(f, self._obj_with_exclusions) + try: + result = self._python_apply_general(f, self._selected_obj) + if ( + not isinstance(self.obj, Series) + and self._selection is None + and self._selected_obj.shape != self._obj_with_exclusions.shape + ): + warnings.warn( + message=_apply_groupings_depr.format(type(self).__name__, "apply"), + category=FutureWarning, + stacklevel=find_stack_level(), + ) + except TypeError: + # gh-20949 + # try again, with .apply acting as a filtering + # operation, by excluding the grouping column + # This would normally not be triggered + # except if the udf is trying an operation that + # fails on *some* columns, e.g. a numeric operation + # on a string grouper column + + return self._python_apply_general(f, self._obj_with_exclusions) return result diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3719bf1f77f85..0e5eed3cab851 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -488,7 +488,7 @@ def is_view(self) -> bool: # e.g. [ b.values.base is not None for b in self.blocks ] # but then we have the case of possibly some blocks being a view # and some blocks not. setting in theory is possible on the non-view - # blocks w/o causing a SettingWithCopy raise/warn. But this is a bit + # blocks. But this is a bit # complicated return False diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 3b8ea14e764c8..f90c73f9000f0 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -418,8 +418,8 @@ class ChainedAssignmentError(Warning): Copy-on-Write always behaves as a copy. Thus, assigning through a chain can never update the original Series or DataFrame. - For more information on view vs. copy, - see :ref:`the user guide`. + For more information on Copy-on-Write, + see :ref:`the user guide`. Examples -------- diff --git a/pandas/tests/copy_view/test_clip.py b/pandas/tests/copy_view/test_clip.py index 7c87646424e2f..d083847a5fd4a 100644 --- a/pandas/tests/copy_view/test_clip.py +++ b/pandas/tests/copy_view/test_clip.py @@ -1,9 +1,6 @@ import numpy as np -from pandas import ( - DataFrame, - option_context, -) +from pandas import DataFrame import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -93,9 +90,7 @@ def test_clip_chained_inplace(using_copy_on_write): df["a"].clip(1, 2, inplace=True) with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[["a"]].clip(1, 2, inplace=True) + df[["a"]].clip(1, 2, inplace=True) with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[df["a"] > 1].clip(1, 2, inplace=True) + df[df["a"] > 1].clip(1, 2, inplace=True) diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index ddc5879a56d54..e3a65c2627523 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -10,7 +10,6 @@ Series, Timestamp, interval_range, - option_context, ) import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -396,12 +395,10 @@ def test_fillna_chained_assignment(using_copy_on_write): tm.assert_frame_equal(df, df_orig) else: with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[["a"]].fillna(100, inplace=True) + df[["a"]].fillna(100, inplace=True) with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[df.a > 5].fillna(100, inplace=True) + df[df.a > 5].fillna(100, inplace=True) with tm.assert_produces_warning(FutureWarning, match="inplace method"): df["a"].fillna(100, inplace=True) @@ -424,9 +421,7 @@ def test_interpolate_chained_assignment(using_copy_on_write, func): getattr(df["a"], func)(inplace=True) with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - getattr(df[["a"]], func)(inplace=True) + getattr(df[["a"]], func)(inplace=True) with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - getattr(df[df["a"] > 1], func)(inplace=True) + getattr(df[df["a"] > 1], func)(inplace=True) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index f83d5b078b3b1..babf6683a5ab5 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -10,7 +10,6 @@ Series, Timestamp, date_range, - option_context, period_range, ) import pandas._testing as tm @@ -1561,12 +1560,10 @@ def test_chained_where_mask(using_copy_on_write, func): getattr(df["a"], func)(df["a"] > 2, 5, inplace=True) with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True) + getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True) with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - getattr(df[df["a"] > 1], func)(df["a"] > 2, 5, inplace=True) + getattr(df[df["a"] > 1], func)(df["a"] > 2, 5, inplace=True) def test_asfreq_noop(using_copy_on_write): @@ -1805,12 +1802,10 @@ def test_update_chained_assignment(using_copy_on_write): df["a"].update(ser2) with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[["a"]].update(ser2.to_frame()) + df[["a"]].update(ser2.to_frame()) with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[df["a"] > 1].update(ser2.to_frame()) + df[df["a"] > 1].update(ser2.to_frame()) def test_inplace_arithmetic_series(using_copy_on_write): diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index 6d16bc3083883..af6650b8e0976 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -4,7 +4,6 @@ from pandas import ( Categorical, DataFrame, - option_context, ) import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -434,12 +433,10 @@ def test_replace_chained_assignment(using_copy_on_write): tm.assert_frame_equal(df, df_orig) else: with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[["a"]].replace(1, 100, inplace=True) + df[["a"]].replace(1, 100, inplace=True) with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[df.a > 5].replace(1, 100, inplace=True) + df[df.a > 5].replace(1, 100, inplace=True) with tm.assert_produces_warning(FutureWarning, match="inplace method"): df["a"].replace(1, 100, inplace=True) diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index 6b3459fbdc035..e711f54c915fc 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -335,7 +335,7 @@ def test_sample_aligns_weights_with_frame(self): def test_sample_is_copy(self): # GH#27357, GH#30784: ensure the result of sample is an actual copy and - # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings + # doesn't track the parent dataframe df = DataFrame( np.random.default_rng(2).standard_normal((10, 3)), columns=["a", "b", "c"] ) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index a2ecd6c65db60..4f680200b2f67 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -674,10 +674,9 @@ def f(group): assert result["d"].dtype == np.float64 # this is by definition a mutating operation! - with pd.option_context("mode.chained_assignment", None): - for key, group in grouped: - res = f(group) - tm.assert_frame_equal(res, result.loc[key]) + for key, group in grouped: + res = f(group) + tm.assert_frame_equal(res, result.loc[key]) @pytest.mark.parametrize( From 7929800bc03f7a5c3a456fbc953121799951b6ed Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Wed, 27 Dec 2023 17:13:58 +0100 Subject: [PATCH 4/7] CoW: Boolean indexer in MultiIndex raising read-only error --- pandas/core/indexes/multi.py | 2 ++ pandas/tests/copy_view/test_indexing.py | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 2a4e027e2b806..681a3287e46e6 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3487,6 +3487,8 @@ def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]: "cannot index with a boolean indexer that " "is not the same length as the index" ) + if isinstance(k, (ABCSeries, Index)): + k = k._values lvl_indexer = np.asarray(k) elif is_list_like(k): diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 9afc98e558c11..7f7be355b76c7 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1180,6 +1180,24 @@ def test_series_midx_tuples_slice(using_copy_on_write, warn_copy_on_write): tm.assert_series_equal(ser, expected) +def test_midx_read_only_bool_indexer(): + def mklbl(prefix, n): + return [f"{prefix}{i}" for i in range(n)] + + idx = pd.MultiIndex.from_product( + [mklbl("A", 4), mklbl("B", 2), mklbl("C", 4), mklbl("D", 2)] + ) + cols = pd.MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"] + ) + df = DataFrame(1, index=idx, columns=cols).sort_index().sort_index(axis=1) + + mask = df[("a", "foo")] == 1 + result = df.loc[pd.IndexSlice[mask, :, ["C1", "C3"]], :] + expected = df.loc[pd.IndexSlice[:, :, ["C1", "C3"]], :] + tm.assert_frame_equal(result, expected) + + def test_loc_enlarging_with_dataframe(using_copy_on_write): df = DataFrame({"a": [1, 2, 3]}) rhs = DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]}) From 60653ad993c25666bec2ad23997568ee4e14e4a4 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 3 Feb 2024 22:40:21 +0000 Subject: [PATCH 5/7] Update --- doc/source/user_guide/advanced.rst | 2 -- doc/source/user_guide/indexing.rst | 3 +-- pandas/_config/__init__.py | 5 ----- pandas/core/frame.py | 2 +- pandas/core/generic.py | 13 +------------ 5 files changed, 3 insertions(+), 22 deletions(-) diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index f7ab466e92d93..3455a0770b1ee 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -395,8 +395,6 @@ slicers on a single axis. Furthermore, you can *set* the values using the following methods. .. ipython:: python - :okwarning: - df2 = dfmi.copy() df2.loc(axis=0)[:, :, ["C1", "C3"]] = -10 df2 diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index eb414e4bb74d6..24cdbad41fe60 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -1707,7 +1707,6 @@ Why does assignment fail when using chained indexing? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ :ref:`Copy-on-Write ` is the new default with pandas 3.0. -This means than chained indexing will never work. As a consequence, -the ``SettingWithCopyWarning`` is not necessary anymore. +This means than chained indexing will never work. See :ref:`this section ` for more context. diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index 0594d1c190a72..aeaff1af1f852 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -39,11 +39,6 @@ def warn_copy_on_write() -> bool: return False -def using_nullable_dtypes() -> bool: - _mode_options = _global_config["mode"] - return _mode_options["nullable_dtypes"] - - def using_pyarrow_string_dtype() -> bool: _mode_options = _global_config["future"] return _mode_options["infer_string"] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fea047607c938..0fec3d1c7795d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4121,7 +4121,7 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar: series = self._ixs(col, axis=1) return series._values[index] - series = self[col] + series = self._get_item(col) engine = self.index._engine if not isinstance(self.index, MultiIndex): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index eb2f972f54686..cbaf330c305a1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6163,14 +6163,6 @@ def _dir_additions(self) -> set[str]: # ---------------------------------------------------------------------- # Consolidation of internals - @final - def _protect_consolidate(self, f): - """ - Consolidate _mgr -- if the blocks have changed, then clear the - cache - """ - return f() - @final def _consolidate_inplace(self) -> None: """Consolidate data in place and return None""" @@ -6178,8 +6170,6 @@ def _consolidate_inplace(self) -> None: def f() -> None: self._mgr = self._mgr.consolidate() - self._protect_consolidate(f) - @final def _consolidate(self): """ @@ -6190,8 +6180,7 @@ def _consolidate(self): ------- consolidated : same type as caller """ - f = lambda: self._mgr.consolidate() - cons_data = self._protect_consolidate(f) + cons_data = self._mgr.consolidate() return self._constructor_from_mgr(cons_data, axes=cons_data.axes).__finalize__( self ) From 00cb5ff62abe301aa3268504d7c897426d989e61 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 4 Feb 2024 00:11:30 +0000 Subject: [PATCH 6/7] Update --- doc/source/user_guide/advanced.rst | 2 ++ pandas/core/generic.py | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 3455a0770b1ee..f7ab466e92d93 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -395,6 +395,8 @@ slicers on a single axis. Furthermore, you can *set* the values using the following methods. .. ipython:: python + :okwarning: + df2 = dfmi.copy() df2.loc(axis=0)[:, :, ["C1", "C3"]] = -10 df2 diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cbaf330c305a1..83d3b87bfd9aa 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6167,8 +6167,7 @@ def _dir_additions(self) -> set[str]: def _consolidate_inplace(self) -> None: """Consolidate data in place and return None""" - def f() -> None: - self._mgr = self._mgr.consolidate() + self._mgr = self._mgr.consolidate() @final def _consolidate(self): From 516bd88a02ddaff87dc01b66e0081b8b145c5f3d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 4 Feb 2024 12:14:16 +0000 Subject: [PATCH 7/7] Update --- pandas/tests/copy_view/test_indexing.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 000bf691ba0a9..da72e89b23ca0 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -513,7 +513,7 @@ def test_subset_chained_getitem( @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_chained_getitem_column(backend, dtype, warn_copy_on_write): +def test_subset_chained_getitem_column(backend, dtype): # Case: creating a subset using multiple, chained getitem calls using views # still needs to guarantee proper CoW behaviour dtype_backend, DataFrame, Series = backend @@ -524,14 +524,12 @@ def test_subset_chained_getitem_column(backend, dtype, warn_copy_on_write): # modify subset -> don't modify parent subset = df[:]["a"][0:2] - with tm.assert_cow_warning(warn_copy_on_write): - subset.iloc[0] = 0 + subset.iloc[0] = 0 tm.assert_frame_equal(df, df_orig) # modify parent -> don't modify subset subset = df[:]["a"][0:2] - with tm.assert_cow_warning(warn_copy_on_write): - df.iloc[0, 0] = 0 + df.iloc[0, 0] = 0 expected = Series([1, 2], name="a") tm.assert_series_equal(subset, expected) @@ -821,7 +819,7 @@ def test_column_as_series(backend): tm.assert_series_equal(df["a"], df_orig["a"]) -def test_column_as_series_set_with_upcast(backend, warn_copy_on_write): +def test_column_as_series_set_with_upcast(backend): # Case: selecting a single column now also uses Copy-on-Write -> when # setting a value causes an upcast, we don't need to update the parent # DataFrame through the cache mechanism @@ -838,7 +836,6 @@ def test_column_as_series_set_with_upcast(backend, warn_copy_on_write): with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): s[0] = "foo" expected = Series(["foo", 2, 3], dtype=object, name="a") - expected = Series(["foo", 2, 3], dtype=object, name="a") tm.assert_series_equal(s, expected) tm.assert_frame_equal(df, df_orig)