From 858753f9bbc5cf2cb825d50a3499d63becc5ae49 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 4 Feb 2024 23:43:27 +0000 Subject: [PATCH 1/3] CoW: Enforce some deprecations on the datafame level --- pandas/core/frame.py | 52 +++------- pandas/core/generic.py | 172 +++++++-------------------------- pandas/core/groupby/grouper.py | 25 ++--- pandas/core/series.py | 55 ++++------- 4 files changed, 75 insertions(+), 229 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index afa680d064c4a..d0c4c781e6cbd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -62,7 +62,6 @@ from pandas.errors.cow import ( _chained_assignment_method_msg, _chained_assignment_msg, - _chained_assignment_warning_method_msg, ) from pandas.util._decorators import ( Appender, @@ -706,8 +705,7 @@ def __init__( stacklevel=1, # bump to 2 once pyarrow 15.0 is released with fix ) - if using_copy_on_write(): - data = data.copy(deep=False) + data = data.copy(deep=False) # first check if a Manager is passed without any other arguments # -> use fastpath (without checking Manager type) if index is None and columns is None and dtype is None and not copy: @@ -729,9 +727,7 @@ def __init__( if isinstance(data, dict): # retain pre-GH#38939 default behavior copy = True - elif using_copy_on_write() and not isinstance( - data, (Index, DataFrame, Series) - ): + elif not isinstance(data, (Index, DataFrame, Series)): copy = True else: copy = False @@ -784,7 +780,6 @@ def __init__( ) elif getattr(data, "name", None) is not None: # i.e. Series/Index with non-None name - _copy = copy if using_copy_on_write() else True mgr = dict_to_mgr( # error: Item "ndarray" of "Union[ndarray, Series, Index]" has no # attribute "name" @@ -792,7 +787,7 @@ def __init__( index, columns, dtype=dtype, - copy=_copy, + copy=copy, ) else: mgr = ndarray_to_mgr( @@ -1496,10 +1491,9 @@ def iterrows(self) -> Iterable[tuple[Hashable, Series]]: """ columns = self.columns klass = self._constructor_sliced - using_cow = using_copy_on_write() for k, v in zip(self.index, self.values): s = klass(v, index=columns, name=k).__finalize__(self) - if using_cow and self._mgr.is_single_block: + if self._mgr.is_single_block: s._mgr.add_references(self._mgr) yield k, s @@ -3803,8 +3797,6 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: if self._can_fast_transpose: # Note: tests pass without this, but this improves perf quite a bit. new_vals = self._values.T - if copy and not using_copy_on_write(): - new_vals = new_vals.copy() result = self._constructor( new_vals, @@ -3813,7 +3805,7 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: copy=False, dtype=new_vals.dtype, ) - if using_copy_on_write() and len(self) > 0: + if len(self) > 0: result._mgr.add_references(self._mgr) elif ( @@ -3857,8 +3849,6 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: else: new_arr = self.values.T - if copy and not using_copy_on_write(): - new_arr = new_arr.copy() result = self._constructor( new_arr, index=self.columns, @@ -4177,7 +4167,7 @@ def isetitem(self, loc, value) -> None: self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs) def __setitem__(self, key, value) -> None: - if not PYPY and using_copy_on_write(): + if not PYPY: if sys.getrefcount(self) <= 3: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 @@ -4385,12 +4375,7 @@ def _set_item_mgr( def _iset_item(self, loc: int, value: Series, inplace: bool = True) -> None: # We are only called from _replace_columnwise which guarantees that # no reindex is necessary - if using_copy_on_write(): - self._iset_item_mgr( - loc, value._values, inplace=inplace, refs=value._references - ) - else: - self._iset_item_mgr(loc, value._values.copy(), inplace=True) + self._iset_item_mgr(loc, value._values, inplace=inplace, refs=value._references) def _set_item(self, key, value) -> None: """ @@ -5125,9 +5110,7 @@ def _series(self): # ---------------------------------------------------------------------- # Reindexing and alignment - def _reindex_multi( - self, axes: dict[str, Index], copy: bool, fill_value - ) -> DataFrame: + def _reindex_multi(self, axes: dict[str, Index], fill_value) -> DataFrame: """ We are guaranteed non-Nones in the axes. """ @@ -5149,7 +5132,7 @@ def _reindex_multi( else: return self._reindex_with_indexers( {0: [new_index, row_indexer], 1: [new_columns, col_indexer]}, - copy=copy, + copy=False, fill_value=fill_value, ) @@ -7041,7 +7024,7 @@ def sort_values( return self.copy(deep=None) if is_range_indexer(indexer, len(indexer)): - result = self.copy(deep=(not inplace and not using_copy_on_write())) + result = self.copy(deep=False) if ignore_index: result.index = default_index(len(result)) @@ -8801,20 +8784,13 @@ def update( 1 2 500.0 2 3 6.0 """ - if not PYPY and using_copy_on_write(): + if not PYPY: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, ChainedAssignmentError, stacklevel=2, ) - elif not PYPY and not using_copy_on_write() and self._is_view_after_cow_rules(): - if sys.getrefcount(self) <= REF_COUNT: - warnings.warn( - _chained_assignment_warning_method_msg, - FutureWarning, - stacklevel=2, - ) # TODO: Support other joins if join != "left": # pragma: no cover @@ -12078,7 +12054,7 @@ def to_timestamp( >>> df2.index DatetimeIndex(['2023-01-31', '2024-01-31'], dtype='datetime64[ns]', freq=None) """ - new_obj = self.copy(deep=copy and not using_copy_on_write()) + new_obj = self.copy(deep=False) axis_name = self._get_axis_name(axis) old_ax = getattr(self, axis_name) @@ -12147,7 +12123,7 @@ def to_period( >>> idx.to_period("Y") PeriodIndex(['2001', '2002', '2003'], dtype='period[Y-DEC]') """ - new_obj = self.copy(deep=copy and not using_copy_on_write()) + new_obj = self.copy(deep=False) axis_name = self._get_axis_name(axis) old_ax = getattr(self, axis_name) @@ -12464,7 +12440,7 @@ def _reindex_for_setitem( # reindex if necessary if value.index.equals(index) or not len(index): - if using_copy_on_write() and isinstance(value, Series): + if isinstance(value, Series): return value._values, value._references return value._values.copy(), None diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7bb07694c34a5..73efe571ab412 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -96,10 +96,7 @@ ChainedAssignmentError, InvalidIndexError, ) -from pandas.errors.cow import ( - _chained_assignment_method_msg, - _chained_assignment_warning_method_msg, -) +from pandas.errors.cow import _chained_assignment_method_msg from pandas.util._decorators import ( deprecate_nonkeyword_arguments, doc, @@ -467,7 +464,7 @@ def set_flags( >>> df2.flags.allows_duplicate_labels False """ - df = self.copy(deep=copy and not using_copy_on_write()) + df = self.copy(deep=False) if allows_duplicate_labels is not None: df.flags["allows_duplicate_labels"] = allows_duplicate_labels return df @@ -635,14 +632,6 @@ def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]: def _info_axis(self) -> Index: return getattr(self, self._info_axis_name) - def _is_view_after_cow_rules(self): - # Only to be used in cases of chained assignment checks, this is a - # simplified check that assumes that either the whole object is a view - # or a copy - if len(self._mgr.blocks) == 0: - return False - return self._mgr.blocks[0].refs.has_reference() - @property def shape(self) -> tuple[int, ...]: """ @@ -766,7 +755,7 @@ def _set_axis_nocheck( else: # With copy=False, we create a new object but don't copy the # underlying data. - obj = self.copy(deep=copy and not using_copy_on_write()) + obj = self.copy(deep=False) setattr(obj, obj._get_axis_name(axis), labels) return obj @@ -809,7 +798,7 @@ def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self j = self._get_axis_number(axis2) if i == j: - return self.copy(deep=copy and not using_copy_on_write()) + return self.copy(deep=False) mapping = {i: j, j: i} @@ -829,9 +818,6 @@ def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self assert isinstance(self._mgr, BlockManager) new_mgr.blocks[0].refs = self._mgr.blocks[0].refs new_mgr.blocks[0].refs.add_reference(new_mgr.blocks[0]) - if not using_copy_on_write() and copy is not False: - new_mgr = new_mgr.copy(deep=True) - out = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) return out.__finalize__(self, method="swapaxes") @@ -1066,7 +1052,7 @@ def _rename( index = mapper self._check_inplace_and_allows_duplicate_labels(inplace) - result = self if inplace else self.copy(deep=copy and not using_copy_on_write()) + result = self if inplace else self.copy(deep=False) for axis_no, replacements in enumerate((index, columns)): if replacements is None: @@ -1278,24 +1264,19 @@ class name inplace = validate_bool_kwarg(inplace, "inplace") - if copy and using_copy_on_write(): - copy = False - if mapper is not lib.no_default: # Use v0.23 behavior if a scalar or list non_mapper = is_scalar(mapper) or ( is_list_like(mapper) and not is_dict_like(mapper) ) if non_mapper: - return self._set_axis_name( - mapper, axis=axis, inplace=inplace, copy=copy - ) + return self._set_axis_name(mapper, axis=axis, inplace=inplace) else: raise ValueError("Use `.rename` to alter labels with a mapper.") else: # Use new behavior. Means that index and/or columns # is specified - result = self if inplace else self.copy(deep=copy) + result = self if inplace else self.copy(deep=False) for axis in range(self._AXIS_LEN): v = axes.get(self._get_axis_name(axis)) @@ -1308,15 +1289,13 @@ class name f = common.get_rename_function(v) curnames = self._get_axis(axis).names newnames = [f(name) for name in curnames] - result._set_axis_name(newnames, axis=axis, inplace=True, copy=copy) + result._set_axis_name(newnames, axis=axis, inplace=True) if not inplace: return result return None @final - def _set_axis_name( - self, name, axis: Axis = 0, inplace: bool_t = False, copy: bool_t | None = True - ): + def _set_axis_name(self, name, axis: Axis = 0, inplace: bool_t = False): """ Set the name(s) of the axis. @@ -1329,8 +1308,6 @@ def _set_axis_name( and the value 1 or 'columns' specifies columns. inplace : bool, default False If `True`, do operation inplace and return None. - copy: - Whether to make a copy of the result. Returns ------- @@ -1372,7 +1349,7 @@ def _set_axis_name( idx = self._get_axis(axis).set_names(name) inplace = validate_bool_kwarg(inplace, "inplace") - renamed = self if inplace else self.copy(deep=copy) + renamed = self if inplace else self.copy(deep=False) if axis == 0: renamed.index = idx else: @@ -2097,11 +2074,7 @@ def empty(self) -> bool_t: def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: values = self._values arr = np.asarray(values, dtype=dtype) - if ( - astype_is_view(values.dtype, arr.dtype) - and using_copy_on_write() - and self._mgr.is_single_block - ): + if astype_is_view(values.dtype, arr.dtype) and self._mgr.is_single_block: # Check if both conversions can be done without a copy if astype_is_view(self.dtypes.iloc[0], values.dtype) and astype_is_view( values.dtype, arr.dtype @@ -4017,13 +3990,8 @@ class max_speed if not isinstance(indices, slice): indices = np.asarray(indices, dtype=np.intp) - if ( - axis == 0 - and indices.ndim == 1 - and using_copy_on_write() - and is_range_indexer(indices, len(self)) - ): - return self.copy(deep=None) + if axis == 0 and indices.ndim == 1 and is_range_indexer(indices, len(self)): + return self.copy(deep=False) elif self.ndim == 1: raise TypeError( f"{type(self).__name__}.take requires a sequence of integers, " @@ -5372,22 +5340,20 @@ def reindex( # if all axes that are requested to reindex are equal, then only copy # if indicated must have index names equal here as well as values - if copy and using_copy_on_write(): - copy = False if all( self._get_axis(axis_name).identical(ax) for axis_name, ax in axes.items() if ax is not None ): - return self.copy(deep=copy) + return self.copy(deep=False) # check if we are a multi reindex if self._needs_reindex_multi(axes, method, level): - return self._reindex_multi(axes, copy, fill_value) + return self._reindex_multi(axes, fill_value) # perform the reindex on the axes return self._reindex_axes( - axes, level, limit, tolerance, method, fill_value, copy + axes, level, limit, tolerance, method, fill_value, False ).__finalize__(self, method="reindex") @final @@ -5436,7 +5402,7 @@ def _needs_reindex_multi(self, axes, method, level: Level | None) -> bool_t: and self._can_fast_transpose ) - def _reindex_multi(self, axes, copy, fill_value): + def _reindex_multi(self, axes, fill_value): raise AbstractMethodError(self) @final @@ -5473,13 +5439,7 @@ def _reindex_with_indexers( # If we've made a copy once, no need to make another one copy = False - if ( - (copy or copy is None) - and new_data is self._mgr - and not using_copy_on_write() - ): - new_data = new_data.copy(deep=copy) - elif using_copy_on_write() and new_data is self._mgr: + if new_data is self._mgr: new_data = new_data.copy(deep=False) return self._constructor_from_mgr(new_data, axes=new_data.axes).__finalize__( @@ -5667,9 +5627,7 @@ def head(self, n: int = 5) -> Self: 4 monkey 5 parrot """ - if using_copy_on_write(): - return self.iloc[:n].copy() - return self.iloc[:n] + return self.iloc[:n].copy() @final def tail(self, n: int = 5) -> Self: @@ -5744,13 +5702,9 @@ def tail(self, n: int = 5) -> Self: 7 whale 8 zebra """ - if using_copy_on_write(): - if n == 0: - return self.iloc[0:0].copy() - return self.iloc[-n:].copy() if n == 0: - return self.iloc[0:0] - return self.iloc[-n:] + return self.iloc[0:0].copy() + return self.iloc[-n:].copy() @final def sample( @@ -6023,9 +5977,7 @@ def pipe( 1 6997.32 NaN 2 3682.80 1473.12 """ - if using_copy_on_write(): - return common.pipe(self.copy(deep=None), func, *args, **kwargs) - return common.pipe(self, func, *args, **kwargs) + return common.pipe(self.copy(deep=False), func, *args, **kwargs) # ---------------------------------------------------------------------- # Attribute access @@ -7029,7 +6981,7 @@ def fillna( """ inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and using_copy_on_write(): + if not PYPY: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -7104,10 +7056,7 @@ def fillna( "with dict/Series column " "by column" ) - if using_copy_on_write(): - result = self.copy(deep=None) - else: - result = self if inplace else self.copy() + result = self.copy(deep=False) is_dict = isinstance(downcast, dict) for k, v in value.items(): if k not in result: @@ -7316,7 +7265,7 @@ def ffill( downcast = self._deprecate_downcast(downcast, "ffill") inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and using_copy_on_write(): + if not PYPY: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -7504,7 +7453,7 @@ def bfill( downcast = self._deprecate_downcast(downcast, "bfill") inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and using_copy_on_write(): + if not PYPY: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -7659,7 +7608,7 @@ def replace( inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and using_copy_on_write(): + if not PYPY: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -8089,7 +8038,7 @@ def interpolate( inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and using_copy_on_write(): + if not PYPY: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -8711,29 +8660,13 @@ def clip( inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and using_copy_on_write(): + if not PYPY: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, ChainedAssignmentError, stacklevel=2, ) - elif ( - not PYPY - and not using_copy_on_write() - and self._is_view_after_cow_rules() - ): - ctr = sys.getrefcount(self) - ref_count = REF_COUNT - if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): - # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 - ref_count += 1 - if ctr <= ref_count: - warnings.warn( - _chained_assignment_warning_method_msg, - FutureWarning, - stacklevel=2, - ) axis = nv.validate_clip_with_axis(axis, (), kwargs) if axis is not None: @@ -10028,8 +9961,7 @@ def _align_series( fill_axis: Axis = 0, ) -> tuple[Self, Series, Index | None]: is_series = isinstance(self, ABCSeries) - if copy and using_copy_on_write(): - copy = False + copy = False if (not is_series and axis is None) or axis not in [None, 0, 1]: raise ValueError("Must specify axis=0 or 1") @@ -10055,7 +9987,7 @@ def _align_series( new_mgr = self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy) left = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) - right = other._reindex_indexer(join_index, ridx, copy) + right = other._reindex_indexer(join_index, ridx) else: # one has > 1 ndim @@ -10440,29 +10372,13 @@ def where( """ inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and using_copy_on_write(): + if not PYPY: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, ChainedAssignmentError, stacklevel=2, ) - elif ( - not PYPY - and not using_copy_on_write() - and self._is_view_after_cow_rules() - ): - ctr = sys.getrefcount(self) - ref_count = REF_COUNT - if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): - # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 - ref_count += 1 - if ctr <= ref_count: - warnings.warn( - _chained_assignment_warning_method_msg, - FutureWarning, - stacklevel=2, - ) other = common.apply_if_callable(other, self) return self._where(cond, other, inplace, axis, level) @@ -10523,29 +10439,13 @@ def mask( ) -> Self | None: inplace = validate_bool_kwarg(inplace, "inplace") if inplace: - if not PYPY and using_copy_on_write(): + if not PYPY: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, ChainedAssignmentError, stacklevel=2, ) - elif ( - not PYPY - and not using_copy_on_write() - and self._is_view_after_cow_rules() - ): - ctr = sys.getrefcount(self) - ref_count = REF_COUNT - if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): - # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 - ref_count += 1 - if ctr <= ref_count: - warnings.warn( - _chained_assignment_warning_method_msg, - FutureWarning, - stacklevel=2, - ) cond = common.apply_if_callable(cond, self) other = common.apply_if_callable(other, self) @@ -10921,7 +10821,7 @@ def truncate( if isinstance(ax, MultiIndex): setattr(result, self._get_axis_name(axis), ax.truncate(before, after)) - result = result.copy(deep=copy and not using_copy_on_write()) + result = result.copy(deep=False) return result @@ -11014,7 +10914,7 @@ def _tz_convert(ax, tz): raise ValueError(f"The level {level} is not valid") ax = _tz_convert(ax, tz) - result = self.copy(deep=copy and not using_copy_on_write()) + result = self.copy(deep=False) result = result.set_axis(ax, axis=axis, copy=False) return result.__finalize__(self, method="tz_convert") @@ -11208,7 +11108,7 @@ def _tz_localize(ax, tz, ambiguous, nonexistent): raise ValueError(f"The level {level} is not valid") ax = _tz_localize(ax, tz, ambiguous, nonexistent) - result = self.copy(deep=copy and not using_copy_on_write()) + result = self.copy(deep=False) result = result.set_axis(ax, axis=axis, copy=False) return result.__finalize__(self, method="tz_localize") diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 1e6658e5dfd39..6a6141422b80d 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -12,8 +12,6 @@ import numpy as np -from pandas._config import using_copy_on_write - from pandas._libs.tslibs import OutOfBoundsDatetime from pandas.errors import InvalidIndexError from pandas.util._decorators import cache_readonly @@ -959,26 +957,15 @@ def is_in_axis(key) -> bool: def is_in_obj(gpr) -> bool: if not hasattr(gpr, "name"): return False - if using_copy_on_write(): - # For the CoW case, we check the references to determine if the - # series is part of the object - try: - obj_gpr_column = obj[gpr.name] - except (KeyError, IndexError, InvalidIndexError, OutOfBoundsDatetime): - return False - if isinstance(gpr, Series) and isinstance(obj_gpr_column, Series): - return gpr._mgr.references_same_values(obj_gpr_column._mgr, 0) - return False + # We check the references to determine if the + # series is part of the object try: - return gpr is obj[gpr.name] + obj_gpr_column = obj[gpr.name] except (KeyError, IndexError, InvalidIndexError, OutOfBoundsDatetime): - # IndexError reached in e.g. test_skip_group_keys when we pass - # lambda here - # InvalidIndexError raised on key-types inappropriate for index, - # e.g. DatetimeIndex.get_loc(tuple()) - # OutOfBoundsDatetime raised when obj is a Series with DatetimeIndex - # and gpr.name is month str return False + if isinstance(gpr, Series) and isinstance(obj_gpr_column, Series): + return gpr._mgr.references_same_values(obj_gpr_column._mgr, 0) + return False for gpr, level in zip(keys, levels): if is_in_obj(gpr): # df.groupby(df['name']) diff --git a/pandas/core/series.py b/pandas/core/series.py index d3c199286931f..d04266a223ae0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -413,8 +413,7 @@ def __init__( DeprecationWarning, stacklevel=2, ) - if using_copy_on_write(): - data = data.copy(deep=False) + data = data.copy(deep=False) # GH#33357 called with just the SingleBlockManager NDFrame.__init__(self, data) if fastpath: @@ -429,7 +428,7 @@ def __init__( original_dtype = dtype if isinstance(data, (ExtensionArray, np.ndarray)): - if copy is not False and using_copy_on_write(): + if copy is not False: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): data = data.copy() if copy is None: @@ -441,7 +440,7 @@ def __init__( if not isinstance(data, SingleBlockManager): data = SingleBlockManager.from_array(data, index) allow_mgr = True - elif using_copy_on_write() and not copy: + elif not copy: data = data.copy(deep=False) if not allow_mgr: @@ -460,7 +459,7 @@ def __init__( NDFrame.__init__(self, data) return - if isinstance(data, SingleBlockManager) and using_copy_on_write() and not copy: + if isinstance(data, SingleBlockManager) and not copy: data = data.copy(deep=False) if not allow_mgr: @@ -497,12 +496,8 @@ def __init__( if dtype is not None: data = data.astype(dtype, copy=False) - if using_copy_on_write(): - refs = data._references - data = data._values - else: - # GH#24096 we need to ensure the index remains immutable - data = data._values.copy() + refs = data._references + data = data._values copy = False elif isinstance(data, np.ndarray): @@ -884,7 +879,7 @@ def ravel(self, order: str = "C") -> ArrayLike: stacklevel=2, ) arr = self._values.ravel(order=order) - if isinstance(arr, np.ndarray) and using_copy_on_write(): + if isinstance(arr, np.ndarray): arr.flags.writeable = False return arr @@ -1000,7 +995,7 @@ def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: """ values = self._values arr = np.asarray(values, dtype=dtype) - if using_copy_on_write() and astype_is_view(values.dtype, arr.dtype): + if astype_is_view(values.dtype, arr.dtype): arr = arr.view() arr.flags.writeable = False return arr @@ -1068,9 +1063,7 @@ def __getitem__(self, key): key = com.apply_if_callable(key, self) if key is Ellipsis: - if using_copy_on_write(): - return self.copy(deep=False) - return self + return self.copy(deep=False) key_is_scalar = is_scalar(key) if isinstance(key, (list, tuple)): @@ -1230,7 +1223,7 @@ def _get_value(self, label, takeable: bool = False): return self.iloc[loc] def __setitem__(self, key, value) -> None: - if not PYPY and using_copy_on_write(): + if not PYPY: if sys.getrefcount(self) <= 3: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 @@ -1615,14 +1608,10 @@ def reset_index( if inplace: self.index = new_index - elif using_copy_on_write(): + else: new_ser = self.copy(deep=False) new_ser.index = new_index return new_ser.__finalize__(self, method="reset_index") - else: - return self._constructor( - self._values.copy(), index=new_index, copy=False, dtype=self.dtype - ).__finalize__(self, method="reset_index") elif inplace: raise TypeError( "Cannot reset_index inplace on a Series to create a DataFrame" @@ -2000,11 +1989,9 @@ def _set_name( name : str inplace : bool Whether to modify `self` directly or return a copy. - deep : bool|None, default None - Whether to do a deep copy, a shallow copy, or Copy on Write(None) """ inplace = validate_bool_kwarg(inplace, "inplace") - ser = self if inplace else self.copy(deep and not using_copy_on_write()) + ser = self if inplace else self.copy(deep=False) ser.name = name return ser @@ -3479,7 +3466,7 @@ def update(self, other: Series | Sequence | Mapping) -> None: 2 3 dtype: int64 """ - if not PYPY and using_copy_on_write(): + if not PYPY: if sys.getrefcount(self) <= REF_COUNT: warnings.warn( _chained_assignment_method_msg, @@ -4293,7 +4280,7 @@ def swaplevel( {examples} """ assert isinstance(self.index, MultiIndex) - result = self.copy(deep=copy and not using_copy_on_write()) + result = self.copy(deep=False) result.index = self.index.swaplevel(i, j) return result @@ -4627,7 +4614,7 @@ def transform( ) -> DataFrame | Series: # Validate axis argument self._get_axis_number(axis) - ser = self.copy(deep=False) if using_copy_on_write() else self + ser = self.copy(deep=False) result = SeriesApply(ser, func=func, args=args, kwargs=kwargs).transform() return result @@ -4779,11 +4766,7 @@ def _reindex_indexer( if indexer is None and ( new_index is None or new_index.names == self.index.names ): - if using_copy_on_write(): - return self.copy(deep=copy) - if copy or copy is None: - return self.copy(deep=copy) - return self + return self.copy(deep=False) new_values = algorithms.take_nd( self._values, indexer, allow_fill=True, fill_value=None @@ -4940,7 +4923,7 @@ def rename( errors=errors, ) else: - return self._set_name(index, inplace=inplace, deep=copy) + return self._set_name(index, inplace=inplace) @Appender( """ @@ -5887,7 +5870,7 @@ def to_timestamp( if not isinstance(self.index, PeriodIndex): raise TypeError(f"unsupported Type {type(self.index).__name__}") - new_obj = self.copy(deep=copy and not using_copy_on_write()) + new_obj = self.copy(deep=False) new_index = self.index.to_timestamp(freq=freq, how=how) setattr(new_obj, "index", new_index) return new_obj @@ -5939,7 +5922,7 @@ def to_period(self, freq: str | None = None, copy: bool | None = None) -> Series if not isinstance(self.index, DatetimeIndex): raise TypeError(f"unsupported Type {type(self.index).__name__}") - new_obj = self.copy(deep=copy and not using_copy_on_write()) + new_obj = self.copy(deep=False) new_index = self.index.to_period(freq=freq) setattr(new_obj, "index", new_index) return new_obj From 9f599f854753c875d19ccbdd4901c6db25134642 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 4 Feb 2024 23:58:51 +0000 Subject: [PATCH 2/3] Remove copy keyword --- pandas/core/series.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d04266a223ae0..503b853c27055 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4759,7 +4759,6 @@ def _reindex_indexer( self, new_index: Index | None, indexer: npt.NDArray[np.intp] | None, - copy: bool | None, ) -> Series: # Note: new_index is None iff indexer is None # if not None, indexer is np.intp From 144def075d311cee877e0de14fb3890a4377f902 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 6 Feb 2024 00:30:23 +0100 Subject: [PATCH 3/3] Fixup --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 73efe571ab412..87386455337ce 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9980,7 +9980,7 @@ def _align_series( ) if is_series: - left = self._reindex_indexer(join_index, lidx, copy) + left = self._reindex_indexer(join_index, lidx) elif lidx is None or join_index is None: left = self.copy(deep=copy) else: