From 038e45f28f67a69ee9dd5669cd6f78574adfbe4a Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 25 Feb 2024 17:01:22 +0100 Subject: [PATCH 1/4] DEPR: Deprecate remaining copy usages --- doc/source/whatsnew/v3.0.0.rst | 7 +++ pandas/core/frame.py | 51 ++++++++++++++----- pandas/core/generic.py | 31 ++++++----- pandas/core/reshape/concat.py | 38 +++++++++++--- pandas/core/reshape/merge.py | 3 +- pandas/core/series.py | 49 +++++++++++------- .../tests/copy_view/test_copy_deprecation.py | 50 +++++++++++++++--- pandas/tests/frame/methods/test_rename.py | 2 +- pandas/tests/frame/methods/test_set_axis.py | 8 +-- pandas/tests/frame/test_api.py | 4 +- pandas/tests/reshape/concat/test_concat.py | 6 +-- pandas/tests/reshape/concat/test_index.py | 4 +- pandas/tests/reshape/merge/test_merge.py | 4 +- pandas/tests/series/methods/test_rename.py | 2 +- 14 files changed, 182 insertions(+), 77 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 879d71add98e4..c588e317a0acc 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -117,6 +117,13 @@ will be removed in a future version: - :meth:`DataFrame.astype` / :meth:`Series.astype` - :meth:`DataFrame.reindex` / :meth:`Series.reindex` - :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` +- :meth:`DataFrame.set_axis` / :meth:`Series.set_axis` +- :meth:`DataFrame.to_period` / :meth:`Series.to_period` +- :meth:`DataFrame.to_timestamp` / :meth:`Series.to_timestamp` +- :meth:`DataFrame.rename` / :meth:`Series.rename` +- :meth:`DataFrame.transpose` +- :meth:`DataFrame.swaplevel` +- :meth:`DataFrame.merge` / :func:`pd.merge` Copy-on-Write utilizes a lazy copy mechanism that defers copying the data until necessary. Use ``.copy`` to trigger an eager copy. The copy keyword has no effect diff --git a/pandas/core/frame.py b/pandas/core/frame.py index df413fda0255a..78cdda08a3d82 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -355,7 +355,7 @@ of a string to indicate that the column name from `left` or `right` should be left as-is, with no suffix. At least one of the values must not be None. -copy : bool, default True +copy : bool, default False If False, avoid copy if possible. .. note:: @@ -369,6 +369,8 @@ You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + + .. deprecated:: 3.0.0 indicator : bool or str, default False If True, adds a column to the output DataFrame called "_merge" with information on the source of each row. The column can be given a different @@ -3580,7 +3582,11 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> Series: result = index_memory_usage._append(result) return result - def transpose(self, *args, copy: bool = False) -> DataFrame: + def transpose( + self, + *args, + copy: bool | lib.NoDefault = lib.no_default, + ) -> DataFrame: """ Transpose index and columns. @@ -3611,6 +3617,8 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + .. deprecated:: 3.0.0 + Returns ------- DataFrame @@ -3691,6 +3699,7 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: 1 object dtype: object """ + self._check_copy_deprecation(copy) nv.validate_transpose(args, {}) # construct the args @@ -5053,9 +5062,9 @@ def set_axis( labels, *, axis: Axis = 0, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, ) -> DataFrame: - return super().set_axis(labels, axis=axis) + return super().set_axis(labels, axis=axis, copy=copy) @doc( NDFrame.reindex, @@ -5307,7 +5316,7 @@ def rename( index: Renamer | None = ..., columns: Renamer | None = ..., axis: Axis | None = ..., - copy: bool | None = ..., + copy: bool | lib.NoDefault = lib.no_default, inplace: Literal[True], level: Level = ..., errors: IgnoreRaise = ..., @@ -5322,7 +5331,7 @@ def rename( index: Renamer | None = ..., columns: Renamer | None = ..., axis: Axis | None = ..., - copy: bool | None = ..., + copy: bool | lib.NoDefault = lib.no_default, inplace: Literal[False] = ..., level: Level = ..., errors: IgnoreRaise = ..., @@ -5337,7 +5346,7 @@ def rename( index: Renamer | None = ..., columns: Renamer | None = ..., axis: Axis | None = ..., - copy: bool | None = ..., + copy: bool | lib.NoDefault = lib.no_default, inplace: bool = ..., level: Level = ..., errors: IgnoreRaise = ..., @@ -5351,7 +5360,7 @@ def rename( index: Renamer | None = None, columns: Renamer | None = None, axis: Axis | None = None, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, inplace: bool = False, level: Level | None = None, errors: IgnoreRaise = "ignore", @@ -5381,7 +5390,7 @@ def rename( axis : {0 or 'index', 1 or 'columns'}, default 0 Axis to target with ``mapper``. Can be either the axis name ('index', 'columns') or number (0, 1). The default is 'index'. - copy : bool, default True + copy : bool, default False Also copy underlying data. .. note:: @@ -5395,6 +5404,8 @@ def rename( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + + .. deprecated:: 3.0.0 inplace : bool, default False Whether to modify the DataFrame rather than creating a new one. If True then value of copy is ignored. @@ -5475,6 +5486,7 @@ def rename( 2 2 5 4 3 6 """ + self._check_copy_deprecation(copy) return super()._rename( mapper=mapper, index=index, @@ -10502,10 +10514,12 @@ def merge( right_index: bool = False, sort: bool = False, suffixes: Suffixes = ("_x", "_y"), - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, indicator: str | bool = False, validate: MergeValidate | None = None, ) -> DataFrame: + self._check_copy_deprecation(copy) + from pandas.core.reshape.merge import merge return merge( @@ -12332,7 +12346,7 @@ def to_timestamp( freq: Frequency | None = None, how: ToTimestampHow = "start", axis: Axis = 0, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, ) -> DataFrame: """ Cast to DatetimeIndex of timestamps, at *beginning* of period. @@ -12346,7 +12360,7 @@ def to_timestamp( vs. end. axis : {0 or 'index', 1 or 'columns'}, default 0 The axis to convert (the index by default). - copy : bool, default True + copy : bool, default False If False then underlying input data is not copied. .. note:: @@ -12361,6 +12375,8 @@ def to_timestamp( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + .. deprecated:: 3.0.0 + Returns ------- DataFrame @@ -12397,6 +12413,7 @@ def to_timestamp( >>> df2.index DatetimeIndex(['2023-01-31', '2024-01-31'], dtype='datetime64[ns]', freq=None) """ + self._check_copy_deprecation(copy) new_obj = self.copy(deep=False) axis_name = self._get_axis_name(axis) @@ -12410,7 +12427,10 @@ def to_timestamp( return new_obj def to_period( - self, freq: Frequency | None = None, axis: Axis = 0, copy: bool | None = None + self, + freq: Frequency | None = None, + axis: Axis = 0, + copy: bool | lib.NoDefault = lib.no_default, ) -> DataFrame: """ Convert DataFrame from DatetimeIndex to PeriodIndex. @@ -12424,7 +12444,7 @@ def to_period( Frequency of the PeriodIndex. axis : {0 or 'index', 1 or 'columns'}, default 0 The axis to convert (the index by default). - copy : bool, default True + copy : bool, default False If False then underlying input data is not copied. .. note:: @@ -12439,6 +12459,8 @@ def to_period( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + .. deprecated:: 3.0.0 + Returns ------- DataFrame @@ -12466,6 +12488,7 @@ def to_period( >>> idx.to_period("Y") PeriodIndex(['2001', '2002', '2003'], dtype='period[Y-DEC]') """ + self._check_copy_deprecation(copy) new_obj = self.copy(deep=False) axis_name = self._get_axis_name(axis) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9424e421fd85f..453c1829ef943 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -398,7 +398,7 @@ def flags(self) -> Flags: def set_flags( self, *, - copy: bool = False, + copy: bool | lib.NoDefault = lib.no_default, allows_duplicate_labels: bool | None = None, ) -> Self: """ @@ -420,6 +420,8 @@ def set_flags( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + + .. deprecated:: 3.0.0 allows_duplicate_labels : bool, optional Whether the returned object allows duplicate labels. @@ -454,6 +456,7 @@ def set_flags( >>> df2.flags.allows_duplicate_labels False """ + self._check_copy_deprecation(copy) df = self.copy(deep=False) if allows_duplicate_labels is not None: df.flags["allows_duplicate_labels"] = allows_duplicate_labels @@ -679,7 +682,7 @@ def set_axis( labels, *, axis: Axis = 0, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, ) -> Self: """ Assign desired index to given axis. @@ -696,7 +699,7 @@ def set_axis( The axis to update. The value 0 identifies the rows. For `Series` this parameter is unused and defaults to 0. - copy : bool, default True + copy : bool, default False Whether to make a copy of the underlying data. .. note:: @@ -711,6 +714,8 @@ def set_axis( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + .. deprecated:: 3.0.0 + Returns ------- %(klass)s @@ -720,6 +725,7 @@ def set_axis( -------- %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s. """ + self._check_copy_deprecation(copy) return self._set_axis_nocheck(labels, axis, inplace=False) @overload @@ -949,7 +955,6 @@ def _rename( index: Renamer | None = ..., columns: Renamer | None = ..., axis: Axis | None = ..., - copy: bool | None = ..., inplace: Literal[False] = ..., level: Level | None = ..., errors: str = ..., @@ -964,7 +969,6 @@ def _rename( index: Renamer | None = ..., columns: Renamer | None = ..., axis: Axis | None = ..., - copy: bool | None = ..., inplace: Literal[True], level: Level | None = ..., errors: str = ..., @@ -979,7 +983,6 @@ def _rename( index: Renamer | None = ..., columns: Renamer | None = ..., axis: Axis | None = ..., - copy: bool | None = ..., inplace: bool, level: Level | None = ..., errors: str = ..., @@ -994,7 +997,6 @@ def _rename( index: Renamer | None = None, columns: Renamer | None = None, axis: Axis | None = None, - copy: bool | None = None, inplace: bool = False, level: Level | None = None, errors: str = "ignore", @@ -1065,7 +1067,7 @@ def rename_axis( index=..., columns=..., axis: Axis = ..., - copy: bool | None = ..., + copy: bool | lib.NoDefault = lib.no_default, inplace: Literal[False] = ..., ) -> Self: ... @@ -1078,7 +1080,7 @@ def rename_axis( index=..., columns=..., axis: Axis = ..., - copy: bool | None = ..., + copy: bool | lib.NoDefault = lib.no_default, inplace: Literal[True], ) -> None: ... @@ -1091,7 +1093,7 @@ def rename_axis( index=..., columns=..., axis: Axis = ..., - copy: bool | None = ..., + copy: bool | lib.NoDefault = lib.no_default, inplace: bool = ..., ) -> Self | None: ... @@ -1103,7 +1105,7 @@ def rename_axis( index=lib.no_default, columns=lib.no_default, axis: Axis = 0, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, inplace: bool = False, ) -> Self | None: """ @@ -1125,7 +1127,7 @@ def rename_axis( apply to that axis' values. axis : {0 or 'index', 1 or 'columns'}, default 0 The axis to rename. - copy : bool, default None + copy : bool, default False Also copy underlying data. .. note:: @@ -1139,6 +1141,8 @@ def rename_axis( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + + .. deprecated:: 3.0.0 inplace : bool, default False Modifies the object directly, instead of creating a new Series or DataFrame. @@ -1226,6 +1230,7 @@ class name cat 4 0 monkey 2 2 """ + self._check_copy_deprecation(copy) axes = {"index": index, "columns": columns} if axis is not None: @@ -6341,7 +6346,7 @@ def astype( return self.copy(deep=False) # GH 19920: retain column metadata after concat - result = concat(results, axis=1, copy=False) + result = concat(results, axis=1) # GH#40810 retain subclass # error: Incompatible types in assignment # (expression has type "Self", variable has type "DataFrame") diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 88323e5304cc4..9dc4673765450 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -15,6 +15,7 @@ import numpy as np +from pandas._libs import lib from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level @@ -79,7 +80,7 @@ def concat( names: list[HashableT] | None = ..., verify_integrity: bool = ..., sort: bool = ..., - copy: bool | None = ..., + copy: bool | lib.NoDefault = ..., ) -> DataFrame: ... @@ -96,7 +97,7 @@ def concat( names: list[HashableT] | None = ..., verify_integrity: bool = ..., sort: bool = ..., - copy: bool | None = ..., + copy: bool | lib.NoDefault = ..., ) -> Series: ... @@ -113,7 +114,7 @@ def concat( names: list[HashableT] | None = ..., verify_integrity: bool = ..., sort: bool = ..., - copy: bool | None = ..., + copy: bool | lib.NoDefault = ..., ) -> DataFrame | Series: ... @@ -130,7 +131,7 @@ def concat( names: list[HashableT] | None = ..., verify_integrity: bool = ..., sort: bool = ..., - copy: bool | None = ..., + copy: bool | lib.NoDefault = ..., ) -> DataFrame: ... @@ -147,7 +148,7 @@ def concat( names: list[HashableT] | None = ..., verify_integrity: bool = ..., sort: bool = ..., - copy: bool | None = ..., + copy: bool | lib.NoDefault = ..., ) -> DataFrame | Series: ... @@ -163,7 +164,7 @@ def concat( names: list[HashableT] | None = None, verify_integrity: bool = False, sort: bool = False, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, ) -> DataFrame | Series: """ Concatenate pandas objects along a particular axis. @@ -207,9 +208,23 @@ def concat( this is when the non-concatentation axis is a DatetimeIndex and join='outer' and the axis is not already aligned. In that case, the non-concatenation axis is always sorted lexicographically. - copy : bool, default True + copy : bool, default False If False, do not copy data unnecessarily. + .. note:: + The `copy` keyword will change behavior in pandas 3.0. + `Copy-on-Write + `__ + will be enabled by default, which means that all methods with a + `copy` keyword will use a lazy copy mechanism to defer the copy and + ignore the `copy` keyword. The `copy` keyword will be removed in a + future version of pandas. + + You can already get the future behavior and improvements through + enabling copy on write ``pd.options.mode.copy_on_write = True`` + + .. deprecated:: 3.0.0 + Returns ------- object, type of objs @@ -368,6 +383,15 @@ def concat( 0 1 2 1 3 4 """ + if copy is not lib.no_default: + warnings.warn( + "The copy keyword is deprecated and will be removed in a future " + "version. Copy-on-Write is active in pandas since 3.0 which utilizes " + "a lazy copy mechanism that defers copies until necessary. Use " + ".copy() to make an eager copy if necessary.", + DeprecationWarning, + stacklevel=find_stack_level(), + ) op = _Concatenator( objs, diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 0494138d1e16f..b0e30658b9c19 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -144,11 +144,12 @@ def merge( right_index: bool = False, sort: bool = False, suffixes: Suffixes = ("_x", "_y"), - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, indicator: str | bool = False, validate: str | None = None, ) -> DataFrame: left_df = _validate_operand(left) + left._check_copy_deprecation(copy) right_df = _validate_operand(right) if how == "cross": return _cross_merge( diff --git a/pandas/core/series.py b/pandas/core/series.py index 5c9bc428e256f..b53721360318e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4141,7 +4141,7 @@ def nsmallest( ), ) def swaplevel( - self, i: Level = -2, j: Level = -1, copy: bool | None = None + self, i: Level = -2, j: Level = -1, copy: bool | lib.NoDefault = lib.no_default ) -> Series: """ Swap levels i and j in a :class:`MultiIndex`. @@ -4161,6 +4161,7 @@ def swaplevel( {examples} """ + self._check_copy_deprecation(copy) assert isinstance(self.index, MultiIndex) result = self.copy(deep=False) result.index = self.index.swaplevel(i, j) @@ -4659,7 +4660,7 @@ def rename( index: Renamer | Hashable | None = ..., *, axis: Axis | None = ..., - copy: bool = ..., + copy: bool | lib.NoDefault = ..., inplace: Literal[True], level: Level | None = ..., errors: IgnoreRaise = ..., @@ -4672,7 +4673,7 @@ def rename( index: Renamer | Hashable | None = ..., *, axis: Axis | None = ..., - copy: bool = ..., + copy: bool | lib.NoDefault = ..., inplace: Literal[False] = ..., level: Level | None = ..., errors: IgnoreRaise = ..., @@ -4685,7 +4686,7 @@ def rename( index: Renamer | Hashable | None = ..., *, axis: Axis | None = ..., - copy: bool = ..., + copy: bool | lib.NoDefault = ..., inplace: bool = ..., level: Level | None = ..., errors: IgnoreRaise = ..., @@ -4697,7 +4698,7 @@ def rename( index: Renamer | Hashable | None = None, *, axis: Axis | None = None, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, inplace: bool = False, level: Level | None = None, errors: IgnoreRaise = "ignore", @@ -4722,7 +4723,7 @@ def rename( attribute. axis : {0 or 'index'} Unused. Parameter needed for compatibility with DataFrame. - copy : bool, default True + copy : bool, default False Also copy underlying data. .. note:: @@ -4736,6 +4737,8 @@ def rename( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + + .. deprecated:: 3.0.0 inplace : bool, default False Whether to return a new Series. If True the value of copy is ignored. level : int or level name, default None @@ -4779,6 +4782,7 @@ def rename( 5 3 dtype: int64 """ + self._check_copy_deprecation(copy) if axis is not None: # Make sure we raise if an invalid 'axis' is passed. axis = self._get_axis_number(axis) @@ -4828,9 +4832,9 @@ def set_axis( labels, *, axis: Axis = 0, - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, ) -> Series: - return super().set_axis(labels, axis=axis) + return super().set_axis(labels, axis=axis, copy=copy) # error: Cannot determine type of 'reindex' @doc( @@ -4867,7 +4871,7 @@ def rename_axis( *, index=..., axis: Axis = ..., - copy: bool = ..., + copy: bool | lib.NoDefault = ..., inplace: Literal[True], ) -> None: ... @@ -4879,7 +4883,7 @@ def rename_axis( *, index=..., axis: Axis = ..., - copy: bool = ..., + copy: bool | lib.NoDefault = ..., inplace: Literal[False] = ..., ) -> Self: ... @@ -4891,7 +4895,7 @@ def rename_axis( *, index=..., axis: Axis = ..., - copy: bool = ..., + copy: bool | lib.NoDefault = ..., inplace: bool = ..., ) -> Self | None: ... @@ -4902,7 +4906,7 @@ def rename_axis( *, index=lib.no_default, axis: Axis = 0, - copy: bool = True, + copy: bool | lib.NoDefault = lib.no_default, inplace: bool = False, ) -> Self | None: """ @@ -4921,7 +4925,7 @@ def rename_axis( apply to that axis' values. axis : {0 or 'index'}, default 0 The axis to rename. For `Series` this parameter is unused and defaults to 0. - copy : bool, default None + copy : bool, default False Also copy underlying data. .. note:: @@ -4971,6 +4975,7 @@ def rename_axis( index=index, axis=axis, inplace=inplace, + copy=copy, ) @overload @@ -5702,7 +5707,7 @@ def to_timestamp( self, freq: Frequency | None = None, how: Literal["s", "e", "start", "end"] = "start", - copy: bool | None = None, + copy: bool | lib.NoDefault = lib.no_default, ) -> Series: """ Cast to DatetimeIndex of Timestamps, at *beginning* of period. @@ -5714,7 +5719,7 @@ def to_timestamp( how : {'s', 'e', 'start', 'end'} Convention for converting period to timestamp; start of period vs. end. - copy : bool, default True + copy : bool, default False Whether or not to return a copy. .. note:: @@ -5729,6 +5734,8 @@ def to_timestamp( You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + .. deprecated:: 3.0.0 + Returns ------- Series with DatetimeIndex @@ -5762,6 +5769,7 @@ def to_timestamp( 2025-01-31 3 Freq: YE-JAN, dtype: int64 """ + self._check_copy_deprecation(copy) if not isinstance(self.index, PeriodIndex): raise TypeError(f"unsupported Type {type(self.index).__name__}") @@ -5770,7 +5778,11 @@ def to_timestamp( setattr(new_obj, "index", new_index) return new_obj - def to_period(self, freq: str | None = None, copy: bool | None = None) -> Series: + def to_period( + self, + freq: str | None = None, + copy: bool | lib.NoDefault = lib.no_default, + ) -> Series: """ Convert Series from DatetimeIndex to PeriodIndex. @@ -5778,7 +5790,7 @@ def to_period(self, freq: str | None = None, copy: bool | None = None) -> Series ---------- freq : str, default None Frequency associated with the PeriodIndex. - copy : bool, default True + copy : bool, default False Whether or not to return a copy. .. note:: @@ -5793,6 +5805,8 @@ def to_period(self, freq: str | None = None, copy: bool | None = None) -> Series You can already get the future behavior and improvements through enabling copy on write ``pd.options.mode.copy_on_write = True`` + .. deprecated:: 3.0.0 + Returns ------- Series @@ -5814,6 +5828,7 @@ def to_period(self, freq: str | None = None, copy: bool | None = None) -> Series >>> s.index PeriodIndex(['2023', '2024', '2025'], dtype='period[Y-DEC]') """ + self._check_copy_deprecation(copy) if not isinstance(self.index, DatetimeIndex): raise TypeError(f"unsupported Type {type(self.index).__name__}") diff --git a/pandas/tests/copy_view/test_copy_deprecation.py b/pandas/tests/copy_view/test_copy_deprecation.py index ca57c02112131..8ee37213b92ab 100644 --- a/pandas/tests/copy_view/test_copy_deprecation.py +++ b/pandas/tests/copy_view/test_copy_deprecation.py @@ -1,6 +1,10 @@ import pytest import pandas as pd +from pandas import ( + concat, + merge, +) import pandas._testing as tm @@ -13,20 +17,33 @@ ("infer_objects", {}), ("astype", {"dtype": "float64"}), ("reindex", {"index": [2, 0, 1]}), + ("transpose", {}), + ("set_axis", {"labels": [1, 2, 3]}), + ("rename", {"index": {1: 2}}), + ("set_flags", {}), + ("to_period", {}), + ("to_timestamp", {}), + ("swaplevel", {"i": 0, "j": 1}), ], ) def test_copy_deprecation(meth, kwargs): - df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": 1}) - if meth in ("tz_convert", "tz_localize"): - tz = None if meth == "tz_localize" else "US/Eastern" + if meth in ("tz_convert", "tz_localize", "to_period"): + tz = None if meth in ("tz_localize", "to_period") else "US/Eastern" df.index = pd.date_range("2020-01-01", freq="D", periods=len(df), tz=tz) + elif meth == "to_timestamp": + df.index = pd.period_range("2020-01-01", freq="D", periods=len(df)) + elif meth == "swaplevel": + df = df.set_index(["b", "c"]) - with tm.assert_produces_warning(DeprecationWarning, match="copy"): - getattr(df, meth)(copy=False, **kwargs) + if meth != "swaplevel": + with tm.assert_produces_warning(DeprecationWarning, match="copy"): + getattr(df, meth)(copy=False, **kwargs) - with tm.assert_produces_warning(DeprecationWarning, match="copy"): - getattr(df.a, meth)(copy=False, **kwargs) + if meth != "transpose": + with tm.assert_produces_warning(DeprecationWarning, match="copy"): + getattr(df.a, meth)(copy=False, **kwargs) def test_copy_deprecation_reindex_like_align(): @@ -51,3 +68,22 @@ def test_copy_deprecation_reindex_like_align(): DeprecationWarning, match="copy", check_stacklevel=False ): df.a.align(df.a, copy=False) + + +def test_copy_deprecation_merge_concat(): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + with tm.assert_produces_warning( + DeprecationWarning, match="copy", check_stacklevel=False + ): + df.merge(df, copy=False) + + with tm.assert_produces_warning( + DeprecationWarning, match="copy", check_stacklevel=False + ): + merge(df, df, copy=False) + + with tm.assert_produces_warning( + DeprecationWarning, match="copy", check_stacklevel=False + ): + concat([df, df], copy=False) diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index 996fc30552bc4..6153a168476d4 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -165,7 +165,7 @@ def test_rename_multiindex(self): tm.assert_index_equal(renamed.index, new_index) def test_rename_nocopy(self, float_frame): - renamed = float_frame.rename(columns={"C": "foo"}, copy=False) + renamed = float_frame.rename(columns={"C": "foo"}) assert np.shares_memory(renamed["foo"]._values, float_frame["C"]._values) diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index 8c42498b45621..1967941bca9f0 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -29,10 +29,7 @@ def test_set_axis_copy(self, obj): expected = obj.copy() expected.index = new_index - result = obj.set_axis(new_index, axis=0, copy=True) - tm.assert_equal(expected, result) - assert result is not obj - result = obj.set_axis(new_index, axis=0, copy=False) + result = obj.set_axis(new_index, axis=0) tm.assert_equal(expected, result) assert result is not obj # check we did NOT make a copy @@ -44,7 +41,6 @@ def test_set_axis_copy(self, obj): for i in range(obj.shape[1]) ) - # copy defaults to True result = obj.set_axis(new_index, axis=0) tm.assert_equal(expected, result) assert result is not obj @@ -57,7 +53,7 @@ def test_set_axis_copy(self, obj): for i in range(obj.shape[1]) ) - res = obj.set_axis(new_index, copy=False) + res = obj.set_axis(new_index) tm.assert_equal(expected, res) # check we did NOT make a copy if res.ndim == 1: diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 680800d7f5e4c..48f51dfa981ca 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -356,9 +356,7 @@ def test_set_flags( assert obj.iloc[key] == 1 # Now we do copy. - result = obj.set_flags( - copy=True, allows_duplicate_labels=allows_duplicate_labels - ) + result = obj.set_flags(allows_duplicate_labels=allows_duplicate_labels) result.iloc[key] = 10 assert obj.iloc[key] == 1 diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index e104b99370f07..e6bfc1ed9be03 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -49,12 +49,12 @@ def test_concat_copy(self): df3 = DataFrame({5: "foo"}, index=range(4)) # These are actual copies. - result = concat([df, df2, df3], axis=1, copy=True) + result = concat([df, df2, df3], axis=1) for arr in result._mgr.arrays: assert arr.base is not None # These are the same. - result = concat([df, df2, df3], axis=1, copy=False) + result = concat([df, df2, df3], axis=1) for arr in result._mgr.arrays: if arr.dtype.kind == "f": @@ -66,7 +66,7 @@ def test_concat_copy(self): # Float block was consolidated. df4 = DataFrame(np.random.default_rng(2).standard_normal((4, 1))) - result = concat([df, df2, df3, df4], axis=1, copy=False) + result = concat([df, df2, df3, df4], axis=1) for arr in result._mgr.arrays: if arr.dtype.kind == "f": # this is a view on some array in either df or df4 diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index ca544c5d42a25..68d77b79a59e7 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -101,7 +101,7 @@ def test_concat_rename_index(self): def test_concat_copy_index_series(self, axis): # GH 29879 ser = Series([1, 2]) - comb = concat([ser, ser], axis=axis, copy=True) + comb = concat([ser, ser], axis=axis) if axis in [0, "index"]: assert comb.index is not ser.index else: @@ -110,7 +110,7 @@ def test_concat_copy_index_series(self, axis): def test_concat_copy_index_frame(self, axis): # GH 29879 df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) - comb = concat([df, df], axis=axis, copy=True) + comb = concat([df, df], axis=axis) if axis in [0, "index"]: assert not comb.index.is_(df.index) assert comb.columns.is_(df.columns) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index f063f333ac889..1cd52ab1ae8b4 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -260,7 +260,7 @@ def test_merge_copy(self): left = DataFrame({"a": 0, "b": 1}, index=range(10)) right = DataFrame({"c": "foo", "d": "bar"}, index=range(10)) - merged = merge(left, right, left_index=True, right_index=True, copy=True) + merged = merge(left, right, left_index=True, right_index=True) merged["a"] = 6 assert (left["a"] == 0).all() @@ -272,7 +272,7 @@ def test_merge_nocopy(self, using_infer_string): left = DataFrame({"a": 0, "b": 1}, index=range(10)) right = DataFrame({"c": "foo", "d": "bar"}, index=range(10)) - merged = merge(left, right, left_index=True, right_index=True, copy=False) + merged = merge(left, right, left_index=True, right_index=True) assert np.shares_memory(merged["a"]._values, left["a"]._values) if not using_infer_string: diff --git a/pandas/tests/series/methods/test_rename.py b/pandas/tests/series/methods/test_rename.py index c67298b777f6d..1da98b3a273be 100644 --- a/pandas/tests/series/methods/test_rename.py +++ b/pandas/tests/series/methods/test_rename.py @@ -173,7 +173,7 @@ def test_rename_copy_false(self): # GH 46889 ser = Series(["foo", "bar"]) ser_orig = ser.copy() - shallow_copy = ser.rename({1: 9}, copy=False) + shallow_copy = ser.rename({1: 9}) ser[0] = "foobar" assert ser_orig[0] == shallow_copy[0] assert ser_orig[1] == shallow_copy[9] From 044e0acf9c6bd89c5b16c8472aa2341a1892fbd5 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 16 Mar 2024 19:43:52 -0500 Subject: [PATCH 2/4] Fixup --- pandas/core/interchange/dataframe.py | 2 +- pandas/tests/extension/base/reshaping.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/interchange/dataframe.py b/pandas/core/interchange/dataframe.py index 1ffe0e8e8dbb0..c67ed34e598ad 100644 --- a/pandas/core/interchange/dataframe.py +++ b/pandas/core/interchange/dataframe.py @@ -32,7 +32,7 @@ def __init__(self, df: DataFrame, allow_copy: bool = True) -> None: Constructor - an instance of this (private) class is returned from `pd.DataFrame.__dataframe__`. """ - self._df = df.rename(columns=str, copy=False) + self._df = df.rename(columns=str) self._allow_copy = allow_copy def __dataframe__( diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 4550e3b055cfe..489cd15644d04 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -106,7 +106,7 @@ def test_concat_extension_arrays_copy_false(self, data, na_value): "B": data[3:7], } ) - result = pd.concat([df1, df2], axis=1, copy=False) + result = pd.concat([df1, df2], axis=1) tm.assert_frame_equal(result, expected) def test_concat_with_reindex(self, data): From e3c0b63df877306f9d80f446133f6f8ad2448de3 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 17 Mar 2024 19:37:44 -0500 Subject: [PATCH 3/4] Fixup tests --- pandas/tests/copy_view/test_functions.py | 3 +-- pandas/tests/copy_view/test_methods.py | 12 ++---------- pandas/tests/dtypes/test_concat.py | 5 ++--- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py index eeb19103f7bd5..e3fcdf6be82d8 100644 --- a/pandas/tests/copy_view/test_functions.py +++ b/pandas/tests/copy_view/test_functions.py @@ -234,12 +234,11 @@ def test_merge_on_key_enlarging_one(func, how): tm.assert_frame_equal(df2, df2_orig) -@pytest.mark.parametrize("copy", [True, None, False]) def test_merge_copy_keyword(copy): df = DataFrame({"a": [1, 2]}) df2 = DataFrame({"b": [3, 4.5]}) - result = df.merge(df2, copy=copy, left_index=True, right_index=True) + result = df.merge(df2, left_index=True, right_index=True) assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 8bf0e81e74e25..3712a74fe54ed 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -176,13 +176,6 @@ def test_methods_series_copy_keyword(request, method, copy): assert np.shares_memory(get_array(ser2), get_array(ser)) -@pytest.mark.parametrize("copy", [True, None, False]) -def test_transpose_copy_keyword(copy): - df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - result = df.transpose(copy=copy) - assert np.shares_memory(get_array(df, "a"), get_array(result, 0)) - - # ----------------------------------------------------------------------------- # DataFrame methods returning new DataFrame using shallow copy @@ -1415,11 +1408,10 @@ def test_inplace_arithmetic_series_with_reference(): tm.assert_series_equal(ser_orig, view) -@pytest.mark.parametrize("copy", [True, False]) -def test_transpose(copy): +def test_transpose(): df = DataFrame({"a": [1, 2, 3], "b": 1}) df_orig = df.copy() - result = df.transpose(copy=copy) + result = df.transpose() assert np.shares_memory(get_array(df, "a"), get_array(result, 0)) result.iloc[0, 0] = 100 diff --git a/pandas/tests/dtypes/test_concat.py b/pandas/tests/dtypes/test_concat.py index 4f7ae6fa2a0a0..1652c9254061b 100644 --- a/pandas/tests/dtypes/test_concat.py +++ b/pandas/tests/dtypes/test_concat.py @@ -20,14 +20,13 @@ def test_concat_mismatched_categoricals_with_empty(): tm.assert_categorical_equal(result, expected) -@pytest.mark.parametrize("copy", [True, False]) -def test_concat_single_dataframe_tz_aware(copy): +def test_concat_single_dataframe_tz_aware(): # https://github.com/pandas-dev/pandas/issues/25257 df = pd.DataFrame( {"timestamp": [pd.Timestamp("2020-04-08 09:00:00.709949+0000", tz="UTC")]} ) expected = df.copy() - result = pd.concat([df], copy=copy) + result = pd.concat([df]) tm.assert_frame_equal(result, expected) From fd5932cc1746a45e883c3d93edf83c36756bdbe1 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 17 Mar 2024 19:57:02 -0500 Subject: [PATCH 4/4] Fixup tests --- pandas/tests/copy_view/test_functions.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py index e3fcdf6be82d8..196d908a44a46 100644 --- a/pandas/tests/copy_view/test_functions.py +++ b/pandas/tests/copy_view/test_functions.py @@ -139,12 +139,11 @@ def test_concat_mixed_series_frame(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("copy", [True, None, False]) -def test_concat_copy_keyword(copy): +def test_concat_copy_keyword(): df = DataFrame({"a": [1, 2]}) df2 = DataFrame({"b": [1.5, 2.5]}) - result = concat([df, df2], axis=1, copy=copy) + result = concat([df, df2], axis=1) assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) @@ -234,7 +233,7 @@ def test_merge_on_key_enlarging_one(func, how): tm.assert_frame_equal(df2, df2_orig) -def test_merge_copy_keyword(copy): +def test_merge_copy_keyword(): df = DataFrame({"a": [1, 2]}) df2 = DataFrame({"b": [3, 4.5]})