diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 02db4cbe0e8a5..1913e3857f2d3 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -294,6 +294,7 @@ Other enhancements - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) - :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) - :class:`Interval` now supports checking whether one interval is contained by another interval (:issue:`46613`) +- Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`47932`) - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support a ``copy`` argument. If ``False``, the underlying data is not copied in the returned object (:issue:`47934`) - :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6cfca4ebdc612..9adcfddc4006c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5036,17 +5036,34 @@ def align( @overload def set_axis( - self, labels, *, axis: Axis = ..., inplace: Literal[False] = ... + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[False] = ..., + copy: bool | lib.NoDefault = ..., ) -> DataFrame: ... @overload - def set_axis(self, labels, *, axis: Axis = ..., inplace: Literal[True]) -> None: + def set_axis( + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[True], + copy: bool | lib.NoDefault = ..., + ) -> None: ... @overload def set_axis( - self, labels, *, axis: Axis = ..., inplace: bool = ... + self, + labels, + *, + axis: Axis = ..., + inplace: bool = ..., + copy: bool | lib.NoDefault = ..., ) -> DataFrame | None: ... @@ -5091,10 +5108,15 @@ def set_axis( see_also_sub=" or columns", ) @Appender(NDFrame.set_axis.__doc__) - def set_axis( # type: ignore[override] - self, labels, axis: Axis = 0, inplace: bool = False - ) -> DataFrame | None: - return super().set_axis(labels, axis=axis, inplace=inplace) + def set_axis( + self, + labels, + axis: Axis = 0, + inplace: bool = False, + *, + copy: bool | lib.NoDefault = lib.no_default, + ): + return super().set_axis(labels, axis=axis, inplace=inplace, copy=copy) @Substitution(**_shared_doc_kwargs) @Appender(NDFrame.reindex.__doc__) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d9264e8a18f2e..7de75be78e6dd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -711,23 +711,45 @@ def size(self) -> int: @overload def set_axis( - self: NDFrameT, labels, *, axis: Axis = ..., inplace: Literal[False] = ... + self: NDFrameT, + labels, + *, + axis: Axis = ..., + inplace: Literal[False] = ..., + copy: bool_t | lib.NoDefault = ..., ) -> NDFrameT: ... @overload - def set_axis(self, labels, *, axis: Axis = ..., inplace: Literal[True]) -> None: + def set_axis( + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[True], + copy: bool_t | lib.NoDefault = ..., + ) -> None: ... @overload def set_axis( - self: NDFrameT, labels, *, axis: Axis = ..., inplace: bool_t = ... + self: NDFrameT, + labels, + *, + axis: Axis = ..., + inplace: bool_t = ..., + copy: bool_t | lib.NoDefault = ..., ) -> NDFrameT | None: ... @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) def set_axis( - self: NDFrameT, labels, axis: Axis = 0, inplace: bool_t = False + self: NDFrameT, + labels, + axis: Axis = 0, + inplace: bool_t = False, + *, + copy: bool_t | lib.NoDefault = lib.no_default, ) -> NDFrameT | None: """ Assign desired index to given axis. @@ -747,6 +769,11 @@ def set_axis( inplace : bool, default False Whether to return a new %(klass)s instance. + copy : bool, default True + Whether to make a copy of the underlying data. + + .. versionadded:: 1.5.0 + Returns ------- renamed : %(klass)s or None @@ -756,16 +783,25 @@ def set_axis( -------- %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s. """ + if inplace: + if copy is True: + raise ValueError("Cannot specify both inplace=True and copy=True") + copy = False + elif copy is lib.no_default: + copy = True + self._check_inplace_and_allows_duplicate_labels(inplace) - return self._set_axis_nocheck(labels, axis, inplace) + return self._set_axis_nocheck(labels, axis, inplace, copy=copy) @final - def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t): + def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t, copy: bool_t): # NDFrame.rename with inplace=False calls set_axis(inplace=True) on a copy. if inplace: setattr(self, self._get_axis_name(axis), labels) else: - obj = self.copy() + # With copy=False, we create a new object but don't copy the + # underlying data. + obj = self.copy(deep=copy) obj.set_axis(labels, axis=axis, inplace=True) return obj @@ -1053,7 +1089,7 @@ def _rename( raise KeyError(f"{missing_labels} not found in axis") new_index = ax._transform_index(f, level=level) - result._set_axis_nocheck(new_index, axis=axis_no, inplace=True) + result._set_axis_nocheck(new_index, axis=axis_no, inplace=True, copy=False) result._clear_item_cache() if inplace: diff --git a/pandas/core/series.py b/pandas/core/series.py index b1ad3ab175d1b..f55d6a26255a0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4976,17 +4976,34 @@ def rename( @overload def set_axis( - self, labels, *, axis: Axis = ..., inplace: Literal[False] = ... + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[False] = ..., + copy: bool | lib.NoDefault = ..., ) -> Series: ... @overload - def set_axis(self, labels, *, axis: Axis = ..., inplace: Literal[True]) -> None: + def set_axis( + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[True], + copy: bool | lib.NoDefault = ..., + ) -> None: ... @overload def set_axis( - self, labels, *, axis: Axis = ..., inplace: bool = ... + self, + labels, + *, + axis: Axis = ..., + inplace: bool = ..., + copy: bool | lib.NoDefault = ..., ) -> Series | None: ... @@ -5018,9 +5035,13 @@ def set_axis( ) @Appender(NDFrame.set_axis.__doc__) def set_axis( # type: ignore[override] - self, labels, axis: Axis = 0, inplace: bool = False + self, + labels, + axis: Axis = 0, + inplace: bool = False, + copy: bool | lib.NoDefault = lib.no_default, ) -> Series | None: - return super().set_axis(labels, axis=axis, inplace=inplace) + return super().set_axis(labels, axis=axis, inplace=inplace, copy=copy) # error: Cannot determine type of 'reindex' @doc( diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index 3284243ddac48..67488dff3c335 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -24,6 +24,69 @@ def test_set_axis(self, obj): result = obj.set_axis(new_index, axis=0, inplace=False) tm.assert_equal(expected, result) + def test_set_axis_copy(self, obj): + # Test copy keyword GH#47932 + new_index = list("abcd")[: len(obj)] + + orig = obj.iloc[:] + expected = obj.copy() + expected.index = new_index + + with pytest.raises( + ValueError, match="Cannot specify both inplace=True and copy=True" + ): + obj.set_axis(new_index, axis=0, inplace=True, copy=True) + + result = obj.set_axis(new_index, axis=0, copy=True) + tm.assert_equal(expected, result) + assert result is not obj + # check we DID make a copy + if obj.ndim == 1: + assert not tm.shares_memory(result, obj) + else: + assert not any( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) + + result = obj.set_axis(new_index, axis=0, copy=False) + tm.assert_equal(expected, result) + assert result is not obj + # check we did NOT make a copy + if obj.ndim == 1: + assert tm.shares_memory(result, obj) + else: + assert all( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) + + # copy defaults to True + result = obj.set_axis(new_index, axis=0) + tm.assert_equal(expected, result) + assert result is not obj + # check we DID make a copy + if obj.ndim == 1: + assert not tm.shares_memory(result, obj) + else: + assert not any( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) + + # Do this last since it alters obj inplace + res = obj.set_axis(new_index, inplace=True, copy=False) + assert res is None + tm.assert_equal(expected, obj) + # check we did NOT make a copy + if obj.ndim == 1: + assert tm.shares_memory(obj, orig) + else: + assert all( + tm.shares_memory(obj.iloc[:, i], orig.iloc[:, i]) + for i in range(obj.shape[1]) + ) + @pytest.mark.parametrize("axis", [0, "index", 1, "columns"]) def test_set_axis_inplace_axis(self, axis, obj): # GH#14636