Skip to content

ENH: copy keyword to set_axis #47932

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 17, 2022
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ Other enhancements
- :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`)
- :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`)
- :class:`Interval` now supports checking whether one interval is contained by another interval (:issue:`46613`)
- Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`47932`)
- :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support a ``copy`` argument. If ``False``, the underlying data is not copied in the returned object (:issue:`47934`)
- :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`)

Expand Down
36 changes: 29 additions & 7 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5036,17 +5036,34 @@ def align(

@overload
def set_axis(
self, labels, *, axis: Axis = ..., inplace: Literal[False] = ...
self,
labels,
*,
axis: Axis = ...,
inplace: Literal[False] = ...,
copy: bool | lib.NoDefault = ...,
) -> DataFrame:
...

@overload
def set_axis(self, labels, *, axis: Axis = ..., inplace: Literal[True]) -> None:
def set_axis(
self,
labels,
*,
axis: Axis = ...,
inplace: Literal[True],
copy: bool | lib.NoDefault = ...,
) -> None:
...

@overload
def set_axis(
self, labels, *, axis: Axis = ..., inplace: bool = ...
self,
labels,
*,
axis: Axis = ...,
inplace: bool = ...,
copy: bool | lib.NoDefault = ...,
) -> DataFrame | None:
...

Expand Down Expand Up @@ -5091,10 +5108,15 @@ def set_axis(
see_also_sub=" or columns",
)
@Appender(NDFrame.set_axis.__doc__)
def set_axis( # type: ignore[override]
self, labels, axis: Axis = 0, inplace: bool = False
) -> DataFrame | None:
return super().set_axis(labels, axis=axis, inplace=inplace)
def set_axis(
self,
labels,
axis: Axis = 0,
inplace: bool = False,
*,
copy: bool | lib.NoDefault = lib.no_default,
):
return super().set_axis(labels, axis=axis, inplace=inplace, copy=copy)

@Substitution(**_shared_doc_kwargs)
@Appender(NDFrame.reindex.__doc__)
Expand Down
52 changes: 44 additions & 8 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,23 +711,45 @@ def size(self) -> int:

@overload
def set_axis(
self: NDFrameT, labels, *, axis: Axis = ..., inplace: Literal[False] = ...
self: NDFrameT,
labels,
*,
axis: Axis = ...,
inplace: Literal[False] = ...,
copy: bool_t | lib.NoDefault = ...,
) -> NDFrameT:
...

@overload
def set_axis(self, labels, *, axis: Axis = ..., inplace: Literal[True]) -> None:
def set_axis(
self,
labels,
*,
axis: Axis = ...,
inplace: Literal[True],
copy: bool_t | lib.NoDefault = ...,
) -> None:
...

@overload
def set_axis(
self: NDFrameT, labels, *, axis: Axis = ..., inplace: bool_t = ...
self: NDFrameT,
labels,
*,
axis: Axis = ...,
inplace: bool_t = ...,
copy: bool_t | lib.NoDefault = ...,
) -> NDFrameT | None:
...

@deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])
def set_axis(
self: NDFrameT, labels, axis: Axis = 0, inplace: bool_t = False
self: NDFrameT,
labels,
axis: Axis = 0,
inplace: bool_t = False,
*,
copy: bool_t | lib.NoDefault = lib.no_default,
) -> NDFrameT | None:
"""
Assign desired index to given axis.
Expand All @@ -747,6 +769,11 @@ def set_axis(
inplace : bool, default False
Whether to return a new %(klass)s instance.

copy : bool, default True
Whether to make a copy of the underlying data.

.. versionadded:: 1.5.0

Returns
-------
renamed : %(klass)s or None
Expand All @@ -756,16 +783,25 @@ def set_axis(
--------
%(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s.
"""
if inplace:
if copy is True:
raise ValueError("Cannot specify both inplace=True and copy=True")
copy = False
elif copy is lib.no_default:
copy = True

self._check_inplace_and_allows_duplicate_labels(inplace)
return self._set_axis_nocheck(labels, axis, inplace)
return self._set_axis_nocheck(labels, axis, inplace, copy=copy)

@final
def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t):
def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t, copy: bool_t):
# NDFrame.rename with inplace=False calls set_axis(inplace=True) on a copy.
if inplace:
setattr(self, self._get_axis_name(axis), labels)
else:
obj = self.copy()
# With copy=False, we create a new object but don't copy the
# underlying data.
obj = self.copy(deep=copy)
obj.set_axis(labels, axis=axis, inplace=True)
return obj

Expand Down Expand Up @@ -1053,7 +1089,7 @@ def _rename(
raise KeyError(f"{missing_labels} not found in axis")

new_index = ax._transform_index(f, level=level)
result._set_axis_nocheck(new_index, axis=axis_no, inplace=True)
result._set_axis_nocheck(new_index, axis=axis_no, inplace=True, copy=False)
result._clear_item_cache()

if inplace:
Expand Down
31 changes: 26 additions & 5 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4976,17 +4976,34 @@ def rename(

@overload
def set_axis(
self, labels, *, axis: Axis = ..., inplace: Literal[False] = ...
self,
labels,
*,
axis: Axis = ...,
inplace: Literal[False] = ...,
copy: bool | lib.NoDefault = ...,
) -> Series:
...

@overload
def set_axis(self, labels, *, axis: Axis = ..., inplace: Literal[True]) -> None:
def set_axis(
self,
labels,
*,
axis: Axis = ...,
inplace: Literal[True],
copy: bool | lib.NoDefault = ...,
) -> None:
...

@overload
def set_axis(
self, labels, *, axis: Axis = ..., inplace: bool = ...
self,
labels,
*,
axis: Axis = ...,
inplace: bool = ...,
copy: bool | lib.NoDefault = ...,
) -> Series | None:
...

Expand Down Expand Up @@ -5018,9 +5035,13 @@ def set_axis(
)
@Appender(NDFrame.set_axis.__doc__)
def set_axis( # type: ignore[override]
self, labels, axis: Axis = 0, inplace: bool = False
self,
labels,
axis: Axis = 0,
inplace: bool = False,
copy: bool | lib.NoDefault = lib.no_default,
) -> Series | None:
return super().set_axis(labels, axis=axis, inplace=inplace)
return super().set_axis(labels, axis=axis, inplace=inplace, copy=copy)

# error: Cannot determine type of 'reindex'
@doc(
Expand Down
63 changes: 63 additions & 0 deletions pandas/tests/frame/methods/test_set_axis.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,69 @@ def test_set_axis(self, obj):
result = obj.set_axis(new_index, axis=0, inplace=False)
tm.assert_equal(expected, result)

def test_set_axis_copy(self, obj):
# Test copy keyword GH#47932
new_index = list("abcd")[: len(obj)]

orig = obj.iloc[:]
expected = obj.copy()
expected.index = new_index

with pytest.raises(
ValueError, match="Cannot specify both inplace=True and copy=True"
):
obj.set_axis(new_index, axis=0, inplace=True, copy=True)

result = obj.set_axis(new_index, axis=0, copy=True)
tm.assert_equal(expected, result)
assert result is not obj
# check we DID make a copy
if obj.ndim == 1:
assert not tm.shares_memory(result, obj)
else:
assert not any(
tm.shares_memory(result.iloc[:, i], obj.iloc[:, i])
for i in range(obj.shape[1])
)

result = obj.set_axis(new_index, axis=0, copy=False)
tm.assert_equal(expected, result)
assert result is not obj
# check we did NOT make a copy
if obj.ndim == 1:
assert tm.shares_memory(result, obj)
else:
assert all(
tm.shares_memory(result.iloc[:, i], obj.iloc[:, i])
for i in range(obj.shape[1])
)

# copy defaults to True
result = obj.set_axis(new_index, axis=0)
tm.assert_equal(expected, result)
assert result is not obj
# check we DID make a copy
if obj.ndim == 1:
assert not tm.shares_memory(result, obj)
else:
assert not any(
tm.shares_memory(result.iloc[:, i], obj.iloc[:, i])
for i in range(obj.shape[1])
)

# Do this last since it alters obj inplace
res = obj.set_axis(new_index, inplace=True, copy=False)
assert res is None
tm.assert_equal(expected, obj)
# check we did NOT make a copy
if obj.ndim == 1:
assert tm.shares_memory(obj, orig)
else:
assert all(
tm.shares_memory(obj.iloc[:, i], orig.iloc[:, i])
for i in range(obj.shape[1])
)

@pytest.mark.parametrize("axis", [0, "index", 1, "columns"])
def test_set_axis_inplace_axis(self, axis, obj):
# GH#14636
Expand Down