Skip to content

Commit 9716fcb

Browse files
authored
ENH: set_index copy kwd (#48043)
* ENH: set_index copy kwd * GH ref * mypy fixup
1 parent ddbb6c2 commit 9716fcb

File tree

3 files changed

+36
-1
lines changed

3 files changed

+36
-1
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,7 @@ Other enhancements
293293
- :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`)
294294
- :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`)
295295
- :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support a ``copy`` argument. If ``False``, the underlying data is not copied in the returned object (:issue:`47934`)
296+
- :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`)
296297

297298
.. ---------------------------------------------------------------------------
298299
.. _whatsnew_150.notable_bug_fixes:

pandas/core/frame.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -5824,6 +5824,7 @@ def set_index(
58245824
append: bool = ...,
58255825
inplace: Literal[False] = ...,
58265826
verify_integrity: bool = ...,
5827+
copy: bool | lib.NoDefault = ...,
58275828
) -> DataFrame:
58285829
...
58295830

@@ -5836,6 +5837,7 @@ def set_index(
58365837
append: bool = ...,
58375838
inplace: Literal[True],
58385839
verify_integrity: bool = ...,
5840+
copy: bool | lib.NoDefault = ...,
58395841
) -> None:
58405842
...
58415843

@@ -5847,6 +5849,7 @@ def set_index(
58475849
append: bool = False,
58485850
inplace: bool = False,
58495851
verify_integrity: bool = False,
5852+
copy: bool | lib.NoDefault = lib.no_default,
58505853
) -> DataFrame | None:
58515854
"""
58525855
Set the DataFrame index using existing columns.
@@ -5873,6 +5876,11 @@ def set_index(
58735876
Check the new index for duplicates. Otherwise defer the check until
58745877
necessary. Setting to False will improve the performance of this
58755878
method.
5879+
copy : bool, default True
5880+
Whether to make a copy of the underlying data when returning a new
5881+
DataFrame.
5882+
5883+
.. versionadded:: 1.5.0
58765884
58775885
Returns
58785886
-------
@@ -5938,6 +5946,13 @@ def set_index(
59385946
4 16 10 2014 31
59395947
"""
59405948
inplace = validate_bool_kwarg(inplace, "inplace")
5949+
if inplace:
5950+
if copy is not lib.no_default:
5951+
raise ValueError("Cannot specify copy when inplace=True")
5952+
copy = False
5953+
elif copy is lib.no_default:
5954+
copy = True
5955+
59415956
self._check_inplace_and_allows_duplicate_labels(inplace)
59425957
if not isinstance(keys, list):
59435958
keys = [keys]
@@ -5973,7 +5988,7 @@ def set_index(
59735988
if inplace:
59745989
frame = self
59755990
else:
5976-
frame = self.copy()
5991+
frame = self.copy(deep=copy)
59775992

59785993
arrays = []
59795994
names: list[Hashable] = []

pandas/tests/frame/methods/test_set_index.py

+19
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,25 @@
2525

2626

2727
class TestSetIndex:
28+
def test_set_index_copy(self):
29+
# GH#48043
30+
df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
31+
expected = DataFrame({"B": [3, 4], "C": [5, 6]}, index=Index([1, 2], name="A"))
32+
33+
res = df.set_index("A", copy=True)
34+
tm.assert_frame_equal(res, expected)
35+
assert not any(tm.shares_memory(df[col], res[col]) for col in res.columns)
36+
37+
res = df.set_index("A", copy=False)
38+
tm.assert_frame_equal(res, expected)
39+
assert all(tm.shares_memory(df[col], res[col]) for col in res.columns)
40+
41+
msg = "Cannot specify copy when inplace=True"
42+
with pytest.raises(ValueError, match=msg):
43+
df.set_index("A", inplace=True, copy=True)
44+
with pytest.raises(ValueError, match=msg):
45+
df.set_index("A", inplace=True, copy=False)
46+
2847
def test_set_index_multiindex(self):
2948
# segfault in GH#3308
3049
d = {"t1": [2, 2.5, 3], "t2": [4, 5, 6]}

0 commit comments

Comments
 (0)