Skip to content

Commit e5ab0ab

Browse files
Manual Backport PR #48417 on branch 1.5.x (Revert set_index inplace and copy keyword changes) (#48552)
Backport PR #48417: Revert set_index inplace and copy keyword changes Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent e020bb6 commit e5ab0ab

File tree

24 files changed

+51
-121
lines changed

24 files changed

+51
-121
lines changed

doc/source/user_guide/indexing.rst

+3-2
Original file line numberDiff line numberDiff line change
@@ -1723,12 +1723,13 @@ the given columns to a MultiIndex:
17231723
frame
17241724
17251725
Other options in ``set_index`` allow you not drop the index columns or to add
1726-
the index without creating a copy of the underlying data:
1726+
the index in-place (without creating a new object):
17271727

17281728
.. ipython:: python
17291729
17301730
data.set_index('c', drop=False)
1731-
data.set_index(['a', 'b'], copy=False)
1731+
data.set_index(['a', 'b'], inplace=True)
1732+
data
17321733
17331734
Reset the index
17341735
~~~~~~~~~~~~~~~

doc/source/whatsnew/v1.5.0.rst

-2
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,6 @@ Other enhancements
330330
- :meth:`DataFrame.quantile` gained a ``method`` argument that can accept ``table`` to evaluate multi-column quantiles (:issue:`43881`)
331331
- :class:`Interval` now supports checking whether one interval is contained by another interval (:issue:`46613`)
332332
- Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`47932`)
333-
- :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`)
334333
- The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`)
335334
- The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`)
336335

@@ -934,7 +933,6 @@ Other Deprecations
934933
- Deprecated the ``inplace`` keyword in :meth:`DataFrame.set_axis` and :meth:`Series.set_axis`, use ``obj = obj.set_axis(..., copy=False)`` instead (:issue:`48130`)
935934
- Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`)
936935
- Fixed up warning message of deprecation of :meth:`MultiIndex.lesort_depth` as public method, as the message previously referred to :meth:`MultiIndex.is_lexsorted` instead (:issue:`38701`)
937-
- Deprecated the ``inplace`` keyword in :meth:`DataFrame.set_index`, use ``df = df.set_index(..., copy=False)`` instead (:issue:`48115`)
938936
- Deprecated the ``sort_columns`` argument in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`).
939937
- Deprecated positional arguments for all but the first argument of :meth:`DataFrame.to_stata` and :func:`read_stata`, use keyword arguments instead (:issue:`48128`).
940938
- Deprecated the ``mangle_dupe_cols`` argument in :func:`read_csv`, :func:`read_fwf`, :func:`read_table` and :func:`read_excel`. The argument was never implemented, and a new argument where the renaming pattern can be specified will be added instead (:issue:`47718`)

pandas/core/frame.py

+4-33
Original file line numberDiff line numberDiff line change
@@ -5855,9 +5855,8 @@ def set_index(
58555855
*,
58565856
drop: bool = ...,
58575857
append: bool = ...,
5858-
inplace: Literal[False] | lib.NoDefault = ...,
5858+
inplace: Literal[False] = ...,
58595859
verify_integrity: bool = ...,
5860-
copy: bool | lib.NoDefault = ...,
58615860
) -> DataFrame:
58625861
...
58635862

@@ -5870,7 +5869,6 @@ def set_index(
58705869
append: bool = ...,
58715870
inplace: Literal[True],
58725871
verify_integrity: bool = ...,
5873-
copy: bool | lib.NoDefault = ...,
58745872
) -> None:
58755873
...
58765874

@@ -5880,9 +5878,8 @@ def set_index(
58805878
keys,
58815879
drop: bool = True,
58825880
append: bool = False,
5883-
inplace: bool | lib.NoDefault = lib.no_default,
5881+
inplace: bool = False,
58845882
verify_integrity: bool = False,
5885-
copy: bool | lib.NoDefault = lib.no_default,
58865883
) -> DataFrame | None:
58875884
"""
58885885
Set the DataFrame index using existing columns.
@@ -5905,18 +5902,10 @@ def set_index(
59055902
Whether to append columns to existing index.
59065903
inplace : bool, default False
59075904
Whether to modify the DataFrame rather than creating a new one.
5908-
5909-
.. deprecated:: 1.5.0
5910-
59115905
verify_integrity : bool, default False
59125906
Check the new index for duplicates. Otherwise defer the check until
59135907
necessary. Setting to False will improve the performance of this
59145908
method.
5915-
copy : bool, default True
5916-
Whether to make a copy of the underlying data when returning a new
5917-
DataFrame.
5918-
5919-
.. versionadded:: 1.5.0
59205909
59215910
Returns
59225911
-------
@@ -5981,25 +5970,7 @@ def set_index(
59815970
3 9 7 2013 84
59825971
4 16 10 2014 31
59835972
"""
5984-
if inplace is not lib.no_default:
5985-
inplace = validate_bool_kwarg(inplace, "inplace")
5986-
warnings.warn(
5987-
"The 'inplace' keyword in DataFrame.set_index is deprecated "
5988-
"and will be removed in a future version. Use "
5989-
"`df = df.set_index(..., copy=False)` instead.",
5990-
FutureWarning,
5991-
stacklevel=find_stack_level(inspect.currentframe()),
5992-
)
5993-
else:
5994-
inplace = False
5995-
5996-
if inplace:
5997-
if copy is not lib.no_default:
5998-
raise ValueError("Cannot specify copy when inplace=True")
5999-
copy = False
6000-
elif copy is lib.no_default:
6001-
copy = True
6002-
5973+
inplace = validate_bool_kwarg(inplace, "inplace")
60035974
self._check_inplace_and_allows_duplicate_labels(inplace)
60045975
if not isinstance(keys, list):
60055976
keys = [keys]
@@ -6035,7 +6006,7 @@ def set_index(
60356006
if inplace:
60366007
frame = self
60376008
else:
6038-
frame = self.copy(deep=copy)
6009+
frame = self.copy()
60396010

60406011
arrays = []
60416012
names: list[Hashable] = []

pandas/core/reshape/merge.py

+8-11
Original file line numberDiff line numberDiff line change
@@ -783,9 +783,9 @@ def get_result(self, copy: bool = True) -> DataFrame:
783783
if self.indicator:
784784
result = self._indicator_post_merge(result)
785785

786-
result = self._maybe_add_join_keys(result, left_indexer, right_indexer)
786+
self._maybe_add_join_keys(result, left_indexer, right_indexer)
787787

788-
result = self._maybe_restore_index_levels(result)
788+
self._maybe_restore_index_levels(result)
789789

790790
self._maybe_drop_cross_column(result, self._cross)
791791

@@ -852,7 +852,7 @@ def _indicator_post_merge(self, result: DataFrame) -> DataFrame:
852852
result = result.drop(labels=["_left_indicator", "_right_indicator"], axis=1)
853853
return result
854854

855-
def _maybe_restore_index_levels(self, result: DataFrame) -> DataFrame:
855+
def _maybe_restore_index_levels(self, result: DataFrame) -> None:
856856
"""
857857
Restore index levels specified as `on` parameters
858858
@@ -870,7 +870,7 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> DataFrame:
870870
871871
Returns
872872
-------
873-
DataFrame
873+
None
874874
"""
875875
names_to_restore = []
876876
for name, left_key, right_key in zip(
@@ -894,15 +894,14 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> DataFrame:
894894
names_to_restore.append(name)
895895

896896
if names_to_restore:
897-
result = result.set_index(names_to_restore, copy=False)
898-
return result
897+
result.set_index(names_to_restore, inplace=True)
899898

900899
def _maybe_add_join_keys(
901900
self,
902901
result: DataFrame,
903902
left_indexer: np.ndarray | None,
904903
right_indexer: np.ndarray | None,
905-
) -> DataFrame:
904+
) -> None:
906905

907906
left_has_missing = None
908907
right_has_missing = None
@@ -993,12 +992,11 @@ def _maybe_add_join_keys(
993992
for level_name in result.index.names
994993
]
995994

996-
result = result.set_index(idx_list, copy=False)
995+
result.set_index(idx_list, inplace=True)
997996
else:
998997
result.index = Index(key_col, name=name)
999998
else:
1000999
result.insert(i, name or f"key_{i}", key_col)
1001-
return result
10021000

10031001
def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
10041002
"""return the join indexers"""
@@ -1768,8 +1766,7 @@ def get_result(self, copy: bool = True) -> DataFrame:
17681766
result = self._reindex_and_concat(
17691767
join_index, left_join_indexer, right_join_indexer, copy=copy
17701768
)
1771-
1772-
result = self._maybe_add_join_keys(result, left_indexer, right_indexer)
1769+
self._maybe_add_join_keys(result, left_indexer, right_indexer)
17731770

17741771
return result
17751772

pandas/io/parsers/arrow_parser_wrapper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame:
117117
# String case
118118
if item not in frame.columns:
119119
raise ValueError(f"Index {item} invalid")
120-
frame = frame.set_index(self.index_col, drop=True, copy=False)
120+
frame.set_index(self.index_col, drop=True, inplace=True)
121121
# Clear names if headerless and no name given
122122
if self.header is None and not multi_index_named:
123123
frame.index.names = [None] * len(frame.index.names)

pandas/io/pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4667,7 +4667,7 @@ def read(
46674667
columns.insert(0, n)
46684668
s = super().read(where=where, columns=columns, start=start, stop=stop)
46694669
if is_multi_index:
4670-
s = s.set_index(self.levels, copy=False)
4670+
s.set_index(self.levels, inplace=True)
46714671

46724672
s = s.iloc[:, 0]
46734673

pandas/io/sql.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def _wrap_result(
152152
frame = _parse_date_columns(frame, parse_dates)
153153

154154
if index_col is not None:
155-
frame = frame.set_index(index_col, copy=False)
155+
frame.set_index(index_col, inplace=True)
156156

157157
return frame
158158

@@ -980,7 +980,7 @@ def _query_iterator(
980980
self._harmonize_columns(parse_dates=parse_dates)
981981

982982
if self.index is not None:
983-
self.frame = self.frame.set_index(self.index, copy=False)
983+
self.frame.set_index(self.index, inplace=True)
984984

985985
yield self.frame
986986

@@ -1021,7 +1021,7 @@ def read(
10211021
self._harmonize_columns(parse_dates=parse_dates)
10221022

10231023
if self.index is not None:
1024-
self.frame = self.frame.set_index(self.index, copy=False)
1024+
self.frame.set_index(self.index, inplace=True)
10251025

10261026
return self.frame
10271027

pandas/tests/frame/methods/test_combine_first.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -387,8 +387,8 @@ def test_combine_first_string_dtype_only_na(self, nullable_string_dtype):
387387
{"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype
388388
)
389389
df2 = DataFrame({"a": ["85"], "b": [pd.NA]}, dtype=nullable_string_dtype)
390-
df = df.set_index(["a", "b"], copy=False)
391-
df2 = df2.set_index(["a", "b"], copy=False)
390+
df.set_index(["a", "b"], inplace=True)
391+
df2.set_index(["a", "b"], inplace=True)
392392
result = df.combine_first(df2)
393393
expected = DataFrame(
394394
{"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype

pandas/tests/frame/methods/test_set_index.py

+1-25
Original file line numberDiff line numberDiff line change
@@ -25,27 +25,6 @@
2525

2626

2727
class TestSetIndex:
28-
def test_set_index_copy(self):
29-
# GH#48043
30-
df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
31-
expected = DataFrame({"B": [3, 4], "C": [5, 6]}, index=Index([1, 2], name="A"))
32-
33-
res = df.set_index("A", copy=True)
34-
tm.assert_frame_equal(res, expected)
35-
assert not any(tm.shares_memory(df[col], res[col]) for col in res.columns)
36-
37-
res = df.set_index("A", copy=False)
38-
tm.assert_frame_equal(res, expected)
39-
assert all(tm.shares_memory(df[col], res[col]) for col in res.columns)
40-
41-
msg = "Cannot specify copy when inplace=True"
42-
with pytest.raises(ValueError, match=msg):
43-
with tm.assert_produces_warning(FutureWarning, match="The 'inplace'"):
44-
df.set_index("A", inplace=True, copy=True)
45-
with pytest.raises(ValueError, match=msg):
46-
with tm.assert_produces_warning(FutureWarning, match="The 'inplace'"):
47-
df.set_index("A", inplace=True, copy=False)
48-
4928
def test_set_index_multiindex(self):
5029
# segfault in GH#3308
5130
d = {"t1": [2, 2.5, 3], "t2": [4, 5, 6]}
@@ -199,10 +178,7 @@ def test_set_index_drop_inplace(self, frame_of_index_cols, drop, inplace, keys):
199178

200179
if inplace:
201180
result = df.copy()
202-
with tm.assert_produces_warning(
203-
FutureWarning, match="The 'inplace' keyword"
204-
):
205-
return_value = result.set_index(keys, drop=drop, inplace=True)
181+
return_value = result.set_index(keys, drop=drop, inplace=True)
206182
assert return_value is None
207183
else:
208184
result = df.set_index(keys, drop=drop)

pandas/tests/frame/test_api.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -244,8 +244,7 @@ def _check_f(base, f):
244244

245245
# set_index
246246
f = lambda x: x.set_index("a", inplace=True)
247-
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
248-
_check_f(data.copy(), f)
247+
_check_f(data.copy(), f)
249248

250249
# reset_index
251250
f = lambda x: x.reset_index(inplace=True)

pandas/tests/frame/test_query_eval.py

+6-12
Original file line numberDiff line numberDiff line change
@@ -436,8 +436,7 @@ def test_date_index_query(self):
436436
df = DataFrame(np.random.randn(n, 3))
437437
df["dates1"] = date_range("1/1/2012", periods=n)
438438
df["dates3"] = date_range("1/1/2014", periods=n)
439-
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
440-
return_value = df.set_index("dates1", inplace=True, drop=True)
439+
return_value = df.set_index("dates1", inplace=True, drop=True)
441440
assert return_value is None
442441
res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
443442
expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
@@ -450,8 +449,7 @@ def test_date_index_query_with_NaT(self):
450449
df["dates1"] = date_range("1/1/2012", periods=n)
451450
df["dates3"] = date_range("1/1/2014", periods=n)
452451
df.iloc[0, 0] = pd.NaT
453-
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
454-
return_value = df.set_index("dates1", inplace=True, drop=True)
452+
return_value = df.set_index("dates1", inplace=True, drop=True)
455453
assert return_value is None
456454
res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
457455
expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
@@ -465,8 +463,7 @@ def test_date_index_query_with_NaT_duplicates(self):
465463
d["dates3"] = date_range("1/1/2014", periods=n)
466464
df = DataFrame(d)
467465
df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
468-
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
469-
return_value = df.set_index("dates1", inplace=True, drop=True)
466+
return_value = df.set_index("dates1", inplace=True, drop=True)
470467
assert return_value is None
471468
res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser)
472469
expec = df[(df.index.to_series() < "20130101") & ("20130101" < df.dates3)]
@@ -797,8 +794,7 @@ def test_date_index_query(self):
797794
df = DataFrame(np.random.randn(n, 3))
798795
df["dates1"] = date_range("1/1/2012", periods=n)
799796
df["dates3"] = date_range("1/1/2014", periods=n)
800-
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
801-
return_value = df.set_index("dates1", inplace=True, drop=True)
797+
return_value = df.set_index("dates1", inplace=True, drop=True)
802798
assert return_value is None
803799
res = df.query(
804800
"(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
@@ -813,8 +809,7 @@ def test_date_index_query_with_NaT(self):
813809
df["dates1"] = date_range("1/1/2012", periods=n)
814810
df["dates3"] = date_range("1/1/2014", periods=n)
815811
df.iloc[0, 0] = pd.NaT
816-
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
817-
return_value = df.set_index("dates1", inplace=True, drop=True)
812+
return_value = df.set_index("dates1", inplace=True, drop=True)
818813
assert return_value is None
819814
res = df.query(
820815
"(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
@@ -829,8 +824,7 @@ def test_date_index_query_with_NaT_duplicates(self):
829824
df["dates1"] = date_range("1/1/2012", periods=n)
830825
df["dates3"] = date_range("1/1/2014", periods=n)
831826
df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
832-
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
833-
return_value = df.set_index("dates1", inplace=True, drop=True)
827+
return_value = df.set_index("dates1", inplace=True, drop=True)
834828
assert return_value is None
835829
msg = r"'BoolOp' nodes are not implemented"
836830
with pytest.raises(NotImplementedError, match=msg):

pandas/tests/groupby/test_apply.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,7 @@ def test_apply_groupby_datetimeindex():
678678
result = df.groupby("Name").sum()
679679

680680
expected = DataFrame({"Name": ["A", "B", "C"], "Value": [10, 50, 90]})
681-
expected = expected.set_index("Name", copy=False)
681+
expected.set_index("Name", inplace=True)
682682

683683
tm.assert_frame_equal(result, expected)
684684

pandas/tests/groupby/test_function.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def test_builtins_apply(keys, f):
9898

9999
if f != sum:
100100
expected = gb.agg(fname).reset_index()
101-
expected = expected.set_index(keys, copy=False, drop=False)
101+
expected.set_index(keys, inplace=True, drop=False)
102102
tm.assert_frame_equal(result, expected, check_dtype=False)
103103

104104
tm.assert_series_equal(getattr(result, fname)(), getattr(df, fname)())
@@ -454,7 +454,7 @@ def test_groupby_non_arithmetic_agg_types(dtype, method, data):
454454
df_out = DataFrame(exp)
455455

456456
df_out["b"] = df_out.b.astype(out_type)
457-
df_out = df_out.set_index("a", copy=False)
457+
df_out.set_index("a", inplace=True)
458458

459459
grpd = df.groupby("a")
460460
t = getattr(grpd, method)(*data["args"])

pandas/tests/indexes/multi/test_reshape.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def test_insert(idx):
3535
idx.insert(0, ("foo2",))
3636

3737
left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"])
38-
left = left.set_index(["1st", "2nd"], copy=False)
38+
left.set_index(["1st", "2nd"], inplace=True)
3939
ts = left["3rd"].copy(deep=True)
4040

4141
left.loc[("b", "x"), "3rd"] = 2
@@ -65,7 +65,7 @@ def test_insert(idx):
6565
],
6666
columns=["1st", "2nd", "3rd"],
6767
)
68-
right = right.set_index(["1st", "2nd"], copy=False)
68+
right.set_index(["1st", "2nd"], inplace=True)
6969
# FIXME data types changes to float because
7070
# of intermediate nan insertion;
7171
tm.assert_frame_equal(left, right, check_dtype=False)

0 commit comments

Comments
 (0)