Skip to content

Commit 9de1f0b

Browse files
authored
DEPR: inplace kwarg in set_index (#48115)
* DEPR: inplace kwarg in set_index * GH ref
1 parent 5ad44e7 commit 9de1f0b

File tree

24 files changed

+87
-52
lines changed

24 files changed

+87
-52
lines changed

doc/source/user_guide/indexing.rst

+2-3
Original file line numberDiff line numberDiff line change
@@ -1723,13 +1723,12 @@ the given columns to a MultiIndex:
17231723
frame
17241724
17251725
Other options in ``set_index`` allow you not drop the index columns or to add
1726-
the index in-place (without creating a new object):
1726+
the index without creating a copy of the underlying data:
17271727

17281728
.. ipython:: python
17291729
17301730
data.set_index('c', drop=False)
1731-
data.set_index(['a', 'b'], inplace=True)
1732-
data
1731+
data.set_index(['a', 'b'], copy=False)
17331732
17341733
Reset the index
17351734
~~~~~~~~~~~~~~~

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,7 @@ Other Deprecations
849849
- Deprecated unused arguments ``encoding`` and ``verbose`` in :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` (:issue:`47912`)
850850
- Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`)
851851
- Fixed up warning message of deprecation of :meth:`MultiIndex.lesort_depth` as public method, as the message previously referred to :meth:`MultiIndex.is_lexsorted` instead (:issue:`38701`)
852+
- Deprecated the ``inplace`` keyword in :meth:`DataFrame.set_index`, use ``df = df.set_index(..., copy=False)`` instead (:issue:`48115`)
852853
- Deprecated the ``sort_columns`` argument in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`).
853854
- Deprecated positional arguments for all but the first argument of :meth:`DataFrame.to_stata` and :func:`read_stata`, use keyword arguments instead (:issue:`48128`).
854855

pandas/core/frame.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -5819,7 +5819,7 @@ def set_index(
58195819
*,
58205820
drop: bool = ...,
58215821
append: bool = ...,
5822-
inplace: Literal[False] = ...,
5822+
inplace: Literal[False] | lib.NoDefault = ...,
58235823
verify_integrity: bool = ...,
58245824
copy: bool | lib.NoDefault = ...,
58255825
) -> DataFrame:
@@ -5844,7 +5844,7 @@ def set_index(
58445844
keys,
58455845
drop: bool = True,
58465846
append: bool = False,
5847-
inplace: bool = False,
5847+
inplace: bool | lib.NoDefault = lib.no_default,
58485848
verify_integrity: bool = False,
58495849
copy: bool | lib.NoDefault = lib.no_default,
58505850
) -> DataFrame | None:
@@ -5869,6 +5869,9 @@ def set_index(
58695869
Whether to append columns to existing index.
58705870
inplace : bool, default False
58715871
Whether to modify the DataFrame rather than creating a new one.
5872+
5873+
.. deprecated:: 1.5.0
5874+
58725875
verify_integrity : bool, default False
58735876
Check the new index for duplicates. Otherwise defer the check until
58745877
necessary. Setting to False will improve the performance of this
@@ -5942,7 +5945,18 @@ def set_index(
59425945
3 9 7 2013 84
59435946
4 16 10 2014 31
59445947
"""
5945-
inplace = validate_bool_kwarg(inplace, "inplace")
5948+
if inplace is not lib.no_default:
5949+
inplace = validate_bool_kwarg(inplace, "inplace")
5950+
warnings.warn(
5951+
"The 'inplace' keyword in DataFrame.set_index is deprecated "
5952+
"and will be removed in a future version. Use "
5953+
"`df = df.set_index(..., copy=False)` instead.",
5954+
FutureWarning,
5955+
stacklevel=find_stack_level(inspect.currentframe()),
5956+
)
5957+
else:
5958+
inplace = False
5959+
59465960
if inplace:
59475961
if copy is not lib.no_default:
59485962
raise ValueError("Cannot specify copy when inplace=True")

pandas/core/reshape/merge.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -782,9 +782,9 @@ def get_result(self, copy: bool = True) -> DataFrame:
782782
if self.indicator:
783783
result = self._indicator_post_merge(result)
784784

785-
self._maybe_add_join_keys(result, left_indexer, right_indexer)
785+
result = self._maybe_add_join_keys(result, left_indexer, right_indexer)
786786

787-
self._maybe_restore_index_levels(result)
787+
result = self._maybe_restore_index_levels(result)
788788

789789
self._maybe_drop_cross_column(result, self._cross)
790790

@@ -851,7 +851,7 @@ def _indicator_post_merge(self, result: DataFrame) -> DataFrame:
851851
result = result.drop(labels=["_left_indicator", "_right_indicator"], axis=1)
852852
return result
853853

854-
def _maybe_restore_index_levels(self, result: DataFrame) -> None:
854+
def _maybe_restore_index_levels(self, result: DataFrame) -> DataFrame:
855855
"""
856856
Restore index levels specified as `on` parameters
857857
@@ -869,7 +869,7 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> None:
869869
870870
Returns
871871
-------
872-
None
872+
DataFrame
873873
"""
874874
names_to_restore = []
875875
for name, left_key, right_key in zip(
@@ -893,14 +893,15 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> None:
893893
names_to_restore.append(name)
894894

895895
if names_to_restore:
896-
result.set_index(names_to_restore, inplace=True)
896+
result = result.set_index(names_to_restore, copy=False)
897+
return result
897898

898899
def _maybe_add_join_keys(
899900
self,
900901
result: DataFrame,
901902
left_indexer: np.ndarray | None,
902903
right_indexer: np.ndarray | None,
903-
) -> None:
904+
) -> DataFrame:
904905

905906
left_has_missing = None
906907
right_has_missing = None
@@ -996,11 +997,12 @@ def _maybe_add_join_keys(
996997
for level_name in result.index.names
997998
]
998999

999-
result.set_index(idx_list, inplace=True)
1000+
result = result.set_index(idx_list, copy=False)
10001001
else:
10011002
result.index = Index(key_col, name=name)
10021003
else:
10031004
result.insert(i, name or f"key_{i}", key_col)
1005+
return result
10041006

10051007
def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
10061008
"""return the join indexers"""
@@ -1768,7 +1770,8 @@ def get_result(self, copy: bool = True) -> DataFrame:
17681770
result = self._reindex_and_concat(
17691771
join_index, left_join_indexer, right_join_indexer, copy=copy
17701772
)
1771-
self._maybe_add_join_keys(result, left_indexer, right_indexer)
1773+
1774+
result = self._maybe_add_join_keys(result, left_indexer, right_indexer)
17721775

17731776
return result
17741777

pandas/io/parsers/arrow_parser_wrapper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame:
117117
# String case
118118
if item not in frame.columns:
119119
raise ValueError(f"Index {item} invalid")
120-
frame.set_index(self.index_col, drop=True, inplace=True)
120+
frame = frame.set_index(self.index_col, drop=True, copy=False)
121121
# Clear names if headerless and no name given
122122
if self.header is None and not multi_index_named:
123123
frame.index.names = [None] * len(frame.index.names)

pandas/io/pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4663,7 +4663,7 @@ def read(
46634663
columns.insert(0, n)
46644664
s = super().read(where=where, columns=columns, start=start, stop=stop)
46654665
if is_multi_index:
4666-
s.set_index(self.levels, inplace=True)
4666+
s = s.set_index(self.levels, copy=False)
46674667

46684668
s = s.iloc[:, 0]
46694669

pandas/io/sql.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def _wrap_result(
152152
frame = _parse_date_columns(frame, parse_dates)
153153

154154
if index_col is not None:
155-
frame.set_index(index_col, inplace=True)
155+
frame = frame.set_index(index_col, copy=False)
156156

157157
return frame
158158

@@ -979,7 +979,7 @@ def _query_iterator(
979979
self._harmonize_columns(parse_dates=parse_dates)
980980

981981
if self.index is not None:
982-
self.frame.set_index(self.index, inplace=True)
982+
self.frame = self.frame.set_index(self.index, copy=False)
983983

984984
yield self.frame
985985

@@ -1020,7 +1020,7 @@ def read(
10201020
self._harmonize_columns(parse_dates=parse_dates)
10211021

10221022
if self.index is not None:
1023-
self.frame.set_index(self.index, inplace=True)
1023+
self.frame = self.frame.set_index(self.index, copy=False)
10241024

10251025
return self.frame
10261026

pandas/tests/frame/methods/test_combine_first.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -387,8 +387,8 @@ def test_combine_first_string_dtype_only_na(self, nullable_string_dtype):
387387
{"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype
388388
)
389389
df2 = DataFrame({"a": ["85"], "b": [pd.NA]}, dtype=nullable_string_dtype)
390-
df.set_index(["a", "b"], inplace=True)
391-
df2.set_index(["a", "b"], inplace=True)
390+
df = df.set_index(["a", "b"], copy=False)
391+
df2 = df2.set_index(["a", "b"], copy=False)
392392
result = df.combine_first(df2)
393393
expected = DataFrame(
394394
{"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype

pandas/tests/frame/methods/test_set_index.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,11 @@ def test_set_index_copy(self):
4040

4141
msg = "Cannot specify copy when inplace=True"
4242
with pytest.raises(ValueError, match=msg):
43-
df.set_index("A", inplace=True, copy=True)
43+
with tm.assert_produces_warning(FutureWarning, match="The 'inplace'"):
44+
df.set_index("A", inplace=True, copy=True)
4445
with pytest.raises(ValueError, match=msg):
45-
df.set_index("A", inplace=True, copy=False)
46+
with tm.assert_produces_warning(FutureWarning, match="The 'inplace'"):
47+
df.set_index("A", inplace=True, copy=False)
4648

4749
def test_set_index_multiindex(self):
4850
# segfault in GH#3308
@@ -197,7 +199,10 @@ def test_set_index_drop_inplace(self, frame_of_index_cols, drop, inplace, keys):
197199

198200
if inplace:
199201
result = df.copy()
200-
return_value = result.set_index(keys, drop=drop, inplace=True)
202+
with tm.assert_produces_warning(
203+
FutureWarning, match="The 'inplace' keyword"
204+
):
205+
return_value = result.set_index(keys, drop=drop, inplace=True)
201206
assert return_value is None
202207
else:
203208
result = df.set_index(keys, drop=drop)

pandas/tests/frame/test_api.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,8 @@ def _check_f(base, f):
244244

245245
# set_index
246246
f = lambda x: x.set_index("a", inplace=True)
247-
_check_f(data.copy(), f)
247+
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
248+
_check_f(data.copy(), f)
248249

249250
# reset_index
250251
f = lambda x: x.reset_index(inplace=True)

pandas/tests/frame/test_query_eval.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,8 @@ def test_date_index_query(self):
436436
df = DataFrame(np.random.randn(n, 3))
437437
df["dates1"] = date_range("1/1/2012", periods=n)
438438
df["dates3"] = date_range("1/1/2014", periods=n)
439-
return_value = df.set_index("dates1", inplace=True, drop=True)
439+
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
440+
return_value = df.set_index("dates1", inplace=True, drop=True)
440441
assert return_value is None
441442
res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
442443
expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
@@ -449,7 +450,8 @@ def test_date_index_query_with_NaT(self):
449450
df["dates1"] = date_range("1/1/2012", periods=n)
450451
df["dates3"] = date_range("1/1/2014", periods=n)
451452
df.iloc[0, 0] = pd.NaT
452-
return_value = df.set_index("dates1", inplace=True, drop=True)
453+
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
454+
return_value = df.set_index("dates1", inplace=True, drop=True)
453455
assert return_value is None
454456
res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
455457
expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
@@ -463,7 +465,8 @@ def test_date_index_query_with_NaT_duplicates(self):
463465
d["dates3"] = date_range("1/1/2014", periods=n)
464466
df = DataFrame(d)
465467
df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
466-
return_value = df.set_index("dates1", inplace=True, drop=True)
468+
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
469+
return_value = df.set_index("dates1", inplace=True, drop=True)
467470
assert return_value is None
468471
res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser)
469472
expec = df[(df.index.to_series() < "20130101") & ("20130101" < df.dates3)]
@@ -794,7 +797,8 @@ def test_date_index_query(self):
794797
df = DataFrame(np.random.randn(n, 3))
795798
df["dates1"] = date_range("1/1/2012", periods=n)
796799
df["dates3"] = date_range("1/1/2014", periods=n)
797-
return_value = df.set_index("dates1", inplace=True, drop=True)
800+
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
801+
return_value = df.set_index("dates1", inplace=True, drop=True)
798802
assert return_value is None
799803
res = df.query(
800804
"(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
@@ -809,7 +813,8 @@ def test_date_index_query_with_NaT(self):
809813
df["dates1"] = date_range("1/1/2012", periods=n)
810814
df["dates3"] = date_range("1/1/2014", periods=n)
811815
df.iloc[0, 0] = pd.NaT
812-
return_value = df.set_index("dates1", inplace=True, drop=True)
816+
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
817+
return_value = df.set_index("dates1", inplace=True, drop=True)
813818
assert return_value is None
814819
res = df.query(
815820
"(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
@@ -824,7 +829,8 @@ def test_date_index_query_with_NaT_duplicates(self):
824829
df["dates1"] = date_range("1/1/2012", periods=n)
825830
df["dates3"] = date_range("1/1/2014", periods=n)
826831
df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
827-
return_value = df.set_index("dates1", inplace=True, drop=True)
832+
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
833+
return_value = df.set_index("dates1", inplace=True, drop=True)
828834
assert return_value is None
829835
msg = r"'BoolOp' nodes are not implemented"
830836
with pytest.raises(NotImplementedError, match=msg):

pandas/tests/groupby/test_apply.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,7 @@ def test_apply_groupby_datetimeindex():
678678
result = df.groupby("Name").sum()
679679

680680
expected = DataFrame({"Name": ["A", "B", "C"], "Value": [10, 50, 90]})
681-
expected.set_index("Name", inplace=True)
681+
expected = expected.set_index("Name", copy=False)
682682

683683
tm.assert_frame_equal(result, expected)
684684

pandas/tests/groupby/test_function.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def test_builtins_apply(keys, f):
9898

9999
if f != sum:
100100
expected = gb.agg(fname).reset_index()
101-
expected.set_index(keys, inplace=True, drop=False)
101+
expected = expected.set_index(keys, copy=False, drop=False)
102102
tm.assert_frame_equal(result, expected, check_dtype=False)
103103

104104
tm.assert_series_equal(getattr(result, fname)(), getattr(df, fname)())
@@ -454,7 +454,7 @@ def test_groupby_non_arithmetic_agg_types(dtype, method, data):
454454
df_out = DataFrame(exp)
455455

456456
df_out["b"] = df_out.b.astype(out_type)
457-
df_out.set_index("a", inplace=True)
457+
df_out = df_out.set_index("a", copy=False)
458458

459459
grpd = df.groupby("a")
460460
t = getattr(grpd, method)(*data["args"])

pandas/tests/indexes/multi/test_reshape.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def test_insert(idx):
3535
idx.insert(0, ("foo2",))
3636

3737
left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"])
38-
left.set_index(["1st", "2nd"], inplace=True)
38+
left = left.set_index(["1st", "2nd"], copy=False)
3939
ts = left["3rd"].copy(deep=True)
4040

4141
left.loc[("b", "x"), "3rd"] = 2
@@ -65,7 +65,7 @@ def test_insert(idx):
6565
],
6666
columns=["1st", "2nd", "3rd"],
6767
)
68-
right.set_index(["1st", "2nd"], inplace=True)
68+
right = right.set_index(["1st", "2nd"], copy=False)
6969
# FIXME data types changes to float because
7070
# of intermediate nan insertion;
7171
tm.assert_frame_equal(left, right, check_dtype=False)

pandas/tests/indexing/multiindex/test_indexing_slow.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,18 @@ def validate(mi, df, key):
6060
assert key[: i + 1] in mi.index
6161
right = df[mask].copy()
6262

63+
msg = "The 'inplace' keyword in DataFrame.set_index is deprecated"
6364
if i + 1 != len(key): # partial key
6465
return_value = right.drop(cols[: i + 1], axis=1, inplace=True)
6566
assert return_value is None
66-
return_value = right.set_index(cols[i + 1 : -1], inplace=True)
67+
with tm.assert_produces_warning(FutureWarning, match=msg):
68+
return_value = right.set_index(cols[i + 1 : -1], inplace=True)
6769
assert return_value is None
6870
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
6971

7072
else: # full key
71-
return_value = right.set_index(cols[:-1], inplace=True)
73+
with tm.assert_produces_warning(FutureWarning, match=msg):
74+
return_value = right.set_index(cols[:-1], inplace=True)
7275
assert return_value is None
7376
if len(right) == 1: # single hit
7477
right = Series(

pandas/tests/indexing/multiindex/test_multiindex.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def test_multiindex_complex(self):
131131
"z": non_complex_data,
132132
}
133133
)
134-
result.set_index(["x", "y"], inplace=True)
134+
result = result.set_index(["x", "y"], copy=False)
135135
expected = DataFrame(
136136
{"z": non_complex_data},
137137
index=MultiIndex.from_arrays(

pandas/tests/io/pytables/test_append.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def test_append_series(setup_path):
137137
mi["B"] = np.arange(len(mi))
138138
mi["C"] = "foo"
139139
mi.loc[3:5, "C"] = "bar"
140-
mi.set_index(["C", "B"], inplace=True)
140+
mi = mi.set_index(["C", "B"], copy=False)
141141
s = mi.stack()
142142
s.index = s.index.droplevel(2)
143143
store.append("mi", s)
@@ -326,7 +326,7 @@ def test_append_with_different_block_ordering(setup_path):
326326
a = df.pop("A")
327327
df["A"] = a
328328

329-
df.set_index("index", inplace=True)
329+
df = df.set_index("index", copy=False)
330330

331331
store.append("df", df)
332332

0 commit comments

Comments
 (0)