Skip to content

Commit d3e84c5

Browse files
authored
ENH/CoW: use lazy copy in set_index method (#49557)
1 parent 12ff4f4 commit d3e84c5

File tree

2 files changed

+19
-1
lines changed

2 files changed

+19
-1
lines changed

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -5836,7 +5836,8 @@ def set_index(
58365836
if inplace:
58375837
frame = self
58385838
else:
5839-
frame = self.copy()
5839+
# GH 49473 Use "lazy copy" with Copy-on-Write
5840+
frame = self.copy(deep=None)
58405841

58415842
arrays = []
58425843
names: list[Hashable] = []

pandas/tests/copy_view/test_methods.py

+17
Original file line numberDiff line numberDiff line change
@@ -214,3 +214,20 @@ def test_chained_methods(request, method, idx, using_copy_on_write):
214214
df.iloc[0, 0] = 0
215215
if not df2_is_view:
216216
tm.assert_frame_equal(df2.iloc[:, idx:], df_orig)
217+
218+
219+
def test_set_index(using_copy_on_write):
220+
# GH 49473
221+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
222+
df_orig = df.copy()
223+
df2 = df.set_index("a")
224+
225+
if using_copy_on_write:
226+
assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
227+
else:
228+
assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
229+
230+
# mutating df2 triggers a copy-on-write for that column / block
231+
df2.iloc[0, 1] = 0
232+
assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
233+
tm.assert_frame_equal(df, df_orig)

0 commit comments

Comments
 (0)