diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a8631f42fb2d6..00a03f6e4619f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5855,7 +5855,8 @@ def set_index( if inplace: frame = self else: - frame = self.copy() + # GH 49473 Use "lazy copy" with Copy-on-Write + frame = self.copy(deep=None) arrays = [] names: list[Hashable] = [] diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 956e2cf98c9b6..9488211e2a0e6 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -214,3 +214,20 @@ def test_chained_methods(request, method, idx, using_copy_on_write): df.iloc[0, 0] = 0 if not df2_is_view: tm.assert_frame_equal(df2.iloc[:, idx:], df_orig) + + +def test_set_index(using_copy_on_write): + # GH 49473 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + df2 = df.set_index("a") + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + else: + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + + # mutating df2 triggers a copy-on-write for that column / block + df2.iloc[0, 1] = 0 + assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + tm.assert_frame_equal(df, df_orig)