diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 44e091e12bfa6..8fa9c28c4bad2 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -65,6 +65,7 @@ Copy-on-Write improvements - The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary of Index objects and specifying ``copy=False``, will now use a lazy copy of those Index objects for the columns of the DataFrame (:issue:`52947`) +- :meth:`DataFrame.head` and :meth:`DataFrame.tail` will now return deep copies (:issue:`54011`) - Add lazy copy mechanism to :meth:`DataFrame.eval` (:issue:`53746`) - Trying to operate inplace on a temporary column selection diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9084395871675..a61bc0e97cf1a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5786,6 +5786,8 @@ def head(self, n: int = 5) -> Self: 4 monkey 5 parrot """ + if using_copy_on_write(): + return self.iloc[:n].copy() return self.iloc[:n] @final @@ -5861,6 +5863,10 @@ def tail(self, n: int = 5) -> Self: 7 whale 8 zebra """ + if using_copy_on_write(): + if n == 0: + return self.iloc[0:0].copy() + return self.iloc[-n:].copy() if n == 0: return self.iloc[0:0] return self.iloc[-n:] diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index e9952e5f4d977..bd5895bc5d970 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -895,16 +895,19 @@ def test_head_tail(method, using_copy_on_write): df2._mgr._verify_integrity() if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + # We are explicitly deviating for CoW here to make an eager copy (avoids + # tracking references for very cheap ops) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) # modify df2 to trigger CoW for that block df2.iloc[0, 0] = 0 - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) if using_copy_on_write: + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) else: # without CoW enabled, head and tail return views. Mutating df2 also mutates df. + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) df2.iloc[0, 0] = 1 tm.assert_frame_equal(df, df_orig)