From 31b7acbee3d6a6d0bb2ace5232c44fd4a063117b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 5 Jul 2023 22:54:49 +0200 Subject: [PATCH 1/4] API/CoW: Return copies for head and tail --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/generic.py | 6 ++++++ pandas/tests/copy_view/test_methods.py | 4 ++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 198a7155e1a1e..36d12bb6aba0f 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -25,6 +25,7 @@ Copy-on-Write improvements - The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary of Index objects and specifying ``copy=False``, will now use a lazy copy of those Index objects for the columns of the DataFrame (:issue:`52947`) +- :meth:`DataFrame.head` and :meth:`DataFrame.tail` will now return copies (:issue:`54011`) - Add lazy copy mechanism to :meth:`DataFrame.eval` (:issue:`53746`) .. _whatsnew_210.enhancements.enhancement2: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f049e9d479b26..4991b06a5eb66 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5773,6 +5773,8 @@ def head(self, n: int = 5) -> Self: 4 monkey 5 parrot """ + if using_copy_on_write(): + return self.iloc[:n].copy() return self.iloc[:n] @final @@ -5848,6 +5850,10 @@ def tail(self, n: int = 5) -> Self: 7 whale 8 zebra """ + if using_copy_on_write(): + if n == 0: + return self.iloc[0:0].copy() + return self.iloc[-n:].copy() if n == 0: return self.iloc[0:0] return self.iloc[-n:] diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 9e7ae9942ea90..c9e50cc3242ad 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -895,8 +895,8 @@ def test_head_tail(method, using_copy_on_write): df2._mgr._verify_integrity() if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) # modify df2 to trigger CoW for that block df2.iloc[0, 0] = 0 From 0241064b5c9af1437e6c4aa2d51995ad2c13d298 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 6 Jul 2023 21:33:07 +0200 Subject: [PATCH 2/4] Fix --- pandas/conftest.py | 1 + pandas/tests/copy_view/test_methods.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index b2f1377a9fb32..7e448ca34337c 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1987,6 +1987,7 @@ def using_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is enabled. """ + pd.options.mode.copy_on_write = True return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block" diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index c9e50cc3242ad..7f7a02575c44e 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -900,11 +900,12 @@ def test_head_tail(method, using_copy_on_write): # modify df2 to trigger CoW for that block df2.iloc[0, 0] = 0 - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) if using_copy_on_write: + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) else: # without CoW enabled, head and tail return views. Mutating df2 also mutates df. + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) df2.iloc[0, 0] = 1 tm.assert_frame_equal(df, df_orig) From faa416db73f82b340f220144186e414ea662dea3 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 6 Jul 2023 21:33:17 +0200 Subject: [PATCH 3/4] Fix --- pandas/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 7e448ca34337c..b2f1377a9fb32 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1987,7 +1987,6 @@ def using_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is enabled. """ - pd.options.mode.copy_on_write = True return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block" From 0a846756c316839d13755e5bf7c0df8d0b22af32 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Thu, 13 Jul 2023 15:28:01 -0500 Subject: [PATCH 4/4] Update --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/tests/copy_view/test_methods.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 36d12bb6aba0f..40258a2667a4d 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -25,7 +25,7 @@ Copy-on-Write improvements - The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary of Index objects and specifying ``copy=False``, will now use a lazy copy of those Index objects for the columns of the DataFrame (:issue:`52947`) -- :meth:`DataFrame.head` and :meth:`DataFrame.tail` will now return copies (:issue:`54011`) +- :meth:`DataFrame.head` and :meth:`DataFrame.tail` will now return deep copies (:issue:`54011`) - Add lazy copy mechanism to :meth:`DataFrame.eval` (:issue:`53746`) .. _whatsnew_210.enhancements.enhancement2: diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 7f7a02575c44e..597e8140caa74 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -895,6 +895,8 @@ def test_head_tail(method, using_copy_on_write): df2._mgr._verify_integrity() if using_copy_on_write: + # We are explicitly deviating for CoW here to make an eager copy (avoids + # tracking references for very cheap ops) assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))