From aec8dd93c806915ad3ead35e830ea45af5dc7bd5 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 4 Jan 2023 22:24:16 +0100 Subject: [PATCH 1/3] ENH: Add lazy copy to pipe --- pandas/core/generic.py | 5 ++++- pandas/tests/copy_view/test_methods.py | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3517f3ee9183d..5b4ecf6a4175a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5771,7 +5771,10 @@ def pipe( ... .pipe((func, 'arg2'), arg1=a, arg3=c) ... ) # doctest: +SKIP """ - return common.pipe(self, func, *args, **kwargs) + result = common.pipe(self, func, *args, **kwargs) + if _using_copy_on_write(): + return result.copy(deep=None) + return result # ---------------------------------------------------------------------- # Attribute access diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 63b0be281fe53..346ed94265589 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -115,6 +115,30 @@ def test_rename_columns_modify_parent(using_copy_on_write): tm.assert_frame_equal(df2, df2_orig) +def test_pipe(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": 1.5}) + df_orig = df.copy() + + def testfunc(df): + return df + + df2 = df.pipe(testfunc) + + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + # mutating df2 triggers a copy-on-write for that column + df2.iloc[0, 0] = 0 + if using_copy_on_write: + tm.assert_frame_equal(df, df_orig) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + expected = DataFrame({"a": [0, 2, 3], "b": 1.5}) + tm.assert_frame_equal(df, expected) + + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + + def test_reindex_columns(using_copy_on_write): # Case: reindexing the column returns a new dataframe # + afterwards modifying the result From 6e7cf365119a1598e9d3edbe780a0493cd3ddd4b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 5 Jan 2023 23:06:35 +0100 Subject: [PATCH 2/3] Add test and move copy --- pandas/core/generic.py | 5 ++--- pandas/tests/copy_view/test_methods.py | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5b4ecf6a4175a..5a90f1fc14d3c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5771,10 +5771,9 @@ def pipe( ... .pipe((func, 'arg2'), arg1=a, arg3=c) ... ) # doctest: +SKIP """ - result = common.pipe(self, func, *args, **kwargs) if _using_copy_on_write(): - return result.copy(deep=None) - return result + return common.pipe(self.copy(deep=None), func, *args, **kwargs) + return common.pipe(self, func, *args, **kwargs) # ---------------------------------------------------------------------- # Attribute access diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 346ed94265589..3cab14234f5dd 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -139,6 +139,29 @@ def testfunc(df): assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) +def test_pipe_modify_df(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": 1.5}) + df_orig = df.copy() + + def testfunc(df): + df.iloc[0, 0] = 100 + return df + + df2 = df.pipe(testfunc) + + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + + if using_copy_on_write: + tm.assert_frame_equal(df, df_orig) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + expected = DataFrame({"a": [100, 2, 3], "b": 1.5}) + tm.assert_frame_equal(df, expected) + + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + + def test_reindex_columns(using_copy_on_write): # Case: reindexing the column returns a new dataframe # + afterwards modifying the result From e9f302070617c97d55ca1e54247a04fcdee30f6f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 6 Jan 2023 14:29:36 +0100 Subject: [PATCH 3/3] Rename --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9658bc2736f89..3cade2568d921 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5771,7 +5771,7 @@ def pipe( ... .pipe((func, 'arg2'), arg1=a, arg3=c) ... ) # doctest: +SKIP """ - if _using_copy_on_write(): + if using_copy_on_write(): return common.pipe(self.copy(deep=None), func, *args, **kwargs) return common.pipe(self, func, *args, **kwargs)