From 92f57073490610aed667a517826cae2e0112f3f1 Mon Sep 17 00:00:00 2001 From: seljaks <33955366+seljaks@users.noreply.github.com> Date: Mon, 14 Nov 2022 00:22:14 +0100 Subject: [PATCH 1/4] ENH/TST: add CoW to df.drop and test for it GH49473 --- pandas/core/generic.py | 1 + pandas/tests/copy_view/test_methods.py | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6288ebe77c8c0..c51cc235d9fcd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4483,6 +4483,7 @@ def _drop_axis( indexer, axis=bm_axis, allow_dups=True, + copy=None, only_slice=only_slice, ) result = self._constructor(new_mgr) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 956e2cf98c9b6..7905e7f49763b 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -52,6 +52,24 @@ def test_copy_shallow(using_copy_on_write): # DataFrame methods returning new DataFrame using shallow copy +def test_drop_on_column(using_copy_on_write): + df = DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=[10, 11, 12] + ) + df_orig = df.copy() + df2 = df.drop(columns="a") + df2._mgr._verify_integrity() + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + df2.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + tm.assert_frame_equal(df, df_orig) + + def test_reset_index(using_copy_on_write): # Case: resetting the index (i.e. adding a new column) + mutating the # resulting dataframe @@ -61,7 +79,6 @@ def test_reset_index(using_copy_on_write): df_orig = df.copy() df2 = df.reset_index() df2._mgr._verify_integrity() - if using_copy_on_write: # still shares memory (df2 is a shallow copy) assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) From 36a2b51d5de16a60857c091db37f0b955de7a73c Mon Sep 17 00:00:00 2001 From: seljaks <33955366+seljaks@users.noreply.github.com> Date: Mon, 14 Nov 2022 01:17:11 +0100 Subject: [PATCH 2/4] black --- pandas/tests/copy_view/test_methods.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 7905e7f49763b..582cf48fc349e 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -79,6 +79,7 @@ def test_reset_index(using_copy_on_write): df_orig = df.copy() df2 = df.reset_index() df2._mgr._verify_integrity() + if using_copy_on_write: # still shares memory (df2 is a shallow copy) assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) From d2d47b0b43ef9471d6490019b342643b50163525 Mon Sep 17 00:00:00 2001 From: seljaks <33955366+seljaks@users.noreply.github.com> Date: Mon, 14 Nov 2022 09:44:44 +0100 Subject: [PATCH 3/4] fix mypy error --- pandas/core/internals/array_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index d325e5e9b92cc..e15743bea02ad 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -546,7 +546,7 @@ def reindex_indexer( axis: AxisInt, fill_value=None, allow_dups: bool = False, - copy: bool = True, + copy: bool | None = True, # ignored keywords only_slice: bool = False, # ArrayManager specific keywords @@ -570,7 +570,7 @@ def _reindex_indexer( axis: AxisInt, fill_value=None, allow_dups: bool = False, - copy: bool = True, + copy: bool | None = True, use_na_proxy: bool = False, ) -> T: """ From b32e60614ea62be68622723aadc596273879983c Mon Sep 17 00:00:00 2001 From: seljaks <33955366+seljaks@users.noreply.github.com> Date: Tue, 15 Nov 2022 20:55:55 +0100 Subject: [PATCH 4/4] black --- pandas/tests/copy_view/test_methods.py | 37 +++++++++++++------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 582cf48fc349e..4c3b2c4beefba 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -52,24 +52,6 @@ def test_copy_shallow(using_copy_on_write): # DataFrame methods returning new DataFrame using shallow copy -def test_drop_on_column(using_copy_on_write): - df = DataFrame( - {"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=[10, 11, 12] - ) - df_orig = df.copy() - df2 = df.drop(columns="a") - df2._mgr._verify_integrity() - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - df2.iloc[0, 0] = 0 - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - tm.assert_frame_equal(df, df_orig) - - def test_reset_index(using_copy_on_write): # Case: resetting the index (i.e. adding a new column) + mutating the # resulting dataframe @@ -150,6 +132,25 @@ def test_reindex_columns(using_copy_on_write): tm.assert_frame_equal(df, df_orig) +def test_drop_on_column(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + df2 = df.drop(columns="a") + df2._mgr._verify_integrity() + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + else: + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + df2.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + tm.assert_frame_equal(df, df_orig) + + def test_select_dtypes(using_copy_on_write): # Case: selecting columns using `select_dtypes()` returns a new dataframe # + afterwards modifying the result