From a1a6a9dd2a020d644233342508c781249b653045 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 20 Jan 2023 20:57:15 +0000 Subject: [PATCH 1/2] ENH: Make shallow copy for align nocopy with CoW --- pandas/core/generic.py | 13 +++++++-- pandas/core/series.py | 2 ++ pandas/tests/copy_view/test_methods.py | 35 +++++++++++++++++++++++ pandas/tests/frame/methods/test_align.py | 7 +++-- pandas/tests/series/methods/test_align.py | 12 ++++++-- 5 files changed, 62 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 028fdacd7444e..a12d7fbdc93f5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5432,6 +5432,8 @@ def _reindex_with_indexers( if (copy or copy is None) and new_data is self._mgr: new_data = new_data.copy(deep=copy) + elif using_copy_on_write() and new_data is self._mgr: + new_data = new_data.copy(deep=copy) return self._constructor(new_data).__finalize__(self) @@ -9472,6 +9474,7 @@ def _align_series( limit=None, fill_axis: Axis = 0, ): + uses_cow = using_copy_on_write() is_series = isinstance(self, ABCSeries) @@ -9495,7 +9498,10 @@ def _align_series( if is_series: left = self._reindex_indexer(join_index, lidx, copy) elif lidx is None or join_index is None: - left = self.copy(deep=copy) if copy or copy is None else self + if uses_cow: + left = self.copy(deep=copy) + else: + left = self.copy(deep=copy) if copy or copy is None else self else: left = self._constructor( self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy) @@ -9524,7 +9530,10 @@ def _align_series( left = self._constructor(fdata) if ridx is None: - right = other.copy(deep=copy) if copy or copy is None else other + if uses_cow: + right = other.copy(deep=copy) + else: + right = other.copy(deep=copy) if copy or copy is None else other else: right = other.reindex(join_index, level=level) diff --git a/pandas/core/series.py b/pandas/core/series.py index 92f53736b7ec9..b835745ce2304 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4645,6 +4645,8 @@ def _reindex_indexer( if indexer is None and ( new_index is None or new_index.names == self.index.names ): + if using_copy_on_write(): + return self.copy(deep=copy) if copy or copy is None: return self.copy(deep=copy) return self diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 8fd9d5c5126c1..d1085c964b26f 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1064,3 +1064,38 @@ def test_isetitem(using_copy_on_write): assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) else: assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) + + +def test_align_copy_false(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df_orig = df.copy() + df2, df3 = df.align(df, copy=False) + + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + + if using_copy_on_write: + df2.loc[0, "a"] = 0 + tm.assert_frame_equal(df, df_orig) # Original is unchanged + + df3.loc[0, "a"] = 0 + tm.assert_frame_equal(df, df_orig) # Original is unchanged + + +def test_align_with_series_copy_false(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + ser = Series([1, 2, 3], name="x") + ser_orig = ser.copy() + df_orig = df.copy() + df2, ser2 = df.align(ser, copy=False, axis=0) + + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + assert np.shares_memory(get_array(ser, "x"), get_array(ser2, "x")) + + if using_copy_on_write: + df2.loc[0, "a"] = 0 + tm.assert_frame_equal(df, df_orig) # Original is unchanged + + ser2.loc[0] = 0 + tm.assert_series_equal(ser, ser_orig) # Original is unchanged diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index 88963dcc4b0f7..d4883fd854e07 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -40,12 +40,15 @@ def test_frame_align_aware(self): assert new1.index.tz is timezone.utc assert new2.index.tz is timezone.utc - def test_align_float(self, float_frame): + def test_align_float(self, float_frame, using_copy_on_write): af, bf = float_frame.align(float_frame) assert af._mgr is not float_frame._mgr af, bf = float_frame.align(float_frame, copy=False) - assert af._mgr is float_frame._mgr + if not using_copy_on_write: + assert af._mgr is float_frame._mgr + else: + assert af._mgr is not float_frame._mgr # axis = 0 other = float_frame.iloc[:-5, :3] diff --git a/pandas/tests/series/methods/test_align.py b/pandas/tests/series/methods/test_align.py index f3fc46e1e39af..b2e03684bc902 100644 --- a/pandas/tests/series/methods/test_align.py +++ b/pandas/tests/series/methods/test_align.py @@ -82,7 +82,7 @@ def test_align_fill_method( tm.assert_series_equal(ab, eb) -def test_align_nocopy(datetime_series): +def test_align_nocopy(datetime_series, using_copy_on_write): b = datetime_series[:5].copy() # do copy @@ -95,7 +95,10 @@ def test_align_nocopy(datetime_series): a = datetime_series.copy() ra, _ = a.align(b, join="left", copy=False) ra[:5] = 5 - assert (a[:5] == 5).all() + if using_copy_on_write: + assert not (a[:5] == 5).any() + else: + assert (a[:5] == 5).all() # do copy a = datetime_series.copy() @@ -109,7 +112,10 @@ def test_align_nocopy(datetime_series): b = datetime_series[:5].copy() _, rb = a.align(b, join="right", copy=False) rb[:2] = 5 - assert (b[:2] == 5).all() + if using_copy_on_write: + assert not (b[:2] == 5).any() + else: + assert (b[:2] == 5).all() def test_align_same_index(datetime_series): From 2aacc427d32bc9339a7b621c3ed0427b2404ef31 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 23 Jan 2023 20:45:19 -0500 Subject: [PATCH 2/2] Move test --- pandas/tests/copy_view/test_methods.py | 70 +++++++++++++------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index b1eb28d67b7a5..0051b5db1a65a 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -456,6 +456,41 @@ def test_align_series(using_copy_on_write): tm.assert_series_equal(ser_other, ser_orig) +def test_align_copy_false(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df_orig = df.copy() + df2, df3 = df.align(df, copy=False) + + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + + if using_copy_on_write: + df2.loc[0, "a"] = 0 + tm.assert_frame_equal(df, df_orig) # Original is unchanged + + df3.loc[0, "a"] = 0 + tm.assert_frame_equal(df, df_orig) # Original is unchanged + + +def test_align_with_series_copy_false(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + ser = Series([1, 2, 3], name="x") + ser_orig = ser.copy() + df_orig = df.copy() + df2, ser2 = df.align(ser, copy=False, axis=0) + + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + assert np.shares_memory(get_array(ser, "x"), get_array(ser2, "x")) + + if using_copy_on_write: + df2.loc[0, "a"] = 0 + tm.assert_frame_equal(df, df_orig) # Original is unchanged + + ser2.loc[0] = 0 + tm.assert_series_equal(ser, ser_orig) # Original is unchanged + + def test_to_frame(using_copy_on_write): # Case: converting a Series to a DataFrame with to_frame ser = Series([1, 2, 3]) @@ -1137,38 +1172,3 @@ def test_isetitem(using_copy_on_write): assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) else: assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) - - -def test_align_copy_false(using_copy_on_write): - df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - df_orig = df.copy() - df2, df3 = df.align(df, copy=False) - - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - - if using_copy_on_write: - df2.loc[0, "a"] = 0 - tm.assert_frame_equal(df, df_orig) # Original is unchanged - - df3.loc[0, "a"] = 0 - tm.assert_frame_equal(df, df_orig) # Original is unchanged - - -def test_align_with_series_copy_false(using_copy_on_write): - df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - ser = Series([1, 2, 3], name="x") - ser_orig = ser.copy() - df_orig = df.copy() - df2, ser2 = df.align(ser, copy=False, axis=0) - - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - assert np.shares_memory(get_array(ser, "x"), get_array(ser2, "x")) - - if using_copy_on_write: - df2.loc[0, "a"] = 0 - tm.assert_frame_equal(df, df_orig) # Original is unchanged - - ser2.loc[0] = 0 - tm.assert_series_equal(ser, ser_orig) # Original is unchanged