diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 998c57b66509d..ce123c704ba33 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5429,6 +5429,8 @@ def _reindex_with_indexers( if (copy or copy is None) and new_data is self._mgr: new_data = new_data.copy(deep=copy) + elif using_copy_on_write() and new_data is self._mgr: + new_data = new_data.copy(deep=copy) return self._constructor(new_data).__finalize__(self) @@ -9469,6 +9471,7 @@ def _align_series( limit=None, fill_axis: Axis = 0, ): + uses_cow = using_copy_on_write() is_series = isinstance(self, ABCSeries) @@ -9492,7 +9495,10 @@ def _align_series( if is_series: left = self._reindex_indexer(join_index, lidx, copy) elif lidx is None or join_index is None: - left = self.copy(deep=copy) if copy or copy is None else self + if uses_cow: + left = self.copy(deep=copy) + else: + left = self.copy(deep=copy) if copy or copy is None else self else: left = self._constructor( self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy) @@ -9521,7 +9527,10 @@ def _align_series( left = self._constructor(fdata) if ridx is None: - right = other.copy(deep=copy) if copy or copy is None else other + if uses_cow: + right = other.copy(deep=copy) + else: + right = other.copy(deep=copy) if copy or copy is None else other else: right = other.reindex(join_index, level=level) diff --git a/pandas/core/series.py b/pandas/core/series.py index c6ba217042353..e2fc75dda02a5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4643,6 +4643,8 @@ def _reindex_indexer( if indexer is None and ( new_index is None or new_index.names == self.index.names ): + if using_copy_on_write(): + return self.copy(deep=copy) if copy or copy is None: return self.copy(deep=copy) return self diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 0cec5522e39cd..0051b5db1a65a 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -456,6 +456,41 @@ def test_align_series(using_copy_on_write): tm.assert_series_equal(ser_other, ser_orig) +def test_align_copy_false(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df_orig = df.copy() + df2, df3 = df.align(df, copy=False) + + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + + if using_copy_on_write: + df2.loc[0, "a"] = 0 + tm.assert_frame_equal(df, df_orig) # Original is unchanged + + df3.loc[0, "a"] = 0 + tm.assert_frame_equal(df, df_orig) # Original is unchanged + + +def test_align_with_series_copy_false(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + ser = Series([1, 2, 3], name="x") + ser_orig = ser.copy() + df_orig = df.copy() + df2, ser2 = df.align(ser, copy=False, axis=0) + + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + assert np.shares_memory(get_array(ser, "x"), get_array(ser2, "x")) + + if using_copy_on_write: + df2.loc[0, "a"] = 0 + tm.assert_frame_equal(df, df_orig) # Original is unchanged + + ser2.loc[0] = 0 + tm.assert_series_equal(ser, ser_orig) # Original is unchanged + + def test_to_frame(using_copy_on_write): # Case: converting a Series to a DataFrame with to_frame ser = Series([1, 2, 3]) diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index 88963dcc4b0f7..d4883fd854e07 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -40,12 +40,15 @@ def test_frame_align_aware(self): assert new1.index.tz is timezone.utc assert new2.index.tz is timezone.utc - def test_align_float(self, float_frame): + def test_align_float(self, float_frame, using_copy_on_write): af, bf = float_frame.align(float_frame) assert af._mgr is not float_frame._mgr af, bf = float_frame.align(float_frame, copy=False) - assert af._mgr is float_frame._mgr + if not using_copy_on_write: + assert af._mgr is float_frame._mgr + else: + assert af._mgr is not float_frame._mgr # axis = 0 other = float_frame.iloc[:-5, :3] diff --git a/pandas/tests/series/methods/test_align.py b/pandas/tests/series/methods/test_align.py index f3fc46e1e39af..b2e03684bc902 100644 --- a/pandas/tests/series/methods/test_align.py +++ b/pandas/tests/series/methods/test_align.py @@ -82,7 +82,7 @@ def test_align_fill_method( tm.assert_series_equal(ab, eb) -def test_align_nocopy(datetime_series): +def test_align_nocopy(datetime_series, using_copy_on_write): b = datetime_series[:5].copy() # do copy @@ -95,7 +95,10 @@ def test_align_nocopy(datetime_series): a = datetime_series.copy() ra, _ = a.align(b, join="left", copy=False) ra[:5] = 5 - assert (a[:5] == 5).all() + if using_copy_on_write: + assert not (a[:5] == 5).any() + else: + assert (a[:5] == 5).all() # do copy a = datetime_series.copy() @@ -109,7 +112,10 @@ def test_align_nocopy(datetime_series): b = datetime_series[:5].copy() _, rb = a.align(b, join="right", copy=False) rb[:2] = 5 - assert (b[:2] == 5).all() + if using_copy_on_write: + assert not (b[:2] == 5).any() + else: + assert (b[:2] == 5).all() def test_align_same_index(datetime_series):