Skip to content

Commit 7674874

Browse files
authored
ENH: Make shallow copy for align nocopy with CoW (#50917)
1 parent 2096725 commit 7674874

File tree

5 files changed

+62
-7
lines changed

5 files changed

+62
-7
lines changed

pandas/core/generic.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -5429,6 +5429,8 @@ def _reindex_with_indexers(
54295429

54305430
if (copy or copy is None) and new_data is self._mgr:
54315431
new_data = new_data.copy(deep=copy)
5432+
elif using_copy_on_write() and new_data is self._mgr:
5433+
new_data = new_data.copy(deep=copy)
54325434

54335435
return self._constructor(new_data).__finalize__(self)
54345436

@@ -9469,6 +9471,7 @@ def _align_series(
94699471
limit=None,
94709472
fill_axis: Axis = 0,
94719473
):
9474+
uses_cow = using_copy_on_write()
94729475

94739476
is_series = isinstance(self, ABCSeries)
94749477

@@ -9492,7 +9495,10 @@ def _align_series(
94929495
if is_series:
94939496
left = self._reindex_indexer(join_index, lidx, copy)
94949497
elif lidx is None or join_index is None:
9495-
left = self.copy(deep=copy) if copy or copy is None else self
9498+
if uses_cow:
9499+
left = self.copy(deep=copy)
9500+
else:
9501+
left = self.copy(deep=copy) if copy or copy is None else self
94969502
else:
94979503
left = self._constructor(
94989504
self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy)
@@ -9521,7 +9527,10 @@ def _align_series(
95219527
left = self._constructor(fdata)
95229528

95239529
if ridx is None:
9524-
right = other.copy(deep=copy) if copy or copy is None else other
9530+
if uses_cow:
9531+
right = other.copy(deep=copy)
9532+
else:
9533+
right = other.copy(deep=copy) if copy or copy is None else other
95259534
else:
95269535
right = other.reindex(join_index, level=level)
95279536

pandas/core/series.py

+2
Original file line numberDiff line numberDiff line change
@@ -4653,6 +4653,8 @@ def _reindex_indexer(
46534653
if indexer is None and (
46544654
new_index is None or new_index.names == self.index.names
46554655
):
4656+
if using_copy_on_write():
4657+
return self.copy(deep=copy)
46564658
if copy or copy is None:
46574659
return self.copy(deep=copy)
46584660
return self

pandas/tests/copy_view/test_methods.py

+35
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,41 @@ def test_align_series(using_copy_on_write):
456456
tm.assert_series_equal(ser_other, ser_orig)
457457

458458

459+
def test_align_copy_false(using_copy_on_write):
460+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
461+
df_orig = df.copy()
462+
df2, df3 = df.align(df, copy=False)
463+
464+
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
465+
assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
466+
467+
if using_copy_on_write:
468+
df2.loc[0, "a"] = 0
469+
tm.assert_frame_equal(df, df_orig) # Original is unchanged
470+
471+
df3.loc[0, "a"] = 0
472+
tm.assert_frame_equal(df, df_orig) # Original is unchanged
473+
474+
475+
def test_align_with_series_copy_false(using_copy_on_write):
476+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
477+
ser = Series([1, 2, 3], name="x")
478+
ser_orig = ser.copy()
479+
df_orig = df.copy()
480+
df2, ser2 = df.align(ser, copy=False, axis=0)
481+
482+
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
483+
assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
484+
assert np.shares_memory(get_array(ser, "x"), get_array(ser2, "x"))
485+
486+
if using_copy_on_write:
487+
df2.loc[0, "a"] = 0
488+
tm.assert_frame_equal(df, df_orig) # Original is unchanged
489+
490+
ser2.loc[0] = 0
491+
tm.assert_series_equal(ser, ser_orig) # Original is unchanged
492+
493+
459494
def test_to_frame(using_copy_on_write):
460495
# Case: converting a Series to a DataFrame with to_frame
461496
ser = Series([1, 2, 3])

pandas/tests/frame/methods/test_align.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,15 @@ def test_frame_align_aware(self):
4040
assert new1.index.tz is timezone.utc
4141
assert new2.index.tz is timezone.utc
4242

43-
def test_align_float(self, float_frame):
43+
def test_align_float(self, float_frame, using_copy_on_write):
4444
af, bf = float_frame.align(float_frame)
4545
assert af._mgr is not float_frame._mgr
4646

4747
af, bf = float_frame.align(float_frame, copy=False)
48-
assert af._mgr is float_frame._mgr
48+
if not using_copy_on_write:
49+
assert af._mgr is float_frame._mgr
50+
else:
51+
assert af._mgr is not float_frame._mgr
4952

5053
# axis = 0
5154
other = float_frame.iloc[:-5, :3]

pandas/tests/series/methods/test_align.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def test_align_fill_method(
8282
tm.assert_series_equal(ab, eb)
8383

8484

85-
def test_align_nocopy(datetime_series):
85+
def test_align_nocopy(datetime_series, using_copy_on_write):
8686
b = datetime_series[:5].copy()
8787

8888
# do copy
@@ -95,7 +95,10 @@ def test_align_nocopy(datetime_series):
9595
a = datetime_series.copy()
9696
ra, _ = a.align(b, join="left", copy=False)
9797
ra[:5] = 5
98-
assert (a[:5] == 5).all()
98+
if using_copy_on_write:
99+
assert not (a[:5] == 5).any()
100+
else:
101+
assert (a[:5] == 5).all()
99102

100103
# do copy
101104
a = datetime_series.copy()
@@ -109,7 +112,10 @@ def test_align_nocopy(datetime_series):
109112
b = datetime_series[:5].copy()
110113
_, rb = a.align(b, join="right", copy=False)
111114
rb[:2] = 5
112-
assert (b[:2] == 5).all()
115+
if using_copy_on_write:
116+
assert not (b[:2] == 5).any()
117+
else:
118+
assert (b[:2] == 5).all()
113119

114120

115121
def test_align_same_index(datetime_series):

0 commit comments

Comments
 (0)