Skip to content

Commit 69a5073

Browse files
authored
CoW: Delay copy when enlarging df with loc (#52062)
1 parent 016c868 commit 69a5073

File tree

2 files changed

+29
-2
lines changed

2 files changed

+29
-2
lines changed

pandas/core/indexing.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -1978,7 +1978,10 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str
19781978
if item in value:
19791979
sub_indexer[1] = item
19801980
val = self._align_series(
1981-
tuple(sub_indexer), value[item], multiindex_indexer
1981+
tuple(sub_indexer),
1982+
value[item],
1983+
multiindex_indexer,
1984+
using_cow=using_copy_on_write(),
19821985
)
19831986
else:
19841987
val = np.nan
@@ -2184,7 +2187,13 @@ def _ensure_iterable_column_indexer(self, column_indexer):
21842187
ilocs = column_indexer
21852188
return ilocs
21862189

2187-
def _align_series(self, indexer, ser: Series, multiindex_indexer: bool = False):
2190+
def _align_series(
2191+
self,
2192+
indexer,
2193+
ser: Series,
2194+
multiindex_indexer: bool = False,
2195+
using_cow: bool = False,
2196+
):
21882197
"""
21892198
Parameters
21902199
----------
@@ -2253,6 +2262,8 @@ def ravel(i):
22532262
else:
22542263
new_ix = Index(new_ix)
22552264
if ser.index.equals(new_ix):
2265+
if using_cow:
2266+
return ser
22562267
return ser._values.copy()
22572268

22582269
return ser.reindex(new_ix)._values

pandas/tests/copy_view/test_indexing.py

+16
Original file line numberDiff line numberDiff line change
@@ -1073,3 +1073,19 @@ def test_series_midx_tuples_slice(using_copy_on_write):
10731073
index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]),
10741074
)
10751075
tm.assert_series_equal(ser, expected)
1076+
1077+
1078+
def test_loc_enlarging_with_dataframe(using_copy_on_write):
1079+
df = DataFrame({"a": [1, 2, 3]})
1080+
rhs = DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]})
1081+
rhs_orig = rhs.copy()
1082+
df.loc[:, ["b", "c"]] = rhs
1083+
if using_copy_on_write:
1084+
assert np.shares_memory(get_array(df, "b"), get_array(rhs, "b"))
1085+
assert np.shares_memory(get_array(df, "c"), get_array(rhs, "c"))
1086+
assert not df._mgr._has_no_reference(1)
1087+
else:
1088+
assert not np.shares_memory(get_array(df, "b"), get_array(rhs, "b"))
1089+
1090+
df.iloc[0, 1] = 100
1091+
tm.assert_frame_equal(rhs, rhs_orig)

0 commit comments

Comments
 (0)