Skip to content

Commit d3870de

Browse files
authored
CoW: Avoid unnecessary copies for columnwise replace (#54117)
1 parent 4d9a6e4 commit d3870de

File tree

3 files changed

+23
-7
lines changed

3 files changed

+23
-7
lines changed

pandas/core/frame.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -4188,11 +4188,15 @@ def _set_item_mgr(
41884188
if len(self):
41894189
self._check_setitem_copy()
41904190

4191-
def _iset_item(self, loc: int, value: Series) -> None:
4191+
def _iset_item(self, loc: int, value: Series, inplace: bool = True) -> None:
41924192
# We are only called from _replace_columnwise which guarantees that
41934193
# no reindex is necessary
4194-
# TODO(CoW): Optimize to avoid copy here, but have ton track refs
4195-
self._iset_item_mgr(loc, value._values.copy(), inplace=True)
4194+
if using_copy_on_write():
4195+
self._iset_item_mgr(
4196+
loc, value._values, inplace=inplace, refs=value._references
4197+
)
4198+
else:
4199+
self._iset_item_mgr(loc, value._values.copy(), inplace=True)
41964200

41974201
# check if we are modifying a copy
41984202
# try to set first as we want an invalid
@@ -5510,7 +5514,7 @@ def _replace_columnwise(
55105514
target, value = mapping[ax_value]
55115515
newobj = ser.replace(target, value, regex=regex)
55125516

5513-
res._iset_item(i, newobj)
5517+
res._iset_item(i, newobj, inplace=inplace)
55145518

55155519
if inplace:
55165520
return

pandas/core/internals/managers.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1101,7 +1101,9 @@ def value_getitem(placement):
11011101
if inplace and blk.should_store(value):
11021102
# Updating inplace -> check if we need to do Copy-on-Write
11031103
if using_copy_on_write() and not self._has_no_reference_block(blkno_l):
1104-
self._iset_split_block(blkno_l, blk_locs, value_getitem(val_locs))
1104+
self._iset_split_block(
1105+
blkno_l, blk_locs, value_getitem(val_locs), refs=refs
1106+
)
11051107
else:
11061108
blk.set_inplace(blk_locs, value_getitem(val_locs))
11071109
continue

pandas/tests/copy_view/test_replace.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -364,13 +364,23 @@ def test_replace_list_none_inplace_refs(using_copy_on_write):
364364
assert np.shares_memory(arr, get_array(df, "a"))
365365

366366

367+
def test_replace_columnwise_no_op_inplace(using_copy_on_write):
368+
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
369+
view = df[:]
370+
df_orig = df.copy()
371+
df.replace({"a": 10}, 100, inplace=True)
372+
if using_copy_on_write:
373+
assert np.shares_memory(get_array(view, "a"), get_array(df, "a"))
374+
df.iloc[0, 0] = 100
375+
tm.assert_frame_equal(view, df_orig)
376+
377+
367378
def test_replace_columnwise_no_op(using_copy_on_write):
368379
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
369380
df_orig = df.copy()
370381
df2 = df.replace({"a": 10}, 100)
371382
if using_copy_on_write:
372-
# TODO(CoW): This should share memory
373-
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
383+
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
374384
df2.iloc[0, 0] = 100
375385
tm.assert_frame_equal(df, df_orig)
376386

0 commit comments

Comments
 (0)