Skip to content

Commit cdb9cff

Browse files
authored
CoW: Copy less in replace implementation with listlikes (#54116)
1 parent cd9d391 commit cdb9cff

File tree

2 files changed

+50
-4
lines changed

2 files changed

+50
-4
lines changed

pandas/core/internals/blocks.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
final,
1212
)
1313
import warnings
14+
import weakref
1415

1516
import numpy as np
1617

@@ -839,7 +840,7 @@ def replace_list(
839840
if inplace:
840841
masks = list(masks)
841842

842-
if using_cow and inplace:
843+
if using_cow:
843844
# Don't set up refs here, otherwise we will think that we have
844845
# references when we check again later
845846
rb = [self]
@@ -872,6 +873,17 @@ def replace_list(
872873
regex=regex,
873874
using_cow=using_cow,
874875
)
876+
877+
if using_cow and i != src_len:
878+
# This is ugly, but we have to get rid of intermediate refs
879+
# that did not go out of scope yet, otherwise we will trigger
880+
# many unnecessary copies
881+
for b in result:
882+
ref = weakref.ref(b)
883+
b.refs.referenced_blocks.pop(
884+
b.refs.referenced_blocks.index(ref)
885+
)
886+
875887
if convert and blk.is_object and not all(x is None for x in dest_list):
876888
# GH#44498 avoid unwanted cast-back
877889
result = extend_blocks(
@@ -936,7 +948,7 @@ def _replace_coerce(
936948
putmask_inplace(nb.values, mask, value)
937949
return [nb]
938950
if using_cow:
939-
return [self.copy(deep=False)]
951+
return [self]
940952
return [self] if inplace else [self.copy()]
941953
return self.replace(
942954
to_replace=to_replace,

pandas/tests/copy_view/test_replace.py

+36-2
Original file line numberDiff line numberDiff line change
@@ -333,8 +333,7 @@ def test_replace_list_multiple_elements_inplace(using_copy_on_write):
333333
arr = get_array(df, "a")
334334
df.replace([1, 2], 4, inplace=True)
335335
if using_copy_on_write:
336-
# TODO(CoW): This should share memory
337-
assert not np.shares_memory(arr, get_array(df, "a"))
336+
assert np.shares_memory(arr, get_array(df, "a"))
338337
assert df._mgr._has_no_reference(0)
339338
else:
340339
assert np.shares_memory(arr, get_array(df, "a"))
@@ -396,3 +395,38 @@ def test_replace_chained_assignment(using_copy_on_write):
396395
with tm.raises_chained_assignment_error():
397396
df[["a"]].replace(1, 100, inplace=True)
398397
tm.assert_frame_equal(df, df_orig)
398+
399+
400+
def test_replace_listlike(using_copy_on_write):
401+
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
402+
df_orig = df.copy()
403+
404+
result = df.replace([200, 201], [11, 11])
405+
if using_copy_on_write:
406+
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
407+
else:
408+
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
409+
410+
result.iloc[0, 0] = 100
411+
tm.assert_frame_equal(df, df)
412+
413+
result = df.replace([200, 2], [10, 10])
414+
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
415+
tm.assert_frame_equal(df, df_orig)
416+
417+
418+
def test_replace_listlike_inplace(using_copy_on_write):
419+
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
420+
arr = get_array(df, "a")
421+
df.replace([200, 2], [10, 11], inplace=True)
422+
assert np.shares_memory(get_array(df, "a"), arr)
423+
424+
view = df[:]
425+
df_orig = df.copy()
426+
df.replace([200, 3], [10, 11], inplace=True)
427+
if using_copy_on_write:
428+
assert not np.shares_memory(get_array(df, "a"), arr)
429+
tm.assert_frame_equal(view, df_orig)
430+
else:
431+
assert np.shares_memory(get_array(df, "a"), arr)
432+
tm.assert_frame_equal(df, view)

0 commit comments

Comments
 (0)