Skip to content

Commit aeafdd6

Browse files
authored
ENH: Improve replace lazy copy handling (#51658)
1 parent ac70f23 commit aeafdd6

File tree

2 files changed

+92
-6
lines changed

2 files changed

+92
-6
lines changed

pandas/core/internals/blocks.py

+22-5
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,8 @@ def replace_list(
714714
(x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
715715
]
716716
if not len(pairs):
717+
if using_cow:
718+
return [self.copy(deep=False)]
717719
# shortcut, nothing to replace
718720
return [self] if inplace else [self.copy()]
719721

@@ -734,8 +736,9 @@ def replace_list(
734736
masks = [extract_bool_array(x) for x in masks]
735737

736738
if using_cow and inplace:
737-
# TODO(CoW): Optimize
738-
rb = [self.copy()]
739+
# Don't set up refs here, otherwise we will think that we have
740+
# references when we check again later
741+
rb = [self]
739742
else:
740743
rb = [self if inplace else self.copy()]
741744
for i, (src, dest) in enumerate(pairs):
@@ -762,10 +765,16 @@ def replace_list(
762765
mask=m, # type: ignore[arg-type]
763766
inplace=inplace,
764767
regex=regex,
768+
using_cow=using_cow,
765769
)
766770
if convert and blk.is_object and not all(x is None for x in dest_list):
767771
# GH#44498 avoid unwanted cast-back
768-
result = extend_blocks([b.convert(copy=True) for b in result])
772+
result = extend_blocks(
773+
[
774+
b.convert(copy=True and not using_cow, using_cow=using_cow)
775+
for b in result
776+
]
777+
)
769778
new_rb.extend(result)
770779
rb = new_rb
771780
return rb
@@ -778,6 +787,7 @@ def _replace_coerce(
778787
mask: npt.NDArray[np.bool_],
779788
inplace: bool = True,
780789
regex: bool = False,
790+
using_cow: bool = False,
781791
) -> list[Block]:
782792
"""
783793
Replace value corresponding to the given boolean array with another
@@ -811,17 +821,24 @@ def _replace_coerce(
811821
if value is None:
812822
# gh-45601, gh-45836, gh-46634
813823
if mask.any():
814-
nb = self.astype(np.dtype(object), copy=False)
815-
if nb is self and not inplace:
824+
has_ref = self.refs.has_reference()
825+
nb = self.astype(np.dtype(object), copy=False, using_cow=using_cow)
826+
if (nb is self or using_cow) and not inplace:
827+
nb = nb.copy()
828+
elif inplace and has_ref and nb.refs.has_reference():
829+
# no copy in astype and we had refs before
816830
nb = nb.copy()
817831
putmask_inplace(nb.values, mask, value)
818832
return [nb]
833+
if using_cow:
834+
return [self.copy(deep=False)]
819835
return [self] if inplace else [self.copy()]
820836
return self.replace(
821837
to_replace=to_replace,
822838
value=value,
823839
inplace=inplace,
824840
mask=mask,
841+
using_cow=using_cow,
825842
)
826843

827844
# ---------------------------------------------------------------------

pandas/tests/copy_view/test_replace.py

+70-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@ def test_replace_to_replace_wrong_dtype(using_copy_on_write):
112112
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
113113

114114

115-
def test_replace_inplace(using_copy_on_write):
115+
@pytest.mark.parametrize("to_replace", [1.5, [1.5], []])
116+
def test_replace_inplace(using_copy_on_write, to_replace):
116117
df = DataFrame({"a": [1.5, 2, 3]})
117118
arr_a = get_array(df, "a")
118119
df.replace(to_replace=1.5, value=15.5, inplace=True)
@@ -216,3 +217,71 @@ def test_masking_inplace(using_copy_on_write, method):
216217
tm.assert_frame_equal(view, df_orig)
217218
else:
218219
assert np.shares_memory(get_array(df, "a"), arr_a)
220+
221+
222+
def test_replace_empty_list(using_copy_on_write):
223+
df = DataFrame({"a": [1, 2]})
224+
225+
df2 = df.replace([], [])
226+
if using_copy_on_write:
227+
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
228+
assert not df._mgr._has_no_reference(0)
229+
else:
230+
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
231+
232+
arr_a = get_array(df, "a")
233+
df.replace([], [])
234+
if using_copy_on_write:
235+
assert np.shares_memory(get_array(df, "a"), arr_a)
236+
assert not df._mgr._has_no_reference(0)
237+
assert not df2._mgr._has_no_reference(0)
238+
239+
240+
@pytest.mark.parametrize("value", ["d", None])
241+
def test_replace_object_list_inplace(using_copy_on_write, value):
242+
df = DataFrame({"a": ["a", "b", "c"]})
243+
arr = get_array(df, "a")
244+
df.replace(["c"], value, inplace=True)
245+
if using_copy_on_write or value is None:
246+
assert np.shares_memory(arr, get_array(df, "a"))
247+
else:
248+
# This could be inplace
249+
assert not np.shares_memory(arr, get_array(df, "a"))
250+
if using_copy_on_write:
251+
assert df._mgr._has_no_reference(0)
252+
253+
254+
def test_replace_list_multiple_elements_inplace(using_copy_on_write):
255+
df = DataFrame({"a": [1, 2, 3]})
256+
arr = get_array(df, "a")
257+
df.replace([1, 2], 4, inplace=True)
258+
if using_copy_on_write:
259+
# TODO(CoW): This should share memory
260+
assert not np.shares_memory(arr, get_array(df, "a"))
261+
assert df._mgr._has_no_reference(0)
262+
else:
263+
assert np.shares_memory(arr, get_array(df, "a"))
264+
265+
266+
def test_replace_list_none(using_copy_on_write):
267+
df = DataFrame({"a": ["a", "b", "c"]})
268+
269+
df_orig = df.copy()
270+
df2 = df.replace(["b"], value=None)
271+
tm.assert_frame_equal(df, df_orig)
272+
273+
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
274+
275+
276+
def test_replace_list_none_inplace_refs(using_copy_on_write):
277+
df = DataFrame({"a": ["a", "b", "c"]})
278+
arr = get_array(df, "a")
279+
df_orig = df.copy()
280+
view = df[:]
281+
df.replace(["a"], value=None, inplace=True)
282+
if using_copy_on_write:
283+
assert df._mgr._has_no_reference(0)
284+
assert not np.shares_memory(arr, get_array(df, "a"))
285+
tm.assert_frame_equal(df_orig, view)
286+
else:
287+
assert np.shares_memory(arr, get_array(df, "a"))

0 commit comments

Comments
 (0)