Skip to content

Commit 45ac7da

Browse files
authored
PERF: replace_list (#38097)
1 parent 11a5a4b commit 45ac7da

File tree

2 files changed

+22
-11
lines changed

2 files changed

+22
-11
lines changed

pandas/core/internals/blocks.py

+19-7
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,15 @@ def _replace_list(
861861
"""
862862
See BlockManager._replace_list docstring.
863863
"""
864-
src_len = len(src_list) - 1
864+
# Exclude anything that we know we won't contain
865+
pairs = [
866+
(x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
867+
]
868+
if not len(pairs):
869+
# shortcut, nothing to replace
870+
return [self] if inplace else [self.copy()]
871+
872+
src_len = len(pairs) - 1
865873

866874
def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray:
867875
"""
@@ -874,15 +882,19 @@ def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray:
874882
s = maybe_box_datetimelike(s)
875883
return compare_or_regex_search(self.values, s, regex, mask)
876884

877-
# Calculate the mask once, prior to the call of comp
878-
# in order to avoid repeating the same computations
879-
mask = ~isna(self.values)
885+
if self.is_object:
886+
# Calculate the mask once, prior to the call of comp
887+
# in order to avoid repeating the same computations
888+
mask = ~isna(self.values)
889+
masks = [comp(s[0], mask, regex) for s in pairs]
890+
else:
891+
# GH#38086 faster if we know we dont need to check for regex
892+
masks = [missing.mask_missing(self.values, s[0]) for s in pairs]
880893

881-
masks = [comp(s, mask, regex) for s in src_list]
882894
masks = [_extract_bool_array(x) for x in masks]
883895

884896
rb = [self if inplace else self.copy()]
885-
for i, (src, dest) in enumerate(zip(src_list, dest_list)):
897+
for i, (src, dest) in enumerate(pairs):
886898
new_rb: List["Block"] = []
887899
for blk in rb:
888900
m = masks[i]
@@ -1037,7 +1049,7 @@ def _putmask_simple(self, mask: np.ndarray, value: Any):
10371049
if lib.is_scalar(value) and isinstance(values, np.ndarray):
10381050
value = convert_scalar_for_putitemlike(value, values.dtype)
10391051

1040-
if self.is_extension or self.is_object:
1052+
if self.is_extension or (self.is_object and not lib.is_scalar(value)):
10411053
# GH#19266 using np.putmask gives unexpected results with listlike value
10421054
if is_list_like(value) and len(value) == len(values):
10431055
values[mask] = value[mask]

pandas/tests/series/methods/test_replace.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,9 @@ def test_replace(self, datetime_series):
7575
with pytest.raises(ValueError, match=msg):
7676
ser.replace([1, 2, 3], [np.nan, 0])
7777

78-
# make sure that we aren't just masking a TypeError because bools don't
79-
# implement indexing
80-
with pytest.raises(TypeError, match="Cannot compare types .+"):
81-
ser.replace([1, 2], [np.nan, 0])
78+
# ser is dt64 so can't hold 1 or 2, so this replace is a no-op
79+
result = ser.replace([1, 2], [np.nan, 0])
80+
tm.assert_series_equal(result, ser)
8281

8382
ser = pd.Series([0, 1, 2, 3, 4])
8483
result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])

0 commit comments

Comments
 (0)