Skip to content

Commit 8090479

Browse files
authored
Place the calculation of mask prior to the calls of comp in replace_list to improve performance (#35229)
1 parent 66003e4 commit 8090479

File tree

1 file changed

+12
-7
lines changed

1 file changed

+12
-7
lines changed

pandas/core/internals/managers.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -596,18 +596,22 @@ def replace_list(
596596
# figure out our mask apriori to avoid repeated replacements
597597
values = self.as_array()
598598

599-
def comp(s, regex=False):
599+
def comp(s: Scalar, mask: np.ndarray, regex: bool = False):
600600
"""
601601
Generate a bool array by perform an equality check, or perform
602602
an element-wise regular expression matching
603603
"""
604604
if isna(s):
605-
return isna(values)
605+
return ~mask
606606

607607
s = com.maybe_box_datetimelike(s)
608-
return _compare_or_regex_search(values, s, regex)
608+
return _compare_or_regex_search(values, s, regex, mask)
609609

610-
masks = [comp(s, regex) for s in src_list]
610+
# Calculate the mask once, prior to the call of comp
611+
# in order to avoid repeating the same computations
612+
mask = ~isna(values)
613+
614+
masks = [comp(s, mask, regex) for s in src_list]
611615

612616
result_blocks = []
613617
src_len = len(src_list) - 1
@@ -1895,7 +1899,7 @@ def _merge_blocks(
18951899

18961900

18971901
def _compare_or_regex_search(
1898-
a: ArrayLike, b: Scalar, regex: bool = False
1902+
a: ArrayLike, b: Scalar, regex: bool = False, mask: Optional[ArrayLike] = None
18991903
) -> Union[ArrayLike, bool]:
19001904
"""
19011905
Compare two array_like inputs of the same shape or two scalar values
@@ -1908,6 +1912,7 @@ def _compare_or_regex_search(
19081912
a : array_like
19091913
b : scalar
19101914
regex : bool, default False
1915+
mask : array_like or None (default)
19111916
19121917
Returns
19131918
-------
@@ -1941,7 +1946,7 @@ def _check_comparison_types(
19411946
)
19421947

19431948
# GH#32621 use mask to avoid comparing to NAs
1944-
if isinstance(a, np.ndarray) and not isinstance(b, np.ndarray):
1949+
if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray):
19451950
mask = np.reshape(~(isna(a)), a.shape)
19461951
if isinstance(a, np.ndarray):
19471952
a = a[mask]
@@ -1953,7 +1958,7 @@ def _check_comparison_types(
19531958

19541959
result = op(a)
19551960

1956-
if isinstance(result, np.ndarray):
1961+
if isinstance(result, np.ndarray) and mask is not None:
19571962
# The shape of the mask can differ to that of the result
19581963
# since we may compare only a subset of a's or b's elements
19591964
tmp = np.zeros(mask.shape, dtype=np.bool_)

0 commit comments

Comments
 (0)