Skip to content

Commit b554bb3

Browse files
authored
REF: move replace code out of Blocks (pandas-dev#39559)
1 parent aa97540 commit b554bb3

File tree

2 files changed

+38
-47
lines changed

2 files changed

+38
-47
lines changed

pandas/core/array_algos/replace.py

+20-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44
import operator
55
import re
6-
from typing import Optional, Pattern, Union
6+
from typing import Any, Optional, Pattern, Union
77

88
import numpy as np
99

@@ -13,13 +13,28 @@
1313
is_datetimelike_v_numeric,
1414
is_numeric_v_string_like,
1515
is_re,
16+
is_re_compilable,
1617
is_scalar,
1718
)
1819
from pandas.core.dtypes.missing import isna
1920

2021

22+
def should_use_regex(regex: bool, to_replace: Any) -> bool:
23+
"""
24+
Decide whether to treat `to_replace` as a regular expression.
25+
"""
26+
if is_re(to_replace):
27+
regex = True
28+
29+
regex = regex and is_re_compilable(to_replace)
30+
31+
# Don't use regex if the pattern is empty.
32+
regex = regex and re.compile(to_replace).pattern != ""
33+
return regex
34+
35+
2136
def compare_or_regex_search(
22-
a: ArrayLike, b: Union[Scalar, Pattern], regex: bool, mask: ArrayLike
37+
a: ArrayLike, b: Union[Scalar, Pattern], regex: bool, mask: np.ndarray
2338
) -> Union[ArrayLike, bool]:
2439
"""
2540
Compare two array_like inputs of the same shape or two scalar values
@@ -32,12 +47,14 @@ def compare_or_regex_search(
3247
a : array_like
3348
b : scalar or regex pattern
3449
regex : bool
35-
mask : array_like
50+
mask : np.ndarray[bool]
3651
3752
Returns
3853
-------
3954
mask : array_like of bool
4055
"""
56+
if isna(b):
57+
return ~mask
4158

4259
def _check_comparison_types(
4360
result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern]

pandas/core/internals/blocks.py

+18-44
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
)
1818
from pandas._libs.internals import BlockPlacement
1919
from pandas._libs.tslibs import conversion
20-
from pandas._typing import ArrayLike, Dtype, DtypeObj, Scalar, Shape
20+
from pandas._typing import ArrayLike, Dtype, DtypeObj, Shape
2121
from pandas.util._validators import validate_bool_kwarg
2222

2323
from pandas.core.dtypes.cast import (
@@ -44,8 +44,6 @@
4444
is_integer,
4545
is_list_like,
4646
is_object_dtype,
47-
is_re,
48-
is_re_compilable,
4947
is_sparse,
5048
pandas_dtype,
5149
)
@@ -59,7 +57,11 @@
5957
putmask_smart,
6058
putmask_without_repeat,
6159
)
62-
from pandas.core.array_algos.replace import compare_or_regex_search, replace_regex
60+
from pandas.core.array_algos.replace import (
61+
compare_or_regex_search,
62+
replace_regex,
63+
should_use_regex,
64+
)
6365
from pandas.core.array_algos.transforms import shift
6466
from pandas.core.arrays import (
6567
Categorical,
@@ -817,6 +819,12 @@ def _replace_list(
817819
"""
818820
See BlockManager._replace_list docstring.
819821
"""
822+
# TODO: dont special-case Categorical
823+
if self.is_categorical and len(algos.unique(dest_list)) == 1:
824+
# We likely got here by tiling value inside NDFrame.replace,
825+
# so un-tile here
826+
return self.replace(src_list, dest_list[0], inplace, regex)
827+
820828
# Exclude anything that we know we won't contain
821829
pairs = [
822830
(x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
@@ -827,21 +835,14 @@ def _replace_list(
827835

828836
src_len = len(pairs) - 1
829837

830-
def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray:
831-
"""
832-
Generate a bool array by perform an equality check, or perform
833-
an element-wise regular expression matching
834-
"""
835-
if isna(s):
836-
return ~mask
837-
838-
return compare_or_regex_search(self.values, s, regex, mask)
839-
840838
if self.is_object:
841839
# Calculate the mask once, prior to the call of comp
842840
# in order to avoid repeating the same computations
843841
mask = ~isna(self.values)
844-
masks = [comp(s[0], mask, regex) for s in pairs]
842+
masks = [
843+
compare_or_regex_search(self.values, s[0], regex=regex, mask=mask)
844+
for s in pairs
845+
]
845846
else:
846847
# GH#38086 faster if we know we dont need to check for regex
847848
masks = [missing.mask_missing(self.values, s[0]) for s in pairs]
@@ -1464,7 +1465,7 @@ def _replace_coerce(
14641465
putmask_inplace(nb.values, mask, value)
14651466
return [nb]
14661467
else:
1467-
regex = _should_use_regex(regex, to_replace)
1468+
regex = should_use_regex(regex, to_replace)
14681469
if regex:
14691470
return self._replace_regex(
14701471
to_replace,
@@ -2353,44 +2354,17 @@ def replace(
23532354
# here with listlike to_replace or value, as those cases
23542355
# go through _replace_list
23552356

2356-
regex = _should_use_regex(regex, to_replace)
2357+
regex = should_use_regex(regex, to_replace)
23572358

23582359
if regex:
23592360
return self._replace_regex(to_replace, value, inplace=inplace)
23602361
else:
23612362
return super().replace(to_replace, value, inplace=inplace, regex=False)
23622363

23632364

2364-
def _should_use_regex(regex: bool, to_replace: Any) -> bool:
2365-
"""
2366-
Decide whether to treat `to_replace` as a regular expression.
2367-
"""
2368-
if is_re(to_replace):
2369-
regex = True
2370-
2371-
regex = regex and is_re_compilable(to_replace)
2372-
2373-
# Don't use regex if the pattern is empty.
2374-
regex = regex and re.compile(to_replace).pattern != ""
2375-
return regex
2376-
2377-
23782365
class CategoricalBlock(ExtensionBlock):
23792366
__slots__ = ()
23802367

2381-
def _replace_list(
2382-
self,
2383-
src_list: List[Any],
2384-
dest_list: List[Any],
2385-
inplace: bool = False,
2386-
regex: bool = False,
2387-
) -> List[Block]:
2388-
if len(algos.unique(dest_list)) == 1:
2389-
# We likely got here by tiling value inside NDFrame.replace,
2390-
# so un-tile here
2391-
return self.replace(src_list, dest_list[0], inplace, regex)
2392-
return super()._replace_list(src_list, dest_list, inplace, regex)
2393-
23942368
def replace(
23952369
self,
23962370
to_replace,

0 commit comments

Comments
 (0)