Skip to content

Commit 82cd86c

Browse files
authored
REF: implement replace_regex, remove unreachable branch in ObjectBlock.replace (#37696)
1 parent 54256b5 commit 82cd86c

File tree

2 files changed

+47
-28
lines changed

2 files changed

+47
-28
lines changed

pandas/core/array_algos/replace.py

+45-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44
import operator
55
import re
6-
from typing import Pattern, Union
6+
from typing import Optional, Pattern, Union
77

88
import numpy as np
99

@@ -12,8 +12,10 @@
1212
from pandas.core.dtypes.common import (
1313
is_datetimelike_v_numeric,
1414
is_numeric_v_string_like,
15+
is_re,
1516
is_scalar,
1617
)
18+
from pandas.core.dtypes.missing import isna
1719

1820

1921
def compare_or_regex_search(
@@ -87,3 +89,45 @@ def _check_comparison_types(
8789

8890
_check_comparison_types(result, a, b)
8991
return result
92+
93+
94+
def replace_regex(values: ArrayLike, rx: re.Pattern, value, mask: Optional[np.ndarray]):
95+
"""
96+
Parameters
97+
----------
98+
values : ArrayLike
99+
Object dtype.
100+
rx : re.Pattern
101+
value : Any
102+
mask : np.ndarray[bool], optional
103+
104+
Notes
105+
-----
106+
Alters values in-place.
107+
"""
108+
109+
# deal with replacing values with objects (strings) that match but
110+
# whose replacement is not a string (numeric, nan, object)
111+
if isna(value) or not isinstance(value, str):
112+
113+
def re_replacer(s):
114+
if is_re(rx) and isinstance(s, str):
115+
return value if rx.search(s) is not None else s
116+
else:
117+
return s
118+
119+
else:
120+
# value is guaranteed to be a string here, s can be either a string
121+
# or null if it's null it gets returned
122+
def re_replacer(s):
123+
if is_re(rx) and isinstance(s, str):
124+
return rx.sub(value, s)
125+
else:
126+
return s
127+
128+
f = np.vectorize(re_replacer, otypes=[values.dtype])
129+
130+
if mask is None:
131+
values[:] = f(values)
132+
else:
133+
values[mask] = f(values[mask])

pandas/core/internals/blocks.py

+2-27
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, isna_compat
6161

6262
import pandas.core.algorithms as algos
63-
from pandas.core.array_algos.replace import compare_or_regex_search
63+
from pandas.core.array_algos.replace import compare_or_regex_search, replace_regex
6464
from pandas.core.array_algos.transforms import shift
6565
from pandas.core.arrays import (
6666
Categorical,
@@ -2563,32 +2563,7 @@ def _replace_single(
25632563
return super().replace(to_replace, value, inplace=inplace, regex=regex)
25642564

25652565
new_values = self.values if inplace else self.values.copy()
2566-
2567-
# deal with replacing values with objects (strings) that match but
2568-
# whose replacement is not a string (numeric, nan, object)
2569-
if isna(value) or not isinstance(value, str):
2570-
2571-
def re_replacer(s):
2572-
if is_re(rx) and isinstance(s, str):
2573-
return value if rx.search(s) is not None else s
2574-
else:
2575-
return s
2576-
2577-
else:
2578-
# value is guaranteed to be a string here, s can be either a string
2579-
# or null if it's null it gets returned
2580-
def re_replacer(s):
2581-
if is_re(rx) and isinstance(s, str):
2582-
return rx.sub(value, s)
2583-
else:
2584-
return s
2585-
2586-
f = np.vectorize(re_replacer, otypes=[self.dtype])
2587-
2588-
if mask is None:
2589-
new_values[:] = f(new_values)
2590-
else:
2591-
new_values[mask] = f(new_values[mask])
2566+
replace_regex(new_values, rx, value, mask)
25922567

25932568
# convert
25942569
block = self.make_block(new_values)

0 commit comments

Comments
 (0)