diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py index 9eaa265adab2b..76d723beac7e6 100644 --- a/pandas/core/array_algos/replace.py +++ b/pandas/core/array_algos/replace.py @@ -3,7 +3,7 @@ """ import operator import re -from typing import Pattern, Union +from typing import Optional, Pattern, Union import numpy as np @@ -12,8 +12,10 @@ from pandas.core.dtypes.common import ( is_datetimelike_v_numeric, is_numeric_v_string_like, + is_re, is_scalar, ) +from pandas.core.dtypes.missing import isna def compare_or_regex_search( @@ -87,3 +89,45 @@ def _check_comparison_types( _check_comparison_types(result, a, b) return result + + +def replace_regex(values: ArrayLike, rx: re.Pattern, value, mask: Optional[np.ndarray]): + """ + Parameters + ---------- + values : ArrayLike + Object dtype. + rx : re.Pattern + value : Any + mask : np.ndarray[bool], optional + + Notes + ----- + Alters values in-place. + """ + + # deal with replacing values with objects (strings) that match but + # whose replacement is not a string (numeric, nan, object) + if isna(value) or not isinstance(value, str): + + def re_replacer(s): + if is_re(rx) and isinstance(s, str): + return value if rx.search(s) is not None else s + else: + return s + + else: + # value is guaranteed to be a string here, s can be either a string + # or null if it's null it gets returned + def re_replacer(s): + if is_re(rx) and isinstance(s, str): + return rx.sub(value, s) + else: + return s + + f = np.vectorize(re_replacer, otypes=[values.dtype]) + + if mask is None: + values[:] = f(values) + else: + values[mask] = f(values[mask]) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9e6480dd709f0..fd23b89365496 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -60,7 +60,7 @@ from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, isna_compat import pandas.core.algorithms as algos -from pandas.core.array_algos.replace import compare_or_regex_search +from pandas.core.array_algos.replace import compare_or_regex_search, replace_regex from pandas.core.array_algos.transforms import shift from pandas.core.arrays import ( Categorical, @@ -2563,32 +2563,7 @@ def _replace_single( return super().replace(to_replace, value, inplace=inplace, regex=regex) new_values = self.values if inplace else self.values.copy() - - # deal with replacing values with objects (strings) that match but - # whose replacement is not a string (numeric, nan, object) - if isna(value) or not isinstance(value, str): - - def re_replacer(s): - if is_re(rx) and isinstance(s, str): - return value if rx.search(s) is not None else s - else: - return s - - else: - # value is guaranteed to be a string here, s can be either a string - # or null if it's null it gets returned - def re_replacer(s): - if is_re(rx) and isinstance(s, str): - return rx.sub(value, s) - else: - return s - - f = np.vectorize(re_replacer, otypes=[self.dtype]) - - if mask is None: - new_values[:] = f(new_values) - else: - new_values[mask] = f(new_values[mask]) + replace_regex(new_values, rx, value, mask) # convert block = self.make_block(new_values)