Skip to content

Commit 99524e6

Browse files
authored
REF: get regex logic out of Block.replace (#44932)
1 parent 97b2890 commit 99524e6

File tree

4 files changed

+38
-22
lines changed

4 files changed

+38
-22
lines changed

pandas/core/generic.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@
118118
nanops,
119119
)
120120
import pandas.core.algorithms as algos
121+
from pandas.core.array_algos.replace import should_use_regex
121122
from pandas.core.arrays import ExtensionArray
122123
from pandas.core.base import PandasObject
123124
import pandas.core.common as com
@@ -6688,9 +6689,17 @@ def replace(
66886689
return self._replace_columnwise(mapping, inplace, regex)
66896690

66906691
elif not is_list_like(value): # NA -> 0
6691-
new_data = self._mgr.replace(
6692-
to_replace=to_replace, value=value, inplace=inplace, regex=regex
6693-
)
6692+
regex = should_use_regex(regex, to_replace)
6693+
if regex:
6694+
new_data = self._mgr.replace_regex(
6695+
to_replace=to_replace,
6696+
value=value,
6697+
inplace=inplace,
6698+
)
6699+
else:
6700+
new_data = self._mgr.replace(
6701+
to_replace=to_replace, value=value, inplace=inplace
6702+
)
66946703
else:
66956704
raise TypeError(
66966705
f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}'

pandas/core/internals/array_manager.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -413,11 +413,17 @@ def _convert(arr):
413413

414414
return self.apply(_convert)
415415

416-
def replace(self: T, value, **kwargs) -> T:
416+
def replace_regex(self: T, **kwargs) -> T:
417+
return self.apply_with_block("_replace_regex", **kwargs)
418+
419+
def replace(self: T, to_replace, value, inplace: bool) -> T:
420+
inplace = validate_bool_kwarg(inplace, "inplace")
417421
assert np.ndim(value) == 0, value
418422
# TODO "replace" is right now implemented on the blocks, we should move
419423
# it to general array algos so it can be reused here
420-
return self.apply_with_block("replace", value=value, **kwargs)
424+
return self.apply_with_block(
425+
"replace", value=value, to_replace=to_replace, inplace=inplace
426+
)
421427

422428
def replace_list(
423429
self: T,

pandas/core/internals/blocks.py

+9-14
Original file line numberDiff line numberDiff line change
@@ -640,14 +640,11 @@ def replace(
640640
to_replace,
641641
value,
642642
inplace: bool = False,
643-
regex: bool = False,
644643
) -> list[Block]:
645644
"""
646645
replace the to_replace value with value, possible to create new
647-
blocks here this is just a call to putmask. regex is not used here.
648-
It is used in ObjectBlocks. It is here for API compatibility.
646+
blocks here this is just a call to putmask.
649647
"""
650-
inplace = validate_bool_kwarg(inplace, "inplace")
651648

652649
# Note: the checks we do in NDFrame.replace ensure we never get
653650
# here with listlike to_replace or value, as those cases
@@ -661,11 +658,6 @@ def replace(
661658
blk.values._replace(to_replace=to_replace, value=value, inplace=True)
662659
return [blk]
663660

664-
regex = should_use_regex(regex, to_replace)
665-
666-
if regex:
667-
return self._replace_regex(to_replace, value, inplace=inplace)
668-
669661
if not self._can_hold_element(to_replace):
670662
# We cannot hold `to_replace`, so we know immediately that
671663
# replacing it is a no-op.
@@ -691,13 +683,12 @@ def replace(
691683
to_replace=to_replace,
692684
value=value,
693685
inplace=True,
694-
regex=regex,
695686
)
696687

697688
else:
698689
# split so that we only upcast where necessary
699690
return self.split_and_operate(
700-
type(self).replace, to_replace, value, inplace=True, regex=regex
691+
type(self).replace, to_replace, value, inplace=True
701692
)
702693

703694
@final
@@ -756,10 +747,14 @@ def replace_list(
756747
values = self.values
757748

758749
# TODO: dont special-case Categorical
759-
if isinstance(values, Categorical) and len(algos.unique(dest_list)) == 1:
750+
if (
751+
isinstance(values, Categorical)
752+
and len(algos.unique(dest_list)) == 1
753+
and not regex
754+
):
760755
# We likely got here by tiling value inside NDFrame.replace,
761756
# so un-tile here
762-
return self.replace(src_list, dest_list[0], inplace, regex)
757+
return self.replace(src_list, dest_list[0], inplace)
763758

764759
# Exclude anything that we know we won't contain
765760
pairs = [
@@ -866,7 +861,7 @@ def _replace_coerce(
866861
convert=False,
867862
mask=mask,
868863
)
869-
return self.replace(to_replace, value, inplace=inplace, regex=False)
864+
return self.replace(to_replace, value, inplace=inplace)
870865
return [self]
871866

872867
# ---------------------------------------------------------------------

pandas/core/internals/managers.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -432,12 +432,18 @@ def convert(
432432
timedelta=timedelta,
433433
)
434434

435-
def replace(self: T, to_replace, value, inplace: bool, regex: bool) -> T:
436-
assert np.ndim(value) == 0, value
435+
def replace(self: T, to_replace, value, inplace: bool) -> T:
436+
inplace = validate_bool_kwarg(inplace, "inplace")
437+
# NDFrame.replace ensures the not-is_list_likes here
438+
assert not is_list_like(to_replace)
439+
assert not is_list_like(value)
437440
return self.apply(
438-
"replace", to_replace=to_replace, value=value, inplace=inplace, regex=regex
441+
"replace", to_replace=to_replace, value=value, inplace=inplace
439442
)
440443

444+
def replace_regex(self, **kwargs):
445+
return self.apply("_replace_regex", **kwargs)
446+
441447
def replace_list(
442448
self: T,
443449
src_list: list[Any],

0 commit comments

Comments
 (0)