Skip to content

Commit 19d6a61

Browse files
authored
REF: simplify NDFrame.replace, ObjectBlock.replace (#37704)
1 parent 0686736 commit 19d6a61

File tree

3 files changed

+37
-48
lines changed

3 files changed

+37
-48
lines changed

pandas/core/generic.py

+17-17
Original file line numberDiff line numberDiff line change
@@ -6744,25 +6744,25 @@ def replace(
67446744
else:
67456745
raise TypeError("value argument must be scalar, dict, or Series")
67466746

6747-
elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing']
6748-
if is_list_like(value):
6749-
if len(to_replace) != len(value):
6750-
raise ValueError(
6751-
f"Replacement lists must match in length. "
6752-
f"Expecting {len(to_replace)} got {len(value)} "
6753-
)
6754-
self._consolidate_inplace()
6755-
new_data = self._mgr.replace_list(
6756-
src_list=to_replace,
6757-
dest_list=value,
6758-
inplace=inplace,
6759-
regex=regex,
6747+
elif is_list_like(to_replace):
6748+
if not is_list_like(value):
6749+
# e.g. to_replace = [NA, ''] and value is 0,
6750+
# so we replace NA with 0 and then replace '' with 0
6751+
value = [value] * len(to_replace)
6752+
6753+
# e.g. we have to_replace = [NA, ''] and value = [0, 'missing']
6754+
if len(to_replace) != len(value):
6755+
raise ValueError(
6756+
f"Replacement lists must match in length. "
6757+
f"Expecting {len(to_replace)} got {len(value)} "
67606758
)
6759+
new_data = self._mgr.replace_list(
6760+
src_list=to_replace,
6761+
dest_list=value,
6762+
inplace=inplace,
6763+
regex=regex,
6764+
)
67616765

6762-
else: # [NA, ''] -> 0
6763-
new_data = self._mgr.replace(
6764-
to_replace=to_replace, value=value, inplace=inplace, regex=regex
6765-
)
67666766
elif to_replace is None:
67676767
if not (
67686768
is_re_compilable(regex)

pandas/core/internals/blocks.py

+18-30
Original file line numberDiff line numberDiff line change
@@ -2502,39 +2502,14 @@ def replace(
25022502
inplace: bool = False,
25032503
regex: bool = False,
25042504
) -> List["Block"]:
2505-
to_rep_is_list = is_list_like(to_replace)
2506-
value_is_list = is_list_like(value)
2507-
both_lists = to_rep_is_list and value_is_list
2508-
either_list = to_rep_is_list or value_is_list
2505+
# Note: the checks we do in NDFrame.replace ensure we never get
2506+
# here with listlike to_replace or value, as those cases
2507+
# go through _replace_list
25092508

2510-
result_blocks: List["Block"] = []
2511-
blocks: List["Block"] = [self]
2512-
2513-
if not either_list and is_re(to_replace):
2509+
if is_re(to_replace) or regex:
25142510
return self._replace_single(to_replace, value, inplace=inplace, regex=True)
2515-
elif not (either_list or regex):
2511+
else:
25162512
return super().replace(to_replace, value, inplace=inplace, regex=regex)
2517-
elif both_lists:
2518-
for to_rep, v in zip(to_replace, value):
2519-
result_blocks = []
2520-
for b in blocks:
2521-
result = b._replace_single(to_rep, v, inplace=inplace, regex=regex)
2522-
result_blocks.extend(result)
2523-
blocks = result_blocks
2524-
return result_blocks
2525-
2526-
elif to_rep_is_list and regex:
2527-
for to_rep in to_replace:
2528-
result_blocks = []
2529-
for b in blocks:
2530-
result = b._replace_single(
2531-
to_rep, value, inplace=inplace, regex=regex
2532-
)
2533-
result_blocks.extend(result)
2534-
blocks = result_blocks
2535-
return result_blocks
2536-
2537-
return self._replace_single(to_replace, value, inplace=inplace, regex=regex)
25382513

25392514
def _replace_single(
25402515
self,
@@ -2627,6 +2602,19 @@ def re_replacer(s):
26272602
class CategoricalBlock(ExtensionBlock):
26282603
__slots__ = ()
26292604

2605+
def _replace_list(
2606+
self,
2607+
src_list: List[Any],
2608+
dest_list: List[Any],
2609+
inplace: bool = False,
2610+
regex: bool = False,
2611+
) -> List["Block"]:
2612+
if len(algos.unique(dest_list)) == 1:
2613+
# We got likely here by tiling value inside NDFrame.replace,
2614+
# so un-tile here
2615+
return self.replace(src_list, dest_list[0], inplace, regex)
2616+
return super()._replace_list(src_list, dest_list, inplace, regex)
2617+
26302618
def replace(
26312619
self,
26322620
to_replace,

pandas/tests/arrays/categorical/test_replace.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
((1, 2, 4), 5, [5, 5, 3], False),
2222
((5, 6), 2, [1, 2, 3], False),
2323
# many-to-many, handled outside of Categorical and results in separate dtype
24+
# except for cases with only 1 unique entry in `value`
2425
([1], [2], [2, 2, 3], True),
2526
([1, 4], [5, 2], [5, 2, 3], True),
2627
# check_categorical sorts categories, which crashes on mixed dtypes
@@ -30,7 +31,7 @@
3031
)
3132
def test_replace(to_replace, value, expected, flip_categories):
3233
# GH 31720
33-
stays_categorical = not isinstance(value, list)
34+
stays_categorical = not isinstance(value, list) or len(pd.unique(value)) == 1
3435

3536
s = pd.Series([1, 2, 3], dtype="category")
3637
result = s.replace(to_replace, value)

0 commit comments

Comments
 (0)