Skip to content

REF: simplify NDFrame.replace, ObjectBlock.replace #37704

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6744,25 +6744,25 @@ def replace(
else:
raise TypeError("value argument must be scalar, dict, or Series")

elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing']
if is_list_like(value):
if len(to_replace) != len(value):
raise ValueError(
f"Replacement lists must match in length. "
f"Expecting {len(to_replace)} got {len(value)} "
)
self._consolidate_inplace()
new_data = self._mgr.replace_list(
src_list=to_replace,
dest_list=value,
inplace=inplace,
regex=regex,
elif is_list_like(to_replace):
if not is_list_like(value):
# e.g. to_replace = [NA, ''] and value is 0,
# so we replace NA with 0 and then replace '' with 0
value = [value] * len(to_replace)

# e.g. we have to_replace = [NA, ''] and value = [0, 'missing']
if len(to_replace) != len(value):
raise ValueError(
f"Replacement lists must match in length. "
f"Expecting {len(to_replace)} got {len(value)} "
)
new_data = self._mgr.replace_list(
src_list=to_replace,
dest_list=value,
inplace=inplace,
regex=regex,
)

else: # [NA, ''] -> 0
new_data = self._mgr.replace(
to_replace=to_replace, value=value, inplace=inplace, regex=regex
)
elif to_replace is None:
if not (
is_re_compilable(regex)
Expand Down
48 changes: 18 additions & 30 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2502,39 +2502,14 @@ def replace(
inplace: bool = False,
regex: bool = False,
) -> List["Block"]:
to_rep_is_list = is_list_like(to_replace)
value_is_list = is_list_like(value)
both_lists = to_rep_is_list and value_is_list
either_list = to_rep_is_list or value_is_list
# Note: the checks we do in NDFrame.replace ensure we never get
# here with listlike to_replace or value, as those cases
# go through _replace_list

result_blocks: List["Block"] = []
blocks: List["Block"] = [self]

if not either_list and is_re(to_replace):
if is_re(to_replace) or regex:
return self._replace_single(to_replace, value, inplace=inplace, regex=True)
elif not (either_list or regex):
else:
return super().replace(to_replace, value, inplace=inplace, regex=regex)
elif both_lists:
for to_rep, v in zip(to_replace, value):
result_blocks = []
for b in blocks:
result = b._replace_single(to_rep, v, inplace=inplace, regex=regex)
result_blocks.extend(result)
blocks = result_blocks
return result_blocks

elif to_rep_is_list and regex:
for to_rep in to_replace:
result_blocks = []
for b in blocks:
result = b._replace_single(
to_rep, value, inplace=inplace, regex=regex
)
result_blocks.extend(result)
blocks = result_blocks
return result_blocks

return self._replace_single(to_replace, value, inplace=inplace, regex=regex)

def _replace_single(
self,
Expand Down Expand Up @@ -2627,6 +2602,19 @@ def re_replacer(s):
class CategoricalBlock(ExtensionBlock):
__slots__ = ()

def _replace_list(
self,
src_list: List[Any],
dest_list: List[Any],
inplace: bool = False,
regex: bool = False,
) -> List["Block"]:
if len(algos.unique(dest_list)) == 1:
# We got likely here by tiling value inside NDFrame.replace,
# so un-tile here
return self.replace(src_list, dest_list[0], inplace, regex)
return super()._replace_list(src_list, dest_list, inplace, regex)

def replace(
self,
to_replace,
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/arrays/categorical/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
((1, 2, 4), 5, [5, 5, 3], False),
((5, 6), 2, [1, 2, 3], False),
# many-to-many, handled outside of Categorical and results in separate dtype
# except for cases with only 1 unique entry in `value`
([1], [2], [2, 2, 3], True),
([1, 4], [5, 2], [5, 2, 3], True),
# check_categorical sorts categories, which crashes on mixed dtypes
Expand All @@ -30,7 +31,7 @@
)
def test_replace(to_replace, value, expected, flip_categories):
# GH 31720
stays_categorical = not isinstance(value, list)
stays_categorical = not isinstance(value, list) or len(pd.unique(value)) == 1

s = pd.Series([1, 2, 3], dtype="category")
result = s.replace(to_replace, value)
Expand Down