diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1b42df1b0147c..ad388ef3f53b0 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -788,6 +788,43 @@ def _replace_single(self, *args, **kwargs): """ no-op on a non-ObjectBlock """ return self if kwargs["inplace"] else self.copy() + def _replace_list( + self, + src_list: List[Any], + dest_list: List[Any], + masks: List[np.ndarray], + inplace: bool = False, + regex: bool = False, + ) -> List["Block"]: + """ + See BlockManager._replace_list docstring. + """ + src_len = len(src_list) - 1 + + rb = [self if inplace else self.copy()] + for i, (src, dest) in enumerate(zip(src_list, dest_list)): + new_rb: List["Block"] = [] + for blk in rb: + m = masks[i][blk.mgr_locs.indexer] + convert = i == src_len # only convert once at the end + result = blk._replace_coerce( + mask=m, + to_replace=src, + value=dest, + inplace=inplace, + convert=convert, + regex=regex, + ) + if m.any() or convert: + if isinstance(result, list): + new_rb.extend(result) + else: + new_rb.append(result) + else: + new_rb.append(blk) + rb = new_rb + return rb + def setitem(self, indexer, value): """ Attempt self.values[indexer] = value, possibly creating a new array. diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 00321b76cb6bf..389252e7ef0f2 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -3,6 +3,7 @@ import operator import re from typing import ( + Any, DefaultDict, Dict, List, @@ -600,8 +601,12 @@ def replace(self, value, **kwargs) -> "BlockManager": return self.apply("replace", value=value, **kwargs) def replace_list( - self, src_list, dest_list, inplace: bool = False, regex: bool = False - ) -> "BlockManager": + self: T, + src_list: List[Any], + dest_list: List[Any], + inplace: bool = False, + regex: bool = False, + ) -> T: """ do a list replace """ inplace = validate_bool_kwarg(inplace, "inplace") @@ -625,34 +630,14 @@ def comp(s: Scalar, mask: np.ndarray, regex: bool = False): masks = [comp(s, mask, regex) for s in src_list] - result_blocks = [] - src_len = len(src_list) - 1 - for blk in self.blocks: - - # its possible to get multiple result blocks here - # replace ALWAYS will return a list - rb = [blk if inplace else blk.copy()] - for i, (s, d) in enumerate(zip(src_list, dest_list)): - new_rb: List[Block] = [] - for b in rb: - m = masks[i][b.mgr_locs.indexer] - convert = i == src_len # only convert once at the end - result = b._replace_coerce( - mask=m, - to_replace=s, - value=d, - inplace=inplace, - convert=convert, - regex=regex, - ) - if m.any() or convert: - new_rb = _extend_blocks(result, new_rb) - else: - new_rb.append(b) - rb = new_rb - result_blocks.extend(rb) - - bm = type(self).from_blocks(result_blocks, self.axes) + bm = self.apply( + "_replace_list", + src_list=src_list, + dest_list=dest_list, + masks=masks, + inplace=inplace, + regex=regex, + ) bm._consolidate_inplace() return bm