From 07a2a7e8eccaa61b90b68dca399ef3fc79afacf5 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 6 Nov 2020 20:26:33 -0800 Subject: [PATCH 1/3] REF: simplify _replace_single by noting regex kwarg is bool --- pandas/core/internals/blocks.py | 49 ++++++++++++++++--------------- pandas/core/internals/managers.py | 6 ++-- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1f34e91d71077..bf4e2c3ef4d84 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -819,7 +819,13 @@ def replace( return blocks def _replace_single( - self, to_replace, value, inplace=False, regex=False, convert=True, mask=None + self, + to_replace, + value, + inplace: bool = False, + regex: bool = False, + convert=True, + mask=None, ) -> List["Block"]: """ no-op on a non-ObjectBlock """ return [self] if inplace else [self.copy()] @@ -860,9 +866,9 @@ def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray: m = masks[i] convert = i == src_len # only convert once at the end result = blk._replace_coerce( - mask=m, to_replace=src, value=dest, + mask=m, inplace=inplace, regex=regex, ) @@ -1567,9 +1573,9 @@ def _replace_coerce( self, to_replace, value, + mask: np.ndarray, inplace: bool = True, regex: bool = False, - mask=None, ) -> List["Block"]: """ Replace value corresponding to the given boolean array with another @@ -1581,12 +1587,12 @@ def _replace_coerce( Scalar to replace or regular expression to match. value : object Replacement object. + mask : np.ndarray[bool] + True indicate corresponding element is ignored. inplace : bool, default True Perform inplace modification. regex : bool, default False If true, perform regular expression substitution. - mask : array-like of bool, optional - True indicate corresponding element is ignored. Returns ------- @@ -2495,7 +2501,12 @@ def _can_hold_element(self, element: Any) -> bool: return True def replace( - self, to_replace, value, inplace=False, regex=False, convert=True + self, + to_replace, + value, + inplace: bool = False, + regex: bool = False, + convert=True, ) -> List["Block"]: to_rep_is_list = is_list_like(to_replace) value_is_list = is_list_like(value) @@ -2540,7 +2551,13 @@ def replace( ) def _replace_single( - self, to_replace, value, inplace=False, regex=False, convert=True, mask=None + self, + to_replace, + value, + inplace: bool = False, + regex: bool = False, + convert=True, + mask=None, ) -> List["Block"]: """ Replace elements by the given value. @@ -2567,23 +2584,7 @@ def _replace_single( inplace = validate_bool_kwarg(inplace, "inplace") # to_replace is regex compilable - to_rep_re = regex and is_re_compilable(to_replace) - - # regex is regex compilable - regex_re = is_re_compilable(regex) - - # only one will survive - if to_rep_re and regex_re: - raise AssertionError( - "only one of to_replace and regex can be regex compilable" - ) - - # if regex was passed as something that can be a regex (rather than a - # boolean) - if regex_re: - to_replace = regex - - regex = regex_re or to_rep_re + regex = regex and is_re_compilable(to_replace) # try to get the pattern attribute (compiled re) or it's a string if is_re(to_replace): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a06d57e268fe2..fda4da8694ea3 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -638,9 +638,11 @@ def convert( coerce=coerce, ) - def replace(self, value, **kwargs) -> "BlockManager": + def replace(self, to_replace, value, inplace: bool, regex: bool) -> "BlockManager": assert np.ndim(value) == 0, value - return self.apply("replace", value=value, **kwargs) + return self.apply( + "replace", to_replace=to_replace, value=value, inplace=inplace, regex=regex + ) def replace_list( self: T, From f1a4bcdeec5812d093165d1bfb785469c7048a1b Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 6 Nov 2020 20:28:09 -0800 Subject: [PATCH 2/3] Annotate --- pandas/core/internals/blocks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index bf4e2c3ef4d84..4b88267daebd7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -824,7 +824,7 @@ def _replace_single( value, inplace: bool = False, regex: bool = False, - convert=True, + convert: bool = True, mask=None, ) -> List["Block"]: """ no-op on a non-ObjectBlock """ @@ -2506,7 +2506,7 @@ def replace( value, inplace: bool = False, regex: bool = False, - convert=True, + convert: bool = True, ) -> List["Block"]: to_rep_is_list = is_list_like(to_replace) value_is_list = is_list_like(value) @@ -2556,7 +2556,7 @@ def _replace_single( value, inplace: bool = False, regex: bool = False, - convert=True, + convert: bool = True, mask=None, ) -> List["Block"]: """ From 64a67aecdb5e8c0452371c55386d1dcafe445c94 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 6 Nov 2020 20:46:13 -0800 Subject: [PATCH 3/3] CLN: remove never-False convert kwarg --- pandas/core/internals/blocks.py | 35 ++++++++++----------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4b88267daebd7..8e01aaa396265 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -726,7 +726,6 @@ def replace( value, inplace: bool = False, regex: bool = False, - convert: bool = True, ) -> List["Block"]: """ replace the to_replace value with value, possible to create new @@ -755,9 +754,7 @@ def replace( if len(to_replace) == 1: # _can_hold_element checks have reduced this back to the # scalar case and we can avoid a costly object cast - return self.replace( - to_replace[0], value, inplace=inplace, regex=regex, convert=convert - ) + return self.replace(to_replace[0], value, inplace=inplace, regex=regex) # GH 22083, TypeError or ValueError occurred within error handling # causes infinite loop. Cast and retry only if not objectblock. @@ -771,7 +768,6 @@ def replace( value=value, inplace=inplace, regex=regex, - convert=convert, ) values = self.values @@ -810,12 +806,11 @@ def replace( value=value, inplace=inplace, regex=regex, - convert=convert, - ) - if convert: - blocks = extend_blocks( - [b.convert(numeric=False, copy=not inplace) for b in blocks] ) + + blocks = extend_blocks( + [b.convert(numeric=False, copy=not inplace) for b in blocks] + ) return blocks def _replace_single( @@ -2506,7 +2501,6 @@ def replace( value, inplace: bool = False, regex: bool = False, - convert: bool = True, ) -> List["Block"]: to_rep_is_list = is_list_like(to_replace) value_is_list = is_list_like(value) @@ -2517,20 +2511,14 @@ def replace( blocks: List["Block"] = [self] if not either_list and is_re(to_replace): - return self._replace_single( - to_replace, value, inplace=inplace, regex=True, convert=convert - ) + return self._replace_single(to_replace, value, inplace=inplace, regex=True) elif not (either_list or regex): - return super().replace( - to_replace, value, inplace=inplace, regex=regex, convert=convert - ) + return super().replace(to_replace, value, inplace=inplace, regex=regex) elif both_lists: for to_rep, v in zip(to_replace, value): result_blocks = [] for b in blocks: - result = b._replace_single( - to_rep, v, inplace=inplace, regex=regex, convert=convert - ) + result = b._replace_single(to_rep, v, inplace=inplace, regex=regex) result_blocks.extend(result) blocks = result_blocks return result_blocks @@ -2540,15 +2528,13 @@ def replace( result_blocks = [] for b in blocks: result = b._replace_single( - to_rep, value, inplace=inplace, regex=regex, convert=convert + to_rep, value, inplace=inplace, regex=regex ) result_blocks.extend(result) blocks = result_blocks return result_blocks - return self._replace_single( - to_replace, value, inplace=inplace, convert=convert, regex=regex - ) + return self._replace_single(to_replace, value, inplace=inplace, regex=regex) def _replace_single( self, @@ -2647,7 +2633,6 @@ def replace( value, inplace: bool = False, regex: bool = False, - convert: bool = True, ) -> List["Block"]: inplace = validate_bool_kwarg(inplace, "inplace") result = self if inplace else self.copy()