From d9910afe93177cfd6f2efd8d4ed2cfa6c460e1fd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 6 Apr 2020 14:26:08 -0700 Subject: [PATCH 1/3] REF: replace column-wise, remove BlockManager.apply filter kwarg --- pandas/core/generic.py | 52 +++++++++--------- pandas/core/internals/blocks.py | 87 +++++-------------------------- pandas/core/internals/managers.py | 19 ++----- 3 files changed, 44 insertions(+), 114 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fac4ca6768ece..92ef7049dd0eb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6458,7 +6458,6 @@ def replace( ): if not ( is_scalar(to_replace) - or isinstance(to_replace, pd.Series) or is_re_compilable(to_replace) or is_list_like(to_replace) ): @@ -6549,18 +6548,20 @@ def replace( # {'A': NA} -> 0 elif not is_list_like(value): - keys = [(k, src) for k, src in to_replace.items() if k in self] - keys_len = len(keys) - 1 - for i, (k, src) in enumerate(keys): - convert = i == keys_len - new_data = new_data.replace( - to_replace=src, - value=value, - filter=[k], - inplace=inplace, - regex=regex, - convert=convert, - ) + + # Operate column-wise + res = self if inplace else self.copy() + ax = self._info_axis + # Note: we only get here with self.ndim == 2 + for i in range(len(ax)): + if ax[i] in to_replace: + ser = self._ixs(i, axis=1) + newobj = ser.replace(to_replace[ax[i]], value, regex=regex) + res.iloc[:, i] = newobj + + if inplace: + return + return res.__finalize__(self) else: raise TypeError("value argument must be scalar, dict, or Series") @@ -6601,17 +6602,20 @@ def replace( # dest iterable dict-like if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} - new_data = self._mgr - - for k, v in value.items(): - if k in self: - new_data = new_data.replace( - to_replace=to_replace, - value=v, - filter=[k], - inplace=inplace, - regex=regex, - ) + + # Operate column-wise + res = self if inplace else self.copy() + ax = self._info_axis + for i in range(len(ax)): + if ax[i] in value: + v = value[ax[i]] + ser = self._ixs(i, axis=1) + newobj = ser.replace(to_replace, value=v, regex=regex) + res.iloc[:, i] = newobj + + if inplace: + return + return res.__finalize__(self) elif not is_list_like(value): # NA -> 0 new_data = self._mgr.replace( diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index fe58fd3af966c..c0c40f0cd0d3a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -679,7 +679,6 @@ def replace( to_replace, value, inplace: bool = False, - filter=None, regex: bool = False, convert: bool = True, ): @@ -711,12 +710,7 @@ def replace( # _can_hold_element checks have reduced this back to the # scalar case and we can avoid a costly object cast return self.replace( - to_replace[0], - value, - inplace=inplace, - filter=filter, - regex=regex, - convert=convert, + to_replace[0], value, inplace=inplace, regex=regex, convert=convert, ) # GH 22083, TypeError or ValueError occurred within error handling @@ -730,7 +724,6 @@ def replace( to_replace=to_replace, value=value, inplace=inplace, - filter=filter, regex=regex, convert=convert, ) @@ -743,9 +736,6 @@ def replace( to_replace = convert_scalar_for_putitemlike(to_replace, values.dtype) mask = missing.mask_missing(values, to_replace) - if filter is not None: - filtered_out = ~self.mgr_locs.isin(filter) - mask[filtered_out.nonzero()[0]] = False if not mask.any(): if inplace: @@ -774,7 +764,6 @@ def replace( to_replace=original_to_replace, value=value, inplace=inplace, - filter=filter, regex=regex, convert=convert, ) @@ -2391,20 +2380,13 @@ def _can_hold_element(self, element: Any) -> bool: return issubclass(tipo.type, np.bool_) return isinstance(element, (bool, np.bool_)) - def replace( - self, to_replace, value, inplace=False, filter=None, regex=False, convert=True - ): + def replace(self, to_replace, value, inplace=False, regex=False, convert=True): inplace = validate_bool_kwarg(inplace, "inplace") to_replace_values = np.atleast_1d(to_replace) if not np.can_cast(to_replace_values, bool): return self return super().replace( - to_replace, - value, - inplace=inplace, - filter=filter, - regex=regex, - convert=convert, + to_replace, value, inplace=inplace, regex=regex, convert=convert, ) @@ -2478,9 +2460,7 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"] def _can_hold_element(self, element: Any) -> bool: return True - def replace( - self, to_replace, value, inplace=False, filter=None, regex=False, convert=True - ): + def replace(self, to_replace, value, inplace=False, regex=False, convert=True): to_rep_is_list = is_list_like(to_replace) value_is_list = is_list_like(value) both_lists = to_rep_is_list and value_is_list @@ -2491,33 +2471,18 @@ def replace( if not either_list and is_re(to_replace): return self._replace_single( - to_replace, - value, - inplace=inplace, - filter=filter, - regex=True, - convert=convert, + to_replace, value, inplace=inplace, regex=True, convert=convert, ) elif not (either_list or regex): return super().replace( - to_replace, - value, - inplace=inplace, - filter=filter, - regex=regex, - convert=convert, + to_replace, value, inplace=inplace, regex=regex, convert=convert, ) elif both_lists: for to_rep, v in zip(to_replace, value): result_blocks = [] for b in blocks: result = b._replace_single( - to_rep, - v, - inplace=inplace, - filter=filter, - regex=regex, - convert=convert, + to_rep, v, inplace=inplace, regex=regex, convert=convert, ) result_blocks = _extend_blocks(result, result_blocks) blocks = result_blocks @@ -2528,35 +2493,18 @@ def replace( result_blocks = [] for b in blocks: result = b._replace_single( - to_rep, - value, - inplace=inplace, - filter=filter, - regex=regex, - convert=convert, + to_rep, value, inplace=inplace, regex=regex, convert=convert, ) result_blocks = _extend_blocks(result, result_blocks) blocks = result_blocks return result_blocks return self._replace_single( - to_replace, - value, - inplace=inplace, - filter=filter, - convert=convert, - regex=regex, + to_replace, value, inplace=inplace, convert=convert, regex=regex, ) def _replace_single( - self, - to_replace, - value, - inplace=False, - filter=None, - regex=False, - convert=True, - mask=None, + self, to_replace, value, inplace=False, regex=False, convert=True, mask=None, ): """ Replace elements by the given value. @@ -2569,7 +2517,6 @@ def _replace_single( Replacement object. inplace : bool, default False Perform inplace modification. - filter : list, optional regex : bool, default False If true, perform regular expression substitution. convert : bool, default True @@ -2615,9 +2562,7 @@ def _replace_single( else: # if the thing to replace is not a string or compiled regex call # the superclass method -> to_replace is some kind of object - return super().replace( - to_replace, value, inplace=inplace, filter=filter, regex=regex - ) + return super().replace(to_replace, value, inplace=inplace, regex=regex) new_values = self.values if inplace else self.values.copy() @@ -2642,15 +2587,10 @@ def re_replacer(s): f = np.vectorize(re_replacer, otypes=[self.dtype]) - if filter is None: - filt = slice(None) - else: - filt = self.mgr_locs.isin(filter).nonzero()[0] - if mask is None: - new_values[filt] = f(new_values[filt]) + new_values[:] = f(new_values) else: - new_values[filt][mask] = f(new_values[filt][mask]) + new_values[mask] = f(new_values[mask]) # convert block = self.make_block(new_values) @@ -2747,7 +2687,6 @@ def replace( to_replace, value, inplace: bool = False, - filter=None, regex: bool = False, convert: bool = True, ): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c6efd6a2ac6a7..7e49a02627456 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -375,7 +375,7 @@ def reduce(self, func, *args, **kwargs): return res - def apply(self: T, f, filter=None, align_keys=None, **kwargs) -> T: + def apply(self: T, f, align_keys=None, **kwargs) -> T: """ Iterate over the blocks, collect and create a new BlockManager. @@ -383,26 +383,17 @@ def apply(self: T, f, filter=None, align_keys=None, **kwargs) -> T: ---------- f : str or callable Name of the Block method to apply. - filter : list, if supplied, only call the block if the filter is in - the block Returns ------- BlockManager """ + assert "filter" not in kwargs + align_keys = align_keys or [] result_blocks = [] # fillna: Series/DataFrame is responsible for making sure value is aligned - # filter kwarg is used in replace-* family of methods - if filter is not None: - filter_locs = set(self.items.get_indexer_for(filter)) - if len(filter_locs) == len(self.items): - # All items are included, as if there were no filtering - filter = None - else: - kwargs["filter"] = filter_locs - self._consolidate_inplace() align_copy = False @@ -416,10 +407,6 @@ def apply(self: T, f, filter=None, align_keys=None, **kwargs) -> T: } for b in self.blocks: - if filter is not None: - if not b.mgr_locs.isin(filter_locs).any(): - result_blocks.append(b) - continue if aligned_args: b_items = self.items[b.mgr_locs.indexer] From f94e2a2f9728596f72521c292cc8a3e4993a68b0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 6 Apr 2020 14:53:54 -0700 Subject: [PATCH 2/3] mypy fixup --- pandas/core/internals/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 606969e5a460e..72f0635a8a6de 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -391,7 +391,7 @@ def apply(self: T, f, align_keys=None, **kwargs) -> T: assert "filter" not in kwargs align_keys = align_keys or [] - result_blocks = [] + result_blocks: List[Block] = [] # fillna: Series/DataFrame is responsible for making sure value is aligned self._consolidate_inplace() From dec71b0b14202f857c15447dcfc0201945ba45d1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 6 Apr 2020 17:11:08 -0700 Subject: [PATCH 3/3] implement _replace_columnwise --- pandas/core/frame.py | 35 +++++++++++++++++ pandas/core/generic.py | 42 ++++++++------------- pandas/tests/series/methods/test_replace.py | 16 ++++++++ 3 files changed, 67 insertions(+), 26 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index aedbba755227d..d2da52ba7bdd0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4025,6 +4025,41 @@ def replace( method=method, ) + def _replace_columnwise( + self, mapping: Dict[Label, Tuple[Any, Any]], inplace: bool, regex + ): + """ + Dispatch to Series.replace column-wise. + + + Parameters + ---------- + mapping : dict + of the form {col: (target, value)} + inplace : bool + regex : bool or same types as `to_replace` in DataFrame.replace + + Returns + ------- + DataFrame or None + """ + # Operate column-wise + res = self if inplace else self.copy() + ax = self.columns + + for i in range(len(ax)): + if ax[i] in mapping: + ser = self.iloc[:, i] + + target, value = mapping[ax[i]] + newobj = ser.replace(target, value, regex=regex) + + res.iloc[:, i] = newobj + + if inplace: + return + return res.__finalize__(self) + @Appender(_shared_docs["shift"] % _shared_doc_kwargs) def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> "DataFrame": return super().shift( diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b62e2eee05ada..a36aca5ea7f1c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6558,20 +6558,16 @@ def replace( # {'A': NA} -> 0 elif not is_list_like(value): - # Operate column-wise - res = self if inplace else self.copy() - ax = self._info_axis - # Note: we only get here with self.ndim == 2 - for i in range(len(ax)): - if ax[i] in to_replace: - ser = self._ixs(i, axis=1) - newobj = ser.replace(to_replace[ax[i]], value, regex=regex) - res.iloc[:, i] = newobj - - if inplace: - return - return res.__finalize__(self) + if self.ndim == 1: + raise ValueError( + "Series.replace cannot use dict-like to_replace " + "and non-None value" + ) + mapping = { + col: (to_rep, value) for col, to_rep in to_replace.items() + } + return self._replace_columnwise(mapping, inplace, regex) else: raise TypeError("value argument must be scalar, dict, or Series") @@ -6612,20 +6608,14 @@ def replace( # dest iterable dict-like if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} - # Operate column-wise - res = self if inplace else self.copy() - ax = self._info_axis - for i in range(len(ax)): - if ax[i] in value: - v = value[ax[i]] - ser = self._ixs(i, axis=1) - newobj = ser.replace(to_replace, value=v, regex=regex) - res.iloc[:, i] = newobj - - if inplace: - return - return res.__finalize__(self) + if self.ndim == 1: + raise ValueError( + "Series.replace cannot use dict-value and " + "non-None to_replace" + ) + mapping = {col: (to_replace, val) for col, val in value.items()} + return self._replace_columnwise(mapping, inplace, regex) elif not is_list_like(value): # NA -> 0 new_data = self._mgr.replace( diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 904a455870ab1..bea8cb8b105e7 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -373,3 +373,19 @@ def test_replace_invalid_to_replace(self): ) with pytest.raises(TypeError, match=msg): series.replace(lambda x: x.strip()) + + def test_replace_only_one_dictlike_arg(self): + # GH#33340 + + ser = pd.Series([1, 2, "A", pd.Timestamp.now(), True]) + to_replace = {0: 1, 2: "A"} + value = "foo" + msg = "Series.replace cannot use dict-like to_replace and non-None value" + with pytest.raises(ValueError, match=msg): + ser.replace(to_replace, value) + + to_replace = 1 + value = {0: "foo", 2: "bar"} + msg = "Series.replace cannot use dict-value and non-None to_replace" + with pytest.raises(ValueError, match=msg): + ser.replace(to_replace, value)