diff --git a/pandas/core/frame.py b/pandas/core/frame.py index aedbba755227d..d2da52ba7bdd0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4025,6 +4025,41 @@ def replace( method=method, ) + def _replace_columnwise( + self, mapping: Dict[Label, Tuple[Any, Any]], inplace: bool, regex + ): + """ + Dispatch to Series.replace column-wise. + + + Parameters + ---------- + mapping : dict + of the form {col: (target, value)} + inplace : bool + regex : bool or same types as `to_replace` in DataFrame.replace + + Returns + ------- + DataFrame or None + """ + # Operate column-wise + res = self if inplace else self.copy() + ax = self.columns + + for i in range(len(ax)): + if ax[i] in mapping: + ser = self.iloc[:, i] + + target, value = mapping[ax[i]] + newobj = ser.replace(target, value, regex=regex) + + res.iloc[:, i] = newobj + + if inplace: + return + return res.__finalize__(self) + @Appender(_shared_docs["shift"] % _shared_doc_kwargs) def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> "DataFrame": return super().shift( diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 052a4adddca27..a36aca5ea7f1c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6468,7 +6468,6 @@ def replace( ): if not ( is_scalar(to_replace) - or isinstance(to_replace, pd.Series) or is_re_compilable(to_replace) or is_list_like(to_replace) ): @@ -6559,18 +6558,16 @@ def replace( # {'A': NA} -> 0 elif not is_list_like(value): - keys = [(k, src) for k, src in to_replace.items() if k in self] - keys_len = len(keys) - 1 - for i, (k, src) in enumerate(keys): - convert = i == keys_len - new_data = new_data.replace( - to_replace=src, - value=value, - filter=[k], - inplace=inplace, - regex=regex, - convert=convert, + # Operate column-wise + if self.ndim == 1: + raise ValueError( + "Series.replace cannot use dict-like to_replace " + "and non-None value" ) + mapping = { + col: (to_rep, value) for col, to_rep in to_replace.items() + } + return self._replace_columnwise(mapping, inplace, regex) else: raise TypeError("value argument must be scalar, dict, or Series") @@ -6611,17 +6608,14 @@ def replace( # dest iterable dict-like if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} - new_data = self._mgr - - for k, v in value.items(): - if k in self: - new_data = new_data.replace( - to_replace=to_replace, - value=v, - filter=[k], - inplace=inplace, - regex=regex, - ) + # Operate column-wise + if self.ndim == 1: + raise ValueError( + "Series.replace cannot use dict-value and " + "non-None to_replace" + ) + mapping = {col: (to_replace, val) for col, val in value.items()} + return self._replace_columnwise(mapping, inplace, regex) elif not is_list_like(value): # NA -> 0 new_data = self._mgr.replace( diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ba2fd037901a2..d22209e61df0c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -679,7 +679,6 @@ def replace( to_replace, value, inplace: bool = False, - filter=None, regex: bool = False, convert: bool = True, ): @@ -711,12 +710,7 @@ def replace( # _can_hold_element checks have reduced this back to the # scalar case and we can avoid a costly object cast return self.replace( - to_replace[0], - value, - inplace=inplace, - filter=filter, - regex=regex, - convert=convert, + to_replace[0], value, inplace=inplace, regex=regex, convert=convert, ) # GH 22083, TypeError or ValueError occurred within error handling @@ -730,7 +724,6 @@ def replace( to_replace=to_replace, value=value, inplace=inplace, - filter=filter, regex=regex, convert=convert, ) @@ -743,9 +736,6 @@ def replace( to_replace = convert_scalar_for_putitemlike(to_replace, values.dtype) mask = missing.mask_missing(values, to_replace) - if filter is not None: - filtered_out = ~self.mgr_locs.isin(filter) - mask[filtered_out.nonzero()[0]] = False if not mask.any(): if inplace: @@ -774,7 +764,6 @@ def replace( to_replace=original_to_replace, value=value, inplace=inplace, - filter=filter, regex=regex, convert=convert, ) @@ -2374,20 +2363,13 @@ def _can_hold_element(self, element: Any) -> bool: return issubclass(tipo.type, np.bool_) return isinstance(element, (bool, np.bool_)) - def replace( - self, to_replace, value, inplace=False, filter=None, regex=False, convert=True - ): + def replace(self, to_replace, value, inplace=False, regex=False, convert=True): inplace = validate_bool_kwarg(inplace, "inplace") to_replace_values = np.atleast_1d(to_replace) if not np.can_cast(to_replace_values, bool): return self return super().replace( - to_replace, - value, - inplace=inplace, - filter=filter, - regex=regex, - convert=convert, + to_replace, value, inplace=inplace, regex=regex, convert=convert, ) @@ -2461,9 +2443,7 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"] def _can_hold_element(self, element: Any) -> bool: return True - def replace( - self, to_replace, value, inplace=False, filter=None, regex=False, convert=True - ): + def replace(self, to_replace, value, inplace=False, regex=False, convert=True): to_rep_is_list = is_list_like(to_replace) value_is_list = is_list_like(value) both_lists = to_rep_is_list and value_is_list @@ -2474,33 +2454,18 @@ def replace( if not either_list and is_re(to_replace): return self._replace_single( - to_replace, - value, - inplace=inplace, - filter=filter, - regex=True, - convert=convert, + to_replace, value, inplace=inplace, regex=True, convert=convert, ) elif not (either_list or regex): return super().replace( - to_replace, - value, - inplace=inplace, - filter=filter, - regex=regex, - convert=convert, + to_replace, value, inplace=inplace, regex=regex, convert=convert, ) elif both_lists: for to_rep, v in zip(to_replace, value): result_blocks = [] for b in blocks: result = b._replace_single( - to_rep, - v, - inplace=inplace, - filter=filter, - regex=regex, - convert=convert, + to_rep, v, inplace=inplace, regex=regex, convert=convert, ) result_blocks = _extend_blocks(result, result_blocks) blocks = result_blocks @@ -2511,35 +2476,18 @@ def replace( result_blocks = [] for b in blocks: result = b._replace_single( - to_rep, - value, - inplace=inplace, - filter=filter, - regex=regex, - convert=convert, + to_rep, value, inplace=inplace, regex=regex, convert=convert, ) result_blocks = _extend_blocks(result, result_blocks) blocks = result_blocks return result_blocks return self._replace_single( - to_replace, - value, - inplace=inplace, - filter=filter, - convert=convert, - regex=regex, + to_replace, value, inplace=inplace, convert=convert, regex=regex, ) def _replace_single( - self, - to_replace, - value, - inplace=False, - filter=None, - regex=False, - convert=True, - mask=None, + self, to_replace, value, inplace=False, regex=False, convert=True, mask=None, ): """ Replace elements by the given value. @@ -2552,7 +2500,6 @@ def _replace_single( Replacement object. inplace : bool, default False Perform inplace modification. - filter : list, optional regex : bool, default False If true, perform regular expression substitution. convert : bool, default True @@ -2598,9 +2545,7 @@ def _replace_single( else: # if the thing to replace is not a string or compiled regex call # the superclass method -> to_replace is some kind of object - return super().replace( - to_replace, value, inplace=inplace, filter=filter, regex=regex - ) + return super().replace(to_replace, value, inplace=inplace, regex=regex) new_values = self.values if inplace else self.values.copy() @@ -2625,15 +2570,10 @@ def re_replacer(s): f = np.vectorize(re_replacer, otypes=[self.dtype]) - if filter is None: - filt = slice(None) - else: - filt = self.mgr_locs.isin(filter).nonzero()[0] - if mask is None: - new_values[filt] = f(new_values[filt]) + new_values[:] = f(new_values) else: - new_values[filt][mask] = f(new_values[filt][mask]) + new_values[mask] = f(new_values[mask]) # convert block = self.make_block(new_values) @@ -2730,7 +2670,6 @@ def replace( to_replace, value, inplace: bool = False, - filter=None, regex: bool = False, convert: bool = True, ): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e2c9212ae9576..f241410b25a82 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -369,7 +369,7 @@ def reduce(self, func, *args, **kwargs): return res - def apply(self: T, f, filter=None, align_keys=None, **kwargs) -> T: + def apply(self: T, f, align_keys=None, **kwargs) -> T: """ Iterate over the blocks, collect and create a new BlockManager. @@ -377,26 +377,17 @@ def apply(self: T, f, filter=None, align_keys=None, **kwargs) -> T: ---------- f : str or callable Name of the Block method to apply. - filter : list, if supplied, only call the block if the filter is in - the block Returns ------- BlockManager """ + assert "filter" not in kwargs + align_keys = align_keys or [] - result_blocks = [] + result_blocks: List[Block] = [] # fillna: Series/DataFrame is responsible for making sure value is aligned - # filter kwarg is used in replace-* family of methods - if filter is not None: - filter_locs = set(self.items.get_indexer_for(filter)) - if len(filter_locs) == len(self.items): - # All items are included, as if there were no filtering - filter = None - else: - kwargs["filter"] = filter_locs - self._consolidate_inplace() align_copy = False @@ -410,10 +401,6 @@ def apply(self: T, f, filter=None, align_keys=None, **kwargs) -> T: } for b in self.blocks: - if filter is not None: - if not b.mgr_locs.isin(filter_locs).any(): - result_blocks.append(b) - continue if aligned_args: b_items = self.items[b.mgr_locs.indexer] diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 904a455870ab1..bea8cb8b105e7 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -373,3 +373,19 @@ def test_replace_invalid_to_replace(self): ) with pytest.raises(TypeError, match=msg): series.replace(lambda x: x.strip()) + + def test_replace_only_one_dictlike_arg(self): + # GH#33340 + + ser = pd.Series([1, 2, "A", pd.Timestamp.now(), True]) + to_replace = {0: 1, 2: "A"} + value = "foo" + msg = "Series.replace cannot use dict-like to_replace and non-None value" + with pytest.raises(ValueError, match=msg): + ser.replace(to_replace, value) + + to_replace = 1 + value = {0: "foo", 2: "bar"} + msg = "Series.replace cannot use dict-value and non-None to_replace" + with pytest.raises(ValueError, match=msg): + ser.replace(to_replace, value)