Skip to content

REF: replace column-wise, remove BlockManager.apply filter kwarg #33340

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4025,6 +4025,41 @@ def replace(
method=method,
)

def _replace_columnwise(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think may make sense to move this somewhere, maybe missing.py just to try to reduce size in this module. anyhow for another day.

self, mapping: Dict[Label, Tuple[Any, Any]], inplace: bool, regex
):
"""
Dispatch to Series.replace column-wise.


Parameters
----------
mapping : dict
of the form {col: (target, value)}
inplace : bool
regex : bool or same types as `to_replace` in DataFrame.replace

Returns
-------
DataFrame or None
"""
# Operate column-wise
res = self if inplace else self.copy()
ax = self.columns

for i in range(len(ax)):
if ax[i] in mapping:
ser = self.iloc[:, i]

target, value = mapping[ax[i]]
newobj = ser.replace(target, value, regex=regex)

res.iloc[:, i] = newobj

if inplace:
return
return res.__finalize__(self)

@Appender(_shared_docs["shift"] % _shared_doc_kwargs)
def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> "DataFrame":
return super().shift(
Expand Down
40 changes: 17 additions & 23 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6468,7 +6468,6 @@ def replace(
):
if not (
is_scalar(to_replace)
or isinstance(to_replace, pd.Series)
or is_re_compilable(to_replace)
or is_list_like(to_replace)
):
Expand Down Expand Up @@ -6559,18 +6558,16 @@ def replace(

# {'A': NA} -> 0
elif not is_list_like(value):
keys = [(k, src) for k, src in to_replace.items() if k in self]
keys_len = len(keys) - 1
for i, (k, src) in enumerate(keys):
convert = i == keys_len
new_data = new_data.replace(
to_replace=src,
value=value,
filter=[k],
inplace=inplace,
regex=regex,
convert=convert,
# Operate column-wise
if self.ndim == 1:
raise ValueError(
"Series.replace cannot use dict-like to_replace "
"and non-None value"
)
mapping = {
col: (to_rep, value) for col, to_rep in to_replace.items()
}
return self._replace_columnwise(mapping, inplace, regex)
else:
raise TypeError("value argument must be scalar, dict, or Series")

Expand Down Expand Up @@ -6611,17 +6608,14 @@ def replace(

# dest iterable dict-like
if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1}
new_data = self._mgr

for k, v in value.items():
if k in self:
new_data = new_data.replace(
to_replace=to_replace,
value=v,
filter=[k],
inplace=inplace,
regex=regex,
)
# Operate column-wise
if self.ndim == 1:
raise ValueError(
"Series.replace cannot use dict-value and "
"non-None to_replace"
)
mapping = {col: (to_replace, val) for col, val in value.items()}
return self._replace_columnwise(mapping, inplace, regex)

elif not is_list_like(value): # NA -> 0
new_data = self._mgr.replace(
Expand Down
87 changes: 13 additions & 74 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,6 @@ def replace(
to_replace,
value,
inplace: bool = False,
filter=None,
regex: bool = False,
convert: bool = True,
):
Expand Down Expand Up @@ -711,12 +710,7 @@ def replace(
# _can_hold_element checks have reduced this back to the
# scalar case and we can avoid a costly object cast
return self.replace(
to_replace[0],
value,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert,
to_replace[0], value, inplace=inplace, regex=regex, convert=convert,
)

# GH 22083, TypeError or ValueError occurred within error handling
Expand All @@ -730,7 +724,6 @@ def replace(
to_replace=to_replace,
value=value,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert,
)
Expand All @@ -743,9 +736,6 @@ def replace(
to_replace = convert_scalar_for_putitemlike(to_replace, values.dtype)

mask = missing.mask_missing(values, to_replace)
if filter is not None:
filtered_out = ~self.mgr_locs.isin(filter)
mask[filtered_out.nonzero()[0]] = False

if not mask.any():
if inplace:
Expand Down Expand Up @@ -774,7 +764,6 @@ def replace(
to_replace=original_to_replace,
value=value,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert,
)
Expand Down Expand Up @@ -2374,20 +2363,13 @@ def _can_hold_element(self, element: Any) -> bool:
return issubclass(tipo.type, np.bool_)
return isinstance(element, (bool, np.bool_))

def replace(
self, to_replace, value, inplace=False, filter=None, regex=False, convert=True
):
def replace(self, to_replace, value, inplace=False, regex=False, convert=True):
inplace = validate_bool_kwarg(inplace, "inplace")
to_replace_values = np.atleast_1d(to_replace)
if not np.can_cast(to_replace_values, bool):
return self
return super().replace(
to_replace,
value,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert,
to_replace, value, inplace=inplace, regex=regex, convert=convert,
)


Expand Down Expand Up @@ -2461,9 +2443,7 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]
def _can_hold_element(self, element: Any) -> bool:
return True

def replace(
self, to_replace, value, inplace=False, filter=None, regex=False, convert=True
):
def replace(self, to_replace, value, inplace=False, regex=False, convert=True):
to_rep_is_list = is_list_like(to_replace)
value_is_list = is_list_like(value)
both_lists = to_rep_is_list and value_is_list
Expand All @@ -2474,33 +2454,18 @@ def replace(

if not either_list and is_re(to_replace):
return self._replace_single(
to_replace,
value,
inplace=inplace,
filter=filter,
regex=True,
convert=convert,
to_replace, value, inplace=inplace, regex=True, convert=convert,
)
elif not (either_list or regex):
return super().replace(
to_replace,
value,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert,
to_replace, value, inplace=inplace, regex=regex, convert=convert,
)
elif both_lists:
for to_rep, v in zip(to_replace, value):
result_blocks = []
for b in blocks:
result = b._replace_single(
to_rep,
v,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert,
to_rep, v, inplace=inplace, regex=regex, convert=convert,
)
result_blocks = _extend_blocks(result, result_blocks)
blocks = result_blocks
Expand All @@ -2511,35 +2476,18 @@ def replace(
result_blocks = []
for b in blocks:
result = b._replace_single(
to_rep,
value,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert,
to_rep, value, inplace=inplace, regex=regex, convert=convert,
)
result_blocks = _extend_blocks(result, result_blocks)
blocks = result_blocks
return result_blocks

return self._replace_single(
to_replace,
value,
inplace=inplace,
filter=filter,
convert=convert,
regex=regex,
to_replace, value, inplace=inplace, convert=convert, regex=regex,
)

def _replace_single(
self,
to_replace,
value,
inplace=False,
filter=None,
regex=False,
convert=True,
mask=None,
self, to_replace, value, inplace=False, regex=False, convert=True, mask=None,
):
"""
Replace elements by the given value.
Expand All @@ -2552,7 +2500,6 @@ def _replace_single(
Replacement object.
inplace : bool, default False
Perform inplace modification.
filter : list, optional
regex : bool, default False
If true, perform regular expression substitution.
convert : bool, default True
Expand Down Expand Up @@ -2598,9 +2545,7 @@ def _replace_single(
else:
# if the thing to replace is not a string or compiled regex call
# the superclass method -> to_replace is some kind of object
return super().replace(
to_replace, value, inplace=inplace, filter=filter, regex=regex
)
return super().replace(to_replace, value, inplace=inplace, regex=regex)

new_values = self.values if inplace else self.values.copy()

Expand All @@ -2625,15 +2570,10 @@ def re_replacer(s):

f = np.vectorize(re_replacer, otypes=[self.dtype])

if filter is None:
filt = slice(None)
else:
filt = self.mgr_locs.isin(filter).nonzero()[0]

if mask is None:
new_values[filt] = f(new_values[filt])
new_values[:] = f(new_values)
else:
new_values[filt][mask] = f(new_values[filt][mask])
new_values[mask] = f(new_values[mask])

# convert
block = self.make_block(new_values)
Expand Down Expand Up @@ -2730,7 +2670,6 @@ def replace(
to_replace,
value,
inplace: bool = False,
filter=None,
regex: bool = False,
convert: bool = True,
):
Expand Down
21 changes: 4 additions & 17 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,34 +369,25 @@ def reduce(self, func, *args, **kwargs):

return res

def apply(self: T, f, filter=None, align_keys=None, **kwargs) -> T:
def apply(self: T, f, align_keys=None, **kwargs) -> T:
"""
Iterate over the blocks, collect and create a new BlockManager.

Parameters
----------
f : str or callable
Name of the Block method to apply.
filter : list, if supplied, only call the block if the filter is in
the block

Returns
-------
BlockManager
"""
assert "filter" not in kwargs

align_keys = align_keys or []
result_blocks = []
result_blocks: List[Block] = []
# fillna: Series/DataFrame is responsible for making sure value is aligned

# filter kwarg is used in replace-* family of methods
if filter is not None:
filter_locs = set(self.items.get_indexer_for(filter))
if len(filter_locs) == len(self.items):
# All items are included, as if there were no filtering
filter = None
else:
kwargs["filter"] = filter_locs

self._consolidate_inplace()

align_copy = False
Expand All @@ -410,10 +401,6 @@ def apply(self: T, f, filter=None, align_keys=None, **kwargs) -> T:
}

for b in self.blocks:
if filter is not None:
if not b.mgr_locs.isin(filter_locs).any():
result_blocks.append(b)
continue

if aligned_args:
b_items = self.items[b.mgr_locs.indexer]
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,3 +373,19 @@ def test_replace_invalid_to_replace(self):
)
with pytest.raises(TypeError, match=msg):
series.replace(lambda x: x.strip())

def test_replace_only_one_dictlike_arg(self):
# GH#33340

ser = pd.Series([1, 2, "A", pd.Timestamp.now(), True])
to_replace = {0: 1, 2: "A"}
value = "foo"
msg = "Series.replace cannot use dict-like to_replace and non-None value"
with pytest.raises(ValueError, match=msg):
ser.replace(to_replace, value)

to_replace = 1
value = {0: "foo", 2: "bar"}
msg = "Series.replace cannot use dict-value and non-None to_replace"
with pytest.raises(ValueError, match=msg):
ser.replace(to_replace, value)