Skip to content

REF: replace column-wise, remove BlockManager.apply filter kwarg #33340

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 8, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 28 additions & 24 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6459,7 +6459,6 @@ def replace(
):
if not (
is_scalar(to_replace)
or isinstance(to_replace, pd.Series)
or is_re_compilable(to_replace)
or is_list_like(to_replace)
):
Expand Down Expand Up @@ -6550,18 +6549,20 @@ def replace(

# {'A': NA} -> 0
elif not is_list_like(value):
keys = [(k, src) for k, src in to_replace.items() if k in self]
keys_len = len(keys) - 1
for i, (k, src) in enumerate(keys):
convert = i == keys_len
new_data = new_data.replace(
to_replace=src,
value=value,
filter=[k],
inplace=inplace,
regex=regex,
convert=convert,
)

# Operate column-wise
res = self if inplace else self.copy()
ax = self._info_axis
# Note: we only get here with self.ndim == 2
for i in range(len(ax)):
if ax[i] in to_replace:
ser = self._ixs(i, axis=1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would just use .iloc here

newobj = ser.replace(to_replace[ax[i]], value, regex=regex)
res.iloc[:, i] = newobj

if inplace:
return
return res.__finalize__(self)
else:
raise TypeError("value argument must be scalar, dict, or Series")

Expand Down Expand Up @@ -6602,17 +6603,20 @@ def replace(

# dest iterable dict-like
if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1}
new_data = self._mgr

for k, v in value.items():
if k in self:
new_data = new_data.replace(
to_replace=to_replace,
value=v,
filter=[k],
inplace=inplace,
regex=regex,
)

# Operate column-wise
res = self if inplace else self.copy()
ax = self._info_axis
for i in range(len(ax)):
if ax[i] in value:
v = value[ax[i]]
ser = self._ixs(i, axis=1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same,

can you factor this to a function instead.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

refactored + green

i think in follow-up might be able to use this new helper method in one or two more places

newobj = ser.replace(to_replace, value=v, regex=regex)
res.iloc[:, i] = newobj

if inplace:
return
return res.__finalize__(self)

elif not is_list_like(value): # NA -> 0
new_data = self._mgr.replace(
Expand Down
87 changes: 13 additions & 74 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,6 @@ def replace(
to_replace,
value,
inplace: bool = False,
filter=None,
regex: bool = False,
convert: bool = True,
):
Expand Down Expand Up @@ -711,12 +710,7 @@ def replace(
# _can_hold_element checks have reduced this back to the
# scalar case and we can avoid a costly object cast
return self.replace(
to_replace[0],
value,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert,
to_replace[0], value, inplace=inplace, regex=regex, convert=convert,
)

# GH 22083, TypeError or ValueError occurred within error handling
Expand All @@ -730,7 +724,6 @@ def replace(
to_replace=to_replace,
value=value,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert,
)
Expand All @@ -743,9 +736,6 @@ def replace(
to_replace = convert_scalar_for_putitemlike(to_replace, values.dtype)

mask = missing.mask_missing(values, to_replace)
if filter is not None:
filtered_out = ~self.mgr_locs.isin(filter)
mask[filtered_out.nonzero()[0]] = False

if not mask.any():
if inplace:
Expand Down Expand Up @@ -774,7 +764,6 @@ def replace(
to_replace=original_to_replace,
value=value,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert,
)
Expand Down Expand Up @@ -2390,20 +2379,13 @@ def _can_hold_element(self, element: Any) -> bool:
return issubclass(tipo.type, np.bool_)
return isinstance(element, (bool, np.bool_))

def replace(
self, to_replace, value, inplace=False, filter=None, regex=False, convert=True
):
def replace(self, to_replace, value, inplace=False, regex=False, convert=True):
inplace = validate_bool_kwarg(inplace, "inplace")
to_replace_values = np.atleast_1d(to_replace)
if not np.can_cast(to_replace_values, bool):
return self
return super().replace(
to_replace,
value,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert,
to_replace, value, inplace=inplace, regex=regex, convert=convert,
)


Expand Down Expand Up @@ -2477,9 +2459,7 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]
def _can_hold_element(self, element: Any) -> bool:
return True

def replace(
self, to_replace, value, inplace=False, filter=None, regex=False, convert=True
):
def replace(self, to_replace, value, inplace=False, regex=False, convert=True):
to_rep_is_list = is_list_like(to_replace)
value_is_list = is_list_like(value)
both_lists = to_rep_is_list and value_is_list
Expand All @@ -2490,33 +2470,18 @@ def replace(

if not either_list and is_re(to_replace):
return self._replace_single(
to_replace,
value,
inplace=inplace,
filter=filter,
regex=True,
convert=convert,
to_replace, value, inplace=inplace, regex=True, convert=convert,
)
elif not (either_list or regex):
return super().replace(
to_replace,
value,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert,
to_replace, value, inplace=inplace, regex=regex, convert=convert,
)
elif both_lists:
for to_rep, v in zip(to_replace, value):
result_blocks = []
for b in blocks:
result = b._replace_single(
to_rep,
v,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert,
to_rep, v, inplace=inplace, regex=regex, convert=convert,
)
result_blocks = _extend_blocks(result, result_blocks)
blocks = result_blocks
Expand All @@ -2527,35 +2492,18 @@ def replace(
result_blocks = []
for b in blocks:
result = b._replace_single(
to_rep,
value,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert,
to_rep, value, inplace=inplace, regex=regex, convert=convert,
)
result_blocks = _extend_blocks(result, result_blocks)
blocks = result_blocks
return result_blocks

return self._replace_single(
to_replace,
value,
inplace=inplace,
filter=filter,
convert=convert,
regex=regex,
to_replace, value, inplace=inplace, convert=convert, regex=regex,
)

def _replace_single(
self,
to_replace,
value,
inplace=False,
filter=None,
regex=False,
convert=True,
mask=None,
self, to_replace, value, inplace=False, regex=False, convert=True, mask=None,
):
"""
Replace elements by the given value.
Expand All @@ -2568,7 +2516,6 @@ def _replace_single(
Replacement object.
inplace : bool, default False
Perform inplace modification.
filter : list, optional
regex : bool, default False
If true, perform regular expression substitution.
convert : bool, default True
Expand Down Expand Up @@ -2614,9 +2561,7 @@ def _replace_single(
else:
# if the thing to replace is not a string or compiled regex call
# the superclass method -> to_replace is some kind of object
return super().replace(
to_replace, value, inplace=inplace, filter=filter, regex=regex
)
return super().replace(to_replace, value, inplace=inplace, regex=regex)

new_values = self.values if inplace else self.values.copy()

Expand All @@ -2641,15 +2586,10 @@ def re_replacer(s):

f = np.vectorize(re_replacer, otypes=[self.dtype])

if filter is None:
filt = slice(None)
else:
filt = self.mgr_locs.isin(filter).nonzero()[0]

if mask is None:
new_values[filt] = f(new_values[filt])
new_values[:] = f(new_values)
else:
new_values[filt][mask] = f(new_values[filt][mask])
new_values[mask] = f(new_values[mask])

# convert
block = self.make_block(new_values)
Expand Down Expand Up @@ -2746,7 +2686,6 @@ def replace(
to_replace,
value,
inplace: bool = False,
filter=None,
regex: bool = False,
convert: bool = True,
):
Expand Down
21 changes: 4 additions & 17 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,34 +375,25 @@ def reduce(self, func, *args, **kwargs):

return res

def apply(self: T, f, filter=None, align_keys=None, **kwargs) -> T:
def apply(self: T, f, align_keys=None, **kwargs) -> T:
"""
Iterate over the blocks, collect and create a new BlockManager.

Parameters
----------
f : str or callable
Name of the Block method to apply.
filter : list, if supplied, only call the block if the filter is in
the block

Returns
-------
BlockManager
"""
assert "filter" not in kwargs

align_keys = align_keys or []
result_blocks = []
result_blocks: List[Block] = []
# fillna: Series/DataFrame is responsible for making sure value is aligned

# filter kwarg is used in replace-* family of methods
if filter is not None:
filter_locs = set(self.items.get_indexer_for(filter))
if len(filter_locs) == len(self.items):
# All items are included, as if there were no filtering
filter = None
else:
kwargs["filter"] = filter_locs

self._consolidate_inplace()

align_copy = False
Expand All @@ -416,10 +407,6 @@ def apply(self: T, f, filter=None, align_keys=None, **kwargs) -> T:
}

for b in self.blocks:
if filter is not None:
if not b.mgr_locs.isin(filter_locs).any():
result_blocks.append(b)
continue

if aligned_args:
b_items = self.items[b.mgr_locs.indexer]
Expand Down