Skip to content

TYP: ensure Block.putmask, Block.where get arrays, not Series/DataFrame #32962

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8609,7 +8609,7 @@ def _where(
# GH 2745 / GH 4192
# treat like a scalar
if len(other) == 1:
other = np.array(other[0])
other = other[0]

# GH 3235
# match True cond to other
Expand Down
58 changes: 36 additions & 22 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCExtensionArray,
ABCIndexClass,
ABCPandasArray,
ABCSeries,
)
Expand Down Expand Up @@ -913,7 +914,7 @@ def putmask(

Parameters
----------
mask : the condition to respect
mask : np.ndarray[bool], SparseArray[bool], or BooleanArray
new : a ndarray/object
inplace : bool, default False
Perform inplace modification.
Expand All @@ -925,10 +926,10 @@ def putmask(
-------
List[Block]
"""
new_values = self.values if inplace else self.values.copy()
mask = _extract_bool_array(mask)
assert not isinstance(new, (ABCIndexClass, ABCSeries, ABCDataFrame))

new = getattr(new, "values", new)
mask = getattr(mask, "values", mask)
new_values = self.values if inplace else self.values.copy()

# if we are passed a scalar None, convert it here
if not is_list_like(new) and isna(new) and not self.is_object:
Expand Down Expand Up @@ -1308,18 +1309,21 @@ def where(
Parameters
----------
other : a ndarray/object
cond : the condition to respect
cond : np.ndarray[bool], SparseArray[bool], or BooleanArray
errors : str, {'raise', 'ignore'}, default 'raise'
- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object
axis : int, default 0

Returns
-------
a new block(s), the result of the func
List[Block]
"""
import pandas.core.computation.expressions as expressions

cond = _extract_bool_array(cond)
assert not isinstance(other, (ABCIndexClass, ABCSeries, ABCDataFrame))

assert errors in ["raise", "ignore"]
transpose = self.ndim == 2

Expand All @@ -1328,9 +1332,6 @@ def where(
if transpose:
values = values.T

other = getattr(other, "_values", getattr(other, "values", other))
cond = getattr(cond, "values", cond)

# If the default broadcasting would go in the wrong direction, then
# explicitly reshape other instead
if getattr(other, "ndim", 0) >= 1:
Expand Down Expand Up @@ -1628,9 +1629,9 @@ def putmask(
"""
inplace = validate_bool_kwarg(inplace, "inplace")

# use block's copy logic.
# .values may be an Index which does shallow copy by default
new_values = self.values if inplace else self.copy().values
mask = _extract_bool_array(mask)

new_values = self.values if inplace else self.values.copy()

if isinstance(new, np.ndarray) and len(new) == len(mask):
new = new[mask]
Expand Down Expand Up @@ -1859,19 +1860,19 @@ def shift(
def where(
self, other, cond, errors="raise", try_cast: bool = False, axis: int = 0,
) -> List["Block"]:
if isinstance(other, ABCDataFrame):
# ExtensionArrays are 1-D, so if we get here then
# `other` should be a DataFrame with a single column.
assert other.shape[1] == 1
other = other.iloc[:, 0]

other = extract_array(other, extract_numpy=True)
cond = _extract_bool_array(cond)
assert not isinstance(other, (ABCIndexClass, ABCSeries, ABCDataFrame))

if isinstance(cond, ABCDataFrame):
assert cond.shape[1] == 1
cond = cond.iloc[:, 0]
if isinstance(other, np.ndarray) and other.ndim == 2:
# TODO(EA2D): unnecessary with 2D EAs
assert other.shape[1] == 1
other = other[:, 0]

cond = extract_array(cond, extract_numpy=True)
if isinstance(cond, np.ndarray) and cond.ndim == 2:
# TODO(EA2D): unnecessary with 2D EAs
assert cond.shape[1] == 1
cond = cond[:, 0]

if lib.is_scalar(other) and isna(other):
# The default `other` for Series / Frame is np.nan
Expand Down Expand Up @@ -3113,3 +3114,16 @@ def _putmask_preserve(nv, n):
v = v.astype(dtype)

return _putmask_preserve(v, n)


def _extract_bool_array(mask: ArrayLike) -> np.ndarray:
"""
If we have a SparseArray or BooleanArray, convert it to ndarray[bool].
"""
if isinstance(mask, ExtensionArray):
# We could have BooleanArray, Sparse[bool], ...
mask = np.asarray(mask, dtype=np.bool_)

assert isinstance(mask, np.ndarray), type(mask)
assert mask.dtype == bool, mask.dtype
return mask
5 changes: 4 additions & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import pandas.core.algorithms as algos
from pandas.core.arrays.sparse import SparseDtype
from pandas.core.base import PandasObject
from pandas.core.construction import extract_array
from pandas.core.indexers import maybe_convert_indices
from pandas.core.indexes.api import Index, ensure_index
from pandas.core.internals.blocks import (
Expand Down Expand Up @@ -426,7 +427,7 @@ def apply(self: T, f, filter=None, align_keys=None, **kwargs) -> T:

for k, obj in aligned_args.items():
axis = obj._info_axis_number
kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)
kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)._values

if callable(f):
applied = b.apply(f, **kwargs)
Expand Down Expand Up @@ -552,6 +553,7 @@ def where(self, **kwargs) -> "BlockManager":
align_keys = ["other", "cond"]
else:
align_keys = ["cond"]
kwargs["other"] = extract_array(kwargs["other"], extract_numpy=True)

return self.apply("where", align_keys=align_keys, **kwargs)

Expand All @@ -567,6 +569,7 @@ def putmask(
align_keys = ["new", "mask"]
else:
align_keys = ["mask"]
new = extract_array(new, extract_numpy=True)

return self.apply(
"putmask",
Expand Down