Skip to content

REF: move actual lookup and dispatch to array_op from frame into internals #39772

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

44 changes: 12 additions & 32 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6808,7 +6808,7 @@ def _arith_method(self, other, op):

_logical_method = _arith_method

def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None):
def _dispatch_frame_op(self, right, func: Callable, axis: int = 0):
"""
Evaluate the frame operation func(left, right) by evaluating
column-by-column, dispatching to the Series implementation.
Expand All @@ -6817,21 +6817,18 @@ def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None):
----------
right : scalar, Series, or DataFrame
func : arithmetic or comparison operator
axis : {None, 0, 1}
axis : {0, 1}
Only relevant if `right` is a Series.

Returns
-------
DataFrame
"""
# Get the appropriate array-op to apply to each column/block's values.
array_op = ops.get_array_op(func)

right = lib.item_from_zerodim(right)
if not is_list_like(right):
# i.e. scalar, faster than checking np.ndim(right) == 0
with np.errstate(all="ignore"):
bm = self._mgr.apply(array_op, right=right)
return type(self)(bm)
bm = self._mgr.operate_scalar(right, func)

elif isinstance(right, DataFrame):
assert self.index.equals(right.index)
Expand All @@ -6840,48 +6837,31 @@ def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None):
# fails in cases with empty columns reached via
# _frame_arith_method_with_reindex

# TODO operate_blockwise expects a manager of the same type
# TODO operate_manager expects a manager of the same type
with np.errstate(all="ignore"):
bm = self._mgr.operate_blockwise(
# error: Argument 1 to "operate_blockwise" of "ArrayManager" has
bm = self._mgr.operate_manager(
# error: Argument 1 to "operate_manager" of "ArrayManager" has
# incompatible type "Union[ArrayManager, BlockManager]"; expected
# "ArrayManager"
# error: Argument 1 to "operate_blockwise" of "BlockManager" has
# error: Argument 1 to "operate_manager" of "BlockManager" has
# incompatible type "Union[ArrayManager, BlockManager]"; expected
# "BlockManager"
right._mgr, # type: ignore[arg-type]
array_op,
func,
)
return type(self)(bm)

elif isinstance(right, Series) and axis == 1:
# axis=1 means we want to operate row-by-row
assert right.index.equals(self.columns)

right = right._values
# maybe_align_as_frame ensures we do not have an ndarray here
assert not isinstance(right, np.ndarray)

with np.errstate(all="ignore"):
arrays = [
array_op(_left, _right)
for _left, _right in zip(self._iter_column_arrays(), right)
]

elif isinstance(right, Series):
assert right.index.equals(self.index) # Handle other cases later
assert right.index.equals(self._get_axis(axis))
right = right._values

with np.errstate(all="ignore"):
arrays = [array_op(left, right) for left in self._iter_column_arrays()]
bm = self._mgr.operate_array(right, func, axis)

else:
# Remaining cases have less-obvious dispatch rules
raise NotImplementedError(right)

return type(self)._from_arrays(
arrays, self.columns, self.index, verify_integrity=False
)
return type(self)(bm)

def _combine_frame(self, other: DataFrame, func, fill_value=None):
# at this point we have `self._indexed_same(other)`
Expand Down
68 changes: 66 additions & 2 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
ArrayLike,
DtypeObj,
Hashable,
Scalar,
)
from pandas.util._validators import validate_bool_kwarg

Expand Down Expand Up @@ -54,6 +55,7 @@
na_value_for_dtype,
)

from pandas.core import ops
import pandas.core.algorithms as algos
from pandas.core.array_algos.quantile import quantile_compat
from pandas.core.array_algos.take import take_1d
Expand Down Expand Up @@ -1081,11 +1083,73 @@ def reduce(
new_mgr = type(self)(result_arrays, [index, columns]) # type: ignore[arg-type]
return new_mgr, indexer

def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager:
def operate_scalar(self, other: Scalar, op) -> ArrayManager:
"""
Apply array_op blockwise with another (aligned) BlockManager.
Element-wise (arithmetic/comparison/logical) operation with other scalar.

Parameters
----------
other : scalar
op : operator function (eg ``operator.add``)

Returns
-------
ArrayManager
"""
# Get the appropriate array-op to apply to each column/block's values.
array_op = ops.get_array_op(op)
result_arrays = [array_op(left, other) for left in self.arrays]
return type(self)(result_arrays, self._axes)

def operate_array(self, other: ArrayLike, op, axis: int) -> ArrayManager:
"""
Element-wise (arithmetic/comparison/logical) operation with other array.

The array is already checked to be of the correct length.

Parameters
----------
other : np.ndarray or ExtensionArray
op : operator function (eg ``operator.add``)
axis : int
Whether to match the array on the index and broadcast along the
columns (axis=0) or match the array on the columns and broadcast
along the rows (axis=1).

Returns
-------
ArrayManager
"""
array_op = ops.get_array_op(op)
if axis == 1:
# match on the columns -> operate on each column array with single
# element from other array
result_arrays = [
array_op(left, right_scalar)
for left, right_scalar in zip(self.arrays, other)
]
else:
# match on the rows -> operate for each column array with full other array
result_arrays = [array_op(left, other) for left in self.arrays]
return type(self)(result_arrays, self._axes)

def operate_manager(self, other: ArrayManager, op) -> ArrayManager:
"""
Element-wise (arithmetic/comparison/logical) operation with other ArrayManager.

The other ArrayManager is already aligned with `self`.

Parameters
----------
other : ArrayManager
op : operator function (eg ``operator.add``)

Returns
-------
ArrayManager
"""
# TODO what if `other` is BlockManager ?
array_op = ops.get_array_op(op)
left_arrays = self.arrays
right_arrays = other.arrays
result_arrays = [
Expand Down
69 changes: 67 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
ArrayLike,
Dtype,
DtypeObj,
Scalar,
Shape,
type_t,
)
Expand All @@ -46,6 +47,7 @@
isna,
)

from pandas.core import ops
import pandas.core.algorithms as algos
from pandas.core.arrays.sparse import SparseDtype
from pandas.core.construction import (
Expand Down Expand Up @@ -1366,10 +1368,73 @@ def reduce(
new_mgr = type(self).from_blocks(res_blocks, [self.items, index])
return new_mgr, indexer

def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager:
def operate_scalar(self, other: Scalar, op) -> BlockManager:
"""
Apply array_op blockwise with another (aligned) BlockManager.
Element-wise (arithmetic/comparison/logical) operation with other scalar.

Parameters
----------
other : scalar
op : operator function (eg ``operator.add``)

Returns
-------
BlockManager
"""
# Get the appropriate array-op to apply to each column/block's values.
array_op = ops.get_array_op(op)
return self.apply(array_op, right=other)

def operate_array(self, other: ArrayLike, op, axis: int) -> BlockManager:
"""
Element-wise (arithmetic/comparison/logical) operation with other array.

The array is already checked to be of the correct length.

Parameters
----------
other : np.ndarray or ExtensionArray
op : operator function (eg ``operator.add``)
axis : int
Whether to match the array on the index and broadcast along the
columns (axis=0) or match the array on the columns and broadcast
along the rows (axis=1).

Returns
-------
BlockManager
"""
array_op = ops.get_array_op(op)
if axis == 1:
# match on the columns -> operate on each column array with single
# element from other array
arrays = [
array_op(self.iget_values(i), _right) for i, _right in enumerate(other)
]
else:
# match on the rows -> operate for each column array with full other array
arrays = [
array_op(self.iget_values(i), other) for i in range(len(self.items))
]

return create_block_manager_from_arrays(arrays, self.axes[0], self.axes)

def operate_manager(self, other: BlockManager, op) -> BlockManager:
"""
Element-wise (arithmetic/comparison/logical) operation with other BlockManager.

The other BlockManager is already aligned with `self`.

Parameters
----------
other : BlockManager
op : operator function (eg ``operator.add``)

Returns
-------
BlockManager
"""
array_op = ops.get_array_op(op)
return operate_blockwise(self, other, array_op)

def _equal_values(self: BlockManager, other: BlockManager) -> bool:
Expand Down