Skip to content

REF: implement array_ops #27936

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,11 @@ def wrapper(self, other):
return invalid_comparison(self, other, op)

if is_object_dtype(other):
# We have to use _comp_method_OBJECT_ARRAY instead of numpy
# We have to use comp_method_OBJECT_ARRAY instead of numpy
# comparison otherwise it would fail to raise when
# comparing tz-aware and tz-naive
with np.errstate(all="ignore"):
result = ops._comp_method_OBJECT_ARRAY(
result = ops.comp_method_OBJECT_ARRAY(
op, self.astype(object), other
)
o_mask = isna(other)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def cmp_method(self, other):
elif is_object_dtype(self) and not isinstance(self, ABCMultiIndex):
# don't pass MultiIndex
with np.errstate(all="ignore"):
result = ops._comp_method_OBJECT_ARRAY(op, self.values, other)
result = ops.comp_method_OBJECT_ARRAY(op, self.values, other)

else:
with np.errstate(all="ignore"):
Expand Down
123 changes: 5 additions & 118 deletions pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,7 @@
from pandas.errors import NullFrequencyError
from pandas.util._decorators import Appender

from pandas.core.dtypes.cast import (
construct_1d_object_array_from_listlike,
find_common_type,
maybe_upcast_putmask,
)
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
from pandas.core.dtypes.common import (
ensure_object,
is_bool_dtype,
Expand All @@ -29,15 +25,13 @@
is_integer_dtype,
is_list_like,
is_object_dtype,
is_period_dtype,
is_scalar,
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCDatetimeArray,
ABCDatetimeIndex,
ABCIndex,
ABCIndexClass,
ABCSeries,
ABCSparseSeries,
Expand All @@ -47,7 +41,7 @@
import pandas as pd
from pandas._typing import ArrayLike
from pandas.core.construction import array, extract_array
from pandas.core.ops import missing
from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY, define_na_arithmetic_op
from pandas.core.ops.docstrings import (
_arith_doc_FRAME,
_flex_comp_doc_FRAME,
Expand Down Expand Up @@ -398,63 +392,6 @@ def mask_cmp_op(x, y, op):
return result


def masked_arith_op(x, y, op):
"""
If the given arithmetic operation fails, attempt it again on
only the non-null elements of the input array(s).
Parameters
----------
x : np.ndarray
y : np.ndarray, Series, Index
op : binary operator
"""
# For Series `x` is 1D so ravel() is a no-op; calling it anyway makes
# the logic valid for both Series and DataFrame ops.
xrav = x.ravel()
assert isinstance(x, np.ndarray), type(x)
if isinstance(y, np.ndarray):
dtype = find_common_type([x.dtype, y.dtype])
result = np.empty(x.size, dtype=dtype)

# PeriodIndex.ravel() returns int64 dtype, so we have
# to work around that case. See GH#19956
yrav = y if is_period_dtype(y) else y.ravel()
mask = notna(xrav) & notna(yrav)

if yrav.shape != mask.shape:
# FIXME: GH#5284, GH#5035, GH#19448
# Without specifically raising here we get mismatched
# errors in Py3 (TypeError) vs Py2 (ValueError)
# Note: Only = an issue in DataFrame case
raise ValueError("Cannot broadcast operands together.")

if mask.any():
with np.errstate(all="ignore"):
result[mask] = op(xrav[mask], yrav[mask])

else:
assert is_scalar(y), type(y)
assert isinstance(x, np.ndarray), type(x)
# mask is only meaningful for x
result = np.empty(x.size, dtype=x.dtype)
mask = notna(xrav)

# 1 ** np.nan is 1. So we have to unmask those.
if op == pow:
mask = np.where(x == 1, False, mask)
elif op == rpow:
mask = np.where(y == 1, False, mask)

if mask.any():
with np.errstate(all="ignore"):
result[mask] = op(xrav[mask], y)

result, changed = maybe_upcast_putmask(result, ~mask, np.nan)
result = result.reshape(x.shape) # 2D compat
return result


# -----------------------------------------------------------------------------
# Dispatch logic

Expand Down Expand Up @@ -673,33 +610,7 @@ def _arith_method_SERIES(cls, op, special):
_construct_divmod_result if op in [divmod, rdivmod] else _construct_result
)

def na_op(x, y):
"""
Return the result of evaluating op on the passed in values.
If native types are not compatible, try coersion to object dtype.
Parameters
----------
x : array-like
y : array-like or scalar
Returns
-------
array-like
Raises
------
TypeError : invalid operation
"""
import pandas.core.computation.expressions as expressions

try:
result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
except TypeError:
result = masked_arith_op(x, y, op)

return missing.dispatch_fill_zeros(op, x, y, result)
na_op = define_na_arithmetic_op(op, str_rep, eval_kwargs)

def wrapper(left, right):
if isinstance(right, ABCDataFrame):
Expand Down Expand Up @@ -735,22 +646,6 @@ def wrapper(left, right):
return wrapper


def _comp_method_OBJECT_ARRAY(op, x, y):
if isinstance(y, list):
y = construct_1d_object_array_from_listlike(y)
if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)):
if not is_object_dtype(y.dtype):
y = y.astype(np.object_)

if isinstance(y, (ABCSeries, ABCIndex)):
y = y.values

result = libops.vec_compare(x, y, op)
else:
result = libops.scalar_compare(x, y, op)
return result


def _comp_method_SERIES(cls, op, special):
"""
Wrapper function for Series arithmetic operations, to avoid
Expand All @@ -764,7 +659,7 @@ def na_op(x, y):
# Extension Dtypes are not called here

if is_object_dtype(x.dtype):
result = _comp_method_OBJECT_ARRAY(op, x, y)
result = comp_method_OBJECT_ARRAY(op, x, y)

elif is_datetimelike_v_numeric(x, y):
return invalid_comparison(x, y, op)
Expand Down Expand Up @@ -1091,15 +986,7 @@ def _arith_method_FRAME(cls, op, special):
eval_kwargs = _gen_eval_kwargs(op_name)
default_axis = _get_frame_op_default_axis(op_name)

def na_op(x, y):
import pandas.core.computation.expressions as expressions

try:
result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
except TypeError:
result = masked_arith_op(x, y, op)

return missing.dispatch_fill_zeros(op, x, y, result)
na_op = define_na_arithmetic_op(op, str_rep, eval_kwargs)

if op_name in _op_descriptions:
# i.e. include "add" but not "__add__"
Expand Down
127 changes: 127 additions & 0 deletions pandas/core/ops/array_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""
Functions for arithmetic and comparison operations on NumPy arrays and
ExtensionArrays.
"""
import numpy as np

from pandas._libs import ops as libops

from pandas.core.dtypes.cast import (
construct_1d_object_array_from_listlike,
find_common_type,
maybe_upcast_putmask,
)
from pandas.core.dtypes.common import is_object_dtype, is_period_dtype, is_scalar
from pandas.core.dtypes.generic import ABCIndex, ABCSeries
from pandas.core.dtypes.missing import notna

from pandas.core.ops import missing
from pandas.core.ops.roperator import rpow


def comp_method_OBJECT_ARRAY(op, x, y):
if isinstance(y, list):
y = construct_1d_object_array_from_listlike(y)

# TODO: Should the checks below be ABCIndexClass?
if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)):
# TODO: should this be ABCIndexClass??
if not is_object_dtype(y.dtype):
y = y.astype(np.object_)

if isinstance(y, (ABCSeries, ABCIndex)):
y = y.values

result = libops.vec_compare(x, y, op)
else:
result = libops.scalar_compare(x, y, op)
return result


def masked_arith_op(x, y, op):
"""
If the given arithmetic operation fails, attempt it again on
only the non-null elements of the input array(s).
Parameters
----------
x : np.ndarray
y : np.ndarray, Series, Index
op : binary operator
"""
# For Series `x` is 1D so ravel() is a no-op; calling it anyway makes
# the logic valid for both Series and DataFrame ops.
xrav = x.ravel()
assert isinstance(x, np.ndarray), type(x)
if isinstance(y, np.ndarray):
dtype = find_common_type([x.dtype, y.dtype])
result = np.empty(x.size, dtype=dtype)

# PeriodIndex.ravel() returns int64 dtype, so we have
# to work around that case. See GH#19956
yrav = y if is_period_dtype(y) else y.ravel()
mask = notna(xrav) & notna(yrav)

if yrav.shape != mask.shape:
# FIXME: GH#5284, GH#5035, GH#19448
# Without specifically raising here we get mismatched
# errors in Py3 (TypeError) vs Py2 (ValueError)
# Note: Only = an issue in DataFrame case
raise ValueError("Cannot broadcast operands together.")

if mask.any():
with np.errstate(all="ignore"):
result[mask] = op(xrav[mask], yrav[mask])

else:
assert is_scalar(y), type(y)
assert isinstance(x, np.ndarray), type(x)
# mask is only meaningful for x
result = np.empty(x.size, dtype=x.dtype)
mask = notna(xrav)

# 1 ** np.nan is 1. So we have to unmask those.
if op == pow:
mask = np.where(x == 1, False, mask)
elif op == rpow:
mask = np.where(y == 1, False, mask)

if mask.any():
with np.errstate(all="ignore"):
result[mask] = op(xrav[mask], y)

result, changed = maybe_upcast_putmask(result, ~mask, np.nan)
result = result.reshape(x.shape) # 2D compat
return result


def define_na_arithmetic_op(op, str_rep, eval_kwargs):
def na_op(x, y):
"""
Return the result of evaluating op on the passed in values.
If native types are not compatible, try coersion to object dtype.
Parameters
----------
x : array-like
y : array-like or scalar
Returns
-------
array-like
Raises
------
TypeError : invalid operation
"""
import pandas.core.computation.expressions as expressions

try:
result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
except TypeError:
result = masked_arith_op(x, y, op)

return missing.dispatch_fill_zeros(op, x, y, result)

return na_op