-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
De-duplicate masking/fallback logic in ops #19613
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
1c65d80
1e18968
d9e8518
0accde4
221f8a6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -398,6 +398,85 @@ def _make_flex_doc(op_name, typ): | |
return doc | ||
|
||
|
||
# ----------------------------------------------------------------------------- | ||
# Masking NA values and fallbacks for operations numpy does not support | ||
|
||
def fill_binop(left, right, fill_value): | ||
if fill_value is not None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doc-string |
||
left_mask = isna(left) | ||
right_mask = isna(right) | ||
left = left.copy() | ||
right = right.copy() | ||
|
||
# one but not both | ||
mask = left_mask ^ right_mask | ||
left[left_mask & mask] = fill_value | ||
right[right_mask & mask] = fill_value | ||
return left, right | ||
|
||
|
||
def mask_arith_op(x, y, op): | ||
xrav = x.ravel() | ||
if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): | ||
# The ABCSeries and pd.Index cases are only reached for Series ops, | ||
# DataFrame casts these inputs to Series in align_method_FRAME | ||
dtype = find_common_type([x.dtype, y.dtype]) | ||
result = np.empty(x.size, dtype=dtype) | ||
yrav = y.ravel() | ||
mask = notna(xrav) & notna(yrav) | ||
xrav = xrav[mask] | ||
|
||
if yrav.shape != mask.shape: | ||
# Only a risk for DataFrame. | ||
# FIXME: GH#5284, GH#5035, GH#19448 | ||
# Without specifically raising here we get mismatched | ||
# errors in Py3 (TypeError) vs Py2 (ValueError) | ||
raise ValueError('Cannot broadcast operands together.') | ||
|
||
yrav = com._values_from_object(y[mask]) | ||
if xrav.size: | ||
# Avoid the operation if it is on an empty array | ||
with np.errstate(all='ignore'): | ||
result[mask] = op(xrav, yrav) | ||
|
||
elif isinstance(x, np.ndarray): | ||
# this always holds for Series, never for DataFrame | ||
result = np.empty(x.size, dtype=x.dtype) | ||
mask = notna(x) | ||
result[mask] = op(x[mask], y) | ||
|
||
else: | ||
# Raise otherwise; only relevant for DataFrame | ||
raise TypeError("cannot perform operation {op} between " | ||
"objects of type {x} and {y}".format(op=name, | ||
x=type(x), | ||
y=type(y))) | ||
|
||
result, changed = maybe_upcast_putmask(result, ~mask, np.nan) | ||
return result.reshape(x.shape) | ||
|
||
|
||
def mask_cmp_op(x, y, op, allowed_types): | ||
# TODO: Can we make the allowed_types arg unnecessary? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doc-string |
||
xrav = x.ravel() | ||
result = np.empty(x.size, dtype=bool) | ||
if isinstance(y, allowed_types): | ||
yrav = y.ravel() | ||
mask = notna(xrav) & notna(yrav) | ||
result[mask] = op(np.array(list(xrav[mask])), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you know why we build the intermediate There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No idea. I can try removing it and see if that affects any tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like a complex-dtype test would be affected. |
||
np.array(list(yrav[mask]))) | ||
else: | ||
mask = notna(xrav) | ||
result[mask] = op(np.array(list(xrav[mask])), y) | ||
|
||
if op == operator.ne: # pragma: no cover | ||
np.putmask(result, ~mask, True) | ||
else: | ||
np.putmask(result, ~mask, False) | ||
result = result.reshape(x.shape) | ||
return result | ||
|
||
|
||
# ----------------------------------------------------------------------------- | ||
# Functions that add arithmetic methods to objects, given arithmetic factory | ||
# methods | ||
|
@@ -642,18 +721,7 @@ def na_op(x, y): | |
try: | ||
result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) | ||
except TypeError: | ||
if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): | ||
dtype = find_common_type([x.dtype, y.dtype]) | ||
result = np.empty(x.size, dtype=dtype) | ||
mask = notna(x) & notna(y) | ||
result[mask] = op(x[mask], com._values_from_object(y[mask])) | ||
else: | ||
assert isinstance(x, np.ndarray) | ||
result = np.empty(len(x), dtype=x.dtype) | ||
mask = notna(x) | ||
result[mask] = op(x[mask], y) | ||
|
||
result, changed = maybe_upcast_putmask(result, ~mask, np.nan) | ||
result = mask_arith_op(x, y, op) | ||
|
||
result = missing.fill_zeros(result, x, y, name, fill_zeros) | ||
return result | ||
|
@@ -1053,40 +1121,7 @@ def na_op(x, y): | |
try: | ||
result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) | ||
except TypeError: | ||
xrav = x.ravel() | ||
if isinstance(y, (np.ndarray, ABCSeries)): | ||
dtype = np.find_common_type([x.dtype, y.dtype], []) | ||
result = np.empty(x.size, dtype=dtype) | ||
yrav = y.ravel() | ||
mask = notna(xrav) & notna(yrav) | ||
xrav = xrav[mask] | ||
|
||
if yrav.shape != mask.shape: | ||
# FIXME: GH#5284, GH#5035, GH#19448 | ||
# Without specifically raising here we get mismatched | ||
# errors in Py3 (TypeError) vs Py2 (ValueError) | ||
raise ValueError('Cannot broadcast operands together.') | ||
|
||
yrav = yrav[mask] | ||
if xrav.size: | ||
with np.errstate(all='ignore'): | ||
result[mask] = op(xrav, yrav) | ||
|
||
elif isinstance(x, np.ndarray): | ||
# mask is only meaningful for x | ||
result = np.empty(x.size, dtype=x.dtype) | ||
mask = notna(xrav) | ||
xrav = xrav[mask] | ||
if xrav.size: | ||
with np.errstate(all='ignore'): | ||
result[mask] = op(xrav, y) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this case is reached/reachable. If y is an ndarray then most non-trivial cases should have shape mismatches if this were ever reached. |
||
else: | ||
raise TypeError("cannot perform operation {op} between " | ||
"objects of type {x} and {y}".format( | ||
op=name, x=type(x), y=type(y))) | ||
|
||
result, changed = maybe_upcast_putmask(result, ~mask, np.nan) | ||
result = result.reshape(x.shape) | ||
result = mask_arith_op(x, y, op) | ||
|
||
result = missing.fill_zeros(result, x, y, name, fill_zeros) | ||
|
||
|
@@ -1127,23 +1162,7 @@ def na_op(x, y): | |
with np.errstate(invalid='ignore'): | ||
result = op(x, y) | ||
except TypeError: | ||
xrav = x.ravel() | ||
result = np.empty(x.size, dtype=bool) | ||
if isinstance(y, (np.ndarray, ABCSeries)): | ||
yrav = y.ravel() | ||
mask = notna(xrav) & notna(yrav) | ||
result[mask] = op(np.array(list(xrav[mask])), | ||
np.array(list(yrav[mask]))) | ||
else: | ||
mask = notna(xrav) | ||
result[mask] = op(np.array(list(xrav[mask])), y) | ||
|
||
if op == operator.ne: # pragma: no cover | ||
np.putmask(result, ~mask, True) | ||
else: | ||
np.putmask(result, ~mask, False) | ||
result = result.reshape(x.shape) | ||
|
||
result = mask_cmp_op(x, y, op, (np.ndarray, ABCSeries)) | ||
return result | ||
|
||
@Appender('Wrapper for flexible comparison methods {name}' | ||
|
@@ -1221,23 +1240,7 @@ def na_op(x, y): | |
try: | ||
result = expressions.evaluate(op, str_rep, x, y) | ||
except TypeError: | ||
xrav = x.ravel() | ||
result = np.empty(x.size, dtype=bool) | ||
if isinstance(y, np.ndarray): | ||
yrav = y.ravel() | ||
mask = notna(xrav) & notna(yrav) | ||
result[mask] = op(np.array(list(xrav[mask])), | ||
np.array(list(yrav[mask]))) | ||
else: | ||
mask = notna(xrav) | ||
result[mask] = op(np.array(list(xrav[mask])), y) | ||
|
||
if op == operator.ne: # pragma: no cover | ||
np.putmask(result, ~mask, True) | ||
else: | ||
np.putmask(result, ~mask, False) | ||
result = result.reshape(x.shape) | ||
|
||
result = mask_cmp_op(x, y, op, np.ndarray) | ||
return result | ||
|
||
@Appender('Wrapper for comparison method {name}'.format(name=name)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you might be able to simplify this even more here (IOW remove _arith_op) and just in-line it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This overlaps with #19611. This will probably get in before that does, so I'll take a look at this suggestion after rebasing there.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
aok