From 1c65d80341860c441cf5c25884b915f259bf3ee7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 8 Feb 2018 20:17:11 -0800 Subject: [PATCH 1/5] de-dup logic for series/frame/panel methods --- pandas/core/ops.py | 150 +++++++++++++++++++++------------------------ 1 file changed, 70 insertions(+), 80 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index effa35695fcd1..c65be1cfde64d 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -398,6 +398,72 @@ def _make_flex_doc(op_name, typ): return doc +# ----------------------------------------------------------------------------- +# Masking NA values and fallbacks for operations numpy does not support + + +def mask_arith_op(x, y, op): + xrav = x.ravel() + if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): + # The ABCSeries and pd.Index cases are only reached for Series ops, + # DataFrame casts these inputs to Series in align_method_FRAME + dtype = find_common_type([x.dtype, y.dtype]) + result = np.empty(x.size, dtype=dtype) + yrav = y.ravel() + mask = notna(xrav) & notna(yrav) + xrav = xrav[mask] + + if yrav.shape != mask.shape: + # Only a risk for DataFrame. + # FIXME: GH#5284, GH#5035, GH#19448 + # Without specifically raising here we get mismatched + # errors in Py3 (TypeError) vs Py2 (ValueError) + raise ValueError('Cannot broadcast operands together.') + + yrav = com._values_from_object(y[mask]) + if xrav.size: + # Avoid the operation if it is on an empty array + with np.errstate(all='ignore'): + result[mask] = op(xrav, yrav) + + elif isinstance(x, np.ndarray): + # this always holds for Series, never for DataFrame + result = np.empty(x.size, dtype=x.dtype) + mask = notna(x) + result[mask] = op(x[mask], y) + + else: + # Raise otherwise; only relevant for DataFrame + raise TypeError("cannot perform operation {op} between " + "objects of type {x} and {y}".format(op=name, + x=type(x), + y=type(y))) + + result, changed = maybe_upcast_putmask(result, ~mask, np.nan) + return result.reshape(x.shape) + + +def mask_cmp_op(x, y, op, allowed_types): + # TODO: Can we make the allowed_types arg unnecessary? + xrav = x.ravel() + result = np.empty(x.size, dtype=bool) + if isinstance(y, allowed_types): + yrav = y.ravel() + mask = notna(xrav) & notna(yrav) + result[mask] = op(np.array(list(xrav[mask])), + np.array(list(yrav[mask]))) + else: + mask = notna(xrav) + result[mask] = op(np.array(list(xrav[mask])), y) + + if op == operator.ne: # pragma: no cover + np.putmask(result, ~mask, True) + else: + np.putmask(result, ~mask, False) + result = result.reshape(x.shape) + return result + + # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory # methods @@ -642,18 +708,7 @@ def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: - if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): - dtype = find_common_type([x.dtype, y.dtype]) - result = np.empty(x.size, dtype=dtype) - mask = notna(x) & notna(y) - result[mask] = op(x[mask], com._values_from_object(y[mask])) - else: - assert isinstance(x, np.ndarray) - result = np.empty(len(x), dtype=x.dtype) - mask = notna(x) - result[mask] = op(x[mask], y) - - result, changed = maybe_upcast_putmask(result, ~mask, np.nan) + result = mask_arith_op(x, y, op) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result @@ -1053,40 +1108,7 @@ def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: - xrav = x.ravel() - if isinstance(y, (np.ndarray, ABCSeries)): - dtype = np.find_common_type([x.dtype, y.dtype], []) - result = np.empty(x.size, dtype=dtype) - yrav = y.ravel() - mask = notna(xrav) & notna(yrav) - xrav = xrav[mask] - - if yrav.shape != mask.shape: - # FIXME: GH#5284, GH#5035, GH#19448 - # Without specifically raising here we get mismatched - # errors in Py3 (TypeError) vs Py2 (ValueError) - raise ValueError('Cannot broadcast operands together.') - - yrav = yrav[mask] - if xrav.size: - with np.errstate(all='ignore'): - result[mask] = op(xrav, yrav) - - elif isinstance(x, np.ndarray): - # mask is only meaningful for x - result = np.empty(x.size, dtype=x.dtype) - mask = notna(xrav) - xrav = xrav[mask] - if xrav.size: - with np.errstate(all='ignore'): - result[mask] = op(xrav, y) - else: - raise TypeError("cannot perform operation {op} between " - "objects of type {x} and {y}".format( - op=name, x=type(x), y=type(y))) - - result, changed = maybe_upcast_putmask(result, ~mask, np.nan) - result = result.reshape(x.shape) + result = mask_arith_op(x, y, op) result = missing.fill_zeros(result, x, y, name, fill_zeros) @@ -1127,23 +1149,7 @@ def na_op(x, y): with np.errstate(invalid='ignore'): result = op(x, y) except TypeError: - xrav = x.ravel() - result = np.empty(x.size, dtype=bool) - if isinstance(y, (np.ndarray, ABCSeries)): - yrav = y.ravel() - mask = notna(xrav) & notna(yrav) - result[mask] = op(np.array(list(xrav[mask])), - np.array(list(yrav[mask]))) - else: - mask = notna(xrav) - result[mask] = op(np.array(list(xrav[mask])), y) - - if op == operator.ne: # pragma: no cover - np.putmask(result, ~mask, True) - else: - np.putmask(result, ~mask, False) - result = result.reshape(x.shape) - + result = mask_cmp_op(x, y, op, (np.ndarray, ABCSeries)) return result @Appender('Wrapper for flexible comparison methods {name}' @@ -1221,23 +1227,7 @@ def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y) except TypeError: - xrav = x.ravel() - result = np.empty(x.size, dtype=bool) - if isinstance(y, np.ndarray): - yrav = y.ravel() - mask = notna(xrav) & notna(yrav) - result[mask] = op(np.array(list(xrav[mask])), - np.array(list(yrav[mask]))) - else: - mask = notna(xrav) - result[mask] = op(np.array(list(xrav[mask])), y) - - if op == operator.ne: # pragma: no cover - np.putmask(result, ~mask, True) - else: - np.putmask(result, ~mask, False) - result = result.reshape(x.shape) - + result = mask_cmp_op(x, y, op, np.ndarray) return result @Appender('Wrapper for comparison method {name}'.format(name=name)) From 1e189680b14a33489a6addd4ab03796a93f6cbc2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 8 Feb 2018 20:21:38 -0800 Subject: [PATCH 2/5] dedup filling in combine_frame and binop --- pandas/core/frame.py | 12 +----------- pandas/core/ops.py | 13 +++++++++++++ pandas/core/series.py | 15 ++------------- 3 files changed, 16 insertions(+), 24 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 28923f0fbf240..f0adc77e46e5d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3920,17 +3920,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None): new_index, new_columns = this.index, this.columns def _arith_op(left, right): - if fill_value is not None: - left_mask = isna(left) - right_mask = isna(right) - left = left.copy() - right = right.copy() - - # one but not both - mask = left_mask ^ right_mask - left[left_mask & mask] = fill_value - right[right_mask & mask] = fill_value - + left, right = ops.fill_binop(left, right, fill_value) return func(left, right) if this._is_mixed_type or other._is_mixed_type: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index c65be1cfde64d..663c379b3e000 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -401,6 +401,19 @@ def _make_flex_doc(op_name, typ): # ----------------------------------------------------------------------------- # Masking NA values and fallbacks for operations numpy does not support +def fill_binop(left, right, fill_value): + if fill_value is not None: + left_mask = isna(left) + right_mask = isna(right) + left = left.copy() + right = right.copy() + + # one but not both + mask = left_mask ^ right_mask + left[left_mask & mask] = fill_value + right[right_mask & mask] = fill_value + return left, right + def mask_arith_op(x, y, op): xrav = x.ravel() diff --git a/pandas/core/series.py b/pandas/core/series.py index e4b8979d6393a..655eaa5373f5a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1725,19 +1725,8 @@ def _binop(self, other, func, level=None, fill_value=None): copy=False) new_index = this.index - this_vals = this.values - other_vals = other.values - - if fill_value is not None: - this_mask = isna(this_vals) - other_mask = isna(other_vals) - this_vals = this_vals.copy() - other_vals = other_vals.copy() - - # one but not both - mask = this_mask ^ other_mask - this_vals[this_mask & mask] = fill_value - other_vals[other_mask & mask] = fill_value + this_vals, other_vals = ops.fill_binop(this.values, other.values, + fill_value) with np.errstate(all='ignore'): result = func(this_vals, other_vals) From d9e8518032d1faa2a775eb3b9ed860d370817554 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 8 Feb 2018 22:09:18 -0800 Subject: [PATCH 3/5] Fix typo --- pandas/core/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 663c379b3e000..73e8eddfa8d94 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -433,7 +433,7 @@ def mask_arith_op(x, y, op): # errors in Py3 (TypeError) vs Py2 (ValueError) raise ValueError('Cannot broadcast operands together.') - yrav = com._values_from_object(y[mask]) + yrav = com._values_from_object(yrav[mask]) if xrav.size: # Avoid the operation if it is on an empty array with np.errstate(all='ignore'): From 0accde4640a806471355d2ba604790501fb191b9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 8 Feb 2018 23:29:02 -0800 Subject: [PATCH 4/5] revert changes that appeared to break slow tests --- pandas/core/ops.py | 89 ++++++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 73e8eddfa8d94..091a964a65205 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -415,47 +415,6 @@ def fill_binop(left, right, fill_value): return left, right -def mask_arith_op(x, y, op): - xrav = x.ravel() - if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): - # The ABCSeries and pd.Index cases are only reached for Series ops, - # DataFrame casts these inputs to Series in align_method_FRAME - dtype = find_common_type([x.dtype, y.dtype]) - result = np.empty(x.size, dtype=dtype) - yrav = y.ravel() - mask = notna(xrav) & notna(yrav) - xrav = xrav[mask] - - if yrav.shape != mask.shape: - # Only a risk for DataFrame. - # FIXME: GH#5284, GH#5035, GH#19448 - # Without specifically raising here we get mismatched - # errors in Py3 (TypeError) vs Py2 (ValueError) - raise ValueError('Cannot broadcast operands together.') - - yrav = com._values_from_object(yrav[mask]) - if xrav.size: - # Avoid the operation if it is on an empty array - with np.errstate(all='ignore'): - result[mask] = op(xrav, yrav) - - elif isinstance(x, np.ndarray): - # this always holds for Series, never for DataFrame - result = np.empty(x.size, dtype=x.dtype) - mask = notna(x) - result[mask] = op(x[mask], y) - - else: - # Raise otherwise; only relevant for DataFrame - raise TypeError("cannot perform operation {op} between " - "objects of type {x} and {y}".format(op=name, - x=type(x), - y=type(y))) - - result, changed = maybe_upcast_putmask(result, ~mask, np.nan) - return result.reshape(x.shape) - - def mask_cmp_op(x, y, op, allowed_types): # TODO: Can we make the allowed_types arg unnecessary? xrav = x.ravel() @@ -721,7 +680,18 @@ def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: - result = mask_arith_op(x, y, op) + if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): + dtype = find_common_type([x.dtype, y.dtype]) + result = np.empty(x.size, dtype=dtype) + mask = notna(x) & notna(y) + result[mask] = op(x[mask], com._values_from_object(y[mask])) + else: + assert isinstance(x, np.ndarray) + result = np.empty(len(x), dtype=x.dtype) + mask = notna(x) + result[mask] = op(x[mask], y) + + result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result @@ -1121,7 +1091,40 @@ def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: - result = mask_arith_op(x, y, op) + xrav = x.ravel() + if isinstance(y, (np.ndarray, ABCSeries)): + dtype = np.find_common_type([x.dtype, y.dtype], []) + result = np.empty(x.size, dtype=dtype) + yrav = y.ravel() + mask = notna(xrav) & notna(yrav) + xrav = xrav[mask] + + if yrav.shape != mask.shape: + # FIXME: GH#5284, GH#5035, GH#19448 + # Without specifically raising here we get mismatched + # errors in Py3 (TypeError) vs Py2 (ValueError) + raise ValueError('Cannot broadcast operands together.') + + yrav = yrav[mask] + if xrav.size: + with np.errstate(all='ignore'): + result[mask] = op(xrav, yrav) + + elif isinstance(x, np.ndarray): + # mask is only meaningful for x + result = np.empty(x.size, dtype=x.dtype) + mask = notna(xrav) + xrav = xrav[mask] + if xrav.size: + with np.errstate(all='ignore'): + result[mask] = op(xrav, y) + else: + raise TypeError("cannot perform operation {op} between " + "objects of type {x} and {y}".format( + op=name, x=type(x), y=type(y))) + + result, changed = maybe_upcast_putmask(result, ~mask, np.nan) + result = result.reshape(x.shape) result = missing.fill_zeros(result, x, y, name, fill_zeros) From 221f8a63d74445a160e03c77c78e21374a8c3a61 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 10 Feb 2018 10:43:19 -0800 Subject: [PATCH 5/5] docstrings --- pandas/core/ops.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 091a964a65205..4c234ccb4dd47 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -402,6 +402,27 @@ def _make_flex_doc(op_name, typ): # Masking NA values and fallbacks for operations numpy does not support def fill_binop(left, right, fill_value): + """ + If a non-None fill_value is given, replace null entries in left and right + with this value, but only in positions where _one_ of left/right is null, + not both. + + Parameters + ---------- + left : array-like + right : array-like + fill_value : object + + Returns + ------- + left : array-like + right : array-like + + Notes + ----- + Makes copies if fill_value is not None + """ + # TODO: can we make a no-copy implementation? if fill_value is not None: left_mask = isna(left) right_mask = isna(right) @@ -416,6 +437,20 @@ def fill_binop(left, right, fill_value): def mask_cmp_op(x, y, op, allowed_types): + """ + Apply the function `op` to only non-null points in x and y. + + Parameters + ---------- + x : array-like + y : array-like + op : binary operation + allowed_types : class or tuple of classes + + Returns + ------- + result : ndarray[bool] + """ # TODO: Can we make the allowed_types arg unnecessary? xrav = x.ravel() result = np.empty(x.size, dtype=bool)