From a13c16165d718c4fc0f010ef3d6325f125c01fe2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 25 Jul 2018 21:24:36 -0700 Subject: [PATCH 1/7] fix trailing whitespace --- pandas/_libs/tslibs/period.pyx | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 4054154cd285b..517f126e0cd6d 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1539,7 +1539,7 @@ cdef class _Period(object): See Also -------- Period.year : Return the calendar year of the period. - + Examples -------- If the natural and fiscal year are the same, `qyear` and `year` will diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f7a6cf0c6dafc..221c345531208 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -929,7 +929,7 @@ cdef class _Timedelta(timedelta): def nanoseconds(self): """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. - + Returns ------- int From c8948bc4ae2456b14fc1b7b33c3b615f7c6d0bba Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 25 Jul 2018 21:25:04 -0700 Subject: [PATCH 2/7] implement dispatch_to_series, remove a layer of closure --- pandas/core/frame.py | 37 +++------------------- pandas/core/ops.py | 74 +++++++++++++++++++++++++++----------------- 2 files changed, 50 insertions(+), 61 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 078e176ff2b99..35bd6240c7958 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4901,20 +4901,7 @@ def _arith_op(left, right): if this._is_mixed_type or other._is_mixed_type: # iterate over columns - if this.columns.is_unique: - # unique columns - result = {col: _arith_op(this[col], other[col]) - for col in this} - result = self._constructor(result, index=new_index, - columns=new_columns, copy=False) - else: - # non-unique columns - result = {i: _arith_op(this.iloc[:, i], other.iloc[:, i]) - for i, col in enumerate(this.columns)} - result = self._constructor(result, index=new_index, copy=False) - result.columns = new_columns - return result - + return ops.dispatch_to_series(this, other, _arith_op) else: result = _arith_op(this.values, other.values) @@ -4948,27 +4935,11 @@ def _compare_frame(self, other, func, str_rep): # compare_frame assumes self._indexed_same(other) import pandas.core.computation.expressions as expressions - # unique - if self.columns.is_unique: - def _compare(a, b): - return {col: func(a[col], b[col]) for col in a.columns} + def _compare(a, b): + return expressions.evaluate(func, str_rep, a, b) - new_data = expressions.evaluate(_compare, str_rep, self, other) - return self._constructor(data=new_data, index=self.index, - columns=self.columns, copy=False) - # non-unique - else: - - def _compare(a, b): - return {i: func(a.iloc[:, i], b.iloc[:, i]) - for i, col in enumerate(a.columns)} - - new_data = expressions.evaluate(_compare, str_rep, self, other) - result = self._constructor(data=new_data, index=self.index, - copy=False) - result.columns = self.columns - return result + return ops.dispatch_to_series(self, other, _compare) def combine(self, other, func, fill_value=None, overwrite=True): """ diff --git a/pandas/core/ops.py b/pandas/core/ops.py index c65d2dcdc478c..2384780243e14 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1123,30 +1123,6 @@ def na_op(x, y): result = missing.fill_zeros(result, x, y, op_name, fill_zeros) return result - def safe_na_op(lvalues, rvalues): - """ - return the result of evaluating na_op on the passed in values - - try coercion to object type if the native types are not compatible - - Parameters - ---------- - lvalues : array-like - rvalues : array-like - - Raises - ------ - TypeError: invalid operation - """ - try: - with np.errstate(all='ignore'): - return na_op(lvalues, rvalues) - except Exception: - if is_object_dtype(lvalues): - return libalgos.arrmap_object(lvalues, - lambda x: op(x, rvalues)) - raise - def wrapper(left, right): if isinstance(right, ABCDataFrame): return NotImplemented @@ -1174,12 +1150,18 @@ def wrapper(left, right): index=left.index, name=res_name, dtype=result.dtype) + elif is_object_dtype(left): + rvalues = right.values if isinstance(right, ABCSeries) else right + result = libalgos.arrmap_object(left.values, + lambda x: op(x, rvalues)) + lvalues = left.values rvalues = right if isinstance(rvalues, ABCSeries): rvalues = rvalues.values - result = safe_na_op(lvalues, rvalues) + with np.errstate(all='ignore'): + result = na_op(lvalues, rvalues) return construct_result(left, result, index=left.index, name=res_name, dtype=None) @@ -1313,7 +1295,7 @@ def wrapper(self, other, axis=None): return self._constructor(res_values, index=self.index, name=res_name) - if is_datetime64_dtype(self) or is_datetime64tz_dtype(self): + elif is_datetime64_dtype(self) or is_datetime64tz_dtype(self): # Dispatch to DatetimeIndex to ensure identical # Series/Index behavior if (isinstance(other, datetime.date) and @@ -1355,8 +1337,9 @@ def wrapper(self, other, axis=None): name=res_name) elif (is_extension_array_dtype(self) or - (is_extension_array_dtype(other) and - not is_scalar(other))): + (is_extension_array_dtype(other) and not is_scalar(other))): + # Note: the `not is_scalar(other)` condition rules out + # e.g. other == "category" return dispatch_to_extension_op(op, self, other) elif isinstance(other, ABCSeries): @@ -1515,6 +1498,41 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): # ----------------------------------------------------------------------------- # DataFrame +def dispatch_to_series(left, right, func): + """ + Evaluate the frame operation func(left, right) by evaluating + column-by-column, dispatching to the Series implementation. + + Parameters + ---------- + left : DataFrame + right : scalar or DataFrame + func : arithmetic or comparison operator + + Returns + ------- + DataFrame + """ + # Note: we use iloc to access columns for compat with cases + # with non-unique columns. + if lib.is_scalar(right): + new_data = {i: func(left.iloc[:, i], right) + for i in range(len(left.columns))} + elif isinstance(right, ABCDataFrame): + assert right._indexed_same(left) + new_data = {i: func(left.iloc[:, i], right.iloc[:, i]) + for i in range(len(left.columns))} + else: + # Remaining cases have less-obvious dispatch rules + raise NotImplementedError + + result = left._constructor(new_data, index=left.index, copy=False) + # Pin columns instead of passing to constructor for compat with + # non-unique columns case + result.columns = left.columns + return result + + def _combine_series_frame(self, other, func, fill_value=None, axis=None, level=None, try_cast=True): """ From 2f25223ab52051846eaabd9e987340e6c2b06d6e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 26 Jul 2018 09:32:39 -0700 Subject: [PATCH 3/7] revert incorrect change; remove no-longer-reachable cases --- pandas/core/ops.py | 49 +++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 2384780243e14..5f44741080009 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1123,6 +1123,30 @@ def na_op(x, y): result = missing.fill_zeros(result, x, y, op_name, fill_zeros) return result + def safe_na_op(lvalues, rvalues): + """ + return the result of evaluating na_op on the passed in values + + try coercion to object type if the native types are not compatible + + Parameters + ---------- + lvalues : array-like + rvalues : array-like + + Raises + ------ + TypeError: invalid operation + """ + try: + with np.errstate(all='ignore'): + return na_op(lvalues, rvalues) + except Exception: + if is_object_dtype(lvalues): + return libalgos.arrmap_object(lvalues, + lambda x: op(x, rvalues)) + raise + def wrapper(left, right): if isinstance(right, ABCDataFrame): return NotImplemented @@ -1150,18 +1174,12 @@ def wrapper(left, right): index=left.index, name=res_name, dtype=result.dtype) - elif is_object_dtype(left): - rvalues = right.values if isinstance(right, ABCSeries) else right - result = libalgos.arrmap_object(left.values, - lambda x: op(x, rvalues)) - lvalues = left.values rvalues = right if isinstance(rvalues, ABCSeries): rvalues = rvalues.values - with np.errstate(all='ignore'): - result = na_op(lvalues, rvalues) + result = safe_na_op(lvalues, rvalues) return construct_result(left, result, index=left.index, name=res_name, dtype=None) @@ -1231,13 +1249,11 @@ def na_op(x, y): # should have guarantess on what x, y can be type-wise # Extension Dtypes are not called here - # dispatch to the categorical if we have a categorical - # in either operand - if is_categorical_dtype(y) and not is_scalar(y): - # The `not is_scalar(y)` check excludes the string "category" - return op(y, x) + # Checking that cases that were once handled here are no longer + # reachable. + assert not (is_categorical_dtype(y) and not is_scalar(y)) - elif is_object_dtype(x.dtype): + if is_object_dtype(x.dtype): result = _comp_method_OBJECT_ARRAY(op, x, y) elif is_datetimelike_v_numeric(x, y): @@ -1362,13 +1378,6 @@ def wrapper(self, other, axis=None): # is not. return result.__finalize__(self).rename(res_name) - elif isinstance(other, pd.Categorical): - # ordering of checks matters; by this point we know - # that not is_categorical_dtype(self) - res_values = op(self.values, other) - return self._constructor(res_values, index=self.index, - name=res_name) - elif is_scalar(other) and isna(other): # numpy does not like comparisons vs None if op is operator.ne: From 8527bd2091abbe7364f5349330bcee8f2655b81d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 26 Jul 2018 11:24:27 -0700 Subject: [PATCH 4/7] Comment and assertion --- pandas/core/ops.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 5f44741080009..6397ce3a314da 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1114,6 +1114,7 @@ def na_op(x, y): result[mask] = op(x[mask], com.values_from_object(y[mask])) else: assert isinstance(x, np.ndarray) + assert lib.is_scalar(y) result = np.empty(len(x), dtype=x.dtype) mask = notna(x) result[mask] = op(x[mask], y) @@ -1160,6 +1161,7 @@ def wrapper(left, right): elif (is_extension_array_dtype(left) or is_extension_array_dtype(right)): + # TODO: should this include `not is_scalar(right)`? return dispatch_to_extension_op(op, left, right) elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left): From 2ae4b2ff41ea2091f6da97294613725f554bbd42 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 28 Jul 2018 09:10:56 -0700 Subject: [PATCH 5/7] use imported is_scalar --- pandas/core/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 6397ce3a314da..661f7c11617dd 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1114,7 +1114,7 @@ def na_op(x, y): result[mask] = op(x[mask], com.values_from_object(y[mask])) else: assert isinstance(x, np.ndarray) - assert lib.is_scalar(y) + assert is_scalar(y) result = np.empty(len(x), dtype=x.dtype) mask = notna(x) result[mask] = op(x[mask], y) From f5bb3b64c42a8e81ecbb2f9805e13f3cbf25ff6f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 1 Aug 2018 17:57:21 -0700 Subject: [PATCH 6/7] dummy commit to force CI --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 46bb8b7045400..b949d5c1bec13 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4898,7 +4898,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None): new_index, new_columns = this.index, this.columns def _arith_op(left, right): - # for the mixed_type case where we iterate over columns, + # For the mixed_type case where we iterate over columns, # _arith_op(left, right) is equivalent to # left._binop(right, func, fill_value=fill_value) left, right = ops.fill_binop(left, right, fill_value) From c36e6729b1df61559e29e5e9b2dbe0678c0795a6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 1 Aug 2018 18:37:52 -0700 Subject: [PATCH 7/7] revert change that hurt perf --- pandas/core/frame.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b949d5c1bec13..719488a50ca40 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4898,7 +4898,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None): new_index, new_columns = this.index, this.columns def _arith_op(left, right): - # For the mixed_type case where we iterate over columns, + # for the mixed_type case where we iterate over columns, # _arith_op(left, right) is equivalent to # left._binop(right, func, fill_value=fill_value) left, right = ops.fill_binop(left, right, fill_value) @@ -4942,9 +4942,14 @@ def _compare_frame(self, other, func, str_rep): import pandas.core.computation.expressions as expressions def _compare(a, b): - return expressions.evaluate(func, str_rep, a, b) + return {i: func(a.iloc[:, i], b.iloc[:, i]) + for i in range(len(a.columns))} - return ops.dispatch_to_series(self, other, _compare) + new_data = expressions.evaluate(_compare, str_rep, self, other) + result = self._constructor(data=new_data, index=self.index, + copy=False) + result.columns = self.columns + return result def combine(self, other, func, fill_value=None, overwrite=True): """