From cb79d128a7495190b929a612d791d10b6a96d234 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 29 Oct 2018 10:20:00 -0700 Subject: [PATCH 01/12] implement wrap_dispatched_op --- pandas/core/frame.py | 31 +++++++++++++++++++++++++++---- pandas/core/ops.py | 15 ++++++--------- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a58d34574d28d..ae18509d0f225 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4901,6 +4901,25 @@ def reorder_levels(self, order, axis=0): # ---------------------------------------------------------------------- # Arithmetic / combination related + def _wrap_dispatched_op(self, result): + """ + Wrap the result of an arithmetic/comparison operation performed + via ops.dispatch_to_series in a properly-indexed DataFrame. + + Parameters + ---------- + result : dict[int:Series] + + Returns + ------- + DataFrame + """ + result = left._constructor(result, index=self.index, copy=False) + # Pin columns instead of passing to constructor for compat with + # non-unique columns case + result.columns = self.columns + return result + def _combine_frame(self, other, func, fill_value=None, level=None): this, other = self.align(other, join='outer', level=level, copy=False) new_index, new_columns = this.index, this.columns @@ -4914,7 +4933,8 @@ def _arith_op(left, right): if ops.should_series_dispatch(this, other, func): # iterate over columns - return ops.dispatch_to_series(this, other, _arith_op) + result = ops.dispatch_to_series(this, other, _arith_op) + return this._wrap_dispatched_op(result) else: result = _arith_op(this.values, other.values) return self._constructor(result, @@ -4928,7 +4948,8 @@ def _combine_match_index(self, other, func, level=None): if left._is_mixed_type or right._is_mixed_type: # operate column-wise; avoid costly object-casting in `.values` - return ops.dispatch_to_series(left, right, func) + result = ops.dispatch_to_series(left, right, func) + return left._wrap_dispatched_op(result) else: # fastpath --> operate directly on values with np.errstate(all="ignore"): @@ -4942,11 +4963,13 @@ def _combine_match_columns(self, other, func, level=None): left, right = self.align(other, join='outer', axis=1, level=level, copy=False) assert left.columns.equals(right.index) - return ops.dispatch_to_series(left, right, func, axis="columns") + result = ops.dispatch_to_series(left, right, func, axis="columns") + return left._wrap_dispatched_op(result) def _combine_const(self, other, func, errors='raise'): assert lib.is_scalar(other) or np.ndim(other) == 0 - return ops.dispatch_to_series(self, other, func) + result = ops.dispatch_to_seris(self, other, func) + return self._wrap_dispatched_op(result) def combine(self, other, func, fill_value=None, overwrite=True): """ diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 2335b26c576eb..88c5a4327c08d 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -958,7 +958,7 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): Returns ------- - DataFrame + dict[int:Series] """ # Note: we use iloc to access columns for compat with cases # with non-unique columns. @@ -999,12 +999,7 @@ def column_op(a, b): raise NotImplementedError(right) new_data = expressions.evaluate(column_op, str_rep, left, right) - - result = left._constructor(new_data, index=left.index, copy=False) - # Pin columns instead of passing to constructor for compat with - # non-unique columns case - result.columns = left.columns - return result + return new_data def dispatch_to_index_op(op, left, right, index_class): @@ -1902,7 +1897,8 @@ def f(self, other, axis=default_axis, level=None): if not self._indexed_same(other): self, other = self.align(other, 'outer', level=level, copy=False) - return dispatch_to_series(self, other, na_op, str_rep) + result = dispatch_to_series(self, other, na_op, str_rep) + return self._wrap_dispatched_op(result) elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, na_op, @@ -1931,7 +1927,8 @@ def f(self, other): if not self._indexed_same(other): raise ValueError('Can only compare identically-labeled ' 'DataFrame objects') - return dispatch_to_series(self, other, func, str_rep) + result = dispatch_to_series(self, other, func, str_rep) + return self._wrap_dispatched_op(result) elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, func, From 9c8a9c72176b0873e2ac633a97bd78e263158265 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 29 Oct 2018 10:58:13 -0700 Subject: [PATCH 02/12] typo fixup --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ae18509d0f225..29028313efb26 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4968,7 +4968,7 @@ def _combine_match_columns(self, other, func, level=None): def _combine_const(self, other, func, errors='raise'): assert lib.is_scalar(other) or np.ndim(other) == 0 - result = ops.dispatch_to_seris(self, other, func) + result = ops.dispatch_to_series(self, other, func) return self._wrap_dispatched_op(result) def combine(self, other, func, fill_value=None, overwrite=True): From 32a327b521c682156b118d8894696a05aa5a6c81 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 29 Oct 2018 10:58:45 -0700 Subject: [PATCH 03/12] fixup copy/paste mixup --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 29028313efb26..7f3c52ea38614 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4914,7 +4914,7 @@ def _wrap_dispatched_op(self, result): ------- DataFrame """ - result = left._constructor(result, index=self.index, copy=False) + result = self._constructor(result, index=self.index, copy=False) # Pin columns instead of passing to constructor for compat with # non-unique columns case result.columns = self.columns From e737d452238df20e09276d086a91b78e85faa017 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 29 Oct 2018 13:17:55 -0700 Subject: [PATCH 04/12] use wrap_dispatched_op in sparse --- pandas/core/frame.py | 9 ++---- pandas/core/ops.py | 10 +++---- pandas/core/sparse/frame.py | 56 ++++++++++++++++++++++++++----------- 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7f3c52ea38614..135396b92f4a0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4948,8 +4948,7 @@ def _combine_match_index(self, other, func, level=None): if left._is_mixed_type or right._is_mixed_type: # operate column-wise; avoid costly object-casting in `.values` - result = ops.dispatch_to_series(left, right, func) - return left._wrap_dispatched_op(result) + return ops.dispatch_to_series(left, right, func) else: # fastpath --> operate directly on values with np.errstate(all="ignore"): @@ -4963,13 +4962,11 @@ def _combine_match_columns(self, other, func, level=None): left, right = self.align(other, join='outer', axis=1, level=level, copy=False) assert left.columns.equals(right.index) - result = ops.dispatch_to_series(left, right, func, axis="columns") - return left._wrap_dispatched_op(result) + return ops.dispatch_to_series(left, right, func, axis="columns") def _combine_const(self, other, func, errors='raise'): assert lib.is_scalar(other) or np.ndim(other) == 0 - result = ops.dispatch_to_series(self, other, func) - return self._wrap_dispatched_op(result) + return ops.dispatch_to_series(self, other, func) def combine(self, other, func, fill_value=None, overwrite=True): """ diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 88c5a4327c08d..e654727f0de2c 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -958,7 +958,7 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): Returns ------- - dict[int:Series] + DataFrame """ # Note: we use iloc to access columns for compat with cases # with non-unique columns. @@ -999,7 +999,7 @@ def column_op(a, b): raise NotImplementedError(right) new_data = expressions.evaluate(column_op, str_rep, left, right) - return new_data + return left._wrap_dispatched_op(new_data) def dispatch_to_index_op(op, left, right, index_class): @@ -1897,8 +1897,7 @@ def f(self, other, axis=default_axis, level=None): if not self._indexed_same(other): self, other = self.align(other, 'outer', level=level, copy=False) - result = dispatch_to_series(self, other, na_op, str_rep) - return self._wrap_dispatched_op(result) + return dispatch_to_series(self, other, na_op, str_rep) elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, na_op, @@ -1927,8 +1926,7 @@ def f(self, other): if not self._indexed_same(other): raise ValueError('Can only compare identically-labeled ' 'DataFrame objects') - result = dispatch_to_series(self, other, func, str_rep) - return self._wrap_dispatched_op(result) + return dispatch_to_series(self, other, func, str_rep) elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, func, diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index c7d8be0d2e9e4..4a6c2210e38a8 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -547,6 +547,31 @@ def xs(self, key, axis=0, copy=False): # ---------------------------------------------------------------------- # Arithmetic-related methods + def _wrap_dispatched_op(self, result, other, func, axis=None): + """ + Wrap the result of an arithmetic/comparison operation performed + via ops.dispatch_to_series in a properly-indexed DataFrame. + + Parameters + ---------- + result : dict[int:Series] + other : DataFrame, Series, scalar + func : binary function + axis : {None, "columns"} + + Returns + ------- + SparseDataFrame + """ + + fill_value = self._get_op_result_fill_value(other, func, axis) + + res = self._constructor(result, index=self.index, + columns=self.columns, + default_fill_value=fill_value) + + return res.__finalize__(self) + def _combine_frame(self, other, func, fill_value=None, level=None): if level is not None: raise NotImplementedError("'level' argument is not supported") @@ -573,12 +598,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None): if col in this and col in other: new_data[col] = func(this[col], other[col]) - new_fill_value = self._get_op_result_fill_value(other, func) - - return self._constructor(data=new_data, index=new_index, - columns=new_columns, - default_fill_value=new_fill_value - ).__finalize__(self) + return this._wrap_dispatched_op(new_data, other, func) def _combine_match_index(self, other, func, level=None): new_data = {} @@ -592,11 +612,7 @@ def _combine_match_index(self, other, func, level=None): for col, series in compat.iteritems(this): new_data[col] = func(series.values, other.values) - fill_value = self._get_op_result_fill_value(other, func) - - return self._constructor( - new_data, index=this.index, columns=self.columns, - default_fill_value=fill_value).__finalize__(self) + return self._wrap_dispatched_op(new_data, other, func) def _combine_match_columns(self, other, func, level=None): # patched version of DataFrame._combine_match_columns to account for @@ -616,24 +632,26 @@ def _combine_match_columns(self, other, func, level=None): for col in left.columns: new_data[col] = func(left[col], float(right[col])) - return self._constructor( - new_data, index=left.index, columns=left.columns, - default_fill_value=self.default_fill_value).__finalize__(self) + return self._wrap_dispatched_op(new_data, other, func, "columns") def _combine_const(self, other, func, errors='raise'): return self._apply_columns(lambda x: func(x, other)) - def _get_op_result_fill_value(self, other, func): + def _get_op_result_fill_value(self, other, func, axis=None): own_default = self.default_fill_value - if isinstance(other, DataFrame): + if axis == "columns": + # i.e. called via _combine_match_columns + fill_value = self.default_fill_value + + elif isinstance(other, DataFrame): # i.e. called from _combine_frame other_default = getattr(other, 'default_fill_value', np.nan) # if the fill values are the same use them? or use a valid one if own_default == other_default: - # TOOD: won't this evaluate as False if both are np.nan? + # TODO: won't this evaluate as False if both are np.nan? fill_value = own_default elif np.isnan(own_default) and not np.isnan(other_default): fill_value = other_default @@ -652,6 +670,10 @@ def _get_op_result_fill_value(self, other, func): fill_value = func(np.float64(own_default), np.float64(other.fill_value)) + elif np.ndim(other) == 0: + # i.e. called via _combine_const + fill_value = self.default_fill_value + else: raise NotImplementedError(type(other)) From 08f80fa6d8609e02d7dd5508fa942ff131d871ff Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 29 Oct 2018 13:29:11 -0700 Subject: [PATCH 05/12] remove unnecessary errors kwarg --- pandas/core/frame.py | 2 +- pandas/core/ops.py | 5 ++--- pandas/core/sparse/frame.py | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 135396b92f4a0..e711232cf3c9d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4964,7 +4964,7 @@ def _combine_match_columns(self, other, func, level=None): assert left.columns.equals(right.index) return ops.dispatch_to_series(left, right, func, axis="columns") - def _combine_const(self, other, func, errors='raise'): + def _combine_const(self, other, func): assert lib.is_scalar(other) or np.ndim(other) == 0 return ops.dispatch_to_series(self, other, func) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index e654727f0de2c..6de00557a5ae3 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1935,9 +1935,8 @@ def f(self, other): else: # straight boolean comparisons we want to allow all columns - # (regardless of dtype to pass thru) See #4537 for discussion. - res = self._combine_const(other, func, - errors='ignore') + # (regardless of dtype to pass thru) See GH#4537 for discussion. + res = self._combine_const(other, func) return res.fillna(True).astype(bool) f.__name__ = op_name diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 4a6c2210e38a8..f665a438472c5 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -634,7 +634,7 @@ def _combine_match_columns(self, other, func, level=None): return self._wrap_dispatched_op(new_data, other, func, "columns") - def _combine_const(self, other, func, errors='raise'): + def _combine_const(self, other, func): return self._apply_columns(lambda x: func(x, other)) def _get_op_result_fill_value(self, other, func, axis=None): From ea4ce51cc960cb52b6a2f4887036ae284619677b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 29 Oct 2018 15:33:05 -0700 Subject: [PATCH 06/12] pass str_rep --- pandas/core/frame.py | 17 +++++++++-------- pandas/core/ops.py | 37 ++++++++++++++++++++----------------- pandas/core/sparse/frame.py | 23 ++++++++++++++--------- 3 files changed, 43 insertions(+), 34 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e711232cf3c9d..49423455fc9be 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4901,7 +4901,7 @@ def reorder_levels(self, order, axis=0): # ---------------------------------------------------------------------- # Arithmetic / combination related - def _wrap_dispatched_op(self, result): + def _wrap_dispatched_op(self, result, other, func, axis=None): """ Wrap the result of an arithmetic/comparison operation performed via ops.dispatch_to_series in a properly-indexed DataFrame. @@ -4934,21 +4934,21 @@ def _arith_op(left, right): if ops.should_series_dispatch(this, other, func): # iterate over columns result = ops.dispatch_to_series(this, other, _arith_op) - return this._wrap_dispatched_op(result) + return this._wrap_dispatched_op(result, other, _arith_op) else: result = _arith_op(this.values, other.values) return self._constructor(result, index=new_index, columns=new_columns, copy=False) - def _combine_match_index(self, other, func, level=None): + def _combine_match_index(self, other, func, level=None, str_rep=None): left, right = self.align(other, join='outer', axis=0, level=level, copy=False) assert left.index.equals(right.index) if left._is_mixed_type or right._is_mixed_type: # operate column-wise; avoid costly object-casting in `.values` - return ops.dispatch_to_series(left, right, func) + return ops.dispatch_to_series(left, right, func, str_rep=str_rep) else: # fastpath --> operate directly on values with np.errstate(all="ignore"): @@ -4957,16 +4957,17 @@ def _combine_match_index(self, other, func, level=None): index=left.index, columns=self.columns, copy=False) - def _combine_match_columns(self, other, func, level=None): + def _combine_match_columns(self, other, func, level=None, str_rep=None): assert isinstance(other, Series) left, right = self.align(other, join='outer', axis=1, level=level, copy=False) assert left.columns.equals(right.index) - return ops.dispatch_to_series(left, right, func, axis="columns") + return ops.dispatch_to_series(left, right, func, axis="columns", + str_rep=str_rep) - def _combine_const(self, other, func): + def _combine_const(self, other, func, str_rep=None): assert lib.is_scalar(other) or np.ndim(other) == 0 - return ops.dispatch_to_series(self, other, func) + return ops.dispatch_to_series(self, other, func, str_rep=str_rep) def combine(self, other, func, fill_value=None, overwrite=True): """ diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 6de00557a5ae3..4cd54e6da3f9b 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -785,7 +785,7 @@ def fill_binop(left, right, fill_value): return left, right -def mask_cmp_op(x, y, op, allowed_types): +def mask_cmp_op(x, y, op): """ Apply the function `op` to only non-null points in x and y. @@ -794,16 +794,15 @@ def mask_cmp_op(x, y, op, allowed_types): x : array-like y : array-like op : binary operation - allowed_types : class or tuple of classes Returns ------- result : ndarray[bool] """ - # TODO: Can we make the allowed_types arg unnecessary? + # TODO: can we do this without casting to list? xrav = x.ravel() result = np.empty(x.size, dtype=bool) - if isinstance(y, allowed_types): + if isinstance(y, (np.ndarray, ABCSeries)): yrav = y.ravel() mask = notna(xrav) & notna(yrav) result[mask] = op(np.array(list(xrav[mask])), @@ -999,7 +998,7 @@ def column_op(a, b): raise NotImplementedError(right) new_data = expressions.evaluate(column_op, str_rep, left, right) - return left._wrap_dispatched_op(new_data) + return left._wrap_dispatched_op(new_data, right, func, axis=axis) def dispatch_to_index_op(op, left, right, index_class): @@ -1722,7 +1721,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): def _combine_series_frame(self, other, func, fill_value=None, axis=None, - level=None): + level=None, str_rep=None): """ Apply binary operator `func` to self, other using alignment and fill conventions determined by the fill_value, axis, and level kwargs. @@ -1735,6 +1734,7 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None, fill_value : object, default None axis : {0, 1, 'columns', 'index', None}, default None level : int or None, default None + str_rep : str Returns ------- @@ -1747,9 +1747,11 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None, if axis is not None: axis = self._get_axis_number(axis) if axis == 0: - return self._combine_match_index(other, func, level=level) + return self._combine_match_index(other, func, level=level, + str_rep=str_rep) else: - return self._combine_match_columns(other, func, level=level) + return self._combine_match_columns(other, func, level=level, + str_rep=str_rep) else: if not len(other): return self * np.nan @@ -1760,7 +1762,8 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None, columns=self.columns) # default axis is columns - return self._combine_match_columns(other, func, level=level) + return self._combine_match_columns(other, func, level=level, + str_rep=str_rep) def _align_method_FRAME(left, right, axis): @@ -1860,13 +1863,13 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): pass_op = op if axis in [0, "columns", None] else na_op return _combine_series_frame(self, other, pass_op, fill_value=fill_value, axis=axis, - level=level) + level=level, str_rep=str_rep) else: if fill_value is not None: self = self.fillna(fill_value) assert np.ndim(other) == 0 - return self._combine_const(other, op) + return self._combine_const(other, op, str_rep=str_rep) f.__name__ = op_name @@ -1883,7 +1886,7 @@ def na_op(x, y): with np.errstate(invalid='ignore'): result = op(x, y) except TypeError: - result = mask_cmp_op(x, y, op, (np.ndarray, ABCSeries)) + result = mask_cmp_op(x, y, op) return result @Appender('Wrapper for flexible comparison methods {name}' @@ -1902,10 +1905,10 @@ def f(self, other, axis=default_axis, level=None): elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, na_op, fill_value=None, axis=axis, - level=level) + level=level, str_rep=str_rep) else: assert np.ndim(other) == 0, other - return self._combine_const(other, na_op) + return self._combine_const(other, na_op, str_rep=str_rep) f.__name__ = op_name @@ -1931,12 +1934,12 @@ def f(self, other): elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, func, fill_value=None, axis=None, - level=None) + level=None, str_rep=str_rep) else: # straight boolean comparisons we want to allow all columns # (regardless of dtype to pass thru) See GH#4537 for discussion. - res = self._combine_const(other, func) + res = self._combine_const(other, func, str_rep=str_rep) return res.fillna(True).astype(bool) f.__name__ = op_name @@ -1973,7 +1976,7 @@ def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y) except TypeError: - result = mask_cmp_op(x, y, op, np.ndarray) + result = mask_cmp_op(x, y, op) return result @Appender('Wrapper for comparison method {name}'.format(name=op_name)) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index f665a438472c5..6bce8c0ab1b99 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -339,9 +339,10 @@ def _apply_columns(self, func): for col, series in compat.iteritems(self): new_data[col] = func(series) - return self._constructor( - data=new_data, index=self.index, columns=self.columns, - default_fill_value=self.default_fill_value).__finalize__(self) + # pass dummy arguments for func and axis; `other` just needs to be + # a scalar. + return self._wrap_dispatched_op(new_data, other=None, + func=None, axis=None) def astype(self, dtype): return self._apply_columns(lambda x: x.astype(dtype)) @@ -600,8 +601,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None): return this._wrap_dispatched_op(new_data, other, func) - def _combine_match_index(self, other, func, level=None): - new_data = {} + def _combine_match_index(self, other, func, level=None, str_rep=None): if level is not None: raise NotImplementedError("'level' argument is not supported") @@ -609,12 +609,13 @@ def _combine_match_index(self, other, func, level=None): this, other = self.align(other, join='outer', axis=0, level=level, copy=False) + new_data = {} for col, series in compat.iteritems(this): new_data[col] = func(series.values, other.values) return self._wrap_dispatched_op(new_data, other, func) - def _combine_match_columns(self, other, func, level=None): + def _combine_match_columns(self, other, func, level=None, str_rep=None): # patched version of DataFrame._combine_match_columns to account for # NumPy circumventing __rsub__ with float64 types, e.g.: 3.0 - series, # where 3.0 is numpy.float64 and series is a SparseSeries. Still @@ -628,14 +629,18 @@ def _combine_match_columns(self, other, func, level=None): assert left.columns.equals(right.index) new_data = {} - for col in left.columns: new_data[col] = func(left[col], float(right[col])) return self._wrap_dispatched_op(new_data, other, func, "columns") - def _combine_const(self, other, func): - return self._apply_columns(lambda x: func(x, other)) + def _combine_const(self, other, func, str_rep=None): + new_data = {} + for col, series in compat.iteritems(self): + new_data[col] = func(series, other) + + return self._wrap_dispatched_op(new_data, other=other, + func=func, axis=None) def _get_op_result_fill_value(self, other, func, axis=None): own_default = self.default_fill_value From 4d99777a6d95d4ab8714edea8c78d218ea15b141 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 29 Oct 2018 17:04:19 -0700 Subject: [PATCH 07/12] fix double wrapping --- pandas/core/frame.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 49423455fc9be..854bfb002d189 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4933,8 +4933,7 @@ def _arith_op(left, right): if ops.should_series_dispatch(this, other, func): # iterate over columns - result = ops.dispatch_to_series(this, other, _arith_op) - return this._wrap_dispatched_op(result, other, _arith_op) + return ops.dispatch_to_series(this, other, _arith_op) else: result = _arith_op(this.values, other.values) return self._constructor(result, From f4c99edae463f761b09f61027e9f0ccab4255268 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 29 Oct 2018 17:09:23 -0700 Subject: [PATCH 08/12] revert non-central changes --- pandas/core/frame.py | 13 ++++++------- pandas/core/ops.py | 22 +++++++++------------- pandas/core/sparse/frame.py | 6 +++--- 3 files changed, 18 insertions(+), 23 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 854bfb002d189..4758497d53095 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4940,14 +4940,14 @@ def _arith_op(left, right): index=new_index, columns=new_columns, copy=False) - def _combine_match_index(self, other, func, level=None, str_rep=None): + def _combine_match_index(self, other, func, level=None): left, right = self.align(other, join='outer', axis=0, level=level, copy=False) assert left.index.equals(right.index) if left._is_mixed_type or right._is_mixed_type: # operate column-wise; avoid costly object-casting in `.values` - return ops.dispatch_to_series(left, right, func, str_rep=str_rep) + return ops.dispatch_to_series(left, right, func) else: # fastpath --> operate directly on values with np.errstate(all="ignore"): @@ -4956,17 +4956,16 @@ def _combine_match_index(self, other, func, level=None, str_rep=None): index=left.index, columns=self.columns, copy=False) - def _combine_match_columns(self, other, func, level=None, str_rep=None): + def _combine_match_columns(self, other, func, level=None): assert isinstance(other, Series) left, right = self.align(other, join='outer', axis=1, level=level, copy=False) assert left.columns.equals(right.index) - return ops.dispatch_to_series(left, right, func, axis="columns", - str_rep=str_rep) + return ops.dispatch_to_series(left, right, func, axis="columns") - def _combine_const(self, other, func, str_rep=None): + def _combine_const(self, other, func): assert lib.is_scalar(other) or np.ndim(other) == 0 - return ops.dispatch_to_series(self, other, func, str_rep=str_rep) + return ops.dispatch_to_series(self, other, func) def combine(self, other, func, fill_value=None, overwrite=True): """ diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 4cd54e6da3f9b..dd90a4123d705 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1721,7 +1721,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): def _combine_series_frame(self, other, func, fill_value=None, axis=None, - level=None, str_rep=None): + level=None): """ Apply binary operator `func` to self, other using alignment and fill conventions determined by the fill_value, axis, and level kwargs. @@ -1734,7 +1734,6 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None, fill_value : object, default None axis : {0, 1, 'columns', 'index', None}, default None level : int or None, default None - str_rep : str Returns ------- @@ -1747,11 +1746,9 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None, if axis is not None: axis = self._get_axis_number(axis) if axis == 0: - return self._combine_match_index(other, func, level=level, - str_rep=str_rep) + return self._combine_match_index(other, func, level=level) else: - return self._combine_match_columns(other, func, level=level, - str_rep=str_rep) + return self._combine_match_columns(other, func, level=level) else: if not len(other): return self * np.nan @@ -1762,8 +1759,7 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None, columns=self.columns) # default axis is columns - return self._combine_match_columns(other, func, level=level, - str_rep=str_rep) + return self._combine_match_columns(other, func, level=level) def _align_method_FRAME(left, right, axis): @@ -1863,13 +1859,13 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): pass_op = op if axis in [0, "columns", None] else na_op return _combine_series_frame(self, other, pass_op, fill_value=fill_value, axis=axis, - level=level, str_rep=str_rep) + level=level) else: if fill_value is not None: self = self.fillna(fill_value) assert np.ndim(other) == 0 - return self._combine_const(other, op, str_rep=str_rep) + return self._combine_const(other, op) f.__name__ = op_name @@ -1905,10 +1901,10 @@ def f(self, other, axis=default_axis, level=None): elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, na_op, fill_value=None, axis=axis, - level=level, str_rep=str_rep) + level=level) else: assert np.ndim(other) == 0, other - return self._combine_const(other, na_op, str_rep=str_rep) + return self._combine_const(other, na_op) f.__name__ = op_name @@ -1934,7 +1930,7 @@ def f(self, other): elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, func, fill_value=None, axis=None, - level=None, str_rep=str_rep) + level=None) else: # straight boolean comparisons we want to allow all columns diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 6bce8c0ab1b99..bf756681036db 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -601,7 +601,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None): return this._wrap_dispatched_op(new_data, other, func) - def _combine_match_index(self, other, func, level=None, str_rep=None): + def _combine_match_index(self, other, func, level=None): if level is not None: raise NotImplementedError("'level' argument is not supported") @@ -615,7 +615,7 @@ def _combine_match_index(self, other, func, level=None, str_rep=None): return self._wrap_dispatched_op(new_data, other, func) - def _combine_match_columns(self, other, func, level=None, str_rep=None): + def _combine_match_columns(self, other, func, level=None): # patched version of DataFrame._combine_match_columns to account for # NumPy circumventing __rsub__ with float64 types, e.g.: 3.0 - series, # where 3.0 is numpy.float64 and series is a SparseSeries. Still @@ -634,7 +634,7 @@ def _combine_match_columns(self, other, func, level=None, str_rep=None): return self._wrap_dispatched_op(new_data, other, func, "columns") - def _combine_const(self, other, func, str_rep=None): + def _combine_const(self, other, func): new_data = {} for col, series in compat.iteritems(self): new_data[col] = func(series, other) From 7810dc694ebb84f0afcd37452350a0f5c0dd6f04 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 29 Oct 2018 17:24:11 -0700 Subject: [PATCH 09/12] revert str_rep --- pandas/core/ops.py | 2 +- pandas/core/sparse/frame.py | 32 ++++++++++++++------------------ 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index dd90a4123d705..e954ea5906d00 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1935,7 +1935,7 @@ def f(self, other): # straight boolean comparisons we want to allow all columns # (regardless of dtype to pass thru) See GH#4537 for discussion. - res = self._combine_const(other, func, str_rep=str_rep) + res = self._combine_const(other, func) return res.fillna(True).astype(bool) f.__name__ = op_name diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index bf756681036db..c25571283b690 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -583,23 +583,20 @@ def _combine_frame(self, other, func, fill_value=None, level=None): if self.empty and other.empty: return self._constructor(index=new_index).__finalize__(self) - new_data = {} - if fill_value is not None: - # TODO: be a bit more intelligent here - for col in new_columns: - if col in this and col in other: - dleft = this[col].to_dense() - dright = other[col].to_dense() - result = dleft._binop(dright, func, fill_value=fill_value) - result = result.to_sparse(fill_value=this[col].fill_value) - new_data[col] = result - else: + def _arith_op(left, right): + # analogous to _arith_op defined in DataFrame._combine_frame + dleft = left.to_dense() + dright = right.to_dense() + result = dleft._binop(dright, func, fill_value=fill_value) + result = result.to_sparse(fill_value=left.fill_value) + return result - for col in new_columns: - if col in this and col in other: - new_data[col] = func(this[col], other[col]) + new_data = {} + for col in new_columns: + assert col in this and col in other + new_data[col] = _arith_op(this[col], other[col]) - return this._wrap_dispatched_op(new_data, other, func) + return this._wrap_dispatched_op(new_data, other, func, axis=None) def _combine_match_index(self, other, func, level=None): @@ -613,7 +610,7 @@ def _combine_match_index(self, other, func, level=None): for col, series in compat.iteritems(this): new_data[col] = func(series.values, other.values) - return self._wrap_dispatched_op(new_data, other, func) + return self._wrap_dispatched_op(new_data, other, func, axis=None) def _combine_match_columns(self, other, func, level=None): # patched version of DataFrame._combine_match_columns to account for @@ -639,8 +636,7 @@ def _combine_const(self, other, func): for col, series in compat.iteritems(self): new_data[col] = func(series, other) - return self._wrap_dispatched_op(new_data, other=other, - func=func, axis=None) + return self._wrap_dispatched_op(new_data, other, func, axis=None) def _get_op_result_fill_value(self, other, func, axis=None): own_default = self.default_fill_value From bf88f232b08d87be0905980da332f909cebe626a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 30 Oct 2018 10:19:11 -0700 Subject: [PATCH 10/12] docstring fixup, validate axis --- pandas/core/frame.py | 8 ++++++++ pandas/core/sparse/frame.py | 2 ++ 2 files changed, 10 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4758497d53095..418acc3a655db 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4909,10 +4909,18 @@ def _wrap_dispatched_op(self, result, other, func, axis=None): Parameters ---------- result : dict[int:Series] + other : DataFrame, Series, or scalar + func : binary operator + axis : {0, 1, "index", "columns"} Returns ------- DataFrame + + Notes + ----- + The `other`, `func`, and `axis` arguments are not used here but are + included to keep the signature consistent with that in SparseDataFrame. """ result = self._constructor(result, index=self.index, copy=False) # Pin columns instead of passing to constructor for compat with diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index c25571283b690..55c063d20bb0d 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -641,6 +641,8 @@ def _combine_const(self, other, func): def _get_op_result_fill_value(self, other, func, axis=None): own_default = self.default_fill_value + axis = self._get_axis_name(axis) if axis is not None else None + if axis == "columns": # i.e. called via _combine_match_columns fill_value = self.default_fill_value From 3c2f3230d3396a02d08958fa3482e4a321078f45 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 31 Oct 2018 17:33:30 -0700 Subject: [PATCH 11/12] use dict comprehensions --- pandas/core/sparse/frame.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 55c063d20bb0d..56db228ef982f 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -591,10 +591,10 @@ def _arith_op(left, right): result = result.to_sparse(fill_value=left.fill_value) return result - new_data = {} - for col in new_columns: - assert col in this and col in other - new_data[col] = _arith_op(this[col], other[col]) + assert all(col in this and col in other for col in new_columns) + + new_data = {col: _arith_op(this[col], other[col]) + for col in new_columns} return this._wrap_dispatched_op(new_data, other, func, axis=None) @@ -625,16 +625,13 @@ def _combine_match_columns(self, other, func, level=None): copy=False) assert left.columns.equals(right.index) - new_data = {} - for col in left.columns: - new_data[col] = func(left[col], float(right[col])) + new_data = {col: func(left[col], float(right[col])) + for col in left.columns} return self._wrap_dispatched_op(new_data, other, func, "columns") def _combine_const(self, other, func): - new_data = {} - for col, series in compat.iteritems(self): - new_data[col] = func(series, other) + new_data = {col: func(self[col], other) for col in self.columns} return self._wrap_dispatched_op(new_data, other, func, axis=None) From 63544b44138733c888c094beb5311c344e7a67cd Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 1 Nov 2018 08:15:22 -0700 Subject: [PATCH 12/12] dict comphrension --- pandas/core/sparse/frame.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 56db228ef982f..7ebedd02a5ff3 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -606,9 +606,8 @@ def _combine_match_index(self, other, func, level=None): this, other = self.align(other, join='outer', axis=0, level=level, copy=False) - new_data = {} - for col, series in compat.iteritems(this): - new_data[col] = func(series.values, other.values) + new_data = {col: func(this[col].values, other.values) + for col in this.columns} return self._wrap_dispatched_op(new_data, other, func, axis=None)