From 8e1ad041c84b5eafba0b9402ba14718bbd3eade3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 19 Aug 2019 18:55:36 -0700 Subject: [PATCH 1/7] implement _construct_result --- pandas/core/frame.py | 31 +++++++++++------ pandas/core/ops/__init__.py | 4 ++- pandas/core/sparse/frame.py | 67 ++++++++++++++++++------------------- 3 files changed, 55 insertions(+), 47 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 603a615c1f8cb..de811f54de9eb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5291,8 +5291,7 @@ def reorder_levels(self, order, axis=0): # Arithmetic / combination related def _combine_frame(self, other, func, fill_value=None, level=None): - this, other = self.align(other, join="outer", level=level, copy=False) - new_index, new_columns = this.index, this.columns + # Assumes we already have matching alignment def _arith_op(left, right): # for the mixed_type case where we iterate over columns, @@ -5301,14 +5300,12 @@ def _arith_op(left, right): left, right = ops.fill_binop(left, right, fill_value) return func(left, right) - if ops.should_series_dispatch(this, other, func): + if ops.should_series_dispatch(self, other, func): # iterate over columns - return ops.dispatch_to_series(this, other, _arith_op) + return ops.dispatch_to_series(self, other, _arith_op) else: - result = _arith_op(this.values, other.values) - return self._constructor( - result, index=new_index, columns=new_columns, copy=False - ) + res_values = _arith_op(self.values, other.values) + return self._construct_result(other, res_values, _arith_op) def _combine_match_index(self, other, func, level=None): left, right = self.align(other, join="outer", axis=0, level=level, copy=False) @@ -5321,9 +5318,7 @@ def _combine_match_index(self, other, func, level=None): # fastpath --> operate directly on values with np.errstate(all="ignore"): new_data = func(left.values.T, right.values).T - return self._constructor( - new_data, index=left.index, columns=self.columns, copy=False - ) + return left._construct_result(other, new_data, func) def _combine_match_columns(self, other, func, level=None): assert isinstance(other, Series) @@ -5335,6 +5330,20 @@ def _combine_const(self, other, func): assert lib.is_scalar(other) or np.ndim(other) == 0 return ops.dispatch_to_series(self, other, func) + def _construct_result(self, other, result, func): + """ + Compat for DataFrame/SparseDataFrame op result wrapping. + + `func` is included for compat with SparseDataFrame signature, is not + needed here. + """ + out = self._constructor(result, index=self.index, copy=False) + # Pin columns instead of passing to constructor for compat with + # non-unique columns case + out.columns = self.columns + return out + # TODO: finalize? we do for SparseDataFrame + def combine(self, other, func, fill_value=None, overwrite=True): """ Perform column-wise combine with another DataFrame. diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 0de28f0a4a8b3..054dc2ef4db32 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -513,6 +513,7 @@ def column_op(a, b): new_data = expressions.evaluate(column_op, str_rep, left, right) + return left._construct_result(right, new_data, func) result = left._constructor(new_data, index=left.index, copy=False) # Pin columns instead of passing to constructor for compat with # non-unique columns case @@ -975,8 +976,9 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if isinstance(other, ABCDataFrame): # Another DataFrame + left, other = self.align(other, join="outer", level=level, copy=False) pass_op = op if should_series_dispatch(self, other, op) else na_op - return self._combine_frame(other, pass_op, fill_value, level) + return left._combine_frame(other, pass_op, fill_value, level) elif isinstance(other, ABCSeries): # For these values of `axis`, we end up dispatching to Series op, # so do not want the masked op. diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index f5add426297a7..9082790ffc12d 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -534,59 +534,43 @@ def _set_value(self, index, col, value, takeable=False): # Arithmetic-related methods def _combine_frame(self, other, func, fill_value=None, level=None): + # assumes we already have matching alignment if level is not None: raise NotImplementedError("'level' argument is not supported") - this, other = self.align(other, join="outer", level=level, copy=False) - new_index, new_columns = this.index, this.columns - if self.empty and other.empty: - return self._constructor(index=new_index).__finalize__(self) + return self._constructor(index=self.index).__finalize__(self) new_data = {} if fill_value is not None: # TODO: be a bit more intelligent here - for col in new_columns: - if col in this and col in other: - dleft = this[col].to_dense() + for col in self.columns: + if col in self and col in other: + dleft = self[col].to_dense() dright = other[col].to_dense() result = dleft._binop(dright, func, fill_value=fill_value) - result = result.to_sparse(fill_value=this[col].fill_value) + result = result.to_sparse(fill_value=self[col].fill_value) new_data[col] = result else: - for col in new_columns: - if col in this and col in other: - new_data[col] = func(this[col], other[col]) - - new_fill_value = self._get_op_result_fill_value(other, func) + for col in self.columns: + if col in self and col in other: + new_data[col] = func(self[col], other[col]) - return self._constructor( - data=new_data, - index=new_index, - columns=new_columns, - default_fill_value=new_fill_value, - ).__finalize__(self) + return self._construct_result(other, new_data, func) def _combine_match_index(self, other, func, level=None): - new_data = {} if level is not None: raise NotImplementedError("'level' argument is not supported") this, other = self.align(other, join="outer", axis=0, level=level, copy=False) + new_data = {} for col, series in this.items(): new_data[col] = func(series.values, other.values) - fill_value = self._get_op_result_fill_value(other, func) - - return self._constructor( - new_data, - index=this.index, - columns=self.columns, - default_fill_value=fill_value, - ).__finalize__(self) + return this._construct_result(other, new_data, func) def _combine_match_columns(self, other, func, level=None): # patched version of DataFrame._combine_match_columns to account for @@ -601,20 +585,28 @@ def _combine_match_columns(self, other, func, level=None): assert left.columns.equals(right.index) new_data = {} - for col in left.columns: new_data[col] = func(left[col], float(right[col])) - return self._constructor( - new_data, - index=left.index, - columns=left.columns, - default_fill_value=self.default_fill_value, - ).__finalize__(self) + # TODO: using this changed some behavior, see GH#28025 + return left._construct_result(other, new_data, func) def _combine_const(self, other, func): return self._apply_columns(lambda x: func(x, other)) + def _construct_result(self, other, result, func): + """ + Compat for DataFrame/SparseDataFrame op result wrapping. + """ + fill_value = self._get_op_result_fill_value(other, func) + + return self._constructor( + result, + index=self.index, + columns=self.columns, + default_fill_value=fill_value, + ).__finalize__(self) + def _get_op_result_fill_value(self, other, func): own_default = self.default_fill_value @@ -643,6 +635,11 @@ def _get_op_result_fill_value(self, other, func): else: fill_value = func(np.float64(own_default), np.float64(other.fill_value)) fill_value = item_from_zerodim(fill_value) + + elif isinstance(other, Series): + # reached via _combine_match_columns + fill_value = self.default_fill_value + else: raise NotImplementedError(type(other)) From bdc0d722402ac1624923515ec1ca05abe9eed503 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 19 Aug 2019 19:34:38 -0700 Subject: [PATCH 2/7] Change dispatch_to_series return, restore alignment --- pandas/core/frame.py | 27 +++++++++++++++------------ pandas/core/ops/__init__.py | 17 ++++++----------- pandas/core/sparse/frame.py | 29 +++++++++-------------------- 3 files changed, 30 insertions(+), 43 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index de811f54de9eb..0536376a1b8b0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5291,8 +5291,10 @@ def reorder_levels(self, order, axis=0): # Arithmetic / combination related def _combine_frame(self, other, func, fill_value=None, level=None): - # Assumes we already have matching alignment + this, other = self.align(other, join="outer", level=level, copy=False) + # TODO: if fill_value is None, just set _arith_op=func? costly since + # called in a loop def _arith_op(left, right): # for the mixed_type case where we iterate over columns, # _arith_op(left, right) is equivalent to @@ -5300,12 +5302,12 @@ def _arith_op(left, right): left, right = ops.fill_binop(left, right, fill_value) return func(left, right) - if ops.should_series_dispatch(self, other, func): + if ops.should_series_dispatch(this, other, func): # iterate over columns - return ops.dispatch_to_series(self, other, _arith_op) + new_data = ops.dispatch_to_series(this, other, _arith_op) else: - res_values = _arith_op(self.values, other.values) - return self._construct_result(other, res_values, _arith_op) + new_data = _arith_op(this.values, other.values) + return this._construct_result(other, new_data, _arith_op) def _combine_match_index(self, other, func, level=None): left, right = self.align(other, join="outer", axis=0, level=level, copy=False) @@ -5313,22 +5315,23 @@ def _combine_match_index(self, other, func, level=None): if left._is_mixed_type or right._is_mixed_type: # operate column-wise; avoid costly object-casting in `.values` - return ops.dispatch_to_series(left, right, func) + new_data = ops.dispatch_to_series(left, right, func) else: # fastpath --> operate directly on values with np.errstate(all="ignore"): new_data = func(left.values.T, right.values).T - return left._construct_result(other, new_data, func) + return left._construct_result(other, new_data, func) - def _combine_match_columns(self, other, func, level=None): - assert isinstance(other, Series) + def _combine_match_columns(self, other: Series, func, level=None): left, right = self.align(other, join="outer", axis=1, level=level, copy=False) assert left.columns.equals(right.index) - return ops.dispatch_to_series(left, right, func, axis="columns") + new_data = ops.dispatch_to_series(left, right, func, axis="columns") + return left._construct_result(right, new_data, func) def _combine_const(self, other, func): - assert lib.is_scalar(other) or np.ndim(other) == 0 - return ops.dispatch_to_series(self, other, func) + # scalar other or np.ndim(other) == 0 + new_data = ops.dispatch_to_series(self, other, func) + return self._construct_result(other, new_data, func) def _construct_result(self, other, result, func): """ diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 054dc2ef4db32..da8007c5e4208 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -512,13 +512,7 @@ def column_op(a, b): raise NotImplementedError(right) new_data = expressions.evaluate(column_op, str_rep, left, right) - - return left._construct_result(right, new_data, func) - result = left._constructor(new_data, index=left.index, copy=False) - # Pin columns instead of passing to constructor for compat with - # non-unique columns case - result.columns = left.columns - return result + return new_data def dispatch_to_extension_op(op, left, right): @@ -976,9 +970,8 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if isinstance(other, ABCDataFrame): # Another DataFrame - left, other = self.align(other, join="outer", level=level, copy=False) pass_op = op if should_series_dispatch(self, other, op) else na_op - return left._combine_frame(other, pass_op, fill_value, level) + return self._combine_frame(other, pass_op, fill_value, level) elif isinstance(other, ABCSeries): # For these values of `axis`, we end up dispatching to Series op, # so do not want the masked op. @@ -1024,7 +1017,8 @@ def f(self, other, axis=default_axis, level=None): # Another DataFrame if not self._indexed_same(other): self, other = self.align(other, "outer", level=level, copy=False) - return dispatch_to_series(self, other, na_op, str_rep) + new_data = dispatch_to_series(self, other, na_op, str_rep) + return self._construct_result(other, new_data, na_op) elif isinstance(other, ABCSeries): return _combine_series_frame( @@ -1054,7 +1048,8 @@ def f(self, other): raise ValueError( "Can only compare identically-labeled DataFrame objects" ) - return dispatch_to_series(self, other, func, str_rep) + new_data = dispatch_to_series(self, other, func, str_rep) + return self._construct_result(other, new_data, func) elif isinstance(other, ABCSeries): return _combine_series_frame( diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 9082790ffc12d..e96cf08d130d6 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -534,36 +534,28 @@ def _set_value(self, index, col, value, takeable=False): # Arithmetic-related methods def _combine_frame(self, other, func, fill_value=None, level=None): - # assumes we already have matching alignment - if level is not None: - raise NotImplementedError("'level' argument is not supported") - - if self.empty and other.empty: - return self._constructor(index=self.index).__finalize__(self) + this, other = self.align(other, join="outer", level=level, copy=False) new_data = {} if fill_value is not None: # TODO: be a bit more intelligent here - for col in self.columns: - if col in self and col in other: - dleft = self[col].to_dense() + for col in this.columns: + if col in this and col in other: + dleft = this[col].to_dense() dright = other[col].to_dense() result = dleft._binop(dright, func, fill_value=fill_value) - result = result.to_sparse(fill_value=self[col].fill_value) + result = result.to_sparse(fill_value=this[col].fill_value) new_data[col] = result else: - for col in self.columns: - if col in self and col in other: - new_data[col] = func(self[col], other[col]) + for col in this.columns: + if col in this and col in other: + new_data[col] = func(this[col], other[col]) - return self._construct_result(other, new_data, func) + return this._construct_result(other, new_data, func) def _combine_match_index(self, other, func, level=None): - if level is not None: - raise NotImplementedError("'level' argument is not supported") - this, other = self.align(other, join="outer", axis=0, level=level, copy=False) new_data = {} @@ -578,9 +570,6 @@ def _combine_match_columns(self, other, func, level=None): # where 3.0 is numpy.float64 and series is a SparseSeries. Still # possible for this to happen, which is bothersome - if level is not None: - raise NotImplementedError("'level' argument is not supported") - left, right = self.align(other, join="outer", axis=1, level=level, copy=False) assert left.columns.equals(right.index) From 7a4b8c287b779167af58f265fa7f953e6d021b1b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 19 Aug 2019 19:35:58 -0700 Subject: [PATCH 3/7] remove comment --- pandas/core/frame.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0536376a1b8b0..47cb0b3966a64 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5293,8 +5293,6 @@ def reorder_levels(self, order, axis=0): def _combine_frame(self, other, func, fill_value=None, level=None): this, other = self.align(other, join="outer", level=level, copy=False) - # TODO: if fill_value is None, just set _arith_op=func? costly since - # called in a loop def _arith_op(left, right): # for the mixed_type case where we iterate over columns, # _arith_op(left, right) is equivalent to From 6cf996d330380ca00766b6294cb348fdcd496c92 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 22 Aug 2019 19:57:44 -0700 Subject: [PATCH 4/7] pin columns --- pandas/core/sparse/frame.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index e96cf08d130d6..88503c15da807 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -589,12 +589,9 @@ def _construct_result(self, other, result, func): """ fill_value = self._get_op_result_fill_value(other, func) - return self._constructor( - result, - index=self.index, - columns=self.columns, - default_fill_value=fill_value, - ).__finalize__(self) + out = self._constructor(result, index=self.index, default_fill_value=fill_value) + out.columns = self.columns + return out.__finalize__(self) def _get_op_result_fill_value(self, other, func): own_default = self.default_fill_value From d86703405660b387a31ccc5ca4efea6b0c7e5323 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 23 Aug 2019 08:20:48 -0700 Subject: [PATCH 5/7] patch _default_fill_value --- pandas/core/sparse/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 88503c15da807..510cf048b56bb 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -572,6 +572,7 @@ def _combine_match_columns(self, other, func, level=None): left, right = self.align(other, join="outer", axis=1, level=level, copy=False) assert left.columns.equals(right.index) + left._default_fill_value = self._default_fill_value new_data = {} for col in left.columns: From ca76be429947649fc82418c58dced629cd27bf5f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 23 Aug 2019 14:13:13 -0700 Subject: [PATCH 6/7] pin default_fill_value after align --- pandas/core/sparse/frame.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 510cf048b56bb..1202cfddd24d0 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -535,6 +535,7 @@ def _set_value(self, index, col, value, takeable=False): def _combine_frame(self, other, func, fill_value=None, level=None): this, other = self.align(other, join="outer", level=level, copy=False) + this._default_fill_value = self._default_fill_value new_data = {} if fill_value is not None: @@ -557,6 +558,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None): def _combine_match_index(self, other, func, level=None): this, other = self.align(other, join="outer", axis=0, level=level, copy=False) + this._default_fill_value = self._default_fill_value new_data = {} for col, series in this.items(): From 45f5419f42226af8a3150f45aa6b2f462ed7520d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Sep 2019 16:36:30 -0700 Subject: [PATCH 7/7] improve docstring --- pandas/core/frame.py | 14 +++++++++++++- pandas/core/sparse/frame.py | 12 +++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2f04e29ee3949..6aa3690ef54ea 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5356,8 +5356,20 @@ def _combine_const(self, other, func): def _construct_result(self, other, result, func): """ - Compat for DataFrame/SparseDataFrame op result wrapping. + Wrap the result of an arithmetic, comparison, or logical operation. + Parameters + ---------- + other : object + result : DataFrame + func : binary operator + + Returns + ------- + DataFrame + + Notes + ----- `func` is included for compat with SparseDataFrame signature, is not needed here. """ diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 6226d43fef16c..aaa99839144b4 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -587,7 +587,17 @@ def _combine_const(self, other, func): def _construct_result(self, other, result, func): """ - Compat for DataFrame/SparseDataFrame op result wrapping. + Wrap the result of an arithmetic, comparison, or logical operation. + + Parameters + ---------- + other : object + result : SparseDataFrame + func : binary operator + + Returns + ------- + SparseDataFrame """ fill_value = self._get_op_result_fill_value(other, func)