diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 16fece1c7eb8b..6aa3690ef54ea 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5306,7 +5306,6 @@ def reorder_levels(self, order, axis=0): def _combine_frame(self, other, func, fill_value=None, level=None): this, other = self.align(other, join="outer", level=level, copy=False) - new_index, new_columns = this.index, this.columns if fill_value is None: # since _arith_op may be called in a loop, avoid function call @@ -5324,14 +5323,12 @@ def _arith_op(left, right): if ops.should_series_dispatch(this, other, func): # iterate over columns - return ops.dispatch_to_series(this, other, _arith_op) + new_data = ops.dispatch_to_series(this, other, _arith_op) else: with np.errstate(all="ignore"): - result = _arith_op(this.values, other.values) - result = dispatch_fill_zeros(func, this.values, other.values, result) - return self._constructor( - result, index=new_index, columns=new_columns, copy=False - ) + res_values = _arith_op(this.values, other.values) + new_data = dispatch_fill_zeros(func, this.values, other.values, res_values) + return this._construct_result(other, new_data, _arith_op) def _combine_match_index(self, other, func, level=None): left, right = self.align(other, join="outer", axis=0, level=level, copy=False) @@ -5339,23 +5336,49 @@ def _combine_match_index(self, other, func, level=None): if left._is_mixed_type or right._is_mixed_type: # operate column-wise; avoid costly object-casting in `.values` - return ops.dispatch_to_series(left, right, func) + new_data = ops.dispatch_to_series(left, right, func) else: # fastpath --> operate directly on values with np.errstate(all="ignore"): new_data = func(left.values.T, right.values).T - return self._constructor( - new_data, index=left.index, columns=self.columns, copy=False - ) + return left._construct_result(other, new_data, func) def _combine_match_columns(self, other: Series, func, level=None): left, right = self.align(other, join="outer", axis=1, level=level, copy=False) # at this point we have `left.columns.equals(right.index)` - return ops.dispatch_to_series(left, right, func, axis="columns") + new_data = ops.dispatch_to_series(left, right, func, axis="columns") + return left._construct_result(right, new_data, func) def _combine_const(self, other, func): # scalar other or np.ndim(other) == 0 - return ops.dispatch_to_series(self, other, func) + new_data = ops.dispatch_to_series(self, other, func) + return self._construct_result(other, new_data, func) + + def _construct_result(self, other, result, func): + """ + Wrap the result of an arithmetic, comparison, or logical operation. + + Parameters + ---------- + other : object + result : DataFrame + func : binary operator + + Returns + ------- + DataFrame + + Notes + ----- + `func` is included for compat with SparseDataFrame signature, is not + needed here. + """ + out = self._constructor(result, index=self.index, copy=False) + # Pin columns instead of passing to constructor for compat with + # non-unique columns case + out.columns = self.columns + return out + # TODO: finalize? we do for SparseDataFrame def combine(self, other, func, fill_value=None, overwrite=True): """ diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 016feff7e3beb..38faf3cea88fd 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -509,12 +509,7 @@ def column_op(a, b): raise NotImplementedError(right) new_data = expressions.evaluate(column_op, str_rep, left, right) - - result = left._constructor(new_data, index=left.index, copy=False) - # Pin columns instead of passing to constructor for compat with - # non-unique columns case - result.columns = left.columns - return result + return new_data def dispatch_to_extension_op( @@ -1055,7 +1050,8 @@ def f(self, other, axis=default_axis, level=None): # Another DataFrame if not self._indexed_same(other): self, other = self.align(other, "outer", level=level, copy=False) - return dispatch_to_series(self, other, na_op, str_rep) + new_data = dispatch_to_series(self, other, na_op, str_rep) + return self._construct_result(other, new_data, na_op) elif isinstance(other, ABCSeries): return _combine_series_frame( @@ -1085,7 +1081,8 @@ def f(self, other): raise ValueError( "Can only compare identically-labeled DataFrame objects" ) - return dispatch_to_series(self, other, func, str_rep) + new_data = dispatch_to_series(self, other, func, str_rep) + return self._construct_result(other, new_data, func) elif isinstance(other, ABCSeries): return _combine_series_frame( diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 3d6ba0b8d9774..aaa99839144b4 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -534,19 +534,13 @@ def _set_value(self, index, col, value, takeable=False): # Arithmetic-related methods def _combine_frame(self, other, func, fill_value=None, level=None): - if level is not None: - raise NotImplementedError("'level' argument is not supported") - this, other = self.align(other, join="outer", level=level, copy=False) - new_index, new_columns = this.index, this.columns - - if self.empty and other.empty: - return self._constructor(index=new_index).__finalize__(self) + this._default_fill_value = self._default_fill_value new_data = {} if fill_value is not None: # TODO: be a bit more intelligent here - for col in new_columns: + for col in this.columns: if col in this and col in other: dleft = this[col].to_dense() dright = other[col].to_dense() @@ -555,38 +549,21 @@ def _combine_frame(self, other, func, fill_value=None, level=None): new_data[col] = result else: - for col in new_columns: + for col in this.columns: if col in this and col in other: new_data[col] = func(this[col], other[col]) - new_fill_value = self._get_op_result_fill_value(other, func) - - return self._constructor( - data=new_data, - index=new_index, - columns=new_columns, - default_fill_value=new_fill_value, - ).__finalize__(self) + return this._construct_result(other, new_data, func) def _combine_match_index(self, other, func, level=None): - - if level is not None: - raise NotImplementedError("'level' argument is not supported") - this, other = self.align(other, join="outer", axis=0, level=level, copy=False) + this._default_fill_value = self._default_fill_value new_data = {} for col in this.columns: new_data[col] = func(this[col], other) - fill_value = self._get_op_result_fill_value(other, func) - - return self._constructor( - new_data, - index=this.index, - columns=self.columns, - default_fill_value=fill_value, - ).__finalize__(self) + return this._construct_result(other, new_data, func) def _combine_match_columns(self, other, func, level=None): # patched version of DataFrame._combine_match_columns to account for @@ -594,27 +571,40 @@ def _combine_match_columns(self, other, func, level=None): # where 3.0 is numpy.float64 and series is a SparseSeries. Still # possible for this to happen, which is bothersome - if level is not None: - raise NotImplementedError("'level' argument is not supported") - left, right = self.align(other, join="outer", axis=1, level=level, copy=False) assert left.columns.equals(right.index) + left._default_fill_value = self._default_fill_value new_data = {} - for col in left.columns: new_data[col] = func(left[col], right[col]) - return self._constructor( - new_data, - index=left.index, - columns=left.columns, - default_fill_value=self.default_fill_value, - ).__finalize__(self) + # TODO: using this changed some behavior, see GH#28025 + return left._construct_result(other, new_data, func) def _combine_const(self, other, func): return self._apply_columns(lambda x: func(x, other)) + def _construct_result(self, other, result, func): + """ + Wrap the result of an arithmetic, comparison, or logical operation. + + Parameters + ---------- + other : object + result : SparseDataFrame + func : binary operator + + Returns + ------- + SparseDataFrame + """ + fill_value = self._get_op_result_fill_value(other, func) + + out = self._constructor(result, index=self.index, default_fill_value=fill_value) + out.columns = self.columns + return out.__finalize__(self) + def _get_op_result_fill_value(self, other, func): own_default = self.default_fill_value @@ -643,6 +633,11 @@ def _get_op_result_fill_value(self, other, func): else: fill_value = func(np.float64(own_default), np.float64(other.fill_value)) fill_value = item_from_zerodim(fill_value) + + elif isinstance(other, Series): + # reached via _combine_match_columns + fill_value = self.default_fill_value + else: raise NotImplementedError(type(other))