diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a001037b573d4..b96af6af3707f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3944,34 +3944,27 @@ def _combine_frame(self, other, func, fill_value=None, level=None): new_index, new_columns = this.index, this.columns def _arith_op(left, right): + # for the mixed_type case where we iterate over columns, + # _arith_op(left, right) is equivalent to + # left._binop(right, func, fill_value=fill_value) left, right = ops.fill_binop(left, right, fill_value) return func(left, right) if this._is_mixed_type or other._is_mixed_type: - - # unique + # iterate over columns if this.columns.is_unique: - - def f(col): - r = _arith_op(this[col].values, other[col].values) - return self._constructor_sliced(r, index=new_index, - dtype=r.dtype) - - result = {col: f(col) for col in this} - - # non-unique + # unique columns + result = {col: _arith_op(this[col], other[col]) + for col in this} + result = self._constructor(result, index=new_index, + columns=new_columns, copy=False) else: - - def f(i): - r = _arith_op(this.iloc[:, i].values, - other.iloc[:, i].values) - return self._constructor_sliced(r, index=new_index, - dtype=r.dtype) - - result = {i: f(i) for i, col in enumerate(this.columns)} + # non-unique columns + result = {i: _arith_op(this.iloc[:, i], other.iloc[:, i]) + for i, col in enumerate(this.columns)} result = self._constructor(result, index=new_index, copy=False) result.columns = new_columns - return result + return result else: result = _arith_op(this.values, other.values) @@ -3979,36 +3972,11 @@ def f(i): return self._constructor(result, index=new_index, columns=new_columns, copy=False) - def _combine_series(self, other, func, fill_value=None, axis=None, - level=None, try_cast=True): - if fill_value is not None: - raise NotImplementedError("fill_value {fill} not supported." - .format(fill=fill_value)) - - if axis is not None: - axis = self._get_axis_name(axis) - if axis == 'index': - return self._combine_match_index(other, func, level=level) - else: - return self._combine_match_columns(other, func, level=level, - try_cast=try_cast) - else: - if not len(other): - return self * np.nan - - if not len(self): - # Ambiguous case, use _series so works with DataFrame - return self._constructor(data=self._series, index=self.index, - columns=self.columns) - - # default axis is columns - return self._combine_match_columns(other, func, level=level, - try_cast=try_cast) - def _combine_match_index(self, other, func, level=None): left, right = self.align(other, join='outer', axis=0, level=level, copy=False) - return self._constructor(func(left.values.T, right.values).T, + new_data = func(left.values.T, right.values).T + return self._constructor(new_data, index=left.index, columns=self.columns, copy=False) @@ -4027,7 +3995,8 @@ def _combine_const(self, other, func, errors='raise', try_cast=True): try_cast=try_cast) return self._constructor(new_data) - def _compare_frame_evaluate(self, other, func, str_rep, try_cast=True): + def _compare_frame(self, other, func, str_rep, try_cast=True): + # compare_frame assumes self._indexed_same(other) import pandas.core.computation.expressions as expressions # unique @@ -4052,19 +4021,6 @@ def _compare(a, b): result.columns = self.columns return result - def _compare_frame(self, other, func, str_rep, try_cast=True): - if not self._indexed_same(other): - raise ValueError('Can only compare identically-labeled ' - 'DataFrame objects') - return self._compare_frame_evaluate(other, func, str_rep, - try_cast=try_cast) - - def _flex_compare_frame(self, other, func, str_rep, level, try_cast=True): - if not self._indexed_same(other): - self, other = self.align(other, 'outer', level=level, copy=False) - return self._compare_frame_evaluate(other, func, str_rep, - try_cast=try_cast) - def combine(self, other, func, fill_value=None, overwrite=True): """ Add two DataFrame objects and do not propagate NaN values, so if for a diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 81b6b28d3927e..a84c00a6b84ce 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -55,7 +55,7 @@ import pandas.core.algorithms as algos import pandas.core.sorting as sorting from pandas.io.formats.printing import pprint_thing -from pandas.core.ops import _comp_method_OBJECT_ARRAY +from pandas.core.ops import _comp_method_OBJECT_ARRAY, make_invalid_op from pandas.core.config import get_option from pandas.core.strings import StringMethods @@ -82,26 +82,6 @@ def _try_get_item(x): return x -def _make_invalid_op(name): - """ - Return a binary method that always raises a TypeError. - - Parameters - ---------- - name : str - - Returns - ------- - invalid_op : function - """ - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: " - "{typ}".format(name=name, typ=type(self))) - - invalid_op.__name__ = name - return invalid_op - - class InvalidIndexError(Exception): pass @@ -3994,22 +3974,23 @@ def _evaluate_compare(self, other): @classmethod def _add_numeric_methods_add_sub_disabled(cls): """ add in the numeric add/sub methods to disable """ - cls.__add__ = cls.__radd__ = __iadd__ = _make_invalid_op('__add__') # noqa - cls.__sub__ = __isub__ = _make_invalid_op('__sub__') # noqa + cls.__add__ = cls.__radd__ = __iadd__ = make_invalid_op('__add__') # noqa + cls.__sub__ = __isub__ = make_invalid_op('__sub__') # noqa @classmethod def _add_numeric_methods_disabled(cls): """ add in numeric methods to disable other than add/sub """ - cls.__pow__ = cls.__rpow__ = _make_invalid_op('__pow__') - cls.__mul__ = cls.__rmul__ = _make_invalid_op('__mul__') - cls.__floordiv__ = cls.__rfloordiv__ = _make_invalid_op('__floordiv__') - cls.__truediv__ = cls.__rtruediv__ = _make_invalid_op('__truediv__') + cls.__pow__ = make_invalid_op('__pow__') + cls.__rpow__ = make_invalid_op('__rpow__') + cls.__mul__ = cls.__rmul__ = make_invalid_op('__mul__') + cls.__floordiv__ = cls.__rfloordiv__ = make_invalid_op('__floordiv__') + cls.__truediv__ = cls.__rtruediv__ = make_invalid_op('__truediv__') if not compat.PY3: - cls.__div__ = cls.__rdiv__ = _make_invalid_op('__div__') - cls.__neg__ = _make_invalid_op('__neg__') - cls.__pos__ = _make_invalid_op('__pos__') - cls.__abs__ = _make_invalid_op('__abs__') - cls.__inv__ = _make_invalid_op('__inv__') + cls.__div__ = cls.__rdiv__ = make_invalid_op('__div__') + cls.__neg__ = make_invalid_op('__neg__') + cls.__pos__ = make_invalid_op('__pos__') + cls.__abs__ = make_invalid_op('__abs__') + cls.__inv__ = make_invalid_op('__inv__') def _maybe_update_attributes(self, attrs): """ Update Index attributes (e.g. freq) depending on op """ @@ -4207,8 +4188,8 @@ def logical_func(self, *args, **kwargs): @classmethod def _add_logical_methods_disabled(cls): """ add in logical methods to disable """ - cls.all = _make_invalid_op('all') - cls.any = _make_invalid_op('any') + cls.all = make_invalid_op('all') + cls.any = make_invalid_op('any') Index._add_numeric_methods_disabled() diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 4c234ccb4dd47..fd4fc5540fcec 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -96,6 +96,26 @@ def rxor(left, right): # ----------------------------------------------------------------------------- +def make_invalid_op(name): + """ + Return a binary method that always raises a TypeError. + + Parameters + ---------- + name : str + + Returns + ------- + invalid_op : function + """ + def invalid_op(self, other=None): + raise TypeError("cannot perform {name} with this index type: " + "{typ}".format(name=name, typ=type(self).__name__)) + + invalid_op.__name__ = name + return invalid_op + + def _gen_eval_kwargs(name): """ Find the keyword arguments to pass to numexpr for the given operation. @@ -1047,8 +1067,8 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): elif isinstance(other, (np.ndarray, list, tuple)): if len(other) != len(self): raise ValueError('Lengths must be equal') - return self._binop(self._constructor(other, self.index), op, - level=level, fill_value=fill_value) + other = self._constructor(other, self.index) + return self._binop(other, op, level=level, fill_value=fill_value) else: if fill_value is not None: self = self.fillna(fill_value) @@ -1071,6 +1091,51 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): # ----------------------------------------------------------------------------- # DataFrame +def _combine_series_frame(self, other, func, fill_value=None, axis=None, + level=None, try_cast=True): + """ + Apply binary operator `func` to self, other using alignment and fill + conventions determined by the fill_value, axis, level, and try_cast kwargs. + + Parameters + ---------- + self : DataFrame + other : Series + func : binary operator + fill_value : object, default None + axis : {0, 1, 'columns', 'index', None}, default None + level : int or None, default None + try_cast : bool, default True + + Returns + ------- + result : DataFrame + """ + if fill_value is not None: + raise NotImplementedError("fill_value {fill} not supported." + .format(fill=fill_value)) + + if axis is not None: + axis = self._get_axis_number(axis) + if axis == 0: + return self._combine_match_index(other, func, level=level) + else: + return self._combine_match_columns(other, func, level=level, + try_cast=try_cast) + else: + if not len(other): + return self * np.nan + + if not len(self): + # Ambiguous case, use _series so works with DataFrame + return self._constructor(data=self._series, index=self.index, + columns=self.columns) + + # default axis is columns + return self._combine_match_columns(other, func, level=level, + try_cast=try_cast) + + def _align_method_FRAME(left, right, axis): """ convert rhs to meet lhs dims if input is list, tuple or np.ndarray """ @@ -1179,8 +1244,9 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if isinstance(other, ABCDataFrame): # Another DataFrame return self._combine_frame(other, na_op, fill_value, level) elif isinstance(other, ABCSeries): - return self._combine_series(other, na_op, fill_value, axis, level, - try_cast=True) + return _combine_series_frame(self, other, na_op, + fill_value=fill_value, axis=axis, + level=level, try_cast=True) else: if fill_value is not None: self = self.fillna(fill_value) @@ -1209,13 +1275,17 @@ def f(self, other, axis=default_axis, level=None): other = _align_method_FRAME(self, other, axis) - if isinstance(other, ABCDataFrame): # Another DataFrame - return self._flex_compare_frame(other, na_op, str_rep, level, - try_cast=False) + if isinstance(other, ABCDataFrame): + # Another DataFrame + if not self._indexed_same(other): + self, other = self.align(other, 'outer', + level=level, copy=False) + return self._compare_frame(other, na_op, str_rep, try_cast=False) elif isinstance(other, ABCSeries): - return self._combine_series(other, na_op, None, axis, level, - try_cast=False) + return _combine_series_frame(self, other, na_op, + fill_value=None, axis=axis, + level=level, try_cast=False) else: return self._combine_const(other, na_op, try_cast=False) @@ -1227,11 +1297,17 @@ def f(self, other, axis=default_axis, level=None): def _comp_method_FRAME(func, name, str_rep): @Appender('Wrapper for comparison method {name}'.format(name=name)) def f(self, other): - if isinstance(other, ABCDataFrame): # Another DataFrame - return self._compare_frame(other, func, str_rep) + if isinstance(other, ABCDataFrame): + # Another DataFrame + if not self._indexed_same(other): + raise ValueError('Can only compare identically-labeled ' + 'DataFrame objects') + return self._compare_frame(other, func, str_rep, try_cast=True) + elif isinstance(other, ABCSeries): - return self._combine_series(other, func, - axis=None, try_cast=False) + return _combine_series_frame(self, other, func, + fill_value=None, axis=None, + level=None, try_cast=False) else: # straight boolean comparisons we want to allow all columns diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index a3a799aed1c55..65afe85628f8e 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -72,6 +72,23 @@ def test_tz_aware_scalar_comparison(self, timestamps): # ------------------------------------------------------------------- # Arithmetic +class TestFrameFlexArithmetic(object): + def test_df_add_flex_filled_mixed_dtypes(self): + # GH#19611 + dti = pd.date_range('2016-01-01', periods=3) + ser = pd.Series(['1 Day', 'NaT', '2 Days'], dtype='timedelta64[ns]') + df = pd.DataFrame({'A': dti, 'B': ser}) + other = pd.DataFrame({'A': ser, 'B': ser}) + fill = pd.Timedelta(days=1).to_timedelta64() + result = df.add(other, fill_value=fill) + + expected = pd.DataFrame( + {'A': pd.Series(['2016-01-02', '2016-01-03', '2016-01-05'], + dtype='datetime64[ns]'), + 'B': ser * 2}) + tm.assert_frame_equal(result, expected) + + class TestFrameMulDiv(object): """Tests for DataFrame multiplication and division""" # ------------------------------------------------------------------