diff --git a/pandas/core/ops.py b/pandas/core/ops.py index ba8a15b60ba56..6ea4a81cb52a1 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -39,8 +39,7 @@ ABCSeries, ABCDataFrame, ABCIndex, - ABCPeriodIndex, - ABCSparseSeries) + ABCSparseSeries, ABCSparseArray) def _gen_eval_kwargs(name): @@ -445,8 +444,14 @@ def names(x): return new_methods -def add_methods(cls, new_methods, force): +def add_methods(cls, new_methods): for name, method in new_methods.items(): + # For most methods, if we find that the class already has a method + # of the same name, it is OK to over-write it. The exception is + # inplace methods (__iadd__, __isub__, ...) for SparseArray, which + # retain the np.ndarray versions. + force = not (issubclass(cls, ABCSparseArray) and + name.startswith('__i')) if force or name not in cls.__dict__: bind_method(cls, name, method) @@ -454,8 +459,7 @@ def add_methods(cls, new_methods, force): # ---------------------------------------------------------------------- # Arithmetic def add_special_arithmetic_methods(cls, arith_method=None, - comp_method=None, bool_method=None, - force=False): + comp_method=None, bool_method=None): """ Adds the full suite of special arithmetic methods (``__add__``, ``__sub__``, etc.) to the class. @@ -469,9 +473,6 @@ def add_special_arithmetic_methods(cls, arith_method=None, factory for rich comparison - signature: f(op, name, str_rep) bool_method : function (optional) factory for boolean methods - signature: f(op, name, str_rep) - force : bool, default False - if False, checks whether function is defined **on ``cls.__dict__``** - before defining if True, always defines functions on class base """ new_methods = _create_methods(cls, arith_method, comp_method, bool_method, special=True) @@ -512,12 +513,11 @@ def f(self, other): __ior__=_wrap_inplace_method(new_methods["__or__"]), __ixor__=_wrap_inplace_method(new_methods["__xor__"]))) - add_methods(cls, new_methods=new_methods, force=force) + add_methods(cls, new_methods=new_methods) def add_flex_arithmetic_methods(cls, flex_arith_method, - flex_comp_method=None, flex_bool_method=None, - force=False): + flex_comp_method=None, flex_bool_method=None): """ Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) to the class. @@ -529,9 +529,6 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, f(op, name, str_rep) flex_comp_method : function, optional, factory for rich comparison - signature: f(op, name, str_rep) - force : bool, default False - if False, checks whether function is defined **on ``cls.__dict__``** - before defining if True, always defines functions on class base """ new_methods = _create_methods(cls, flex_arith_method, flex_comp_method, flex_bool_method, @@ -544,7 +541,7 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, if k in new_methods: new_methods.pop(k) - add_methods(cls, new_methods=new_methods, force=force) + add_methods(cls, new_methods=new_methods) # ----------------------------------------------------------------------------- @@ -614,14 +611,11 @@ def na_op(x, y): result = np.empty(x.size, dtype=dtype) mask = notna(x) & notna(y) result[mask] = op(x[mask], com._values_from_object(y[mask])) - elif isinstance(x, np.ndarray): + else: + assert isinstance(x, np.ndarray) result = np.empty(len(x), dtype=x.dtype) mask = notna(x) result[mask] = op(x[mask], y) - else: - raise TypeError("{typ} cannot perform the operation " - "{op}".format(typ=type(x).__name__, - op=str_rep)) result, changed = maybe_upcast_putmask(result, ~mask, np.nan) @@ -658,6 +652,10 @@ def wrapper(left, right, name=name, na_op=na_op): index=left.index, name=res_name, dtype=result.dtype) + elif is_categorical_dtype(left): + raise TypeError("{typ} cannot perform the operation " + "{op}".format(typ=type(left).__name__, op=str_rep)) + lvalues = left.values rvalues = right if isinstance(rvalues, ABCSeries): @@ -745,8 +743,12 @@ def na_op(x, y): elif is_categorical_dtype(y) and not is_scalar(y): return op(y, x) - if is_object_dtype(x.dtype): + elif is_object_dtype(x.dtype): result = _comp_method_OBJECT_ARRAY(op, x, y) + + elif is_datetimelike_v_numeric(x, y): + raise TypeError("invalid type comparison") + else: # we want to compare like types @@ -754,15 +756,6 @@ def na_op(x, y): # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons - if is_datetimelike_v_numeric(x, y): - raise TypeError("invalid type comparison") - - # numpy does not like comparisons vs None - if is_scalar(y) and isna(y): - if name == '__ne__': - return np.ones(len(x), dtype=bool) - else: - return np.zeros(len(x), dtype=bool) # we have a datetime/timedelta and may need to convert mask = None @@ -795,15 +788,18 @@ def wrapper(self, other, axis=None): if axis is not None: self._get_axis_number(axis) - if isinstance(other, ABCSeries): + if isinstance(other, ABCDataFrame): # pragma: no cover + # Defer to DataFrame implementation; fail early + return NotImplemented + + elif isinstance(other, ABCSeries): name = com._maybe_match_name(self, other) if not self._indexed_same(other): msg = 'Can only compare identically-labeled Series objects' raise ValueError(msg) - return self._constructor(na_op(self.values, other.values), - index=self.index, name=name) - elif isinstance(other, ABCDataFrame): # pragma: no cover - return NotImplemented + res_values = na_op(self.values, other.values) + return self._constructor(res_values, index=self.index, name=name) + elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array # as it will broadcast @@ -811,23 +807,25 @@ def wrapper(self, other, axis=None): len(self) != len(other)): raise ValueError('Lengths must match to compare') - if isinstance(other, ABCPeriodIndex): - # temp workaround until fixing GH 13637 - # tested in test_nat_comparisons - # (pandas.tests.series.test_operators.TestSeriesOperators) - return self._constructor(na_op(self.values, - other.astype(object).values), - index=self.index) - - return self._constructor(na_op(self.values, np.asarray(other)), + res_values = na_op(self.values, np.asarray(other)) + return self._constructor(res_values, index=self.index).__finalize__(self) - elif isinstance(other, pd.Categorical): - if not is_categorical_dtype(self): - msg = ("Cannot compare a Categorical for op {op} with Series " - "of dtype {typ}.\nIf you want to compare values, use " - "'series np.asarray(other)'.") - raise TypeError(msg.format(op=op, typ=self.dtype)) + elif (isinstance(other, pd.Categorical) and + not is_categorical_dtype(self)): + raise TypeError("Cannot compare a Categorical for op {op} with " + "Series of dtype {typ}.\nIf you want to compare " + "values, use 'series np.asarray(other)'." + .format(op=op, typ=self.dtype)) + + elif is_scalar(other) and isna(other): + # numpy does not like comparisons vs None + if op is operator.ne: + res_values = np.ones(len(self), dtype=bool) + else: + res_values = np.zeros(len(self), dtype=bool) + return self._constructor(res_values, index=self.index, + name=self.name, dtype='bool') if is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, @@ -877,11 +875,10 @@ def na_op(x, y): y = _ensure_object(y) result = lib.vec_binop(x, y, op) else: + # let null fall thru + if not isna(y): + y = bool(y) try: - - # let null fall thru - if not isna(y): - y = bool(y) result = lib.scalar_binop(x, y, op) except: msg = ("cannot compare a dtyped [{dtype}] array " @@ -899,26 +896,31 @@ def wrapper(self, other): self, other = _align_method_SERIES(self, other, align_asobject=True) - if isinstance(other, ABCSeries): + if isinstance(other, ABCDataFrame): + # Defer to DataFrame implementation; fail early + return NotImplemented + + elif isinstance(other, ABCSeries): name = com._maybe_match_name(self, other) is_other_int_dtype = is_integer_dtype(other.dtype) other = fill_int(other) if is_other_int_dtype else fill_bool(other) filler = (fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool) - return filler(self._constructor(na_op(self.values, other.values), - index=self.index, name=name)) - elif isinstance(other, ABCDataFrame): - return NotImplemented + res_values = na_op(self.values, other.values) + unfilled = self._constructor(res_values, + index=self.index, name=name) + return filler(unfilled) else: # scalars, list, tuple, np.array filler = (fill_int if is_self_int_dtype and is_integer_dtype(np.asarray(other)) else fill_bool) - return filler(self._constructor( - na_op(self.values, other), - index=self.index)).__finalize__(self) + + res_values = na_op(self.values, other) + unfilled = self._constructor(res_values, index=self.index) + return filler(unfilled).__finalize__(self) return wrapper @@ -1023,21 +1025,23 @@ def na_op(x, y): mask = notna(xrav) & notna(yrav) xrav = xrav[mask] - # we may need to manually - # broadcast a 1 element array if yrav.shape != mask.shape: - yrav = np.empty(mask.shape, dtype=yrav.dtype) - yrav.fill(yrav.item()) + # FIXME: GH#5284, GH#5035, GH#19448 + # Without specifically raising here we get mismatched + # errors in Py3 (TypeError) vs Py2 (ValueError) + raise ValueError('Cannot broadcast operands together.') yrav = yrav[mask] - if np.prod(xrav.shape) and np.prod(yrav.shape): + if xrav.size: with np.errstate(all='ignore'): result[mask] = op(xrav, yrav) - elif hasattr(x, 'size'): + + elif isinstance(x, np.ndarray): + # mask is only meaningful for x result = np.empty(x.size, dtype=x.dtype) mask = notna(xrav) xrav = xrav[mask] - if np.prod(xrav.shape): + if xrav.size: with np.errstate(all='ignore'): result[mask] = op(xrav, y) else: diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 1c23527cf57c4..62a467bec2683 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -819,4 +819,4 @@ def from_coo(cls, A, dense_index=False): ops.add_special_arithmetic_methods(SparseSeries, ops._arith_method_SPARSE_SERIES, comp_method=ops._arith_method_SPARSE_SERIES, - bool_method=None, force=True) + bool_method=None)