diff --git a/pandas/core/ops.py b/pandas/core/ops.py index fc3ea106252db..cfd75201fcd53 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -11,7 +11,7 @@ import pandas as pd import datetime -from pandas._libs import (lib, index as libindex, +from pandas._libs import (lib, tslib as libts, algos as libalgos, iNaT) from pandas import compat @@ -39,8 +39,7 @@ from pandas.core.dtypes.generic import ( ABCSeries, ABCDataFrame, - ABCIndex, ABCDatetimeIndex, - ABCPeriodIndex) + ABCIndex, ABCDatetimeIndex, ABCIndexClass) # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory @@ -748,7 +747,7 @@ def _get_series_op_result_name(left, right): def _comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, list): y = construct_1d_object_array_from_listlike(y) - if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): + if isinstance(y, (np.ndarray, ABCSeries, ABCIndexClass)): if not is_object_dtype(y.dtype): y = y.astype(np.object_) @@ -775,8 +774,9 @@ def na_op(x, y): return op(x, y) elif is_categorical_dtype(y) and not is_scalar(y): return op(y, x) + # TODO: Does this make sense or should op be reversed? - if is_object_dtype(x.dtype): + elif is_object_dtype(x.dtype): result = _comp_method_OBJECT_ARRAY(op, x, y) else: @@ -797,15 +797,9 @@ def na_op(x, y): # we have a datetime/timedelta and may need to convert mask = None - if (needs_i8_conversion(x) or - (not is_scalar(y) and needs_i8_conversion(y))): - - if is_scalar(y): - mask = isna(x) - y = libindex.convert_scalar(x, _values_from_object(y)) - else: - mask = isna(x) | isna(y) - y = y.view('i8') + if not is_scalar(y) and needs_i8_conversion(y): + mask = isna(x) | isna(y) + y = y.view('i8') x = x.view('i8') try: @@ -821,54 +815,72 @@ def na_op(x, y): return result - def wrapper(self, other, axis=None): - # Validate the axis parameter - if axis is not None: - self._get_axis_number(axis) - - if isinstance(other, ABCSeries): - name = _maybe_match_name(self, other) - if not self._indexed_same(other): - msg = 'Can only compare identically-labeled Series objects' - raise ValueError(msg) - return self._constructor(na_op(self.values, other.values), - index=self.index, name=name) - elif isinstance(other, ABCDataFrame): # pragma: no cover + def wrapper(self, other): + if isinstance(other, ABCDataFrame): # pragma: no cover return NotImplemented - elif isinstance(other, (np.ndarray, pd.Index)): - # do not check length of zerodim array - # as it will broadcast - if (not is_scalar(lib.item_from_zerodim(other)) and - len(self) != len(other)): - raise ValueError('Lengths must match to compare') - if isinstance(other, ABCPeriodIndex): - # temp workaround until fixing GH 13637 - # tested in test_nat_comparisons - # (pandas.tests.series.test_operators.TestSeriesOperators) - return self._constructor(na_op(self.values, - other.astype(object).values), - index=self.index) + elif isinstance(other, ABCSeries) and not self._indexed_same(other): + msg = 'Can only compare identically-labeled Series objects' + raise ValueError(msg) - return self._constructor(na_op(self.values, np.asarray(other)), - index=self.index).__finalize__(self) + res_name = _get_series_op_result_name(self, other) - elif isinstance(other, pd.Categorical): - if not is_categorical_dtype(self): - msg = ("Cannot compare a Categorical for op {op} with Series " - "of dtype {typ}.\nIf you want to compare values, use " - "'series np.asarray(other)'.") - raise TypeError(msg.format(op=op, typ=self.dtype)) + if is_timedelta64_dtype(self): + res = op(pd.TimedeltaIndex(self), other) + return self._constructor(res, index=self.index, name=res_name) + + elif is_datetime64_dtype(self) or is_datetime64tz_dtype(self): + # kludge; DatetimeIndex refuses to compare against None or + # datetime.date; until the "right" behavior is resolved, we cast + # these types here to types that DatetimeIndex understand. + if type(other) is datetime.date: + other = datetime.datetime(other.year, other.month, other.day) + elif other is None: + other = pd.NaT + res = op(pd.DatetimeIndex(self), other) + return self._constructor(res, index=self.index, name=res_name) - if is_categorical_dtype(self): + elif isinstance(other, ABCSeries): + # Note: Ordering matters; this needs to go before + # is_categorical_dtype(self) branch. + res = na_op(self.values, other.values) + return self._constructor(res, + index=self.index, name=res_name) + + elif is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, # which would then not take categories ordering into account # we can go directly to op, as the na_op would just test again and # dispatch to it. with np.errstate(all='ignore'): res = op(self.values, other) + # Note: self.values is a pd.Categorical object + return self._constructor(res, index=self.index, + name=res_name, dtype='bool') + + elif isinstance(other, pd.Categorical): + # Ordering of conditions here matters; we know at this point + # that not is_categorical_dtype(self) + msg = ("Cannot compare a Categorical for op {op} with Series " + "of dtype {typ}.\nIf you want to compare values, use " + "'series np.asarray(other)'.") + raise TypeError(msg.format(op=op, typ=self.dtype)) + + elif isinstance(other, (np.ndarray, pd.Index)): + # do not check length of zerodim array + # as it will broadcast + if (not is_scalar(lib.item_from_zerodim(other)) and + len(self) != len(other)): + raise ValueError('Lengths must match to compare') + + res = na_op(self.values, np.asarray(other)) + return self._constructor(res, + index=self.index).__finalize__(self) + # TODO: Why __finalize__ here but not elsewhere? + else: values = self.get_values() + # TODO: why get_values() here and just values elsewhere? if isinstance(other, (list, np.ndarray)): other = np.asarray(other) @@ -881,9 +893,10 @@ def wrapper(self, other, axis=None): # always return a full value series here res = _values_from_object(res) - res = pd.Series(res, index=self.index, name=self.name, dtype='bool') - return res - + res = pd.Series(res, index=self.index, name=res_name, dtype='bool') + # TODO: Why not use self._constructor here? + # TODO: pass dtype='bool' in other locations? + return res return wrapper diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 8948c5f79900d..bfba134541164 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -760,11 +760,12 @@ def test_equals_op(self): if isinstance(index_a, PeriodIndex): return + len_msg = "Lengths must match|different lengths" n = len(index_a) index_b = index_a[0:-1] index_c = index_a[0:-1].append(index_a[-2:-1]) index_d = index_a[0:1] - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, len_msg): index_a == index_b expected1 = np.array([True] * n) expected2 = np.array([True] * (n - 1) + [False]) @@ -776,7 +777,7 @@ def test_equals_op(self): array_b = np.array(index_a[0:-1]) array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) array_d = np.array(index_a[0:1]) - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, len_msg): index_a == array_b tm.assert_numpy_array_equal(index_a == array_a, expected1) tm.assert_numpy_array_equal(index_a == array_c, expected2) @@ -786,22 +787,22 @@ def test_equals_op(self): series_b = Series(array_b) series_c = Series(array_c) series_d = Series(array_d) - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, len_msg): index_a == series_b tm.assert_numpy_array_equal(index_a == series_a, expected1) tm.assert_numpy_array_equal(index_a == series_c, expected2) # cases where length is 1 for one of them - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, len_msg): index_a == index_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, len_msg): index_a == series_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, len_msg): index_a == array_d msg = "Can only compare identically-labeled Series objects" with tm.assert_raises_regex(ValueError, msg): series_a == series_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, len_msg): series_a == array_d # comparing with a scalar should broadcast; note that we are excluding