diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ea56ebad7d782..4dfa4bfe637f1 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -686,3 +686,5 @@ Other ^^^^^ - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) +- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) +- diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 62c6a6b16cbe9..15953e6efbe7f 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -53,6 +53,9 @@ def f(self, other): # results depending whether categories are the same or not is kind of # insane, so be a bit stricter here and use the python3 idea of # comparing only things of equal type. + if isinstance(other, ABCSeries): + return NotImplemented + if not self.ordered: if op in ['__lt__', '__gt__', '__le__', '__ge__']: raise TypeError("Unordered Categoricals can only compare " diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 2c7be2b21f959..97cadb2808ad1 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -1,3 +1,5 @@ +import operator + import numpy as np from pandas._libs import index as libindex @@ -726,7 +728,9 @@ def _codes_for_groupby(self, sort): def _add_comparison_methods(cls): """ add in comparison methods """ - def _make_compare(opname): + def _make_compare(op): + opname = '__{op}__'.format(op=op.__name__) + def _evaluate_compare(self, other): # if we have a Categorical type, then must have the same @@ -749,16 +753,21 @@ def _evaluate_compare(self, other): "have the same categories and ordered " "attributes") - return getattr(self.values, opname)(other) + result = op(self.values, other) + if isinstance(result, ABCSeries): + # Dispatch to pd.Categorical returned NotImplemented + # and we got a Series back; down-cast to ndarray + result = result.values + return result return compat.set_function_name(_evaluate_compare, opname, cls) - cls.__eq__ = _make_compare('__eq__') - cls.__ne__ = _make_compare('__ne__') - cls.__lt__ = _make_compare('__lt__') - cls.__gt__ = _make_compare('__gt__') - cls.__le__ = _make_compare('__le__') - cls.__ge__ = _make_compare('__ge__') + cls.__eq__ = _make_compare(operator.eq) + cls.__ne__ = _make_compare(operator.ne) + cls.__lt__ = _make_compare(operator.lt) + cls.__gt__ = _make_compare(operator.gt) + cls.__le__ = _make_compare(operator.le) + cls.__ge__ = _make_compare(operator.ge) def _delegate_method(self, name, *args, **kwargs): """ method delegation to the ._values """ diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 6db84aedce7e7..974a57a28f196 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -726,7 +726,7 @@ def dispatch_to_index_op(op, left, right, index_class): # avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes, # left_idx may inherit a freq from a cached DatetimeIndex. # See discussion in GH#19147. - if left_idx.freq is not None: + if getattr(left_idx, 'freq', None) is not None: left_idx = left_idx._shallow_copy(freq=None) try: result = op(left_idx, right) @@ -774,9 +774,8 @@ def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand - if is_categorical_dtype(x): - return op(x, y) - elif is_categorical_dtype(y) and not is_scalar(y): + if is_categorical_dtype(y) and not is_scalar(y): + # The `not is_scalar(y)` check excludes the string "category" return op(y, x) elif is_object_dtype(x.dtype): @@ -824,17 +823,36 @@ def wrapper(self, other, axis=None): if axis is not None: self._get_axis_number(axis) + res_name = _get_series_op_result_name(self, other) + if isinstance(other, ABCDataFrame): # pragma: no cover # Defer to DataFrame implementation; fail early return NotImplemented + elif isinstance(other, ABCSeries) and not self._indexed_same(other): + raise ValueError("Can only compare identically-labeled " + "Series objects") + + elif is_categorical_dtype(self): + # Dispatch to Categorical implementation; pd.CategoricalIndex + # behavior is non-canonical GH#19513 + res_values = dispatch_to_index_op(op, self, other, pd.Categorical) + return self._constructor(res_values, index=self.index, + name=res_name) + + elif is_timedelta64_dtype(self): + res_values = dispatch_to_index_op(op, self, other, + pd.TimedeltaIndex) + return self._constructor(res_values, index=self.index, + name=res_name) + elif isinstance(other, ABCSeries): - name = com._maybe_match_name(self, other) - if not self._indexed_same(other): - msg = 'Can only compare identically-labeled Series objects' - raise ValueError(msg) + # By this point we have checked that self._indexed_same(other) res_values = na_op(self.values, other.values) - return self._constructor(res_values, index=self.index, name=name) + # rename is needed in case res_name is None and res_values.name + # is not. + return self._constructor(res_values, index=self.index, + name=res_name).rename(res_name) elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array @@ -844,15 +862,17 @@ def wrapper(self, other, axis=None): raise ValueError('Lengths must match to compare') res_values = na_op(self.values, np.asarray(other)) - return self._constructor(res_values, - index=self.index).__finalize__(self) - - elif (isinstance(other, pd.Categorical) and - not is_categorical_dtype(self)): - raise TypeError("Cannot compare a Categorical for op {op} with " - "Series of dtype {typ}.\nIf you want to compare " - "values, use 'series np.asarray(other)'." - .format(op=op, typ=self.dtype)) + result = self._constructor(res_values, index=self.index) + # rename is needed in case res_name is None and self.name + # is not. + return result.__finalize__(self).rename(res_name) + + elif isinstance(other, pd.Categorical): + # ordering of checks matters; by this point we know + # that not is_categorical_dtype(self) + res_values = op(self.values, other) + return self._constructor(res_values, index=self.index, + name=res_name) elif is_scalar(other) and isna(other): # numpy does not like comparisons vs None @@ -863,16 +883,9 @@ def wrapper(self, other, axis=None): return self._constructor(res_values, index=self.index, name=self.name, dtype='bool') - if is_categorical_dtype(self): - # cats are a special case as get_values() would return an ndarray, - # which would then not take categories ordering into account - # we can go directly to op, as the na_op would just test again and - # dispatch to it. - with np.errstate(all='ignore'): - res = op(self.values, other) else: values = self.get_values() - if isinstance(other, (list, np.ndarray)): + if isinstance(other, list): other = np.asarray(other) with np.errstate(all='ignore'): @@ -882,10 +895,9 @@ def wrapper(self, other, axis=None): .format(typ=type(other))) # always return a full value series here - res = com._values_from_object(res) - - res = pd.Series(res, index=self.index, name=self.name, dtype='bool') - return res + res_values = com._values_from_object(res) + return pd.Series(res_values, index=self.index, + name=res_name, dtype='bool') return wrapper diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 8948c5f79900d..2d644652f1b3d 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -788,6 +788,7 @@ def test_equals_op(self): series_d = Series(array_d) with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == series_b + tm.assert_numpy_array_equal(index_a == series_a, expected1) tm.assert_numpy_array_equal(index_a == series_c, expected2) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 1d9fa9dc15531..cf969a7024563 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -43,6 +43,40 @@ def test_ser_flex_cmp_return_dtypes_empty(self, opname): result = getattr(empty, opname)(const).get_dtype_counts() tm.assert_series_equal(result, Series([1], ['bool'])) + @pytest.mark.parametrize('op', [operator.eq, operator.ne, + operator.le, operator.lt, + operator.ge, operator.gt]) + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('baz', 'baz', 'baz')]) + def test_ser_cmp_result_names(self, names, op): + # datetime64 dtype + dti = pd.date_range('1949-06-07 03:00:00', + freq='H', periods=5, name=names[0]) + ser = Series(dti).rename(names[1]) + result = op(ser, dti) + assert result.name == names[2] + + # datetime64tz dtype + dti = dti.tz_localize('US/Central') + ser = Series(dti).rename(names[1]) + result = op(ser, dti) + assert result.name == names[2] + + # timedelta64 dtype + tdi = dti - dti.shift(1) + ser = Series(tdi).rename(names[1]) + result = op(ser, tdi) + assert result.name == names[2] + + # categorical + if op in [operator.eq, operator.ne]: + # categorical dtype comparisons raise for inequalities + cidx = tdi.astype('category') + ser = Series(cidx).rename(names[1]) + result = op(ser, cidx) + assert result.name == names[2] + class TestTimestampSeriesComparison(object): def test_dt64ser_cmp_period_scalar(self):