From 051ee37f9cc9d0d51fb3e6ed474d1688107cdc0e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 7 Feb 2018 08:32:46 -0800 Subject: [PATCH 1/5] dispatch categorical series op to Categorical --- pandas/core/arrays/categorical.py | 3 ++ pandas/core/ops.py | 62 +++++++++++++++++-------------- pandas/tests/indexes/common.py | 10 ++++- 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 62c6a6b16cbe9..15953e6efbe7f 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -53,6 +53,9 @@ def f(self, other): # results depending whether categories are the same or not is kind of # insane, so be a bit stricter here and use the python3 idea of # comparing only things of equal type. + if isinstance(other, ABCSeries): + return NotImplemented + if not self.ordered: if op in ['__lt__', '__gt__', '__le__', '__ge__']: raise TypeError("Unordered Categoricals can only compare " diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 6db84aedce7e7..d00604979a183 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -726,7 +726,7 @@ def dispatch_to_index_op(op, left, right, index_class): # avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes, # left_idx may inherit a freq from a cached DatetimeIndex. # See discussion in GH#19147. - if left_idx.freq is not None: + if getattr(left_idx, 'freq', None) is not None: left_idx = left_idx._shallow_copy(freq=None) try: result = op(left_idx, right) @@ -774,9 +774,8 @@ def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand - if is_categorical_dtype(x): - return op(x, y) - elif is_categorical_dtype(y) and not is_scalar(y): + if is_categorical_dtype(y) and not is_scalar(y): + # The `not is_scalar(y)` check excludes the string "category" return op(y, x) elif is_object_dtype(x.dtype): @@ -824,17 +823,34 @@ def wrapper(self, other, axis=None): if axis is not None: self._get_axis_number(axis) + res_name = _get_series_op_result_name(self, other) + if isinstance(other, ABCDataFrame): # pragma: no cover # Defer to DataFrame implementation; fail early return NotImplemented + elif isinstance(other, ABCSeries) and not self._indexed_same(other): + raise ValueError("Can only compare identically-labeled " + "Series objects") + + elif is_categorical_dtype(self): + # Dispatch to Categorical implementation; pd.CategoricalIndex + # behavior is non-canonical GH#19513 + res_values = dispatch_to_index_op(op, self, other, pd.Categorical) + return self._constructor(res_values, index=self.index, + name=res_name) + + elif is_timedelta64_dtype(self): + res_values = dispatch_to_index_op(op, self, other, + pd.TimedeltaIndex) + return self._constructor(res_values, index=self.index, + name=res_name) + elif isinstance(other, ABCSeries): - name = com._maybe_match_name(self, other) - if not self._indexed_same(other): - msg = 'Can only compare identically-labeled Series objects' - raise ValueError(msg) + # By this point we have checked that self._indexed_same(other) res_values = na_op(self.values, other.values) - return self._constructor(res_values, index=self.index, name=name) + return self._constructor(res_values, index=self.index, + name=res_name) elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array @@ -847,12 +863,12 @@ def wrapper(self, other, axis=None): return self._constructor(res_values, index=self.index).__finalize__(self) - elif (isinstance(other, pd.Categorical) and - not is_categorical_dtype(self)): - raise TypeError("Cannot compare a Categorical for op {op} with " - "Series of dtype {typ}.\nIf you want to compare " - "values, use 'series np.asarray(other)'." - .format(op=op, typ=self.dtype)) + elif isinstance(other, pd.Categorical): + # ordering of checks matters; by this point we know + # that not is_categorical_dtype(self) + res_values = op(self.values, other) + return self._constructor(res_values, index=self.index, + name=res_name) elif is_scalar(other) and isna(other): # numpy does not like comparisons vs None @@ -863,16 +879,9 @@ def wrapper(self, other, axis=None): return self._constructor(res_values, index=self.index, name=self.name, dtype='bool') - if is_categorical_dtype(self): - # cats are a special case as get_values() would return an ndarray, - # which would then not take categories ordering into account - # we can go directly to op, as the na_op would just test again and - # dispatch to it. - with np.errstate(all='ignore'): - res = op(self.values, other) else: values = self.get_values() - if isinstance(other, (list, np.ndarray)): + if isinstance(other, list): other = np.asarray(other) with np.errstate(all='ignore'): @@ -882,10 +891,9 @@ def wrapper(self, other, axis=None): .format(typ=type(other))) # always return a full value series here - res = com._values_from_object(res) - - res = pd.Series(res, index=self.index, name=self.name, dtype='bool') - return res + res_values = com._values_from_object(res) + return pd.Series(res_values, index=self.index, + name=res_name, dtype='bool') return wrapper diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 8948c5f79900d..c9d86d378da40 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -788,8 +788,14 @@ def test_equals_op(self): series_d = Series(array_d) with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == series_b - tm.assert_numpy_array_equal(index_a == series_a, expected1) - tm.assert_numpy_array_equal(index_a == series_c, expected2) + + if isinstance(index_a, CategoricalIndex): + # GH#??? + tm.assert_series_equal(index_a == series_a, Series(expected1)) + tm.assert_series_equal(index_a == series_c, Series(expected2)) + else: + tm.assert_numpy_array_equal(index_a == series_a, expected1) + tm.assert_numpy_array_equal(index_a == series_c, expected2) # cases where length is 1 for one of them with tm.assert_raises_regex(ValueError, "Lengths must match"): From b18254affbd02ad317635601ac7730236384b12d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 7 Feb 2018 09:59:11 -0800 Subject: [PATCH 2/5] tests for series op result names --- pandas/core/ops.py | 10 +++++--- pandas/tests/series/test_arithmetic.py | 34 ++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index d00604979a183..974a57a28f196 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -849,8 +849,10 @@ def wrapper(self, other, axis=None): elif isinstance(other, ABCSeries): # By this point we have checked that self._indexed_same(other) res_values = na_op(self.values, other.values) + # rename is needed in case res_name is None and res_values.name + # is not. return self._constructor(res_values, index=self.index, - name=res_name) + name=res_name).rename(res_name) elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array @@ -860,8 +862,10 @@ def wrapper(self, other, axis=None): raise ValueError('Lengths must match to compare') res_values = na_op(self.values, np.asarray(other)) - return self._constructor(res_values, - index=self.index).__finalize__(self) + result = self._constructor(res_values, index=self.index) + # rename is needed in case res_name is None and self.name + # is not. + return result.__finalize__(self).rename(res_name) elif isinstance(other, pd.Categorical): # ordering of checks matters; by this point we know diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 1d9fa9dc15531..cf969a7024563 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -43,6 +43,40 @@ def test_ser_flex_cmp_return_dtypes_empty(self, opname): result = getattr(empty, opname)(const).get_dtype_counts() tm.assert_series_equal(result, Series([1], ['bool'])) + @pytest.mark.parametrize('op', [operator.eq, operator.ne, + operator.le, operator.lt, + operator.ge, operator.gt]) + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('baz', 'baz', 'baz')]) + def test_ser_cmp_result_names(self, names, op): + # datetime64 dtype + dti = pd.date_range('1949-06-07 03:00:00', + freq='H', periods=5, name=names[0]) + ser = Series(dti).rename(names[1]) + result = op(ser, dti) + assert result.name == names[2] + + # datetime64tz dtype + dti = dti.tz_localize('US/Central') + ser = Series(dti).rename(names[1]) + result = op(ser, dti) + assert result.name == names[2] + + # timedelta64 dtype + tdi = dti - dti.shift(1) + ser = Series(tdi).rename(names[1]) + result = op(ser, tdi) + assert result.name == names[2] + + # categorical + if op in [operator.eq, operator.ne]: + # categorical dtype comparisons raise for inequalities + cidx = tdi.astype('category') + ser = Series(cidx).rename(names[1]) + result = op(ser, cidx) + assert result.name == names[2] + class TestTimestampSeriesComparison(object): def test_dt64ser_cmp_period_scalar(self): From 3276c8d8009b3ce85fe26278bcce1c64511d4cb9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 7 Feb 2018 11:07:44 -0800 Subject: [PATCH 3/5] fix inconsistetn casting --- pandas/core/indexes/category.py | 26 ++++++++++++++++++-------- pandas/tests/indexes/common.py | 9 ++------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 2c7be2b21f959..806eb13c4b10d 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -1,3 +1,5 @@ +import operator + import numpy as np from pandas._libs import index as libindex @@ -726,7 +728,9 @@ def _codes_for_groupby(self, sort): def _add_comparison_methods(cls): """ add in comparison methods """ - def _make_compare(opname): + def _make_compare(op): + opname = '__{op}__'.format(op=op.__name__) + def _evaluate_compare(self, other): # if we have a Categorical type, then must have the same @@ -749,16 +753,22 @@ def _evaluate_compare(self, other): "have the same categories and ordered " "attributes") - return getattr(self.values, opname)(other) + result = op(self.values, other) + #result = getattr(self.values, opname)(other) + if isinstance(result, ABCSeries): + # Dispatch to pd.Categorical returned NotImplemented + # and we got a Series back; down-cast to ndarray + result = result.values + return result return compat.set_function_name(_evaluate_compare, opname, cls) - cls.__eq__ = _make_compare('__eq__') - cls.__ne__ = _make_compare('__ne__') - cls.__lt__ = _make_compare('__lt__') - cls.__gt__ = _make_compare('__gt__') - cls.__le__ = _make_compare('__le__') - cls.__ge__ = _make_compare('__ge__') + cls.__eq__ = _make_compare(operator.eq) + cls.__ne__ = _make_compare(operator.ne) + cls.__lt__ = _make_compare(operator.lt) + cls.__gt__ = _make_compare(operator.gt) + cls.__le__ = _make_compare(operator.le) + cls.__ge__ = _make_compare(operator.ge) def _delegate_method(self, name, *args, **kwargs): """ method delegation to the ._values """ diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index c9d86d378da40..2d644652f1b3d 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -789,13 +789,8 @@ def test_equals_op(self): with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == series_b - if isinstance(index_a, CategoricalIndex): - # GH#??? - tm.assert_series_equal(index_a == series_a, Series(expected1)) - tm.assert_series_equal(index_a == series_c, Series(expected2)) - else: - tm.assert_numpy_array_equal(index_a == series_a, expected1) - tm.assert_numpy_array_equal(index_a == series_c, expected2) + tm.assert_numpy_array_equal(index_a == series_a, expected1) + tm.assert_numpy_array_equal(index_a == series_c, expected2) # cases where length is 1 for one of them with tm.assert_raises_regex(ValueError, "Lengths must match"): From ce64df8ad9e078b432eaba60305a658878330d93 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 7 Feb 2018 11:24:48 -0800 Subject: [PATCH 4/5] remove commented-out --- pandas/core/indexes/category.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 806eb13c4b10d..97cadb2808ad1 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -754,7 +754,6 @@ def _evaluate_compare(self, other): "attributes") result = op(self.values, other) - #result = getattr(self.values, opname)(other) if isinstance(result, ABCSeries): # Dispatch to pd.Categorical returned NotImplemented # and we got a Series back; down-cast to ndarray From 06bd5bb231bc4e626655019f1f23688235e9720b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 13 Feb 2018 08:33:22 -0800 Subject: [PATCH 5/5] whatsnew note --- doc/source/whatsnew/v0.23.0.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ea56ebad7d782..4dfa4bfe637f1 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -686,3 +686,5 @@ Other ^^^^^ - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) +- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) +-