diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d22b4bd4d3f2b..984f1835bd078 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -89,6 +89,9 @@ def f(self, other): return NotImplemented other = lib.item_from_zerodim(other) + if is_list_like(other) and len(other) != len(self): + # TODO: Could this fail if the categories are listlike objects? + raise ValueError("Lengths must match.") if not self.ordered: if op in ["__lt__", "__gt__", "__le__", "__ge__"]: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 356ae20b2240a..bd998656914c6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -48,6 +48,7 @@ ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ( + ABCCategorical, ABCDataFrame, ABCDateOffset, ABCDatetimeArray, @@ -99,11 +100,14 @@ def _make_comparison_op(op, cls): def cmp_method(self, other): - if isinstance(other, (np.ndarray, Index, ABCSeries)): + if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)): if other.ndim > 0 and len(self) != len(other): raise ValueError("Lengths must match to compare") - if is_object_dtype(self) and not isinstance(self, ABCMultiIndex): + if is_object_dtype(self) and isinstance(other, ABCCategorical): + left = type(other)(self._values, dtype=other.dtype) + return op(left, other) + elif is_object_dtype(self) and not isinstance(self, ABCMultiIndex): # don't pass MultiIndex with np.errstate(all="ignore"): result = ops._comp_method_OBJECT_ARRAY(op, self.values, other) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 0f6aa711adc90..8bfa7e8d20b4f 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -899,31 +899,12 @@ def _make_compare(op): opname = "__{op}__".format(op=op.__name__) def _evaluate_compare(self, other): - - # if we have a Categorical type, then must have the same - # categories - if isinstance(other, CategoricalIndex): - other = other._values - elif isinstance(other, Index): - other = self._create_categorical(other._values, dtype=self.dtype) - - if isinstance(other, (ABCCategorical, np.ndarray, ABCSeries)): - if len(self.values) != len(other): - raise ValueError("Lengths must match to compare") - - if isinstance(other, ABCCategorical): - if not self.values.is_dtype_equal(other): - raise TypeError( - "categorical index comparisons must " - "have the same categories and ordered " - "attributes" - ) - - result = op(self.values, other) + with np.errstate(all="ignore"): + result = op(self.array, other) if isinstance(result, ABCSeries): # Dispatch to pd.Categorical returned NotImplemented # and we got a Series back; down-cast to ndarray - result = result.values + result = result._values return result return compat.set_function_name(_evaluate_compare, opname, cls) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 4ab1941e3493f..c78d5c79453ab 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -1038,8 +1038,14 @@ def wrapper(self, other, axis=None): # Defer to DataFrame implementation; fail early return NotImplemented - elif isinstance(other, ABCSeries) and not self._indexed_same(other): + if isinstance(other, ABCSeries) and not self._indexed_same(other): raise ValueError("Can only compare identically-labeled Series objects") + elif ( + is_list_like(other) + and len(other) != len(self) + and not isinstance(other, (set, frozenset)) + ): + raise ValueError("Lengths must match") elif is_categorical_dtype(self): # Dispatch to Categorical implementation; CategoricalIndex diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 280b0a99c7e68..67bf9bd20e716 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -823,6 +823,11 @@ def test_equals_categorical(self): msg = ( "categorical index comparisons must have the same categories" " and ordered attributes" + "|" + "Categoricals can only be compared if 'categories' are the same. " + "Categories are different lengths" + "|" + "Categoricals can only be compared if 'ordered' is the same" ) with pytest.raises(TypeError, match=msg): ci1 == ci2