Skip to content

Commit 3e4b196

Browse files
jbrockmendeljreback
authored andcommitted
REF: Make CategoricalIndex comparison defer to Categorical comparison (#27769)
1 parent 6572497 commit 3e4b196

File tree

5 files changed

+24
-25
lines changed

5 files changed

+24
-25
lines changed

pandas/core/arrays/categorical.py

+3
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ def f(self, other):
8888
return NotImplemented
8989

9090
other = lib.item_from_zerodim(other)
91+
if is_list_like(other) and len(other) != len(self):
92+
# TODO: Could this fail if the categories are listlike objects?
93+
raise ValueError("Lengths must match.")
9194

9295
if not self.ordered:
9396
if opname in ["__lt__", "__gt__", "__le__", "__ge__"]:

pandas/core/indexes/base.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
)
4949
from pandas.core.dtypes.concat import concat_compat
5050
from pandas.core.dtypes.generic import (
51+
ABCCategorical,
5152
ABCDataFrame,
5253
ABCDateOffset,
5354
ABCDatetimeArray,
@@ -99,11 +100,14 @@
99100

100101
def _make_comparison_op(op, cls):
101102
def cmp_method(self, other):
102-
if isinstance(other, (np.ndarray, Index, ABCSeries)):
103+
if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)):
103104
if other.ndim > 0 and len(self) != len(other):
104105
raise ValueError("Lengths must match to compare")
105106

106-
if is_object_dtype(self) and not isinstance(self, ABCMultiIndex):
107+
if is_object_dtype(self) and isinstance(other, ABCCategorical):
108+
left = type(other)(self._values, dtype=other.dtype)
109+
return op(left, other)
110+
elif is_object_dtype(self) and not isinstance(self, ABCMultiIndex):
107111
# don't pass MultiIndex
108112
with np.errstate(all="ignore"):
109113
result = ops._comp_method_OBJECT_ARRAY(op, self.values, other)

pandas/core/indexes/category.py

+3-22
Original file line numberDiff line numberDiff line change
@@ -899,31 +899,12 @@ def _make_compare(op):
899899
opname = "__{op}__".format(op=op.__name__)
900900

901901
def _evaluate_compare(self, other):
902-
903-
# if we have a Categorical type, then must have the same
904-
# categories
905-
if isinstance(other, CategoricalIndex):
906-
other = other._values
907-
elif isinstance(other, Index):
908-
other = self._create_categorical(other._values, dtype=self.dtype)
909-
910-
if isinstance(other, (ABCCategorical, np.ndarray, ABCSeries)):
911-
if len(self.values) != len(other):
912-
raise ValueError("Lengths must match to compare")
913-
914-
if isinstance(other, ABCCategorical):
915-
if not self.values.is_dtype_equal(other):
916-
raise TypeError(
917-
"categorical index comparisons must "
918-
"have the same categories and ordered "
919-
"attributes"
920-
)
921-
922-
result = op(self.values, other)
902+
with np.errstate(all="ignore"):
903+
result = op(self.array, other)
923904
if isinstance(result, ABCSeries):
924905
# Dispatch to pd.Categorical returned NotImplemented
925906
# and we got a Series back; down-cast to ndarray
926-
result = result.values
907+
result = result._values
927908
return result
928909

929910
return compat.set_function_name(_evaluate_compare, opname, cls)

pandas/core/ops/__init__.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -832,8 +832,14 @@ def wrapper(self, other, axis=None):
832832
# Defer to DataFrame implementation; fail early
833833
return NotImplemented
834834

835-
elif isinstance(other, ABCSeries) and not self._indexed_same(other):
835+
if isinstance(other, ABCSeries) and not self._indexed_same(other):
836836
raise ValueError("Can only compare identically-labeled Series objects")
837+
elif (
838+
is_list_like(other)
839+
and len(other) != len(self)
840+
and not isinstance(other, (set, frozenset))
841+
):
842+
raise ValueError("Lengths must match")
837843

838844
elif (
839845
is_list_like(other)

pandas/tests/indexes/test_category.py

+5
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,11 @@ def test_equals_categorical(self):
823823
msg = (
824824
"categorical index comparisons must have the same categories"
825825
" and ordered attributes"
826+
"|"
827+
"Categoricals can only be compared if 'categories' are the same. "
828+
"Categories are different lengths"
829+
"|"
830+
"Categoricals can only be compared if 'ordered' is the same"
826831
)
827832
with pytest.raises(TypeError, match=msg):
828833
ci1 == ci2

0 commit comments

Comments
 (0)