Skip to content

Commit 36da210

Browse files
committed
BUG: Categorical comparison with unordered
Fixes categorical comparison operations improperly considering ordering when two unordered categoricals are compared. Closes pandas-dev#16014
1 parent fdc2185 commit 36da210

File tree

3 files changed

+44
-7
lines changed

3 files changed

+44
-7
lines changed

doc/source/whatsnew/v0.20.2.txt

+3
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,10 @@ Numeric
8080
^^^^^^^
8181

8282

83+
Categorical
84+
^^^^^^^^^^^
8385

86+
- Fixed comparison operations considering the order of the categories when both categoricals are unordered (:issue:`16014`)
8487

8588
Other
8689
^^^^^

pandas/core/categorical.py

+20-7
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,30 @@ def f(self, other):
5555
"equality or not")
5656
if isinstance(other, Categorical):
5757
# Two Categoricals can only be be compared if the categories are
58-
# the same
59-
if ((len(self.categories) != len(other.categories)) or
60-
not ((self.categories == other.categories).all())):
61-
raise TypeError("Categoricals can only be compared if "
62-
"'categories' are the same")
58+
# the same (maybe up to ordering, depending on order)
59+
60+
msg = ("Categoricals can only be compared if "
61+
"'categories' are the same")
62+
if len(self.categories) != len(other.categories):
63+
raise TypeError(msg)
64+
elif (self.ordered and not (self.categories ==
65+
other.categories).all()):
66+
raise TypeError(msg)
67+
elif not set(self.categories) == set(other.categories):
68+
raise TypeError(msg)
69+
6370
if not (self.ordered == other.ordered):
6471
raise TypeError("Categoricals can only be compared if "
6572
"'ordered' is the same")
66-
na_mask = (self._codes == -1) | (other._codes == -1)
73+
if not self.ordered:
74+
# Comparison uses codes, so align theirs to ours
75+
other_codes = _get_codes_for_values(other, self.categories)
76+
else:
77+
other_codes = other._codes
78+
79+
na_mask = (self._codes == -1) | (other_codes == -1)
6780
f = getattr(self._codes, op)
68-
ret = f(other._codes)
81+
ret = f(other_codes)
6982
if na_mask.any():
7083
# In other series, the leads to False, so do that here too
7184
ret[na_mask] = False

pandas/tests/test_categorical.py

+21
Original file line numberDiff line numberDiff line change
@@ -3822,6 +3822,27 @@ def test_cat_equality(self):
38223822
pytest.raises(TypeError, lambda: a > b)
38233823
pytest.raises(TypeError, lambda: b > a)
38243824

3825+
def test_unordered_different_order_equal(self):
3826+
# https://github.com/pandas-dev/pandas/issues/16014
3827+
c1 = Categorical(['a', 'b'], categories=['a', 'b'], ordered=False)
3828+
c2 = Categorical(['a', 'b'], categories=['b', 'a'], ordered=False)
3829+
assert (c1 == c2).all()
3830+
3831+
c1 = Categorical(['a', 'b'], categories=['a', 'b'], ordered=False)
3832+
c2 = Categorical(['b', 'a'], categories=['b', 'a'], ordered=False)
3833+
assert (c1 != c2).all()
3834+
3835+
c1 = Categorical(['a', 'a'], categories=['a', 'b'], ordered=False)
3836+
c2 = Categorical(['b', 'b'], categories=['b', 'a'], ordered=False)
3837+
assert (c1 != c2).all()
3838+
3839+
c1 = Categorical(['a', 'a'], categories=['a', 'b'], ordered=False)
3840+
c2 = Categorical(['a', 'b'], categories=['b', 'a'], ordered=False)
3841+
result = c1 == c2
3842+
3843+
assert result[0]
3844+
assert not result[1]
3845+
38253846
def test_concat_append(self):
38263847
cat = pd.Categorical(["a", "b"], categories=["a", "b"])
38273848
vals = [1, 2]

0 commit comments

Comments
 (0)