Skip to content

Commit 4a93245

Browse files
committed
BUG: Categorical comparison with unordered (#16339)
Fixes categorical comparison operations improperly considering ordering when two unordered categoricals are compared. Closes #16014 (cherry picked from commit 91e9e52)
1 parent bf7ceec commit 4a93245

File tree

4 files changed

+69
-7
lines changed

4 files changed

+69
-7
lines changed

doc/source/categorical.rst

+8
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,14 @@ the original values:
453453
454454
np.asarray(cat) > base
455455
456+
When you compare two unordered categoricals with the same categories, the order is not considered:
457+
458+
.. ipython:: python
459+
460+
c1 = pd.Categorical(['a', 'b'], categories=['a', 'b'], ordered=False)
461+
c2 = pd.Categorical(['a', 'b'], categories=['b', 'a'], ordered=False)
462+
c1 == c2
463+
456464
Operations
457465
----------
458466

doc/source/whatsnew/v0.20.2.txt

+3
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,10 @@ Numeric
8383
^^^^^^^
8484

8585

86+
Categorical
87+
^^^^^^^^^^^
8688

89+
- Fixed comparison operations considering the order of the categories when both categoricals are unordered (:issue:`16014`)
8790

8891
Other
8992
^^^^^

pandas/core/categorical.py

+21-7
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,31 @@ def f(self, other):
5555
"equality or not")
5656
if isinstance(other, Categorical):
5757
# Two Categoricals can only be be compared if the categories are
58-
# the same
59-
if ((len(self.categories) != len(other.categories)) or
60-
not ((self.categories == other.categories).all())):
61-
raise TypeError("Categoricals can only be compared if "
62-
"'categories' are the same")
58+
# the same (maybe up to ordering, depending on ordered)
59+
60+
msg = ("Categoricals can only be compared if "
61+
"'categories' are the same.")
62+
if len(self.categories) != len(other.categories):
63+
raise TypeError(msg + " Categories are different lengths")
64+
elif (self.ordered and not (self.categories ==
65+
other.categories).all()):
66+
raise TypeError(msg)
67+
elif not set(self.categories) == set(other.categories):
68+
raise TypeError(msg)
69+
6370
if not (self.ordered == other.ordered):
6471
raise TypeError("Categoricals can only be compared if "
6572
"'ordered' is the same")
66-
na_mask = (self._codes == -1) | (other._codes == -1)
73+
if not self.ordered and not self.categories.equals(
74+
other.categories):
75+
# both unordered and different order
76+
other_codes = _get_codes_for_values(other, self.categories)
77+
else:
78+
other_codes = other._codes
79+
80+
na_mask = (self._codes == -1) | (other_codes == -1)
6781
f = getattr(self._codes, op)
68-
ret = f(other._codes)
82+
ret = f(other_codes)
6983
if na_mask.any():
7084
# In other series, the leads to False, so do that here too
7185
ret[na_mask] = False

pandas/tests/test_categorical.py

+37
Original file line numberDiff line numberDiff line change
@@ -3822,6 +3822,43 @@ def test_cat_equality(self):
38223822
pytest.raises(TypeError, lambda: a > b)
38233823
pytest.raises(TypeError, lambda: b > a)
38243824

3825+
@pytest.mark.parametrize('ctor', [
3826+
lambda *args, **kwargs: Categorical(*args, **kwargs),
3827+
lambda *args, **kwargs: Series(Categorical(*args, **kwargs)),
3828+
])
3829+
def test_unordered_different_order_equal(self, ctor):
3830+
# https://github.com/pandas-dev/pandas/issues/16014
3831+
c1 = ctor(['a', 'b'], categories=['a', 'b'], ordered=False)
3832+
c2 = ctor(['a', 'b'], categories=['b', 'a'], ordered=False)
3833+
assert (c1 == c2).all()
3834+
3835+
c1 = ctor(['a', 'b'], categories=['a', 'b'], ordered=False)
3836+
c2 = ctor(['b', 'a'], categories=['b', 'a'], ordered=False)
3837+
assert (c1 != c2).all()
3838+
3839+
c1 = ctor(['a', 'a'], categories=['a', 'b'], ordered=False)
3840+
c2 = ctor(['b', 'b'], categories=['b', 'a'], ordered=False)
3841+
assert (c1 != c2).all()
3842+
3843+
c1 = ctor(['a', 'a'], categories=['a', 'b'], ordered=False)
3844+
c2 = ctor(['a', 'b'], categories=['b', 'a'], ordered=False)
3845+
result = c1 == c2
3846+
tm.assert_numpy_array_equal(np.array(result), np.array([True, False]))
3847+
3848+
def test_unordered_different_categories_raises(self):
3849+
c1 = Categorical(['a', 'b'], categories=['a', 'b'], ordered=False)
3850+
c2 = Categorical(['a', 'c'], categories=['c', 'a'], ordered=False)
3851+
with tm.assert_raises_regex(TypeError,
3852+
"Categoricals can only be compared"):
3853+
c1 == c2
3854+
3855+
def test_compare_different_lengths(self):
3856+
c1 = Categorical([], categories=['a', 'b'])
3857+
c2 = Categorical([], categories=['a'])
3858+
msg = "Categories are different lengths"
3859+
with tm.assert_raises_regex(TypeError, msg):
3860+
c1 == c2
3861+
38253862
def test_concat_append(self):
38263863
cat = pd.Categorical(["a", "b"], categories=["a", "b"])
38273864
vals = [1, 2]

0 commit comments

Comments
 (0)