Skip to content

Commit 1bf6371

Browse files
committed
BUG: Fixed Categorical.Equals with unordered
The original issue was already fixed. I added tests to verify (but no whatsnew entry). This addes tests and a fix for #16603 (comment) about `Categorical.equals` Closes #16603
1 parent 7a0ee19 commit 1bf6371

File tree

4 files changed

+27
-3
lines changed

4 files changed

+27
-3
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ Numeric
334334
Categorical
335335
^^^^^^^^^^^
336336

337-
-
337+
- Bug in ``Categorical.equals`` between two unordered categories with the same categories, but in a different order (:issue:`16603`)
338338
-
339339
-
340340

pandas/core/categorical.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -2092,8 +2092,16 @@ def equals(self, other):
20922092
-------
20932093
are_equal : boolean
20942094
"""
2095-
return (self.is_dtype_equal(other) and
2096-
np.array_equal(self._codes, other._codes))
2095+
if self.is_dtype_equal(other):
2096+
if self.categories.equals(other.categories):
2097+
# fastpath to avoid re-coding
2098+
return np.array_equal(self._codes, other._codes)
2099+
else:
2100+
# coerce their codes to ours
2101+
codes2 = _recode_for_categories(other.codes, other.categories,
2102+
self.categories)
2103+
return np.array_equal(self._codes, codes2)
2104+
return False
20972105

20982106
def is_dtype_equal(self, other):
20992107
"""

pandas/tests/categorical/test_operators.py

+7
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,13 @@ def test_compare_different_lengths(self):
250250
with tm.assert_raises_regex(TypeError, msg):
251251
c1 == c2
252252

253+
def test_compare_unordered_different_order(self):
254+
# https://github.com/pandas-dev/pandas/issues/16603#issuecomment-
255+
# 349290078
256+
a = pd.Categorical(['a'], categories=['a', 'b'])
257+
b = pd.Categorical(['b'], categories=['b', 'a'])
258+
assert not a.equals(b)
259+
253260
def test_numeric_like_ops(self):
254261

255262
df = DataFrame({'value': np.random.randint(0, 10000, 100)})

pandas/tests/indexes/test_category.py

+9
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,15 @@ def test_equals_categorical(self):
742742
ordered=True))
743743
assert ci.equals(ci.copy())
744744

745+
def test_equals_categoridcal_unordered(self):
746+
# https://github.com/pandas-dev/pandas/issues/16603
747+
a = pd.CategoricalIndex(['A'], categories=['A', 'B'])
748+
b = pd.CategoricalIndex(['A'], categories=['B', 'A'])
749+
c = pd.CategoricalIndex(['C'], categories=['B', 'A'])
750+
assert a.equals(b)
751+
assert not a.equals(c)
752+
assert not b.equals(c)
753+
745754
def test_string_categorical_index_repr(self):
746755
# short
747756
idx = pd.CategoricalIndex(['a', 'bb', 'ccc'])

0 commit comments

Comments
 (0)