Skip to content

Commit bbfbe48

Browse files
BUG: Fixed Categorical.Equals with unordered (#18822)
* BUG: Fixed Categorical.Equals with unordered The original issue was already fixed. I added tests to verify (but no whatsnew entry). This addes tests and a fix for #16603 (comment) about `Categorical.equals` Closes #16603 * simplify * Release note
1 parent e3251da commit bbfbe48

File tree

4 files changed

+27
-3
lines changed

4 files changed

+27
-3
lines changed

doc/source/whatsnew/v0.23.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ Numeric
453453
Categorical
454454
^^^^^^^^^^^
455455

456-
-
456+
- Bug in ``Categorical.equals`` between two unordered categories with the same categories, but in a different order (:issue:`16603`)
457457
-
458458
-
459459

pandas/core/categorical.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -2082,8 +2082,16 @@ def equals(self, other):
20822082
-------
20832083
are_equal : boolean
20842084
"""
2085-
return (self.is_dtype_equal(other) and
2086-
np.array_equal(self._codes, other._codes))
2085+
if self.is_dtype_equal(other):
2086+
if self.categories.equals(other.categories):
2087+
# fastpath to avoid re-coding
2088+
other_codes = other._codes
2089+
else:
2090+
other_codes = _recode_for_categories(other.codes,
2091+
other.categories,
2092+
self.categories)
2093+
return np.array_equal(self._codes, other_codes)
2094+
return False
20872095

20882096
def is_dtype_equal(self, other):
20892097
"""

pandas/tests/categorical/test_operators.py

+7
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,13 @@ def test_compare_different_lengths(self):
250250
with tm.assert_raises_regex(TypeError, msg):
251251
c1 == c2
252252

253+
def test_compare_unordered_different_order(self):
254+
# https://github.com/pandas-dev/pandas/issues/16603#issuecomment-
255+
# 349290078
256+
a = pd.Categorical(['a'], categories=['a', 'b'])
257+
b = pd.Categorical(['b'], categories=['b', 'a'])
258+
assert not a.equals(b)
259+
253260
def test_numeric_like_ops(self):
254261

255262
df = DataFrame({'value': np.random.randint(0, 10000, 100)})

pandas/tests/indexes/test_category.py

+9
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,15 @@ def test_equals_categorical(self):
756756
ordered=True))
757757
assert ci.equals(ci.copy())
758758

759+
def test_equals_categoridcal_unordered(self):
760+
# https://github.com/pandas-dev/pandas/issues/16603
761+
a = pd.CategoricalIndex(['A'], categories=['A', 'B'])
762+
b = pd.CategoricalIndex(['A'], categories=['B', 'A'])
763+
c = pd.CategoricalIndex(['C'], categories=['B', 'A'])
764+
assert a.equals(b)
765+
assert not a.equals(c)
766+
assert not b.equals(c)
767+
759768
def test_string_categorical_index_repr(self):
760769
# short
761770
idx = pd.CategoricalIndex(['a', 'bb', 'ccc'])

0 commit comments

Comments
 (0)