diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index bd3bee507baa3..f0083ab46d14d 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -378,7 +378,7 @@ Numeric Categorical ^^^^^^^^^^^ -- +- Bug in ``Categorical.equals`` between two unordered categories with the same categories, but in a different order (:issue:`16603`) - - diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index d47cb0762447b..8b86de285b0fe 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2081,8 +2081,16 @@ def equals(self, other): ------- are_equal : boolean """ - return (self.is_dtype_equal(other) and - np.array_equal(self._codes, other._codes)) + if self.is_dtype_equal(other): + if self.categories.equals(other.categories): + # fastpath to avoid re-coding + other_codes = other._codes + else: + other_codes = _recode_for_categories(other.codes, + other.categories, + self.categories) + return np.array_equal(self._codes, other_codes) + return False def is_dtype_equal(self, other): """ diff --git a/pandas/tests/categorical/test_operators.py b/pandas/tests/categorical/test_operators.py index 09a0607b67a88..fa8bb817616e4 100644 --- a/pandas/tests/categorical/test_operators.py +++ b/pandas/tests/categorical/test_operators.py @@ -250,6 +250,13 @@ def test_compare_different_lengths(self): with tm.assert_raises_regex(TypeError, msg): c1 == c2 + def test_compare_unordered_different_order(self): + # https://github.com/pandas-dev/pandas/issues/16603#issuecomment- + # 349290078 + a = pd.Categorical(['a'], categories=['a', 'b']) + b = pd.Categorical(['b'], categories=['b', 'a']) + assert not a.equals(b) + def test_numeric_like_ops(self): df = DataFrame({'value': np.random.randint(0, 10000, 100)}) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index f7328a99195b9..d5c0494df030a 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -747,6 +747,15 @@ def test_equals_categorical(self): ordered=True)) assert ci.equals(ci.copy()) + def test_equals_categoridcal_unordered(self): + # https://github.com/pandas-dev/pandas/issues/16603 + a = pd.CategoricalIndex(['A'], categories=['A', 'B']) + b = pd.CategoricalIndex(['A'], categories=['B', 'A']) + c = pd.CategoricalIndex(['C'], categories=['B', 'A']) + assert a.equals(b) + assert not a.equals(c) + assert not b.equals(c) + def test_string_categorical_index_repr(self): # short idx = pd.CategoricalIndex(['a', 'bb', 'ccc'])