File tree 4 files changed +27
-3
lines changed
4 files changed +27
-3
lines changed Original file line number Diff line number Diff line change @@ -453,7 +453,7 @@ Numeric
453
453
Categorical
454
454
^^^^^^^^^^^
455
455
456
- -
456
+ - Bug in ``Categorical.equals`` between two unordered categories with the same categories, but in a different order (:issue:`16603`)
457
457
-
458
458
-
459
459
Original file line number Diff line number Diff line change @@ -2082,8 +2082,16 @@ def equals(self, other):
2082
2082
-------
2083
2083
are_equal : boolean
2084
2084
"""
2085
- return (self .is_dtype_equal (other ) and
2086
- np .array_equal (self ._codes , other ._codes ))
2085
+ if self .is_dtype_equal (other ):
2086
+ if self .categories .equals (other .categories ):
2087
+ # fastpath to avoid re-coding
2088
+ other_codes = other ._codes
2089
+ else :
2090
+ other_codes = _recode_for_categories (other .codes ,
2091
+ other .categories ,
2092
+ self .categories )
2093
+ return np .array_equal (self ._codes , other_codes )
2094
+ return False
2087
2095
2088
2096
def is_dtype_equal (self , other ):
2089
2097
"""
Original file line number Diff line number Diff line change @@ -250,6 +250,13 @@ def test_compare_different_lengths(self):
250
250
with tm .assert_raises_regex (TypeError , msg ):
251
251
c1 == c2
252
252
253
+ def test_compare_unordered_different_order (self ):
254
+ # https://github.com/pandas-dev/pandas/issues/16603#issuecomment-
255
+ # 349290078
256
+ a = pd .Categorical (['a' ], categories = ['a' , 'b' ])
257
+ b = pd .Categorical (['b' ], categories = ['b' , 'a' ])
258
+ assert not a .equals (b )
259
+
253
260
def test_numeric_like_ops (self ):
254
261
255
262
df = DataFrame ({'value' : np .random .randint (0 , 10000 , 100 )})
Original file line number Diff line number Diff line change @@ -756,6 +756,15 @@ def test_equals_categorical(self):
756
756
ordered = True ))
757
757
assert ci .equals (ci .copy ())
758
758
759
+ def test_equals_categoridcal_unordered (self ):
760
+ # https://github.com/pandas-dev/pandas/issues/16603
761
+ a = pd .CategoricalIndex (['A' ], categories = ['A' , 'B' ])
762
+ b = pd .CategoricalIndex (['A' ], categories = ['B' , 'A' ])
763
+ c = pd .CategoricalIndex (['C' ], categories = ['B' , 'A' ])
764
+ assert a .equals (b )
765
+ assert not a .equals (c )
766
+ assert not b .equals (c )
767
+
759
768
def test_string_categorical_index_repr (self ):
760
769
# short
761
770
idx = pd .CategoricalIndex (['a' , 'bb' , 'ccc' ])
You can’t perform that action at this time.
0 commit comments