File tree 4 files changed +27
-3
lines changed
4 files changed +27
-3
lines changed Original file line number Diff line number Diff line change @@ -334,7 +334,7 @@ Numeric
334
334
Categorical
335
335
^^^^^^^^^^^
336
336
337
- -
337
+ - Bug in ``Categorical.equals`` between two unordered categories with the same categories, but in a different order (:issue:`16603`)
338
338
-
339
339
-
340
340
Original file line number Diff line number Diff line change @@ -2092,8 +2092,16 @@ def equals(self, other):
2092
2092
-------
2093
2093
are_equal : boolean
2094
2094
"""
2095
- return (self .is_dtype_equal (other ) and
2096
- np .array_equal (self ._codes , other ._codes ))
2095
+ if self .is_dtype_equal (other ):
2096
+ if self .categories .equals (other .categories ):
2097
+ # fastpath to avoid re-coding
2098
+ return np .array_equal (self ._codes , other ._codes )
2099
+ else :
2100
+ # coerce their codes to ours
2101
+ codes2 = _recode_for_categories (other .codes , other .categories ,
2102
+ self .categories )
2103
+ return np .array_equal (self ._codes , codes2 )
2104
+ return False
2097
2105
2098
2106
def is_dtype_equal (self , other ):
2099
2107
"""
Original file line number Diff line number Diff line change @@ -250,6 +250,13 @@ def test_compare_different_lengths(self):
250
250
with tm .assert_raises_regex (TypeError , msg ):
251
251
c1 == c2
252
252
253
+ def test_compare_unordered_different_order (self ):
254
+ # https://github.com/pandas-dev/pandas/issues/16603#issuecomment-
255
+ # 349290078
256
+ a = pd .Categorical (['a' ], categories = ['a' , 'b' ])
257
+ b = pd .Categorical (['b' ], categories = ['b' , 'a' ])
258
+ assert not a .equals (b )
259
+
253
260
def test_numeric_like_ops (self ):
254
261
255
262
df = DataFrame ({'value' : np .random .randint (0 , 10000 , 100 )})
Original file line number Diff line number Diff line change @@ -742,6 +742,15 @@ def test_equals_categorical(self):
742
742
ordered = True ))
743
743
assert ci .equals (ci .copy ())
744
744
745
+ def test_equals_categoridcal_unordered (self ):
746
+ # https://github.com/pandas-dev/pandas/issues/16603
747
+ a = pd .CategoricalIndex (['A' ], categories = ['A' , 'B' ])
748
+ b = pd .CategoricalIndex (['A' ], categories = ['B' , 'A' ])
749
+ c = pd .CategoricalIndex (['C' ], categories = ['B' , 'A' ])
750
+ assert a .equals (b )
751
+ assert not a .equals (c )
752
+ assert not b .equals (c )
753
+
745
754
def test_string_categorical_index_repr (self ):
746
755
# short
747
756
idx = pd .CategoricalIndex (['a' , 'bb' , 'ccc' ])
You can’t perform that action at this time.
0 commit comments