Skip to content

Commit af2b609

Browse files
alysivjijreback
authored andcommitted
BUG: assert_index_equal does not raise error for check_categorical=False when comparing 2 CategoricalIndex objects (#21092)
1 parent ed784a8 commit af2b609

File tree

3 files changed

+60
-12
lines changed

3 files changed

+60
-12
lines changed

doc/source/whatsnew/v0.23.1.txt

+5
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ Strings
5555
- Bug in :meth:`Series.str.replace()` where the method throws `TypeError` on Python 3.5.2 (:issue: `21078`)
5656
-
5757

58+
Categorical
59+
^^^^^^^^^^^
60+
61+
- Bug in :func:`pandas.util.testing.assert_index_equal` which raised ``AssertionError`` incorrectly, when comparing two :class:`CategoricalIndex` objects with param ``check_categorical=False`` (:issue:`19776`)
62+
5863
Conversion
5964
^^^^^^^^^^
6065

pandas/tests/util/test_testing.py

+38
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,25 @@ def test_index_equal_metadata_message(self):
503503
with tm.assert_raises_regex(AssertionError, expected):
504504
assert_index_equal(idx1, idx2)
505505

506+
def test_categorical_index_equality(self):
507+
expected = """Index are different
508+
509+
Attribute "dtype" are different
510+
\\[left\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b'\\], ordered=False\\)
511+
\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \
512+
ordered=False\\)"""
513+
514+
with tm.assert_raises_regex(AssertionError, expected):
515+
assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])),
516+
pd.Index(pd.Categorical(['a', 'b'],
517+
categories=['a', 'b', 'c'])))
518+
519+
def test_categorical_index_equality_relax_categories_check(self):
520+
assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])),
521+
pd.Index(pd.Categorical(['a', 'b'],
522+
categories=['a', 'b', 'c'])),
523+
check_categorical=False)
524+
506525

507526
class TestAssertSeriesEqual(object):
508527

@@ -600,6 +619,25 @@ def test_series_equal_message(self):
600619
assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]),
601620
check_less_precise=True)
602621

622+
def test_categorical_series_equality(self):
623+
expected = """Attributes are different
624+
625+
Attribute "dtype" are different
626+
\\[left\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b'\\], ordered=False\\)
627+
\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \
628+
ordered=False\\)"""
629+
630+
with tm.assert_raises_regex(AssertionError, expected):
631+
assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])),
632+
pd.Series(pd.Categorical(['a', 'b'],
633+
categories=['a', 'b', 'c'])))
634+
635+
def test_categorical_series_equality_relax_categories_check(self):
636+
assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])),
637+
pd.Series(pd.Categorical(['a', 'b'],
638+
categories=['a', 'b', 'c'])),
639+
check_categorical=False)
640+
603641

604642
class TestAssertFrameEqual(object):
605643

pandas/util/testing.py

+17-12
Original file line numberDiff line numberDiff line change
@@ -778,8 +778,12 @@ def assert_index_equal(left, right, exact='equiv', check_names=True,
778778

779779
def _check_types(l, r, obj='Index'):
780780
if exact:
781-
assert_class_equal(left, right, exact=exact, obj=obj)
782-
assert_attr_equal('dtype', l, r, obj=obj)
781+
assert_class_equal(l, r, exact=exact, obj=obj)
782+
783+
# Skip exact dtype checking when `check_categorical` is False
784+
if check_categorical:
785+
assert_attr_equal('dtype', l, r, obj=obj)
786+
783787
# allow string-like to have different inferred_types
784788
if l.inferred_type in ('string', 'unicode'):
785789
assert r.inferred_type in ('string', 'unicode')
@@ -829,7 +833,8 @@ def _get_ilevel_values(index, level):
829833
# get_level_values may change dtype
830834
_check_types(left.levels[level], right.levels[level], obj=obj)
831835

832-
if check_exact:
836+
# skip exact index checking when `check_categorical` is False
837+
if check_exact and check_categorical:
833838
if not left.equals(right):
834839
diff = np.sum((left.values != right.values)
835840
.astype(int)) * 100.0 / len(left)
@@ -950,23 +955,23 @@ def is_sorted(seq):
950955

951956

952957
def assert_categorical_equal(left, right, check_dtype=True,
953-
obj='Categorical', check_category_order=True):
958+
check_category_order=True, obj='Categorical'):
954959
"""Test that Categoricals are equivalent.
955960
956961
Parameters
957962
----------
958-
left, right : Categorical
959-
Categoricals to compare
963+
left : Categorical
964+
right : Categorical
960965
check_dtype : bool, default True
961966
Check that integer dtype of the codes are the same
962-
obj : str, default 'Categorical'
963-
Specify object name being compared, internally used to show appropriate
964-
assertion message
965967
check_category_order : bool, default True
966968
Whether the order of the categories should be compared, which
967969
implies identical integer codes. If False, only the resulting
968970
values are compared. The ordered attribute is
969971
checked regardless.
972+
obj : str, default 'Categorical'
973+
Specify object name being compared, internally used to show appropriate
974+
assertion message
970975
"""
971976
_check_isinstance(left, right, Categorical)
972977

@@ -1020,7 +1025,7 @@ def raise_assert_detail(obj, message, left, right, diff=None):
10201025

10211026
def assert_numpy_array_equal(left, right, strict_nan=False,
10221027
check_dtype=True, err_msg=None,
1023-
obj='numpy array', check_same=None):
1028+
check_same=None, obj='numpy array'):
10241029
""" Checks that 'np.ndarray' is equivalent
10251030
10261031
Parameters
@@ -1033,11 +1038,11 @@ def assert_numpy_array_equal(left, right, strict_nan=False,
10331038
check dtype if both a and b are np.ndarray
10341039
err_msg : str, default None
10351040
If provided, used as assertion message
1041+
check_same : None|'copy'|'same', default None
1042+
Ensure left and right refer/do not refer to the same memory area
10361043
obj : str, default 'numpy array'
10371044
Specify object name being compared, internally used to show appropriate
10381045
assertion message
1039-
check_same : None|'copy'|'same', default None
1040-
Ensure left and right refer/do not refer to the same memory area
10411046
"""
10421047

10431048
# instance validation

0 commit comments

Comments
 (0)