diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 5c9c3e2931bd9..d211a21546978 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -46,6 +46,11 @@ Bug Fixes - - +Categorical +^^^^^^^^^^^ + +- Bug in :func:`pandas.util.testing.assert_index_equal` which raised ``AssertionError`` incorrectly, when comparing two :class:`CategoricalIndex` objects with param ``check_categorical=False`` (:issue:`19776`) + Conversion ^^^^^^^^^^ diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py index d6f58d16bcf64..ab7c4fb528452 100644 --- a/pandas/tests/util/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -503,6 +503,25 @@ def test_index_equal_metadata_message(self): with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) + def test_categorical_index_equality(self): + expected = """Index are different + +Attribute "dtype" are different +\\[left\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b'\\], ordered=False\\) +\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \ +ordered=False\\)""" + + with tm.assert_raises_regex(AssertionError, expected): + assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])), + pd.Index(pd.Categorical(['a', 'b'], + categories=['a', 'b', 'c']))) + + def test_categorical_index_equality_relax_categories_check(self): + assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])), + pd.Index(pd.Categorical(['a', 'b'], + categories=['a', 'b', 'c'])), + check_categorical=False) + class TestAssertSeriesEqual(object): @@ -600,6 +619,25 @@ def test_series_equal_message(self): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]), check_less_precise=True) + def test_categorical_series_equality(self): + expected = """Attributes are different + +Attribute "dtype" are different +\\[left\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b'\\], ordered=False\\) +\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \ +ordered=False\\)""" + + with tm.assert_raises_regex(AssertionError, expected): + assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])), + pd.Series(pd.Categorical(['a', 'b'], + categories=['a', 'b', 'c']))) + + def test_categorical_series_equality_relax_categories_check(self): + assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])), + pd.Series(pd.Categorical(['a', 'b'], + categories=['a', 'b', 'c'])), + check_categorical=False) + class TestAssertFrameEqual(object): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index e1484a9c1b390..233eba6490937 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -778,8 +778,12 @@ def assert_index_equal(left, right, exact='equiv', check_names=True, def _check_types(l, r, obj='Index'): if exact: - assert_class_equal(left, right, exact=exact, obj=obj) - assert_attr_equal('dtype', l, r, obj=obj) + assert_class_equal(l, r, exact=exact, obj=obj) + + # Skip exact dtype checking when `check_categorical` is False + if check_categorical: + assert_attr_equal('dtype', l, r, obj=obj) + # allow string-like to have different inferred_types if l.inferred_type in ('string', 'unicode'): assert r.inferred_type in ('string', 'unicode') @@ -829,7 +833,8 @@ def _get_ilevel_values(index, level): # get_level_values may change dtype _check_types(left.levels[level], right.levels[level], obj=obj) - if check_exact: + # skip exact index checking when `check_categorical` is False + if check_exact and check_categorical: if not left.equals(right): diff = np.sum((left.values != right.values) .astype(int)) * 100.0 / len(left) @@ -950,23 +955,23 @@ def is_sorted(seq): def assert_categorical_equal(left, right, check_dtype=True, - obj='Categorical', check_category_order=True): + check_category_order=True, obj='Categorical'): """Test that Categoricals are equivalent. Parameters ---------- - left, right : Categorical - Categoricals to compare + left : Categorical + right : Categorical check_dtype : bool, default True Check that integer dtype of the codes are the same - obj : str, default 'Categorical' - Specify object name being compared, internally used to show appropriate - assertion message check_category_order : bool, default True Whether the order of the categories should be compared, which implies identical integer codes. If False, only the resulting values are compared. The ordered attribute is checked regardless. + obj : str, default 'Categorical' + Specify object name being compared, internally used to show appropriate + assertion message """ _check_isinstance(left, right, Categorical) @@ -1020,7 +1025,7 @@ def raise_assert_detail(obj, message, left, right, diff=None): def assert_numpy_array_equal(left, right, strict_nan=False, check_dtype=True, err_msg=None, - obj='numpy array', check_same=None): + check_same=None, obj='numpy array'): """ Checks that 'np.ndarray' is equivalent Parameters @@ -1033,11 +1038,11 @@ def assert_numpy_array_equal(left, right, strict_nan=False, check dtype if both a and b are np.ndarray err_msg : str, default None If provided, used as assertion message + check_same : None|'copy'|'same', default None + Ensure left and right refer/do not refer to the same memory area obj : str, default 'numpy array' Specify object name being compared, internally used to show appropriate assertion message - check_same : None|'copy'|'same', default None - Ensure left and right refer/do not refer to the same memory area """ # instance validation