Skip to content

BUG: assert_index_equal does not raise error for check_categorical=False when comparing 2 CategoricalIndex objects #21092

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
May 19, 2018
Merged
5 changes: 5 additions & 0 deletions doc/source/whatsnew/v0.23.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ Bug Fixes
-
-

Categorical
^^^^^^^^^^^

- Bug in :func:`pandas.util.testing.assert_index_equal` which raised ``AssertionError`` incorrectly, when comparing two :class:`CategoricalIndex` objects with param ``check_categorical=False`` (:issue:`19776`)

Conversion
^^^^^^^^^^

Expand Down
38 changes: 38 additions & 0 deletions pandas/tests/util/test_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,25 @@ def test_index_equal_metadata_message(self):
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2)

def test_categorical_index_equality(self):
expected = """Index are different

Attribute "dtype" are different
\\[left\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b'\\], ordered=False\\)
\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \
ordered=False\\)"""

with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])),
pd.Index(pd.Categorical(['a', 'b'],
categories=['a', 'b', 'c'])))

def test_categorical_index_equality_relax_categories_check(self):
assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])),
pd.Index(pd.Categorical(['a', 'b'],
categories=['a', 'b', 'c'])),
check_categorical=False)


class TestAssertSeriesEqual(object):

Expand Down Expand Up @@ -600,6 +619,25 @@ def test_series_equal_message(self):
assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]),
check_less_precise=True)

def test_categorical_series_equality(self):
expected = """Attributes are different

Attribute "dtype" are different
\\[left\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b'\\], ordered=False\\)
\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \
ordered=False\\)"""

with tm.assert_raises_regex(AssertionError, expected):
assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])),
pd.Series(pd.Categorical(['a', 'b'],
categories=['a', 'b', 'c'])))

def test_categorical_series_equality_relax_categories_check(self):
assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])),
pd.Series(pd.Categorical(['a', 'b'],
categories=['a', 'b', 'c'])),
check_categorical=False)


class TestAssertFrameEqual(object):

Expand Down
29 changes: 17 additions & 12 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,8 +778,12 @@ def assert_index_equal(left, right, exact='equiv', check_names=True,

def _check_types(l, r, obj='Index'):
if exact:
assert_class_equal(left, right, exact=exact, obj=obj)
assert_attr_equal('dtype', l, r, obj=obj)
assert_class_equal(l, r, exact=exact, obj=obj)

# Skip exact dtype checking when `check_categorical` is False
if check_categorical:
assert_attr_equal('dtype', l, r, obj=obj)

# allow string-like to have different inferred_types
if l.inferred_type in ('string', 'unicode'):
assert r.inferred_type in ('string', 'unicode')
Expand Down Expand Up @@ -829,7 +833,8 @@ def _get_ilevel_values(index, level):
# get_level_values may change dtype
_check_types(left.levels[level], right.levels[level], obj=obj)

if check_exact:
# skip exact index checking when `check_categorical` is False
if check_exact and check_categorical:
if not left.equals(right):
diff = np.sum((left.values != right.values)
.astype(int)) * 100.0 / len(left)
Expand Down Expand Up @@ -950,23 +955,23 @@ def is_sorted(seq):


def assert_categorical_equal(left, right, check_dtype=True,
obj='Categorical', check_category_order=True):
check_category_order=True, obj='Categorical'):
"""Test that Categoricals are equivalent.

Parameters
----------
left, right : Categorical
Categoricals to compare
left : Categorical
right : Categorical
check_dtype : bool, default True
Check that integer dtype of the codes are the same
obj : str, default 'Categorical'
Specify object name being compared, internally used to show appropriate
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did you move this to make this consistent with other function orderings?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, you got it. There is at least one more that can be made more consistent. @jreback should I add to this PR?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

assertion message
check_category_order : bool, default True
Whether the order of the categories should be compared, which
implies identical integer codes. If False, only the resulting
values are compared. The ordered attribute is
checked regardless.
obj : str, default 'Categorical'
Specify object name being compared, internally used to show appropriate
assertion message
"""
_check_isinstance(left, right, Categorical)

Expand Down Expand Up @@ -1020,7 +1025,7 @@ def raise_assert_detail(obj, message, left, right, diff=None):

def assert_numpy_array_equal(left, right, strict_nan=False,
check_dtype=True, err_msg=None,
obj='numpy array', check_same=None):
check_same=None, obj='numpy array'):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

modified order of params to match other assert_* helper methods

""" Checks that 'np.ndarray' is equivalent

Parameters
Expand All @@ -1033,11 +1038,11 @@ def assert_numpy_array_equal(left, right, strict_nan=False,
check dtype if both a and b are np.ndarray
err_msg : str, default None
If provided, used as assertion message
check_same : None|'copy'|'same', default None
Ensure left and right refer/do not refer to the same memory area
obj : str, default 'numpy array'
Specify object name being compared, internally used to show appropriate
assertion message
check_same : None|'copy'|'same', default None
Ensure left and right refer/do not refer to the same memory area
"""

# instance validation
Expand Down