From 2db2c26038df2a3412d2c22c5218d7c2983fa870 Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Wed, 16 May 2018 10:41:32 -0500 Subject: [PATCH 01/14] fix categorical check --- pandas/util/testing.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index e1484a9c1b390..cbea0364f3235 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -778,8 +778,12 @@ def assert_index_equal(left, right, exact='equiv', check_names=True, def _check_types(l, r, obj='Index'): if exact: - assert_class_equal(left, right, exact=exact, obj=obj) - assert_attr_equal('dtype', l, r, obj=obj) + assert_class_equal(l, r, exact=exact, obj=obj) + + # Skip exact dtype checking with `check_categorical` is False + if check_categorical: + assert_attr_equal('dtype', l, r, obj=obj) + # allow string-like to have different inferred_types if l.inferred_type in ('string', 'unicode'): assert r.inferred_type in ('string', 'unicode') @@ -829,7 +833,8 @@ def _get_ilevel_values(index, level): # get_level_values may change dtype _check_types(left.levels[level], right.levels[level], obj=obj) - if check_exact: + # skip exact index checking when `check_categorical` is False + if check_exact and check_categorical: if not left.equals(right): diff = np.sum((left.values != right.values) .astype(int)) * 100.0 / len(left) From 7cfa95778c22fcb19166d003da7c4bea7078b6dd Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Wed, 16 May 2018 10:56:59 -0500 Subject: [PATCH 02/14] Add tests for index and series --- pandas/tests/util/test_testing.py | 36 +++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py index d6f58d16bcf64..c7a4a8d5eba27 100644 --- a/pandas/tests/util/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -503,6 +503,24 @@ def test_index_equal_metadata_message(self): with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) + def test_categorical_index_equality(self): + expected = """Index are different + +Attribute "dtype" are different +\\[left\\]: CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False\\) +\\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], ordered=False\\)""" # noqa + + with tm.assert_raises_regex(AssertionError, expected): + assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])), + pd.Index(pd.Categorical(['a', 'b'], + categories=['a', 'b', 'c']))) + + def test_categorical_index_equality_relax_categories_check(self): + assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])), + pd.Index(pd.Categorical(['a', 'b'], + categories=['a', 'b', 'c'])), + check_categorical=False) + class TestAssertSeriesEqual(object): @@ -600,6 +618,24 @@ def test_series_equal_message(self): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]), check_less_precise=True) + def test_categorical_series_equality(self): + expected = """Attributes are different + +Attribute "dtype" are different +\\[left\\]: CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False\\) +\\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], ordered=False\\)""" # noqa + + with tm.assert_raises_regex(AssertionError, expected): + assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])), + pd.Series(pd.Categorical(['a', 'b'], + categories=['a', 'b', 'c']))) + + def test_categorical_series_equality_relax_categories_check(self): + assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])), + pd.Series(pd.Categorical(['a', 'b'], + categories=['a', 'b', 'c'])), + check_categorical=False) + class TestAssertFrameEqual(object): From 7606152679192cc6a0ddb14fae3358eae9c04cee Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Wed, 16 May 2018 10:59:22 -0500 Subject: [PATCH 03/14] clean up assert_categoricals_eq to match others --- pandas/util/testing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index cbea0364f3235..69df577bcc279 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -955,13 +955,13 @@ def is_sorted(seq): def assert_categorical_equal(left, right, check_dtype=True, - obj='Categorical', check_category_order=True): + check_category_order=True, obj='Categorical'): """Test that Categoricals are equivalent. Parameters ---------- - left, right : Categorical - Categoricals to compare + left : Categorical + right : Categorical check_dtype : bool, default True Check that integer dtype of the codes are the same obj : str, default 'Categorical' From f61e5380fc6bdc657ed9dfcec76c06c8e137eb87 Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Wed, 16 May 2018 11:03:57 -0500 Subject: [PATCH 04/14] Rearrange order of docstring to match params --- pandas/util/testing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 69df577bcc279..a97a348b8dd52 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -964,14 +964,14 @@ def assert_categorical_equal(left, right, check_dtype=True, right : Categorical check_dtype : bool, default True Check that integer dtype of the codes are the same - obj : str, default 'Categorical' - Specify object name being compared, internally used to show appropriate - assertion message check_category_order : bool, default True Whether the order of the categories should be compared, which implies identical integer codes. If False, only the resulting values are compared. The ordered attribute is checked regardless. + obj : str, default 'Categorical' + Specify object name being compared, internally used to show appropriate + assertion message """ _check_isinstance(left, right, Categorical) From 53ac4a4a060c3aba832a253d2e9c51da979992fa Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Wed, 16 May 2018 12:38:58 -0500 Subject: [PATCH 05/14] Refactor comment into boolean check for readability --- pandas/util/testing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index a97a348b8dd52..ce3f29fa508e4 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -833,8 +833,8 @@ def _get_ilevel_values(index, level): # get_level_values may change dtype _check_types(left.levels[level], right.levels[level], obj=obj) - # skip exact index checking when `check_categorical` is False - if check_exact and check_categorical: + run_exact_index_check = check_exact and check_categorical + if run_exact_index_check: if not left.equals(right): diff = np.sum((left.values != right.values) .astype(int)) * 100.0 / len(left) From 25d9fe6c84f763eeecbda467ca5bbd1f0fd4d833 Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Wed, 16 May 2018 13:42:38 -0500 Subject: [PATCH 06/14] Add whatsnew entry --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index b94377af770f4..9b9638d06f71c 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -76,7 +76,7 @@ Bug Fixes Categorical ^^^^^^^^^^^ -- +- Bug in :func:`pandas.util.testing.assert_index_equal` raised ``AssertionError`` when comparing two :class:`CategoricalIndex` objects when ``check_categorical=False`` (:issue:`19776`) - - From fb24ed542371a47bd05117e47c918e6bd2d5c936 Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Wed, 16 May 2018 13:45:54 -0500 Subject: [PATCH 07/14] Fix grammar --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 9b9638d06f71c..17b464eb6590b 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -76,7 +76,7 @@ Bug Fixes Categorical ^^^^^^^^^^^ -- Bug in :func:`pandas.util.testing.assert_index_equal` raised ``AssertionError`` when comparing two :class:`CategoricalIndex` objects when ``check_categorical=False`` (:issue:`19776`) +- Bug in :func:`pandas.util.testing.assert_index_equal` raised ``AssertionError`` if comparing two :class:`CategoricalIndex` objects when ``check_categorical=False`` (:issue:`19776`) - - From 14761496252b2acf441d8a6c76cc282be7ba7b76 Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Wed, 16 May 2018 13:58:44 -0500 Subject: [PATCH 08/14] Fix pep 8 --- pandas/tests/util/test_testing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py index c7a4a8d5eba27..d604a1c47b1c0 100644 --- a/pandas/tests/util/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -508,7 +508,7 @@ def test_categorical_index_equality(self): Attribute "dtype" are different \\[left\\]: CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False\\) -\\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], ordered=False\\)""" # noqa +\\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], ordered=False\\)""" # noqa with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])), @@ -623,7 +623,7 @@ def test_categorical_series_equality(self): Attribute "dtype" are different \\[left\\]: CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False\\) -\\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], ordered=False\\)""" # noqa +\\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], ordered=False\\)""" # noqa with tm.assert_raises_regex(AssertionError, expected): assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])), From 0de8800fe004e0d9a4cfcc6b7944d4094acfad17 Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Wed, 16 May 2018 14:58:28 -0500 Subject: [PATCH 09/14] explicitly state unicode string --- pandas/tests/util/test_testing.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py index d604a1c47b1c0..496bbc62e406e 100644 --- a/pandas/tests/util/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -507,8 +507,8 @@ def test_categorical_index_equality(self): expected = """Index are different Attribute "dtype" are different -\\[left\\]: CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False\\) -\\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], ordered=False\\)""" # noqa +\\[left\\]: CategoricalDtype\\(categories=\\[u'a', u'b'\\], ordered=False\\) +\\[right\\]: CategoricalDtype\\(categories=\\[u'a', u'b', u'c'\\], ordered=False\\)""" # noqa with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])), @@ -622,8 +622,8 @@ def test_categorical_series_equality(self): expected = """Attributes are different Attribute "dtype" are different -\\[left\\]: CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False\\) -\\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], ordered=False\\)""" # noqa +\\[left\\]: CategoricalDtype\\(categories=\\[u'a', u'b'\\], ordered=False\\) +\\[right\\]: CategoricalDtype\\(categories=\\[u'a', u'b', u'c'\\], ordered=False\\)""" # noqa with tm.assert_raises_regex(AssertionError, expected): assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])), From d5ab039a861681d60571cd964e8704ab54b7ecc6 Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Wed, 16 May 2018 15:37:09 -0500 Subject: [PATCH 10/14] properly fix unicode string failures --- pandas/tests/util/test_testing.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py index 496bbc62e406e..880c4ea2f77b2 100644 --- a/pandas/tests/util/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -507,8 +507,8 @@ def test_categorical_index_equality(self): expected = """Index are different Attribute "dtype" are different -\\[left\\]: CategoricalDtype\\(categories=\\[u'a', u'b'\\], ordered=False\\) -\\[right\\]: CategoricalDtype\\(categories=\\[u'a', u'b', u'c'\\], ordered=False\\)""" # noqa +\\[left\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b'\\], ordered=False\\) +\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], ordered=False\\)""" # noqa with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])), @@ -622,8 +622,8 @@ def test_categorical_series_equality(self): expected = """Attributes are different Attribute "dtype" are different -\\[left\\]: CategoricalDtype\\(categories=\\[u'a', u'b'\\], ordered=False\\) -\\[right\\]: CategoricalDtype\\(categories=\\[u'a', u'b', u'c'\\], ordered=False\\)""" # noqa +\\[left\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b'\\], ordered=False\\) +\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], ordered=False\\)""" # noqa with tm.assert_raises_regex(AssertionError, expected): assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])), From dba649f89a7bf79a860351da09c8f31497b99632 Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Wed, 16 May 2018 18:36:00 -0500 Subject: [PATCH 11/14] Code review suggestions --- doc/source/whatsnew/v0.23.1.txt | 5 +++++ doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/tests/util/test_testing.py | 6 ++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 5c9c3e2931bd9..4dcfeab002881 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -46,6 +46,11 @@ Bug Fixes - - +Categorical +^^^^^^^^^^^ + +- Bug in :func:`pandas.util.testing.assert_index_equal` raised ``AssertionError`` if comparing two :class:`CategoricalIndex` objects when ``check_categorical=False`` (:issue:`19776`) + Conversion ^^^^^^^^^^ diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 17b464eb6590b..b94377af770f4 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -76,7 +76,7 @@ Bug Fixes Categorical ^^^^^^^^^^^ -- Bug in :func:`pandas.util.testing.assert_index_equal` raised ``AssertionError`` if comparing two :class:`CategoricalIndex` objects when ``check_categorical=False`` (:issue:`19776`) +- - - diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py index 880c4ea2f77b2..ab7c4fb528452 100644 --- a/pandas/tests/util/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -508,7 +508,8 @@ def test_categorical_index_equality(self): Attribute "dtype" are different \\[left\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b'\\], ordered=False\\) -\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], ordered=False\\)""" # noqa +\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \ +ordered=False\\)""" with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])), @@ -623,7 +624,8 @@ def test_categorical_series_equality(self): Attribute "dtype" are different \\[left\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b'\\], ordered=False\\) -\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], ordered=False\\)""" # noqa +\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \ +ordered=False\\)""" with tm.assert_raises_regex(AssertionError, expected): assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])), From 97f300f059c8c6aaf1d9129676f2e8a4754670ef Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Thu, 17 May 2018 05:53:56 -0500 Subject: [PATCH 12/14] Rearrange order of assert_numpy_array_equals to match other assert_* --- pandas/util/testing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index ce3f29fa508e4..a1f0b88a6565a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1025,7 +1025,7 @@ def raise_assert_detail(obj, message, left, right, diff=None): def assert_numpy_array_equal(left, right, strict_nan=False, check_dtype=True, err_msg=None, - obj='numpy array', check_same=None): + check_same=None, obj='numpy array'): """ Checks that 'np.ndarray' is equivalent Parameters @@ -1038,11 +1038,11 @@ def assert_numpy_array_equal(left, right, strict_nan=False, check dtype if both a and b are np.ndarray err_msg : str, default None If provided, used as assertion message + check_same : None|'copy'|'same', default None + Ensure left and right refer/do not refer to the same memory area obj : str, default 'numpy array' Specify object name being compared, internally used to show appropriate assertion message - check_same : None|'copy'|'same', default None - Ensure left and right refer/do not refer to the same memory area """ # instance validation From 2fb09fee48bac37bde6d923d52c9b8b1eaf4b33a Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Thu, 17 May 2018 08:13:35 -0500 Subject: [PATCH 13/14] Refactor boolean var into if and add comment --- pandas/util/testing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index a1f0b88a6565a..233eba6490937 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -780,7 +780,7 @@ def _check_types(l, r, obj='Index'): if exact: assert_class_equal(l, r, exact=exact, obj=obj) - # Skip exact dtype checking with `check_categorical` is False + # Skip exact dtype checking when `check_categorical` is False if check_categorical: assert_attr_equal('dtype', l, r, obj=obj) @@ -833,8 +833,8 @@ def _get_ilevel_values(index, level): # get_level_values may change dtype _check_types(left.levels[level], right.levels[level], obj=obj) - run_exact_index_check = check_exact and check_categorical - if run_exact_index_check: + # skip exact index checking when `check_categorical` is False + if check_exact and check_categorical: if not left.equals(right): diff = np.sum((left.values != right.values) .astype(int)) * 100.0 / len(left) From 86999e387545cbe879b519db88b86cd8ebe0a5bc Mon Sep 17 00:00:00 2001 From: Aly Sivji Date: Thu, 17 May 2018 08:18:31 -0500 Subject: [PATCH 14/14] Improve description in whatsnew --- doc/source/whatsnew/v0.23.1.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 4dcfeab002881..d211a21546978 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -49,7 +49,7 @@ Bug Fixes Categorical ^^^^^^^^^^^ -- Bug in :func:`pandas.util.testing.assert_index_equal` raised ``AssertionError`` if comparing two :class:`CategoricalIndex` objects when ``check_categorical=False`` (:issue:`19776`) +- Bug in :func:`pandas.util.testing.assert_index_equal` which raised ``AssertionError`` incorrectly, when comparing two :class:`CategoricalIndex` objects with param ``check_categorical=False`` (:issue:`19776`) Conversion ^^^^^^^^^^