From a9463345ff55a1ed8c3e2bff06b62323a77b9da1 Mon Sep 17 00:00:00 2001 From: Anthony Milbourne <18662115+amilbourne@users.noreply.github.com> Date: Thu, 29 Oct 2020 00:46:34 +0000 Subject: [PATCH 1/4] Fix output from assert_frame_equal when indexes differ and check_like=True --- pandas/_testing.py | 18 +++++++++++++++--- pandas/tests/util/test_assert_frame_equal.py | 10 ++++++---- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index a4fdb390abf42..a2ee4c2d8c347 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -667,6 +667,7 @@ def assert_index_equal( check_less_precise: Union[bool, int] = no_default, check_exact: bool = True, check_categorical: bool = True, + check_order: bool = True, rtol: float = 1.0e-5, atol: float = 1.0e-8, obj: str = "Index", @@ -696,6 +697,10 @@ def assert_index_equal( Whether to compare number exactly. check_categorical : bool, default True Whether to compare internal Categorical exactly. + check_order : bool, default True + Whether to compare the order of index entries as well as their values. + If True, both indexes must contain the same elements, in the same order. + If False, both indexes must contain the same elements, but in any order. rtol : float, default 1e-5 Relative tolerance. Only used when check_exact is False. @@ -762,6 +767,11 @@ def _get_ilevel_values(index, level): msg3 = f"{len(right)}, {right}" raise_assert_detail(obj, msg1, msg2, msg3) + # If order doesn't matter then sort the index entries + if not check_order: + left = left.sort_values() + right = right.sort_values() + # MultiIndex special comparison for little-friendly error messages if left.nlevels > 1: left = cast(MultiIndex, left) @@ -1582,9 +1592,6 @@ def assert_frame_equal( obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}" ) - if check_like: - left, right = left.reindex_like(right), right - if check_flags: assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}" @@ -1596,6 +1603,7 @@ def assert_frame_equal( check_names=check_names, check_exact=check_exact, check_categorical=check_categorical, + check_order=not check_like, rtol=rtol, atol=atol, obj=f"{obj}.index", @@ -1609,11 +1617,15 @@ def assert_frame_equal( check_names=check_names, check_exact=check_exact, check_categorical=check_categorical, + check_order=not check_like, rtol=rtol, atol=atol, obj=f"{obj}.columns", ) + if check_like: + left, right = left.reindex_like(right), right + # compare by blocks if by_blocks: rblocks = right._to_dict_of_blocks() diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 6111797d70268..d5161ce37494b 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -145,7 +145,8 @@ def test_empty_dtypes(check_dtype): tm.assert_frame_equal(df1, df2, **kwargs) -def test_frame_equal_index_mismatch(obj_fixture): +@pytest.mark.parametrize("check_like", [True, False]) +def test_frame_equal_index_mismatch(check_like, obj_fixture): msg = f"""{obj_fixture}\\.index are different {obj_fixture}\\.index values are different \\(33\\.33333 %\\) @@ -156,10 +157,11 @@ def test_frame_equal_index_mismatch(obj_fixture): df2 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "d"]) with pytest.raises(AssertionError, match=msg): - tm.assert_frame_equal(df1, df2, obj=obj_fixture) + tm.assert_frame_equal(df1, df2, check_like=check_like, obj=obj_fixture) -def test_frame_equal_columns_mismatch(obj_fixture): +@pytest.mark.parametrize("check_like", [True, False]) +def test_frame_equal_columns_mismatch(check_like, obj_fixture): msg = f"""{obj_fixture}\\.columns are different {obj_fixture}\\.columns values are different \\(50\\.0 %\\) @@ -170,7 +172,7 @@ def test_frame_equal_columns_mismatch(obj_fixture): df2 = DataFrame({"A": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"]) with pytest.raises(AssertionError, match=msg): - tm.assert_frame_equal(df1, df2, obj=obj_fixture) + tm.assert_frame_equal(df1, df2, check_like=check_like, obj=obj_fixture) def test_frame_equal_block_mismatch(by_blocks_fixture, obj_fixture): From 2157ac7b925445ac51384a245066f915f579f61a Mon Sep 17 00:00:00 2001 From: Anthony Milbourne <18662115+amilbourne@users.noreply.github.com> Date: Thu, 29 Oct 2020 09:53:07 +0000 Subject: [PATCH 2/4] Added whatsnew note for new check_order param --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6f9e8d6a98d80..6f137302d4994 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -228,6 +228,7 @@ Other enhancements - :class:`Rolling` now supports the ``closed`` argument for fixed windows (:issue:`34315`) - :class:`DatetimeIndex` and :class:`Series` with ``datetime64`` or ``datetime64tz`` dtypes now support ``std`` (:issue:`37436`) - :class:`Window` now supports all Scipy window types in ``win_type`` with flexible keyword argument support (:issue:`34556`) +- :meth:`testing.assert_index_equal` now has a ``check_order`` parameter that allows indexes to be checked in an order-insensitive manner (:issue:`37478`) .. _whatsnew_120.api_breaking.python: From 20d67a8af04fdcf8aba9778a3aec4dc536fb835d Mon Sep 17 00:00:00 2001 From: Anthony Milbourne <18662115+amilbourne@users.noreply.github.com> Date: Thu, 29 Oct 2020 09:55:31 +0000 Subject: [PATCH 3/4] Added unit test for new check_order param --- pandas/tests/util/test_assert_index_equal.py | 22 ++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 125af6ef78593..21d5a456e20d0 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -115,6 +115,28 @@ def test_index_equal_values_too_far(check_exact, rtol): tm.assert_index_equal(idx1, idx2, **kwargs) +@pytest.mark.parametrize("check_order", [True, False]) +def test_index_equal_value_oder_mismatch(check_exact, rtol, check_order): + idx1 = Index([1, 2, 3]) + idx2 = Index([3, 2, 1]) + + msg = """Index are different + +Index values are different \\(66\\.66667 %\\) +\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: Int64Index\\(\\[3, 2, 1\\], dtype='int64'\\)""" + + if check_order: + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal( + idx1, idx2, check_exact=check_exact, rtol=rtol, check_order=True + ) + else: + tm.assert_index_equal( + idx1, idx2, check_exact=check_exact, rtol=rtol, check_order=False + ) + + def test_index_equal_level_values_mismatch(check_exact, rtol): idx1 = MultiIndex.from_tuples([("A", 2), ("A", 2), ("B", 3), ("B", 4)]) idx2 = MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3), ("B", 4)]) From 25066f63b8ef614de63520e4120864872c174380 Mon Sep 17 00:00:00 2001 From: Anthony Milbourne <18662115+amilbourne@users.noreply.github.com> Date: Mon, 2 Nov 2020 12:45:26 +0000 Subject: [PATCH 4/4] Adding versionadded tag --- pandas/_testing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_testing.py b/pandas/_testing.py index a2ee4c2d8c347..427585704ba58 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -701,6 +701,8 @@ def assert_index_equal( Whether to compare the order of index entries as well as their values. If True, both indexes must contain the same elements, in the same order. If False, both indexes must contain the same elements, but in any order. + + .. versionadded:: 1.2.0 rtol : float, default 1e-5 Relative tolerance. Only used when check_exact is False.