diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6f9e8d6a98d80..6f137302d4994 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -228,6 +228,7 @@ Other enhancements - :class:`Rolling` now supports the ``closed`` argument for fixed windows (:issue:`34315`) - :class:`DatetimeIndex` and :class:`Series` with ``datetime64`` or ``datetime64tz`` dtypes now support ``std`` (:issue:`37436`) - :class:`Window` now supports all Scipy window types in ``win_type`` with flexible keyword argument support (:issue:`34556`) +- :meth:`testing.assert_index_equal` now has a ``check_order`` parameter that allows indexes to be checked in an order-insensitive manner (:issue:`37478`) .. _whatsnew_120.api_breaking.python: diff --git a/pandas/_testing.py b/pandas/_testing.py index a4fdb390abf42..427585704ba58 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -667,6 +667,7 @@ def assert_index_equal( check_less_precise: Union[bool, int] = no_default, check_exact: bool = True, check_categorical: bool = True, + check_order: bool = True, rtol: float = 1.0e-5, atol: float = 1.0e-8, obj: str = "Index", @@ -696,6 +697,12 @@ def assert_index_equal( Whether to compare number exactly. check_categorical : bool, default True Whether to compare internal Categorical exactly. + check_order : bool, default True + Whether to compare the order of index entries as well as their values. + If True, both indexes must contain the same elements, in the same order. + If False, both indexes must contain the same elements, but in any order. + + .. versionadded:: 1.2.0 rtol : float, default 1e-5 Relative tolerance. Only used when check_exact is False. @@ -762,6 +769,11 @@ def _get_ilevel_values(index, level): msg3 = f"{len(right)}, {right}" raise_assert_detail(obj, msg1, msg2, msg3) + # If order doesn't matter then sort the index entries + if not check_order: + left = left.sort_values() + right = right.sort_values() + # MultiIndex special comparison for little-friendly error messages if left.nlevels > 1: left = cast(MultiIndex, left) @@ -1582,9 +1594,6 @@ def assert_frame_equal( obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}" ) - if check_like: - left, right = left.reindex_like(right), right - if check_flags: assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}" @@ -1596,6 +1605,7 @@ def assert_frame_equal( check_names=check_names, check_exact=check_exact, check_categorical=check_categorical, + check_order=not check_like, rtol=rtol, atol=atol, obj=f"{obj}.index", @@ -1609,11 +1619,15 @@ def assert_frame_equal( check_names=check_names, check_exact=check_exact, check_categorical=check_categorical, + check_order=not check_like, rtol=rtol, atol=atol, obj=f"{obj}.columns", ) + if check_like: + left, right = left.reindex_like(right), right + # compare by blocks if by_blocks: rblocks = right._to_dict_of_blocks() diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 6111797d70268..d5161ce37494b 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -145,7 +145,8 @@ def test_empty_dtypes(check_dtype): tm.assert_frame_equal(df1, df2, **kwargs) -def test_frame_equal_index_mismatch(obj_fixture): +@pytest.mark.parametrize("check_like", [True, False]) +def test_frame_equal_index_mismatch(check_like, obj_fixture): msg = f"""{obj_fixture}\\.index are different {obj_fixture}\\.index values are different \\(33\\.33333 %\\) @@ -156,10 +157,11 @@ def test_frame_equal_index_mismatch(obj_fixture): df2 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "d"]) with pytest.raises(AssertionError, match=msg): - tm.assert_frame_equal(df1, df2, obj=obj_fixture) + tm.assert_frame_equal(df1, df2, check_like=check_like, obj=obj_fixture) -def test_frame_equal_columns_mismatch(obj_fixture): +@pytest.mark.parametrize("check_like", [True, False]) +def test_frame_equal_columns_mismatch(check_like, obj_fixture): msg = f"""{obj_fixture}\\.columns are different {obj_fixture}\\.columns values are different \\(50\\.0 %\\) @@ -170,7 +172,7 @@ def test_frame_equal_columns_mismatch(obj_fixture): df2 = DataFrame({"A": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"]) with pytest.raises(AssertionError, match=msg): - tm.assert_frame_equal(df1, df2, obj=obj_fixture) + tm.assert_frame_equal(df1, df2, check_like=check_like, obj=obj_fixture) def test_frame_equal_block_mismatch(by_blocks_fixture, obj_fixture): diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 125af6ef78593..21d5a456e20d0 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -115,6 +115,28 @@ def test_index_equal_values_too_far(check_exact, rtol): tm.assert_index_equal(idx1, idx2, **kwargs) +@pytest.mark.parametrize("check_order", [True, False]) +def test_index_equal_value_oder_mismatch(check_exact, rtol, check_order): + idx1 = Index([1, 2, 3]) + idx2 = Index([3, 2, 1]) + + msg = """Index are different + +Index values are different \\(66\\.66667 %\\) +\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: Int64Index\\(\\[3, 2, 1\\], dtype='int64'\\)""" + + if check_order: + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal( + idx1, idx2, check_exact=check_exact, rtol=rtol, check_order=True + ) + else: + tm.assert_index_equal( + idx1, idx2, check_exact=check_exact, rtol=rtol, check_order=False + ) + + def test_index_equal_level_values_mismatch(check_exact, rtol): idx1 = MultiIndex.from_tuples([("A", 2), ("A", 2), ("B", 3), ("B", 4)]) idx2 = MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3), ("B", 4)])