From 6be91b72c7ae8fd440a4d20733cf8e6272e97248 Mon Sep 17 00:00:00 2001 From: Abdulaziz Aloqeely <52792999+DAzVise@users.noreply.github.com> Date: Tue, 9 Apr 2024 00:29:48 +0300 Subject: [PATCH 1/5] Default check_exact_index to True for integers --- pandas/_testing/asserters.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 3aacd3099c334..2314521245339 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -902,7 +902,7 @@ def assert_series_equal( >>> tm.assert_series_equal(a, b) """ __tracebackhide__ = True - check_exact_index = False if check_exact is lib.no_default else check_exact + check_exact_index = check_exact if ( check_exact is lib.no_default and rtol is lib.no_default @@ -914,8 +914,15 @@ def assert_series_equal( or is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype) ) + check_exact_index = ( + is_numeric_dtype(left.index.dtype) + and not is_float_dtype(left.index.dtype) + or is_numeric_dtype(right.index.dtype) + and not is_float_dtype(right.index.dtype) + ) elif check_exact is lib.no_default: check_exact = False + check_exact_index = False rtol = rtol if rtol is not lib.no_default else 1.0e-5 atol = atol if atol is not lib.no_default else 1.0e-8 From 40a656c276e275722f1fd272aefdb4bd17859377 Mon Sep 17 00:00:00 2001 From: Abdulaziz Aloqeely <52792999+DAzVise@users.noreply.github.com> Date: Tue, 9 Apr 2024 01:00:39 +0300 Subject: [PATCH 2/5] Fix pyright issue --- pandas/_testing/asserters.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 2314521245339..820f866248e13 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -902,7 +902,6 @@ def assert_series_equal( >>> tm.assert_series_equal(a, b) """ __tracebackhide__ = True - check_exact_index = check_exact if ( check_exact is lib.no_default and rtol is lib.no_default @@ -923,6 +922,8 @@ def assert_series_equal( elif check_exact is lib.no_default: check_exact = False check_exact_index = False + else: + check_exact_index = check_exact rtol = rtol if rtol is not lib.no_default else 1.0e-5 atol = atol if atol is not lib.no_default else 1.0e-8 From 0a4478d484fd21ae5a12578d5749133237c55724 Mon Sep 17 00:00:00 2001 From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com> Date: Tue, 23 Apr 2024 21:56:11 +0300 Subject: [PATCH 3/5] fix logic for multiindex --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_testing/asserters.py | 23 +++++++++--- pandas/tests/util/test_assert_series_equal.py | 37 +++++++++++++++++-- 3 files changed, 53 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 19b448a1871c2..d85cd690046b5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -152,6 +152,7 @@ Other API changes - Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`) - pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`) - pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`) +- when comparing the indexes in :func:`testing.assert_series_equal`, the default value for check_exact is decided based on the :class:`Index` dtype, instead of the :class:`Series` dtype. (:issue:`57386`) .. --------------------------------------------------------------------------- .. _whatsnew_300.deprecations: diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 820f866248e13..8efec24cc1380 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -861,12 +861,19 @@ def assert_series_equal( check_names : bool, default True Whether to check the Series and Index names attribute. check_exact : bool, default False - Whether to compare number exactly. + Whether to compare number exactly. This also applies when checking + Index equivalence. .. versionchanged:: 2.2.0 Defaults to True for integer dtypes if none of ``check_exact``, ``rtol`` and ``atol`` are specified. + + .. versionchanged:: 3.0.0 + + When checking Index equivalence, the default value for check_exact + is based off the Index dtype, instead of the Series dtype. + check_datetimelike_compat : bool, default False Compare datetime-like which is comparable ignoring dtype. check_categorical : bool, default True @@ -913,11 +920,17 @@ def assert_series_equal( or is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype) ) + left_index_dtypes = ( + [left.index.dtype] if left.index.nlevels == 1 else left.index.dtypes + ) + right_index_dtypes = ( + [right.index.dtype] if right.index.nlevels == 1 else right.index.dtypes + ) check_exact_index = ( - is_numeric_dtype(left.index.dtype) - and not is_float_dtype(left.index.dtype) - or is_numeric_dtype(right.index.dtype) - and not is_float_dtype(right.index.dtype) + all(is_numeric_dtype(dtype) for dtype in left_index_dtypes) + and not any(is_float_dtype(dtype) for dtype in left_index_dtypes) + or all(is_numeric_dtype(dtype) for dtype in right_index_dtypes) + and not any(is_float_dtype(dtype) for dtype in right_index_dtypes) ) elif check_exact is lib.no_default: check_exact = False diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 0b3bc07c17452..fff2bf94a80ee 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -475,9 +475,40 @@ def test_assert_series_equal_int_tol(): ) -def test_assert_series_equal_index_exact_default(): +@pytest.mark.parametrize( + "left_idx, right_idx", + [ + ( + pd.Index([0, 0.2, 0.4, 0.6, 0.8, 1]), + pd.Index(np.linspace(0, 1, 6)), + ), + ( + pd.MultiIndex.from_arrays([[0, 0, 0, 0, 1, 1], [0, 0.2, 0.4, 0.6, 0.8, 1]]), + pd.MultiIndex.from_arrays([[0, 0, 0, 0, 1, 1], np.linspace(0, 1, 6)]), + ), + ( + pd.MultiIndex.from_arrays( + [["a", "a", "a", "b", "b", "b"], [1, 2, 3, 4, 5, 10000000000001]] + ), + pd.MultiIndex.from_arrays( + [["a", "a", "a", "b", "b", "b"], [1, 2, 3, 4, 5, 10000000000002]] + ), + ), + pytest.param( + pd.Index([1, 2, 3, 4, 5, 10000000000001]), + pd.Index([1, 2, 3, 4, 5, 10000000000002]), + marks=pytest.mark.xfail(reason="check_exact_index defaults to True"), + ), + pytest.param( + pd.MultiIndex.from_arrays([[0, 0, 0, 0, 1, 1], [1, 2, 3, 4, 5, 10000000000001]]), + pd.MultiIndex.from_arrays([[0, 0, 0, 0, 1, 1], [1, 2, 3, 4, 5, 10000000000002]]), + marks=pytest.mark.xfail(reason="check_exact_index defaults to True"), + ), + ], +) +def test_assert_series_equal_check_exact_index_default(left_idx, right_idx): # GH#57067 - ser1 = Series(np.zeros(6, dtype=int), [0, 0.2, 0.4, 0.6, 0.8, 1]) - ser2 = Series(np.zeros(6, dtype=int), np.linspace(0, 1, 6)) + ser1 = Series(np.zeros(6, dtype=int), left_idx) + ser2 = Series(np.zeros(6, dtype=int), right_idx) tm.assert_series_equal(ser1, ser2) tm.assert_frame_equal(ser1.to_frame(), ser2.to_frame()) From b991ebfbca4a10b8358f5e59943eff9610a395a6 Mon Sep 17 00:00:00 2001 From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com> Date: Tue, 23 Apr 2024 21:58:42 +0300 Subject: [PATCH 4/5] Pre-commit stuff --- pandas/tests/util/test_assert_series_equal.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index fff2bf94a80ee..f75f48157aad2 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -500,8 +500,12 @@ def test_assert_series_equal_int_tol(): marks=pytest.mark.xfail(reason="check_exact_index defaults to True"), ), pytest.param( - pd.MultiIndex.from_arrays([[0, 0, 0, 0, 1, 1], [1, 2, 3, 4, 5, 10000000000001]]), - pd.MultiIndex.from_arrays([[0, 0, 0, 0, 1, 1], [1, 2, 3, 4, 5, 10000000000002]]), + pd.MultiIndex.from_arrays( + [[0, 0, 0, 0, 1, 1], [1, 2, 3, 4, 5, 10000000000001]] + ), + pd.MultiIndex.from_arrays( + [[0, 0, 0, 0, 1, 1], [1, 2, 3, 4, 5, 10000000000002]] + ), marks=pytest.mark.xfail(reason="check_exact_index defaults to True"), ), ], From ac593ea2a6c0d17b8b3eb55a91411a2e958eb9cb Mon Sep 17 00:00:00 2001 From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com> Date: Wed, 24 Apr 2024 07:09:45 +0300 Subject: [PATCH 5/5] Address review comments --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/_testing/asserters.py | 13 +++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d85cd690046b5..dbfcd9b35ef56 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -152,7 +152,7 @@ Other API changes - Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`) - pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`) - pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`) -- when comparing the indexes in :func:`testing.assert_series_equal`, the default value for check_exact is decided based on the :class:`Index` dtype, instead of the :class:`Series` dtype. (:issue:`57386`) +- when comparing the indexes in :func:`testing.assert_series_equal`, check_exact defaults to True if an :class:`Index` is of integer dtypes. (:issue:`57386`) .. --------------------------------------------------------------------------- .. _whatsnew_300.deprecations: diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 8efec24cc1380..543d7944e4c5d 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -871,8 +871,8 @@ def assert_series_equal( .. versionchanged:: 3.0.0 - When checking Index equivalence, the default value for check_exact - is based off the Index dtype, instead of the Series dtype. + check_exact for comparing the Indexes defaults to True by + checking if an Index is of integer dtypes. check_datetimelike_compat : bool, default False Compare datetime-like which is comparable ignoring dtype. @@ -926,12 +926,9 @@ def assert_series_equal( right_index_dtypes = ( [right.index.dtype] if right.index.nlevels == 1 else right.index.dtypes ) - check_exact_index = ( - all(is_numeric_dtype(dtype) for dtype in left_index_dtypes) - and not any(is_float_dtype(dtype) for dtype in left_index_dtypes) - or all(is_numeric_dtype(dtype) for dtype in right_index_dtypes) - and not any(is_float_dtype(dtype) for dtype in right_index_dtypes) - ) + check_exact_index = all( + dtype.kind in "iu" for dtype in left_index_dtypes + ) or all(dtype.kind in "iu" for dtype in right_index_dtypes) elif check_exact is lib.no_default: check_exact = False check_exact_index = False