From a13946afe02083f8c85b63b47118f4c924e9794f Mon Sep 17 00:00:00 2001 From: patrick <61934744+phofl@users.noreply.github.com> Date: Thu, 28 Jan 2021 03:34:12 +0100 Subject: [PATCH 1/3] BUG: Assert_frame_equal always raising AssertionError when comparing extension dtypes (#39423) (cherry picked from commit 201d263aefecabc84ee49add65f37d06b45966e9) --- doc/source/whatsnew/v1.2.2.rst | 1 + pandas/_testing/asserters.py | 0 pandas/tests/util/test_assert_frame_equal.py | 16 ++++++++++++++++ 3 files changed, 17 insertions(+) create mode 100644 pandas/_testing/asserters.py diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst index 95fbbaf5d566e..656e779055486 100644 --- a/doc/source/whatsnew/v1.2.2.rst +++ b/doc/source/whatsnew/v1.2.2.rst @@ -15,6 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`) +- Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index bf80a1410e7d9..bd3ea12d9a4b6 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -305,3 +305,19 @@ def test_assert_frame_equal_columns_mixed_dtype(): # GH#39168 df = DataFrame([[0, 1, 2]], columns=["foo", "bar", 42], index=[1, "test", 2]) tm.assert_frame_equal(df, df, check_like=True) + + +def test_frame_equal_extension_dtype(frame_or_series, any_nullable_numeric_dtype): + # GH#39410 + obj = frame_or_series([1, 2], dtype=any_nullable_numeric_dtype) + tm.assert_equal(obj, obj, check_exact=True) + + +@pytest.mark.parametrize("indexer", [(0, 1), (1, 0)]) +def test_frame_equal_mixed_dtypes(frame_or_series, any_nullable_numeric_dtype, indexer): + dtypes = (any_nullable_numeric_dtype, "int64") + obj1 = frame_or_series([1, 2], dtype=dtypes[indexer[0]]) + obj2 = frame_or_series([1, 2], dtype=dtypes[indexer[1]]) + msg = r'(Series|DataFrame.iloc\[:, 0\] \(column name="0"\) classes) are different' + with pytest.raises(AssertionError, match=msg): + tm.assert_equal(obj1, obj2, check_exact=True, check_dtype=False) From 71fef7c1adf50b466330b0079bbdf9ec0970aa7e Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 28 Jan 2021 09:44:27 +0100 Subject: [PATCH 2/3] Resolve remining merge conflicts --- pandas/_testing.py | 26 ++++++++++++++------ pandas/tests/util/test_assert_frame_equal.py | 8 +++--- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 224c8d540c6bb..1df3351a7241c 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -1402,14 +1402,26 @@ def assert_series_equal( assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") if check_exact and is_numeric_dtype(left.dtype) and is_numeric_dtype(right.dtype): + left_values = left._values + right_values = right._values # Only check exact if dtype is numeric - assert_numpy_array_equal( - left._values, - right._values, - check_dtype=check_dtype, - obj=str(obj), - index_values=np.asarray(left.index), - ) + if is_extension_array_dtype(left_values) and is_extension_array_dtype( + right_values + ): + assert_extension_array_equal( + left_values, + right_values, + check_dtype=check_dtype, + index_values=np.asarray(left.index), + ) + else: + assert_numpy_array_equal( + left_values, + right_values, + check_dtype=check_dtype, + obj=str(obj), + index_values=np.asarray(left.index), + ) elif check_datetimelike_compat and ( needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype) ): diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index bd3ea12d9a4b6..f8539e9031d28 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -307,15 +307,15 @@ def test_assert_frame_equal_columns_mixed_dtype(): tm.assert_frame_equal(df, df, check_like=True) -def test_frame_equal_extension_dtype(frame_or_series, any_nullable_numeric_dtype): +def test_frame_equal_extension_dtype(frame_or_series, any_numeric_dtype): # GH#39410 - obj = frame_or_series([1, 2], dtype=any_nullable_numeric_dtype) + obj = frame_or_series([1, 2], dtype=any_numeric_dtype) tm.assert_equal(obj, obj, check_exact=True) @pytest.mark.parametrize("indexer", [(0, 1), (1, 0)]) -def test_frame_equal_mixed_dtypes(frame_or_series, any_nullable_numeric_dtype, indexer): - dtypes = (any_nullable_numeric_dtype, "int64") +def test_frame_equal_mixed_dtypes(frame_or_series, any_numeric_dtype, indexer): + dtypes = (any_numeric_dtype, "int64") obj1 = frame_or_series([1, 2], dtype=dtypes[indexer[0]]) obj2 = frame_or_series([1, 2], dtype=dtypes[indexer[1]]) msg = r'(Series|DataFrame.iloc\[:, 0\] \(column name="0"\) classes) are different' From 0486ebf7bbbc522d134a177d09096a10fea8a46f Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 28 Jan 2021 09:46:54 +0100 Subject: [PATCH 3/3] Remove file --- pandas/_testing/asserters.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 pandas/_testing/asserters.py diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py deleted file mode 100644 index e69de29bb2d1d..0000000000000