From fcac82c96571768eace02c753bb1da06ee175b1d Mon Sep 17 00:00:00 2001 From: iabhi4 Date: Fri, 30 May 2025 23:03:21 -0700 Subject: [PATCH] BUG: Fix assert_frame_equal dtype handling when check_dtype=False (#61473) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_testing/asserters.py | 29 +++++++++++++++----- pandas/tests/util/test_assert_frame_equal.py | 8 ++++++ 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 099e5bc48353a..16f16da678d0c 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -857,6 +857,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ - Bug in :class:`Categorical` when constructing with an :class:`Index` with :class:`ArrowDtype` (:issue:`60563`) +- Bug in :func:`assert_frame_equal` with ``check_dtype=False`` that failed when comparing columns containing ``pd.NA`` with ``Int32`` and ``object`` dtypes. Now handles such comparisons by coercing to ``dtype="object"`` internally. (:issue:`61473`) - Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`) - Bug in :meth:`ArrowExtensionArray.factorize` where NA values were dropped when input was dictionary-encoded even when dropna was set to False(:issue:`60567`) - Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index daa5187cdb636..87c3c2f1de8ad 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1014,6 +1014,7 @@ def assert_series_equal( pass else: assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") + if check_exact: left_values = left._values right_values = right._values @@ -1030,11 +1031,18 @@ def assert_series_equal( ) else: # convert both to NumPy if not, check_dtype would raise earlier - lv, rv = left_values, right_values - if isinstance(left_values, ExtensionArray): - lv = left_values.to_numpy() - if isinstance(right_values, ExtensionArray): - rv = right_values.to_numpy() + lv, rv = left._values, right._values + if not check_dtype and not ( + isinstance(lv, ExtensionArray) and isinstance(rv, ExtensionArray) + ): + lv = left.to_numpy(dtype="object") + rv = right.to_numpy(dtype="object") + else: + if isinstance(lv, ExtensionArray): + lv = rv.to_numpy() + if isinstance(rv, ExtensionArray): + rv = rv.to_numpy() + assert_numpy_array_equal( lv, rv, @@ -1105,9 +1113,16 @@ def assert_series_equal( obj=str(obj), ) else: + lv, rv = left._values, right._values + if not check_dtype and not ( + isinstance(left._values, ExtensionArray) + and isinstance(right._values, ExtensionArray) + ): + lv = left.to_numpy(dtype="object") + rv = right.to_numpy(dtype="object") _testing.assert_almost_equal( - left._values, - right._values, + lv, + rv, rtol=rtol, atol=atol, check_dtype=bool(check_dtype), diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index ea954756d63c8..333d76af2f92f 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -395,3 +395,11 @@ def test_assert_frame_equal_set_mismatch(): msg = r'DataFrame.iloc\[:, 0\] \(column name="set_column"\) values are different' with pytest.raises(AssertionError, match=msg): tm.assert_frame_equal(df1, df2) + + +def test_assert_frame_equal_with_pdNA_and_check_dtype_false(): + # GH#61473 + df1 = DataFrame({"x": pd.Series([pd.NA], dtype="Int32")}) + df2 = DataFrame({"x": pd.Series([pd.NA], dtype="object")}) + + tm.assert_frame_equal(df1, df2, check_dtype=False)