Skip to content

Commit 5e422e4

Browse files
committed
BUG: Fix assert_frame_equal dtype handling when check_dtype=False (#61473)
1 parent cfe54bd commit 5e422e4

File tree

3 files changed

+32
-7
lines changed

3 files changed

+32
-7
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,7 @@ Sparse
857857
ExtensionArray
858858
^^^^^^^^^^^^^^
859859
- Bug in :class:`Categorical` when constructing with an :class:`Index` with :class:`ArrowDtype` (:issue:`60563`)
860+
- Bug in :func:`assert_frame_equal` with ``check_dtype=False`` that failed when comparing columns containing ``pd.NA`` with ``Int32`` and ``object`` dtypes. Now handles such comparisons by coercing to ``dtype="object"`` internally. (:issue:`61473`)
860861
- Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`)
861862
- Bug in :meth:`ArrowExtensionArray.factorize` where NA values were dropped when input was dictionary-encoded even when dropna was set to False(:issue:`60567`)
862863
- Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`)

pandas/_testing/asserters.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,6 +1014,7 @@ def assert_series_equal(
10141014
pass
10151015
else:
10161016
assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
1017+
10171018
if check_exact:
10181019
left_values = left._values
10191020
right_values = right._values
@@ -1030,11 +1031,19 @@ def assert_series_equal(
10301031
)
10311032
else:
10321033
# convert both to NumPy if not, check_dtype would raise earlier
1033-
lv, rv = left_values, right_values
1034-
if isinstance(left_values, ExtensionArray):
1035-
lv = left_values.to_numpy()
1036-
if isinstance(right_values, ExtensionArray):
1037-
rv = right_values.to_numpy()
1034+
lv, rv = left._values, right._values
1035+
if not check_dtype and not (
1036+
isinstance(lv, ExtensionArray)
1037+
and isinstance(rv, ExtensionArray)
1038+
):
1039+
lv = left.to_numpy(dtype="object")
1040+
rv = right.to_numpy(dtype="object")
1041+
else:
1042+
if isinstance(lv, ExtensionArray):
1043+
lv = rv.to_numpy()
1044+
if isinstance(rv, ExtensionArray):
1045+
rv = rv.to_numpy()
1046+
10381047
assert_numpy_array_equal(
10391048
lv,
10401049
rv,
@@ -1105,9 +1114,16 @@ def assert_series_equal(
11051114
obj=str(obj),
11061115
)
11071116
else:
1117+
lv, rv = left._values, right._values
1118+
if not check_dtype and not (
1119+
isinstance(left._values, ExtensionArray)
1120+
and isinstance(right._values, ExtensionArray)
1121+
):
1122+
lv = left.to_numpy(dtype="object")
1123+
rv = right.to_numpy(dtype="object")
11081124
_testing.assert_almost_equal(
1109-
left._values,
1110-
right._values,
1125+
lv,
1126+
rv,
11111127
rtol=rtol,
11121128
atol=atol,
11131129
check_dtype=bool(check_dtype),

pandas/tests/util/test_assert_frame_equal.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,11 @@ def test_assert_frame_equal_set_mismatch():
395395
msg = r'DataFrame.iloc\[:, 0\] \(column name="set_column"\) values are different'
396396
with pytest.raises(AssertionError, match=msg):
397397
tm.assert_frame_equal(df1, df2)
398+
399+
400+
def test_assert_frame_equal_with_pdNA_and_check_dtype_false():
401+
# GH#61473
402+
df1 = DataFrame({"x": pd.Series([pd.NA], dtype="Int32")})
403+
df2 = DataFrame({"x": pd.Series([pd.NA], dtype="object")})
404+
405+
tm.assert_frame_equal(df1, df2, check_dtype=False)

0 commit comments

Comments
 (0)