Skip to content

Commit fcac82c

Browse files
committed
BUG: Fix assert_frame_equal dtype handling when check_dtype=False (#61473)
1 parent cfe54bd commit fcac82c

File tree

3 files changed

+31
-7
lines changed

3 files changed

+31
-7
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,7 @@ Sparse
857857
ExtensionArray
858858
^^^^^^^^^^^^^^
859859
- Bug in :class:`Categorical` when constructing with an :class:`Index` with :class:`ArrowDtype` (:issue:`60563`)
860+
- Bug in :func:`assert_frame_equal` with ``check_dtype=False`` that failed when comparing columns containing ``pd.NA`` with ``Int32`` and ``object`` dtypes. Now handles such comparisons by coercing to ``dtype="object"`` internally. (:issue:`61473`)
860861
- Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`)
861862
- Bug in :meth:`ArrowExtensionArray.factorize` where NA values were dropped when input was dictionary-encoded even when dropna was set to False(:issue:`60567`)
862863
- Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`)

pandas/_testing/asserters.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,6 +1014,7 @@ def assert_series_equal(
10141014
pass
10151015
else:
10161016
assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
1017+
10171018
if check_exact:
10181019
left_values = left._values
10191020
right_values = right._values
@@ -1030,11 +1031,18 @@ def assert_series_equal(
10301031
)
10311032
else:
10321033
# convert both to NumPy if not, check_dtype would raise earlier
1033-
lv, rv = left_values, right_values
1034-
if isinstance(left_values, ExtensionArray):
1035-
lv = left_values.to_numpy()
1036-
if isinstance(right_values, ExtensionArray):
1037-
rv = right_values.to_numpy()
1034+
lv, rv = left._values, right._values
1035+
if not check_dtype and not (
1036+
isinstance(lv, ExtensionArray) and isinstance(rv, ExtensionArray)
1037+
):
1038+
lv = left.to_numpy(dtype="object")
1039+
rv = right.to_numpy(dtype="object")
1040+
else:
1041+
if isinstance(lv, ExtensionArray):
1042+
lv = rv.to_numpy()
1043+
if isinstance(rv, ExtensionArray):
1044+
rv = rv.to_numpy()
1045+
10381046
assert_numpy_array_equal(
10391047
lv,
10401048
rv,
@@ -1105,9 +1113,16 @@ def assert_series_equal(
11051113
obj=str(obj),
11061114
)
11071115
else:
1116+
lv, rv = left._values, right._values
1117+
if not check_dtype and not (
1118+
isinstance(left._values, ExtensionArray)
1119+
and isinstance(right._values, ExtensionArray)
1120+
):
1121+
lv = left.to_numpy(dtype="object")
1122+
rv = right.to_numpy(dtype="object")
11081123
_testing.assert_almost_equal(
1109-
left._values,
1110-
right._values,
1124+
lv,
1125+
rv,
11111126
rtol=rtol,
11121127
atol=atol,
11131128
check_dtype=bool(check_dtype),

pandas/tests/util/test_assert_frame_equal.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,11 @@ def test_assert_frame_equal_set_mismatch():
395395
msg = r'DataFrame.iloc\[:, 0\] \(column name="set_column"\) values are different'
396396
with pytest.raises(AssertionError, match=msg):
397397
tm.assert_frame_equal(df1, df2)
398+
399+
400+
def test_assert_frame_equal_with_pdNA_and_check_dtype_false():
401+
# GH#61473
402+
df1 = DataFrame({"x": pd.Series([pd.NA], dtype="Int32")})
403+
df2 = DataFrame({"x": pd.Series([pd.NA], dtype="object")})
404+
405+
tm.assert_frame_equal(df1, df2, check_dtype=False)

0 commit comments

Comments
 (0)