Skip to content

Commit ad74d18

Browse files
committed
BUG: Fix assert_frame_equal dtype handling when check_dtype=False (#61473)
1 parent cfe54bd commit ad74d18

File tree

3 files changed

+29
-14
lines changed

3 files changed

+29
-14
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,7 @@ Sparse
857857
ExtensionArray
858858
^^^^^^^^^^^^^^
859859
- Bug in :class:`Categorical` when constructing with an :class:`Index` with :class:`ArrowDtype` (:issue:`60563`)
860+
- Bug in :func:`assert_frame_equal` with ``check_dtype=False`` that failed when comparing columns containing ``pd.NA`` with ``Int32`` and ``object`` dtypes. Now handles such comparisons by coercing to ``dtype="object"`` internally. (:issue:`61473`)
860861
- Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`)
861862
- Bug in :meth:`ArrowExtensionArray.factorize` where NA values were dropped when input was dictionary-encoded even when dropna was set to False(:issue:`60567`)
862863
- Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`)

pandas/_testing/asserters.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,16 +1014,22 @@ def assert_series_equal(
10141014
pass
10151015
else:
10161016
assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
1017-
if check_exact:
1017+
1018+
if not check_dtype:
1019+
left_values = left.to_numpy(dtype="object")
1020+
right_values = right.to_numpy(dtype="object")
1021+
else:
10181022
left_values = left._values
10191023
right_values = right._values
1024+
1025+
if check_exact:
10201026
# Only check exact if dtype is numeric
1021-
if isinstance(left_values, ExtensionArray) and isinstance(
1022-
right_values, ExtensionArray
1027+
if isinstance(left._values, ExtensionArray) and isinstance(
1028+
right._values, ExtensionArray
10231029
):
10241030
assert_extension_array_equal(
1025-
left_values,
1026-
right_values,
1031+
left._values,
1032+
right._values,
10271033
check_dtype=check_dtype,
10281034
index_values=left.index,
10291035
obj=str(obj),
@@ -1051,10 +1057,10 @@ def assert_series_equal(
10511057

10521058
# datetimelike may have different objects (e.g. datetime.datetime
10531059
# vs Timestamp) but will compare equal
1054-
if not Index(left._values).equals(Index(right._values)):
1060+
if not Index(left_values).equals(Index(right_values)):
10551061
msg = (
1056-
f"[datetimelike_compat=True] {left._values} "
1057-
f"is not equal to {right._values}."
1062+
f"[datetimelike_compat=True] {left_values} "
1063+
f"is not equal to {right_values}."
10581064
)
10591065
raise AssertionError(msg)
10601066
elif isinstance(left.dtype, IntervalDtype) and isinstance(
@@ -1065,8 +1071,8 @@ def assert_series_equal(
10651071
right.dtype, CategoricalDtype
10661072
):
10671073
_testing.assert_almost_equal(
1068-
left._values,
1069-
right._values,
1074+
left_values,
1075+
right_values,
10701076
rtol=rtol,
10711077
atol=atol,
10721078
check_dtype=bool(check_dtype),
@@ -1106,8 +1112,8 @@ def assert_series_equal(
11061112
)
11071113
else:
11081114
_testing.assert_almost_equal(
1109-
left._values,
1110-
right._values,
1115+
left_values,
1116+
right_values,
11111117
rtol=rtol,
11121118
atol=atol,
11131119
check_dtype=bool(check_dtype),
@@ -1124,8 +1130,8 @@ def assert_series_equal(
11241130
right.dtype, CategoricalDtype
11251131
):
11261132
assert_categorical_equal(
1127-
left._values,
1128-
right._values,
1133+
left_values,
1134+
right_values,
11291135
obj=f"{obj} category",
11301136
check_category_order=check_category_order,
11311137
)

pandas/tests/util/test_assert_frame_equal.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,11 @@ def test_assert_frame_equal_set_mismatch():
395395
msg = r'DataFrame.iloc\[:, 0\] \(column name="set_column"\) values are different'
396396
with pytest.raises(AssertionError, match=msg):
397397
tm.assert_frame_equal(df1, df2)
398+
399+
400+
def test_assert_frame_equal_with_pdNA_and_check_dtype_false():
401+
# GH#61473
402+
df1 = DataFrame({"x": pd.Series([pd.NA], dtype="Int32")})
403+
df2 = DataFrame({"x": pd.Series([pd.NA], dtype="object")})
404+
405+
tm.assert_frame_equal(df1, df2, check_dtype=False)

0 commit comments

Comments
 (0)