Skip to content

Commit e738953

Browse files
committed
BUG: Fix assert_frame_equal dtype handling when check_dtype=False (#61473)
1 parent cfe54bd commit e738953

File tree

3 files changed

+34
-16
lines changed

3 files changed

+34
-16
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,7 @@ Sparse
857857
ExtensionArray
858858
^^^^^^^^^^^^^^
859859
- Bug in :class:`Categorical` when constructing with an :class:`Index` with :class:`ArrowDtype` (:issue:`60563`)
860+
- Bug in :func:`assert_frame_equal` with ``check_dtype=False`` that failed when comparing columns containing ``pd.NA`` with ``Int32`` and ``object`` dtypes. Now handles such comparisons by coercing to ``dtype="object"`` internally. (:issue:`61473`)
860861
- Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`)
861862
- Bug in :meth:`ArrowExtensionArray.factorize` where NA values were dropped when input was dictionary-encoded even when dropna was set to False(:issue:`60567`)
862863
- Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`)

pandas/_testing/asserters.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,9 +1014,18 @@ def assert_series_equal(
10141014
pass
10151015
else:
10161016
assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
1017-
if check_exact:
1017+
1018+
if not check_dtype and not (
1019+
isinstance(left._values, ExtensionArray)
1020+
and isinstance(right._values, ExtensionArray)
1021+
):
1022+
left_values = left.to_numpy(dtype="object")
1023+
right_values = right.to_numpy(dtype="object")
1024+
else:
10181025
left_values = left._values
10191026
right_values = right._values
1027+
1028+
if check_exact:
10201029
# Only check exact if dtype is numeric
10211030
if isinstance(left_values, ExtensionArray) and isinstance(
10221031
right_values, ExtensionArray
@@ -1051,10 +1060,10 @@ def assert_series_equal(
10511060

10521061
# datetimelike may have different objects (e.g. datetime.datetime
10531062
# vs Timestamp) but will compare equal
1054-
if not Index(left._values).equals(Index(right._values)):
1063+
if not Index(left_values).equals(Index(right_values)):
10551064
msg = (
1056-
f"[datetimelike_compat=True] {left._values} "
1057-
f"is not equal to {right._values}."
1065+
f"[datetimelike_compat=True] {left_values} "
1066+
f"is not equal to {right_values}."
10581067
)
10591068
raise AssertionError(msg)
10601069
elif isinstance(left.dtype, IntervalDtype) and isinstance(
@@ -1065,8 +1074,8 @@ def assert_series_equal(
10651074
right.dtype, CategoricalDtype
10661075
):
10671076
_testing.assert_almost_equal(
1068-
left._values,
1069-
right._values,
1077+
left_values,
1078+
right_values,
10701079
rtol=rtol,
10711080
atol=atol,
10721081
check_dtype=bool(check_dtype),
@@ -1077,8 +1086,8 @@ def assert_series_equal(
10771086
right.dtype, ExtensionDtype
10781087
):
10791088
assert_extension_array_equal(
1080-
left._values,
1081-
right._values,
1089+
left_values,
1090+
right_values,
10821091
rtol=rtol,
10831092
atol=atol,
10841093
check_dtype=check_dtype,
@@ -1089,25 +1098,25 @@ def assert_series_equal(
10891098
left.dtype, right.dtype
10901099
) or is_extension_array_dtype_and_needs_i8_conversion(right.dtype, left.dtype):
10911100
assert_extension_array_equal(
1092-
left._values,
1093-
right._values,
1101+
left_values,
1102+
right_values,
10941103
check_dtype=check_dtype,
10951104
index_values=left.index,
10961105
obj=str(obj),
10971106
)
10981107
elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype):
10991108
# DatetimeArray or TimedeltaArray
11001109
assert_extension_array_equal(
1101-
left._values,
1102-
right._values,
1110+
left_values,
1111+
right_values,
11031112
check_dtype=check_dtype,
11041113
index_values=left.index,
11051114
obj=str(obj),
11061115
)
11071116
else:
11081117
_testing.assert_almost_equal(
1109-
left._values,
1110-
right._values,
1118+
left_values,
1119+
right_values,
11111120
rtol=rtol,
11121121
atol=atol,
11131122
check_dtype=bool(check_dtype),
@@ -1124,8 +1133,8 @@ def assert_series_equal(
11241133
right.dtype, CategoricalDtype
11251134
):
11261135
assert_categorical_equal(
1127-
left._values,
1128-
right._values,
1136+
left_values,
1137+
right_values,
11291138
obj=f"{obj} category",
11301139
check_category_order=check_category_order,
11311140
)

pandas/tests/util/test_assert_frame_equal.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,11 @@ def test_assert_frame_equal_set_mismatch():
395395
msg = r'DataFrame.iloc\[:, 0\] \(column name="set_column"\) values are different'
396396
with pytest.raises(AssertionError, match=msg):
397397
tm.assert_frame_equal(df1, df2)
398+
399+
400+
def test_assert_frame_equal_with_pdNA_and_check_dtype_false():
401+
# GH#61473
402+
df1 = DataFrame({"x": pd.Series([pd.NA], dtype="Int32")})
403+
df2 = DataFrame({"x": pd.Series([pd.NA], dtype="object")})
404+
405+
tm.assert_frame_equal(df1, df2, check_dtype=False)

0 commit comments

Comments
 (0)