Skip to content

Commit 17eec96

Browse files
parthi-sivaMarcoGorellimroeschke
authored
BUG: Set check_exact to true if dtype is int (#55934)
* BUG: Set check_exact to true if dtype is int * BUG: Add check to ignore dytype difference - If int dtype is different we are ignoring the difference - so added check to set check_exact to true only when dtype is same * TST: Added test cases * TST: Fix failing test cases * DOC: Update function documentation * check_exact only takes effect for floating dtypes * xfail failing test * Update pandas/tests/extension/base/methods.py Co-authored-by: Matthew Roeschke <[email protected]> --------- Co-authored-by: MarcoGorelli <[email protected]> Co-authored-by: Marco Edward Gorelli <[email protected]> Co-authored-by: Matthew Roeschke <[email protected]>
1 parent b19a093 commit 17eec96

File tree

8 files changed

+78
-23
lines changed

8 files changed

+78
-23
lines changed

doc/source/whatsnew/v2.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor
315315

316316
Other API changes
317317
^^^^^^^^^^^^^^^^^
318-
-
318+
- ``check_exact`` now only takes effect for floating-point dtypes in :func:`testing.assert_frame_equal` and :func:`testing.assert_series_equal`. In particular, integer dtypes are always checked exactly (:issue:`55882`)
319319
-
320320

321321
.. ---------------------------------------------------------------------------

pandas/_testing/asserters.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from pandas.core.dtypes.common import (
1818
is_bool,
19+
is_float_dtype,
1920
is_integer_dtype,
2021
is_number,
2122
is_numeric_dtype,
@@ -713,7 +714,7 @@ def assert_extension_array_equal(
713714
index_values : Index | numpy.ndarray, default None
714715
Optional index (shared by both left and right), used in output.
715716
check_exact : bool, default False
716-
Whether to compare number exactly.
717+
Whether to compare number exactly. Only takes effect for float dtypes.
717718
rtol : float, default 1e-5
718719
Relative tolerance. Only used when check_exact is False.
719720
atol : float, default 1e-8
@@ -782,7 +783,10 @@ def assert_extension_array_equal(
782783

783784
left_valid = left[~left_na].to_numpy(dtype=object)
784785
right_valid = right[~right_na].to_numpy(dtype=object)
785-
if check_exact:
786+
if check_exact or (
787+
(is_numeric_dtype(left.dtype) and not is_float_dtype(left.dtype))
788+
or (is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype))
789+
):
786790
assert_numpy_array_equal(
787791
left_valid, right_valid, obj=obj, index_values=index_values
788792
)
@@ -836,7 +840,7 @@ def assert_series_equal(
836840
check_names : bool, default True
837841
Whether to check the Series and Index names attribute.
838842
check_exact : bool, default False
839-
Whether to compare number exactly.
843+
Whether to compare number exactly. Only takes effect for float dtypes.
840844
check_datetimelike_compat : bool, default False
841845
Compare datetime-like which is comparable ignoring dtype.
842846
check_categorical : bool, default True
@@ -929,8 +933,10 @@ def assert_series_equal(
929933
pass
930934
else:
931935
assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
932-
933-
if check_exact and is_numeric_dtype(left.dtype) and is_numeric_dtype(right.dtype):
936+
if check_exact or (
937+
(is_numeric_dtype(left.dtype) and not is_float_dtype(left.dtype))
938+
or (is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype))
939+
):
934940
left_values = left._values
935941
right_values = right._values
936942
# Only check exact if dtype is numeric
@@ -1093,7 +1099,7 @@ def assert_frame_equal(
10931099
Specify how to compare internal data. If False, compare by columns.
10941100
If True, compare by blocks.
10951101
check_exact : bool, default False
1096-
Whether to compare number exactly.
1102+
Whether to compare number exactly. Only takes effect for float dtypes.
10971103
check_datetimelike_compat : bool, default False
10981104
Compare datetime-like which is comparable ignoring dtype.
10991105
check_categorical : bool, default True

pandas/tests/extension/base/methods.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from pandas._typing import Dtype
88

99
from pandas.core.dtypes.common import is_bool_dtype
10+
from pandas.core.dtypes.dtypes import NumpyEADtype
1011
from pandas.core.dtypes.missing import na_value_for_dtype
1112

1213
import pandas as pd
@@ -331,7 +332,7 @@ def test_fillna_length_mismatch(self, data_missing):
331332
data_missing.fillna(data_missing.take([1]))
332333

333334
# Subclasses can override if we expect e.g Sparse[bool], boolean, pyarrow[bool]
334-
_combine_le_expected_dtype: Dtype = np.dtype(bool)
335+
_combine_le_expected_dtype: Dtype = NumpyEADtype("bool")
335336

336337
def test_combine_le(self, data_repeated):
337338
# GH 20825
@@ -341,16 +342,20 @@ def test_combine_le(self, data_repeated):
341342
s2 = pd.Series(orig_data2)
342343
result = s1.combine(s2, lambda x1, x2: x1 <= x2)
343344
expected = pd.Series(
344-
[a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))],
345-
dtype=self._combine_le_expected_dtype,
345+
pd.array(
346+
[a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))],
347+
dtype=self._combine_le_expected_dtype,
348+
)
346349
)
347350
tm.assert_series_equal(result, expected)
348351

349352
val = s1.iloc[0]
350353
result = s1.combine(val, lambda x1, x2: x1 <= x2)
351354
expected = pd.Series(
352-
[a <= val for a in list(orig_data1)],
353-
dtype=self._combine_le_expected_dtype,
355+
pd.array(
356+
[a <= val for a in list(orig_data1)],
357+
dtype=self._combine_le_expected_dtype,
358+
)
354359
)
355360
tm.assert_series_equal(result, expected)
356361

pandas/tests/io/parser/dtypes/test_dtypes_basic.py

+3
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,9 @@ def test_dtype_backend_pyarrow(all_parsers, request):
531531
tm.assert_frame_equal(result, expected)
532532

533533

534+
# pyarrow engine failing:
535+
# https://github.com/pandas-dev/pandas/issues/56136
536+
@pytest.mark.usefixtures("pyarrow_xfail")
534537
def test_ea_int_avoid_overflow(all_parsers):
535538
# GH#32134
536539
parser = all_parsers

pandas/tests/series/test_constructors.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,10 @@ def test_constructor_maskedarray(self):
572572
data[1] = 1
573573
result = Series(data, index=index)
574574
expected = Series([0, 1, 2], index=index, dtype=int)
575-
tm.assert_series_equal(result, expected)
575+
with pytest.raises(AssertionError, match="Series classes are different"):
576+
# TODO should this be raising at all?
577+
# https://github.com/pandas-dev/pandas/issues/56131
578+
tm.assert_series_equal(result, expected)
576579

577580
data = ma.masked_all((3,), dtype=bool)
578581
result = Series(data)
@@ -589,7 +592,10 @@ def test_constructor_maskedarray(self):
589592
data[1] = True
590593
result = Series(data, index=index)
591594
expected = Series([True, True, False], index=index, dtype=bool)
592-
tm.assert_series_equal(result, expected)
595+
with pytest.raises(AssertionError, match="Series classes are different"):
596+
# TODO should this be raising at all?
597+
# https://github.com/pandas-dev/pandas/issues/56131
598+
tm.assert_series_equal(result, expected)
593599

594600
data = ma.masked_all((3,), dtype="M8[ns]")
595601
result = Series(data)

pandas/tests/tools/test_to_datetime.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -2118,7 +2118,13 @@ def test_float_to_datetime_raise_near_bounds(self):
21182118
expected = (should_succeed * oneday_in_ns).astype(np.int64)
21192119
for error_mode in ["raise", "coerce", "ignore"]:
21202120
result1 = to_datetime(should_succeed, unit="D", errors=error_mode)
2121-
tm.assert_almost_equal(result1.astype(np.int64), expected, rtol=1e-10)
2121+
# Cast to `np.float64` so that `rtol` and inexact checking kick in
2122+
# (`check_exact` doesn't take place for integer dtypes)
2123+
tm.assert_almost_equal(
2124+
result1.astype(np.int64).astype(np.float64),
2125+
expected.astype(np.float64),
2126+
rtol=1e-10,
2127+
)
21222128
# just out of bounds
21232129
should_fail1 = Series([0, tsmax_in_days + 0.005], dtype=float)
21242130
should_fail2 = Series([0, -tsmax_in_days - 0.005], dtype=float)

pandas/tests/util/test_assert_frame_equal.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,10 @@ def test_assert_frame_equal_extension_dtype_mismatch():
211211
"\\[right\\]: int[32|64]"
212212
)
213213

214-
tm.assert_frame_equal(left, right, check_dtype=False)
214+
# TODO: this shouldn't raise (or should raise a better error message)
215+
# https://github.com/pandas-dev/pandas/issues/56131
216+
with pytest.raises(AssertionError, match="classes are different"):
217+
tm.assert_frame_equal(left, right, check_dtype=False)
215218

216219
with pytest.raises(AssertionError, match=msg):
217220
tm.assert_frame_equal(left, right, check_dtype=True)
@@ -236,11 +239,18 @@ def test_assert_frame_equal_interval_dtype_mismatch():
236239
tm.assert_frame_equal(left, right, check_dtype=True)
237240

238241

239-
@pytest.mark.parametrize("right_dtype", ["Int32", "int64"])
240-
def test_assert_frame_equal_ignore_extension_dtype_mismatch(right_dtype):
242+
def test_assert_frame_equal_ignore_extension_dtype_mismatch():
243+
# https://github.com/pandas-dev/pandas/issues/35715
244+
left = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
245+
right = DataFrame({"a": [1, 2, 3]}, dtype="Int32")
246+
tm.assert_frame_equal(left, right, check_dtype=False)
247+
248+
249+
@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/56131")
250+
def test_assert_frame_equal_ignore_extension_dtype_mismatch_cross_class():
241251
# https://github.com/pandas-dev/pandas/issues/35715
242252
left = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
243-
right = DataFrame({"a": [1, 2, 3]}, dtype=right_dtype)
253+
right = DataFrame({"a": [1, 2, 3]}, dtype="int64")
244254
tm.assert_frame_equal(left, right, check_dtype=False)
245255

246256

pandas/tests/util/test_assert_series_equal.py

+23-4
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,10 @@ def test_assert_series_equal_extension_dtype_mismatch():
290290
\\[left\\]: Int64
291291
\\[right\\]: int[32|64]"""
292292

293-
tm.assert_series_equal(left, right, check_dtype=False)
293+
# TODO: this shouldn't raise (or should raise a better error message)
294+
# https://github.com/pandas-dev/pandas/issues/56131
295+
with pytest.raises(AssertionError, match="Series classes are different"):
296+
tm.assert_series_equal(left, right, check_dtype=False)
294297

295298
with pytest.raises(AssertionError, match=msg):
296299
tm.assert_series_equal(left, right, check_dtype=True)
@@ -362,11 +365,18 @@ def test_series_equal_exact_for_nonnumeric():
362365
tm.assert_series_equal(s3, s1, check_exact=True)
363366

364367

365-
@pytest.mark.parametrize("right_dtype", ["Int32", "int64"])
366-
def test_assert_series_equal_ignore_extension_dtype_mismatch(right_dtype):
368+
def test_assert_series_equal_ignore_extension_dtype_mismatch():
367369
# https://github.com/pandas-dev/pandas/issues/35715
368370
left = Series([1, 2, 3], dtype="Int64")
369-
right = Series([1, 2, 3], dtype=right_dtype)
371+
right = Series([1, 2, 3], dtype="Int32")
372+
tm.assert_series_equal(left, right, check_dtype=False)
373+
374+
375+
@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/56131")
376+
def test_assert_series_equal_ignore_extension_dtype_mismatch_cross_class():
377+
# https://github.com/pandas-dev/pandas/issues/35715
378+
left = Series([1, 2, 3], dtype="Int64")
379+
right = Series([1, 2, 3], dtype="int64")
370380
tm.assert_series_equal(left, right, check_dtype=False)
371381

372382

@@ -437,3 +447,12 @@ def test_check_dtype_false_different_reso(dtype):
437447

438448
with pytest.raises(AssertionError, match="Series are different"):
439449
tm.assert_series_equal(ser_s, ser_ms, check_dtype=False)
450+
451+
452+
@pytest.mark.parametrize("dtype", ["Int64", "int64"])
453+
def test_large_unequal_ints(dtype):
454+
# https://github.com/pandas-dev/pandas/issues/55882
455+
left = Series([1577840521123000], dtype=dtype)
456+
right = Series([1577840521123543], dtype=dtype)
457+
with pytest.raises(AssertionError, match="Series are different"):
458+
tm.assert_series_equal(left, right)

0 commit comments

Comments
 (0)