diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index bed3484793bcc..a3a9a4c5eda08 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -454,7 +454,8 @@ Other - Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`) - Bug in :class:`Styler` where ``subset`` arg in methods raised an error for some valid multiindex slices (:issue:`33562`) - :class:`Styler` rendered HTML output minor alterations to support w3 good code standard (:issue:`39626`) -- +- Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`) + .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 0f796b2b65dff..6793ea0ab3bc2 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -73,6 +73,7 @@ from pandas._libs.tslib import array_to_datetime from pandas._libs.missing cimport ( C_NA, checknull, + is_matching_na, is_null_datetime64, is_null_timedelta64, isnaobj, @@ -584,8 +585,10 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: return False elif (x is C_NA) ^ (y is C_NA): return False - elif not (PyObject_RichCompareBool(x, y, Py_EQ) or - (x is None or is_nan(x)) and (y is None or is_nan(y))): + elif not ( + PyObject_RichCompareBool(x, y, Py_EQ) + or is_matching_na(x, y, nan_matches_none=True) + ): return False except ValueError: # Avoid raising ValueError when comparing Numpy arrays to other types diff --git a/pandas/_libs/missing.pxd b/pandas/_libs/missing.pxd index e02b84381b62c..ce8e8007e7630 100644 --- a/pandas/_libs/missing.pxd +++ b/pandas/_libs/missing.pxd @@ -1,6 +1,8 @@ from numpy cimport ndarray, uint8_t +cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*) + cpdef bint checknull(object val) cpdef bint checknull_old(object val) cpdef ndarray[uint8_t] isnaobj(ndarray arr) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index abf38265ddc6d..d91d0261a1b33 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -29,6 +29,58 @@ cdef: bint is_32bit = not IS64 +cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False): + """ + Check if two scalars are both NA of matching types. + + Parameters + ---------- + left : Any + right : Any + nan_matches_none : bool, default False + For backwards compatibility, consider NaN as matching None. + + Returns + ------- + bool + """ + if left is None: + if nan_matches_none and util.is_nan(right): + return True + return right is None + elif left is C_NA: + return right is C_NA + elif left is NaT: + return right is NaT + elif util.is_float_object(left): + if nan_matches_none and right is None: + return True + return ( + util.is_nan(left) + and util.is_float_object(right) + and util.is_nan(right) + ) + elif util.is_complex_object(left): + return ( + util.is_nan(left) + and util.is_complex_object(right) + and util.is_nan(right) + ) + elif util.is_datetime64_object(left): + return ( + get_datetime64_value(left) == NPY_NAT + and util.is_datetime64_object(right) + and get_datetime64_value(right) == NPY_NAT + ) + elif util.is_timedelta64_object(left): + return ( + get_timedelta64_value(left) == NPY_NAT + and util.is_timedelta64_object(right) + and get_timedelta64_value(right) == NPY_NAT + ) + return False + + cpdef bint checknull(object val): """ Return boolean describing of the input is NA-like, defined here as any diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 0d92ef02e07c8..f6566d205e65c 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -10,7 +10,7 @@ from pandas._libs import missing as libmissing from pandas._libs.tslibs import iNaT, is_null_datetimelike -from pandas.core.dtypes.common import is_scalar +from pandas.core.dtypes.common import is_float, is_scalar from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype from pandas.core.dtypes.missing import ( array_equivalent, @@ -653,3 +653,25 @@ def test_is_null_datetimelike(self): for value in never_na_vals: assert not is_null_datetimelike(value) + + def test_is_matching_na(self, nulls_fixture, nulls_fixture2): + left = nulls_fixture + right = nulls_fixture2 + + assert libmissing.is_matching_na(left, left) + + if left is right: + assert libmissing.is_matching_na(left, right) + elif is_float(left) and is_float(right): + # np.nan vs float("NaN") we consider as matching + assert libmissing.is_matching_na(left, right) + else: + assert not libmissing.is_matching_na(left, right) + + def test_is_matching_na_nan_matches_none(self): + + assert not libmissing.is_matching_na(None, np.nan) + assert not libmissing.is_matching_na(np.nan, None) + + assert libmissing.is_matching_na(None, np.nan, nan_matches_none=True) + assert libmissing.is_matching_na(np.nan, None, nan_matches_none=True) diff --git a/pandas/tests/series/methods/test_equals.py b/pandas/tests/series/methods/test_equals.py index cf55482fefe22..85d74196e59b4 100644 --- a/pandas/tests/series/methods/test_equals.py +++ b/pandas/tests/series/methods/test_equals.py @@ -1,9 +1,14 @@ from contextlib import nullcontext +import copy import numpy as np import pytest -from pandas import MultiIndex, Series +from pandas._libs.missing import is_matching_na + +from pandas.core.dtypes.common import is_float + +from pandas import Index, MultiIndex, Series import pandas._testing as tm @@ -65,3 +70,54 @@ def test_equals_false_negative(): assert s1.equals(s4) assert s1.equals(s5) assert s5.equals(s6) + + +def test_equals_matching_nas(): + # matching but not identical NAs + left = Series([np.datetime64("NaT")], dtype=object) + right = Series([np.datetime64("NaT")], dtype=object) + assert left.equals(right) + assert Index(left).equals(Index(right)) + assert left.array.equals(right.array) + + left = Series([np.timedelta64("NaT")], dtype=object) + right = Series([np.timedelta64("NaT")], dtype=object) + assert left.equals(right) + assert Index(left).equals(Index(right)) + assert left.array.equals(right.array) + + left = Series([np.float64("NaN")], dtype=object) + right = Series([np.float64("NaN")], dtype=object) + assert left.equals(right) + assert Index(left).equals(Index(right)) + assert left.array.equals(right.array) + + +def test_equals_mismatched_nas(nulls_fixture, nulls_fixture2): + # GH#39650 + left = nulls_fixture + right = nulls_fixture2 + if hasattr(right, "copy"): + right = right.copy() + else: + right = copy.copy(right) + + ser = Series([left], dtype=object) + ser2 = Series([right], dtype=object) + + if is_matching_na(left, right): + assert ser.equals(ser2) + elif (left is None and is_float(right)) or (right is None and is_float(left)): + assert ser.equals(ser2) + else: + assert not ser.equals(ser2) + + +def test_equals_none_vs_nan(): + # GH#39650 + ser = Series([1, None], dtype=object) + ser2 = Series([1, np.nan], dtype=object) + + assert ser.equals(ser2) + assert Index(ser).equals(Index(ser2)) + assert ser.array.equals(ser2.array) diff --git a/pandas/tests/util/test_assert_numpy_array_equal.py b/pandas/tests/util/test_assert_numpy_array_equal.py index d29ddedd2fdd6..660402ee857e3 100644 --- a/pandas/tests/util/test_assert_numpy_array_equal.py +++ b/pandas/tests/util/test_assert_numpy_array_equal.py @@ -1,3 +1,5 @@ +import copy + import numpy as np import pytest @@ -198,6 +200,14 @@ def test_numpy_array_equal_identical_na(nulls_fixture): tm.assert_numpy_array_equal(a, a) + # matching but not the identical object + if hasattr(nulls_fixture, "copy"): + other = nulls_fixture.copy() + else: + other = copy.copy(nulls_fixture) + b = np.array([other], dtype=object) + tm.assert_numpy_array_equal(a, b) + def test_numpy_array_equal_different_na(): a = np.array([np.nan], dtype=object)