Skip to content

BUG: equals/assert_numpy_array_equals with non-singleton NAs #39650

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,8 @@ Other
- Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`)
- Bug in :class:`Styler` where ``subset`` arg in methods raised an error for some valid multiindex slices (:issue:`33562`)
- :class:`Styler` rendered HTML output minor alterations to support w3 good code standard (:issue:`39626`)
-
- Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`)


.. ---------------------------------------------------------------------------

Expand Down
7 changes: 5 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ from pandas._libs.tslib import array_to_datetime
from pandas._libs.missing cimport (
C_NA,
checknull,
is_matching_na,
is_null_datetime64,
is_null_timedelta64,
isnaobj,
Expand Down Expand Up @@ -584,8 +585,10 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool:
return False
elif (x is C_NA) ^ (y is C_NA):
return False
elif not (PyObject_RichCompareBool(x, y, Py_EQ) or
(x is None or is_nan(x)) and (y is None or is_nan(y))):
elif not (
PyObject_RichCompareBool(x, y, Py_EQ)
or is_matching_na(x, y, nan_matches_none=True)
):
return False
except ValueError:
# Avoid raising ValueError when comparing Numpy arrays to other types
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/missing.pxd
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from numpy cimport ndarray, uint8_t


cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*)

cpdef bint checknull(object val)
cpdef bint checknull_old(object val)
cpdef ndarray[uint8_t] isnaobj(ndarray arr)
Expand Down
52 changes: 52 additions & 0 deletions pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,58 @@ cdef:
bint is_32bit = not IS64


cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False):
"""
Check if two scalars are both NA of matching types.

Parameters
----------
left : Any
right : Any
nan_matches_none : bool, default False
For backwards compatibility, consider NaN as matching None.

Returns
-------
bool
"""
if left is None:
if nan_matches_none and util.is_nan(right):
return True
return right is None
elif left is C_NA:
return right is C_NA
elif left is NaT:
return right is NaT
elif util.is_float_object(left):
if nan_matches_none and right is None:
return True
return (
util.is_nan(left)
and util.is_float_object(right)
and util.is_nan(right)
)
elif util.is_complex_object(left):
return (
util.is_nan(left)
and util.is_complex_object(right)
and util.is_nan(right)
)
elif util.is_datetime64_object(left):
return (
get_datetime64_value(left) == NPY_NAT
and util.is_datetime64_object(right)
and get_datetime64_value(right) == NPY_NAT
)
elif util.is_timedelta64_object(left):
return (
get_timedelta64_value(left) == NPY_NAT
and util.is_timedelta64_object(right)
and get_timedelta64_value(right) == NPY_NAT
)
return False


cpdef bint checknull(object val):
"""
Return boolean describing of the input is NA-like, defined here as any
Expand Down
24 changes: 23 additions & 1 deletion pandas/tests/dtypes/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pandas._libs import missing as libmissing
from pandas._libs.tslibs import iNaT, is_null_datetimelike

from pandas.core.dtypes.common import is_scalar
from pandas.core.dtypes.common import is_float, is_scalar
from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype
from pandas.core.dtypes.missing import (
array_equivalent,
Expand Down Expand Up @@ -653,3 +653,25 @@ def test_is_null_datetimelike(self):

for value in never_na_vals:
assert not is_null_datetimelike(value)

def test_is_matching_na(self, nulls_fixture, nulls_fixture2):
left = nulls_fixture
right = nulls_fixture2

assert libmissing.is_matching_na(left, left)

if left is right:
assert libmissing.is_matching_na(left, right)
elif is_float(left) and is_float(right):
# np.nan vs float("NaN") we consider as matching
assert libmissing.is_matching_na(left, right)
else:
assert not libmissing.is_matching_na(left, right)

def test_is_matching_na_nan_matches_none(self):

assert not libmissing.is_matching_na(None, np.nan)
assert not libmissing.is_matching_na(np.nan, None)

assert libmissing.is_matching_na(None, np.nan, nan_matches_none=True)
assert libmissing.is_matching_na(np.nan, None, nan_matches_none=True)
58 changes: 57 additions & 1 deletion pandas/tests/series/methods/test_equals.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from contextlib import nullcontext
import copy

import numpy as np
import pytest

from pandas import MultiIndex, Series
from pandas._libs.missing import is_matching_na

from pandas.core.dtypes.common import is_float

from pandas import Index, MultiIndex, Series
import pandas._testing as tm


Expand Down Expand Up @@ -65,3 +70,54 @@ def test_equals_false_negative():
assert s1.equals(s4)
assert s1.equals(s5)
assert s5.equals(s6)


def test_equals_matching_nas():
# matching but not identical NAs
left = Series([np.datetime64("NaT")], dtype=object)
right = Series([np.datetime64("NaT")], dtype=object)
assert left.equals(right)
assert Index(left).equals(Index(right))
assert left.array.equals(right.array)

left = Series([np.timedelta64("NaT")], dtype=object)
right = Series([np.timedelta64("NaT")], dtype=object)
assert left.equals(right)
assert Index(left).equals(Index(right))
assert left.array.equals(right.array)

left = Series([np.float64("NaN")], dtype=object)
right = Series([np.float64("NaN")], dtype=object)
assert left.equals(right)
assert Index(left).equals(Index(right))
assert left.array.equals(right.array)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also add tests for the case of non-matching NAs?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure



def test_equals_mismatched_nas(nulls_fixture, nulls_fixture2):
# GH#39650
left = nulls_fixture
right = nulls_fixture2
if hasattr(right, "copy"):
right = right.copy()
else:
right = copy.copy(right)

ser = Series([left], dtype=object)
ser2 = Series([right], dtype=object)

if is_matching_na(left, right):
assert ser.equals(ser2)
elif (left is None and is_float(right)) or (right is None and is_float(left)):
assert ser.equals(ser2)
else:
assert not ser.equals(ser2)


def test_equals_none_vs_nan():
# GH#39650
ser = Series([1, None], dtype=object)
ser2 = Series([1, np.nan], dtype=object)

assert ser.equals(ser2)
assert Index(ser).equals(Index(ser2))
assert ser.array.equals(ser2.array)
10 changes: 10 additions & 0 deletions pandas/tests/util/test_assert_numpy_array_equal.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import copy

import numpy as np
import pytest

Expand Down Expand Up @@ -198,6 +200,14 @@ def test_numpy_array_equal_identical_na(nulls_fixture):

tm.assert_numpy_array_equal(a, a)

# matching but not the identical object
if hasattr(nulls_fixture, "copy"):
other = nulls_fixture.copy()
else:
other = copy.copy(nulls_fixture)
b = np.array([other], dtype=object)
tm.assert_numpy_array_equal(a, b)


def test_numpy_array_equal_different_na():
a = np.array([np.nan], dtype=object)
Expand Down