diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index a7c9a7eb88221..8891fa77cd989 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -168,7 +168,7 @@ Indexing Missing ^^^^^^^ -- +- Bug in :meth:`Index.equals` raising ``TypeError` when :class:`Index` consists of tuples that contain ``NA`` (:issue:`48446`) - MultiIndex diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f1473392147f9..cc891bcb8a572 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -29,6 +29,8 @@ from cython cimport ( floating, ) +from pandas._libs.missing import check_na_tuples_nonequal + import_datetime() import numpy as np @@ -636,7 +638,7 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: or is_matching_na(x, y, nan_matches_none=True) ): return False - except ValueError: + except (ValueError, TypeError): # Avoid raising ValueError when comparing Numpy arrays to other types if cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y): # Only compare scalars to scalars and non-scalars to non-scalars @@ -645,7 +647,12 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: and not (isinstance(x, type(y)) or isinstance(y, type(x)))): # Check if non-scalars have the same type return False + elif check_na_tuples_nonequal(x, y): + # We have tuples where one Side has a NA and the other side does not + # Only condition we may end up with a TypeError + return False raise + return True diff --git a/pandas/_libs/missing.pxd b/pandas/_libs/missing.pxd index 854dcf2ec9775..5920649519442 100644 --- a/pandas/_libs/missing.pxd +++ b/pandas/_libs/missing.pxd @@ -5,6 +5,7 @@ from numpy cimport ( cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*) +cpdef bint check_na_tuples_nonequal(object left, object right) cpdef bint checknull(object val, bint inf_as_na=*) cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=*) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 9b470e95dc22b..518f17d840dd5 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -42,6 +42,37 @@ cdef: type cDecimal = Decimal # for faster isinstance checks +cpdef bint check_na_tuples_nonequal(object left, object right): + """ + When we have NA in one of the tuples but not the other we have to check here, + because our regular checks fail before with ambigous boolean value. + + Parameters + ---------- + left: Any + right: Any + + Returns + ------- + True if we are dealing with tuples that have NA on one side and non NA on + the other side. + + """ + if not isinstance(left, tuple) or not isinstance(right, tuple): + return False + + if len(left) != len(right): + return False + + for left_element, right_element in zip(left, right): + if left_element is C_NA and right_element is not C_NA: + return True + elif right_element is C_NA and left_element is not C_NA: + return True + + return False + + cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False): """ Check if two scalars are both NA of matching types. diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 9c8aeb050ec27..e1dd182a5ae30 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -538,6 +538,23 @@ def test_array_equivalent_nested(): assert not array_equivalent(left, right, strict_nan=True) +def test_array_equivalent_index_with_tuples(): + # GH#48446 + idx1 = pd.Index(np.array([(pd.NA, 4), (1, 1)], dtype="object")) + idx2 = pd.Index(np.array([(1, 1), (pd.NA, 4)], dtype="object")) + assert not array_equivalent(idx1, idx2) + assert not idx1.equals(idx2) + assert not array_equivalent(idx2, idx1) + assert not idx2.equals(idx1) + + idx1 = pd.Index(np.array([(4, pd.NA), (1, 1)], dtype="object")) + idx2 = pd.Index(np.array([(1, 1), (4, pd.NA)], dtype="object")) + assert not array_equivalent(idx1, idx2) + assert not idx1.equals(idx2) + assert not array_equivalent(idx2, idx1) + assert not idx2.equals(idx1) + + @pytest.mark.parametrize( "dtype, na_value", [