Skip to content

Commit 58f7d5f

Browse files
phoflnoatamir
authored andcommitted
BUG: Index.equals raising with index of tuples that contain NA (pandas-dev#48446)
* BUG: Index.equals raising with index of tuples that contain NA * Adress review * Add sanity check
1 parent 32668ec commit 58f7d5f

File tree

5 files changed

+58
-2
lines changed

5 files changed

+58
-2
lines changed

doc/source/whatsnew/v1.6.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ Indexing
169169

170170
Missing
171171
^^^^^^^
172-
-
172+
- Bug in :meth:`Index.equals` raising ``TypeError` when :class:`Index` consists of tuples that contain ``NA`` (:issue:`48446`)
173173
-
174174

175175
MultiIndex

pandas/_libs/lib.pyx

+8-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ from cython cimport (
2929
floating,
3030
)
3131

32+
from pandas._libs.missing import check_na_tuples_nonequal
33+
3234
import_datetime()
3335

3436
import numpy as np
@@ -636,7 +638,7 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool:
636638
or is_matching_na(x, y, nan_matches_none=True)
637639
):
638640
return False
639-
except ValueError:
641+
except (ValueError, TypeError):
640642
# Avoid raising ValueError when comparing Numpy arrays to other types
641643
if cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y):
642644
# Only compare scalars to scalars and non-scalars to non-scalars
@@ -645,7 +647,12 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool:
645647
and not (isinstance(x, type(y)) or isinstance(y, type(x)))):
646648
# Check if non-scalars have the same type
647649
return False
650+
elif check_na_tuples_nonequal(x, y):
651+
# We have tuples where one Side has a NA and the other side does not
652+
# Only condition we may end up with a TypeError
653+
return False
648654
raise
655+
649656
return True
650657

651658

pandas/_libs/missing.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ from numpy cimport (
55

66

77
cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*)
8+
cpdef bint check_na_tuples_nonequal(object left, object right)
89

910
cpdef bint checknull(object val, bint inf_as_na=*)
1011
cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=*)

pandas/_libs/missing.pyx

+31
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,37 @@ cdef:
4242
type cDecimal = Decimal # for faster isinstance checks
4343

4444

45+
cpdef bint check_na_tuples_nonequal(object left, object right):
46+
"""
47+
When we have NA in one of the tuples but not the other we have to check here,
48+
because our regular checks fail before with ambigous boolean value.
49+
50+
Parameters
51+
----------
52+
left: Any
53+
right: Any
54+
55+
Returns
56+
-------
57+
True if we are dealing with tuples that have NA on one side and non NA on
58+
the other side.
59+
60+
"""
61+
if not isinstance(left, tuple) or not isinstance(right, tuple):
62+
return False
63+
64+
if len(left) != len(right):
65+
return False
66+
67+
for left_element, right_element in zip(left, right):
68+
if left_element is C_NA and right_element is not C_NA:
69+
return True
70+
elif right_element is C_NA and left_element is not C_NA:
71+
return True
72+
73+
return False
74+
75+
4576
cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False):
4677
"""
4778
Check if two scalars are both NA of matching types.

pandas/tests/dtypes/test_missing.py

+17
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,23 @@ def test_array_equivalent_nested():
538538
assert not array_equivalent(left, right, strict_nan=True)
539539

540540

541+
def test_array_equivalent_index_with_tuples():
542+
# GH#48446
543+
idx1 = pd.Index(np.array([(pd.NA, 4), (1, 1)], dtype="object"))
544+
idx2 = pd.Index(np.array([(1, 1), (pd.NA, 4)], dtype="object"))
545+
assert not array_equivalent(idx1, idx2)
546+
assert not idx1.equals(idx2)
547+
assert not array_equivalent(idx2, idx1)
548+
assert not idx2.equals(idx1)
549+
550+
idx1 = pd.Index(np.array([(4, pd.NA), (1, 1)], dtype="object"))
551+
idx2 = pd.Index(np.array([(1, 1), (4, pd.NA)], dtype="object"))
552+
assert not array_equivalent(idx1, idx2)
553+
assert not idx1.equals(idx2)
554+
assert not array_equivalent(idx2, idx1)
555+
assert not idx2.equals(idx1)
556+
557+
541558
@pytest.mark.parametrize(
542559
"dtype, na_value",
543560
[

0 commit comments

Comments
 (0)