We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 32668ec commit 58f7d5fCopy full SHA for 58f7d5f
doc/source/whatsnew/v1.6.0.rst
@@ -169,7 +169,7 @@ Indexing
169
170
Missing
171
^^^^^^^
172
--
+- Bug in :meth:`Index.equals` raising ``TypeError` when :class:`Index` consists of tuples that contain ``NA`` (:issue:`48446`)
173
-
174
175
MultiIndex
pandas/_libs/lib.pyx
@@ -29,6 +29,8 @@ from cython cimport (
29
floating,
30
)
31
32
+from pandas._libs.missing import check_na_tuples_nonequal
33
+
34
import_datetime()
35
36
import numpy as np
@@ -636,7 +638,7 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool:
636
638
or is_matching_na(x, y, nan_matches_none=True)
637
639
):
640
return False
- except ValueError:
641
+ except (ValueError, TypeError):
642
# Avoid raising ValueError when comparing Numpy arrays to other types
643
if cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y):
644
# Only compare scalars to scalars and non-scalars to non-scalars
@@ -645,7 +647,12 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool:
645
647
and not (isinstance(x, type(y)) or isinstance(y, type(x)))):
646
648
# Check if non-scalars have the same type
649
650
+ elif check_na_tuples_nonequal(x, y):
651
+ # We have tuples where one Side has a NA and the other side does not
652
+ # Only condition we may end up with a TypeError
653
+ return False
654
raise
655
656
return True
657
658
pandas/_libs/missing.pxd
@@ -5,6 +5,7 @@ from numpy cimport (
5
6
7
cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*)
8
+cpdef bint check_na_tuples_nonequal(object left, object right)
9
10
cpdef bint checknull(object val, bint inf_as_na=*)
11
cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=*)
pandas/_libs/missing.pyx
@@ -42,6 +42,37 @@ cdef:
42
type cDecimal = Decimal # for faster isinstance checks
43
44
45
+cpdef bint check_na_tuples_nonequal(object left, object right):
46
+ """
47
+ When we have NA in one of the tuples but not the other we have to check here,
48
+ because our regular checks fail before with ambigous boolean value.
49
50
+ Parameters
51
+ ----------
52
+ left: Any
53
+ right: Any
54
55
+ Returns
56
+ -------
57
+ True if we are dealing with tuples that have NA on one side and non NA on
58
+ the other side.
59
60
61
+ if not isinstance(left, tuple) or not isinstance(right, tuple):
62
63
64
+ if len(left) != len(right):
65
66
67
+ for left_element, right_element in zip(left, right):
68
+ if left_element is C_NA and right_element is not C_NA:
69
+ return True
70
+ elif right_element is C_NA and left_element is not C_NA:
71
72
73
74
75
76
cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False):
77
"""
78
Check if two scalars are both NA of matching types.
pandas/tests/dtypes/test_missing.py
@@ -538,6 +538,23 @@ def test_array_equivalent_nested():
538
assert not array_equivalent(left, right, strict_nan=True)
539
540
541
+def test_array_equivalent_index_with_tuples():
542
+ # GH#48446
543
+ idx1 = pd.Index(np.array([(pd.NA, 4), (1, 1)], dtype="object"))
544
+ idx2 = pd.Index(np.array([(1, 1), (pd.NA, 4)], dtype="object"))
545
+ assert not array_equivalent(idx1, idx2)
546
+ assert not idx1.equals(idx2)
547
+ assert not array_equivalent(idx2, idx1)
548
+ assert not idx2.equals(idx1)
549
550
+ idx1 = pd.Index(np.array([(4, pd.NA), (1, 1)], dtype="object"))
551
+ idx2 = pd.Index(np.array([(1, 1), (4, pd.NA)], dtype="object"))
552
553
554
555
556
557
558
@pytest.mark.parametrize(
559
"dtype, na_value",
560
[
0 commit comments