From 5d60c035a7a059ae66c8b689ecdd7857dd7b2aad Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Tue, 8 Jun 2021 23:05:40 +0200
Subject: [PATCH 01/11] adding breaking example with complex

---
 pandas/tests/libs/test_hashtable.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index aeff591e3f0dc..8da479cf249d4 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -178,6 +178,24 @@ def test_no_reallocation(self, table_type, dtype):
             assert n_buckets_start == clean_table.get_state()["n_buckets"]
 
 
+class TestPyObjectHashTableWithNans:
+    def test_nan_float(self):
+        nan1 = float("nan")
+        nan2 = float("nan")
+        assert nan1 is not nan2
+        table = ht.PyObjectHashTable()
+        table.set_item(nan1, 42)
+        assert table.get_item(nan2) == 42
+
+    def test_nan_complex(self):
+        nan1 = 1j * float("nan")
+        nan2 = 1j * float("nan")
+        assert nan1 is not nan2
+        table = ht.PyObjectHashTable()
+        table.set_item(nan1, 42)
+        assert table.get_item(nan2) == 42
+
+
 def test_get_labels_groupby_for_Int64(writable):
     table = ht.Int64HashTable()
     vals = np.array([1, 2, -1, 2, 1, -1], dtype=np.int64)

From dfa9f83ef4462bae8b53c388597a80cfadba43e3 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Tue, 8 Jun 2021 23:26:51 +0200
Subject: [PATCH 02/11] fix complex case

---
 pandas/_libs/src/klib/khash_python.h | 43 ++++++++++++++++++++++++----
 pandas/tests/libs/test_hashtable.py  | 30 +++++++++++++++++--
 2 files changed, 65 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h
index aee018262e3a6..9d0ee7698af6c 100644
--- a/pandas/_libs/src/klib/khash_python.h
+++ b/pandas/_libs/src/klib/khash_python.h
@@ -163,17 +163,50 @@ KHASH_MAP_INIT_COMPLEX128(complex128, size_t)
 #define kh_exist_complex128(h, k) (kh_exist(h, k))
 
 
+int PANDAS_INLINE floatobject_cmp(PyObject* a, PyObject* b){
+    return Py_IS_NAN(PyFloat_AS_DOUBLE(a)) &&
+           Py_IS_NAN(PyFloat_AS_DOUBLE(b));
+}
+
+
+int PANDAS_INLINE complexobject_cmp(PyObject* a, PyObject* b){
+    return (
+                Py_IS_NAN(PyComplex_RealAsDouble(a)) &&
+                Py_IS_NAN(PyComplex_RealAsDouble(b)) &&
+                Py_IS_NAN(PyComplex_ImagAsDouble(a)) &&
+                Py_IS_NAN(PyComplex_ImagAsDouble(b))
+           )
+           ||
+           (
+                Py_IS_NAN(PyComplex_RealAsDouble(a)) &&
+                Py_IS_NAN(PyComplex_RealAsDouble(b)) &&
+                PyComplex_ImagAsDouble(a) == PyComplex_ImagAsDouble(b)
+           )
+           ||
+           (
+                PyComplex_RealAsDouble(a) == PyComplex_RealAsDouble(b) &&
+                Py_IS_NAN(PyComplex_ImagAsDouble(a)) &&
+                Py_IS_NAN(PyComplex_ImagAsDouble(b))
+           );
+}
+
+
 int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
 	int result = PyObject_RichCompareBool(a, b, Py_EQ);
 	if (result < 0) {
 		PyErr_Clear();
 		return 0;
 	}
-    if (result == 0) {  // still could be two NaNs
-        return PyFloat_CheckExact(a) &&
-               PyFloat_CheckExact(b) &&
-               Py_IS_NAN(PyFloat_AS_DOUBLE(a)) &&
-               Py_IS_NAN(PyFloat_AS_DOUBLE(b));
+    if (result == 0) {  // still could be built-ins with NaNs
+        if (Py_TYPE(a) != Py_TYPE(b)) {
+            return 0;
+        }
+        if (PyFloat_CheckExact(a)) {
+            return floatobject_cmp(a, b);
+        }
+        if (PyComplex_CheckExact(a)) {
+            return complexobject_cmp(a, b);
+        }
     }
 	return result;
 }
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index 8da479cf249d4..81419891bf00e 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -187,14 +187,38 @@ def test_nan_float(self):
         table.set_item(nan1, 42)
         assert table.get_item(nan2) == 42
 
-    def test_nan_complex(self):
-        nan1 = 1j * float("nan")
-        nan2 = 1j * float("nan")
+    def test_nan_complex_both(self):
+        nan1 = complex(float("nan"), float("nan"))
+        nan2 = complex(float("nan"), float("nan"))
         assert nan1 is not nan2
         table = ht.PyObjectHashTable()
         table.set_item(nan1, 42)
         assert table.get_item(nan2) == 42
 
+    def test_nan_complex_real(self):
+        nan1 = complex(float("nan"), 1)
+        nan2 = complex(float("nan"), 1)
+        other = complex(float("nan"), 2)
+        assert nan1 is not nan2
+        table = ht.PyObjectHashTable()
+        table.set_item(nan1, 42)
+        assert table.get_item(nan2) == 42
+        with pytest.raises(KeyError, match=None) as error:
+            table.get_item(other)
+        assert str(error.value) == str(other)
+
+    def test_nan_complex_imag(self):
+        nan1 = complex(1, float("nan"))
+        nan2 = complex(1, float("nan"))
+        other = complex(2, float("nan"))
+        assert nan1 is not nan2
+        table = ht.PyObjectHashTable()
+        table.set_item(nan1, 42)
+        assert table.get_item(nan2) == 42
+        with pytest.raises(KeyError, match=None) as error:
+            table.get_item(other)
+        assert str(error.value) == str(other)
+
 
 def test_get_labels_groupby_for_Int64(writable):
     table = ht.Int64HashTable()

From 6de86483ace5b9025d2902c1af04eb3c66d54799 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Tue, 8 Jun 2021 23:44:04 +0200
Subject: [PATCH 03/11] add failing example for tuple

---
 pandas/tests/libs/test_hashtable.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index 81419891bf00e..353e074b324c2 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -219,6 +219,25 @@ def test_nan_complex_imag(self):
             table.get_item(other)
         assert str(error.value) == str(other)
 
+    def test_nan_in_tuple(self):
+        nan1 = (float("nan"),)
+        nan2 = (float("nan"),)
+        assert nan1[0] is not nan2[0]
+        table = ht.PyObjectHashTable()
+        table.set_item(nan1, 42)
+        assert table.get_item(nan2) == 42
+
+    def test_nan_in_nested_tuple(self):
+        nan1 = (1, (2, (float("nan"),)))
+        nan2 = (1, (2, (float("nan"),)))
+        other = (1, 2)
+        table = ht.PyObjectHashTable()
+        table.set_item(nan1, 42)
+        assert table.get_item(nan2) == 42
+        with pytest.raises(KeyError, match=None) as error:
+            table.get_item(other)
+        assert str(error.value) == str(other)
+
 
 def test_get_labels_groupby_for_Int64(writable):
     table = ht.Int64HashTable()

From de6f55857f85a2359ea2bb00b8270f204f4fe9aa Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Wed, 9 Jun 2021 00:15:00 +0200
Subject: [PATCH 04/11] fix tuple-case

---
 pandas/_libs/src/klib/khash_python.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h
index 9d0ee7698af6c..464a2f60f3dbd 100644
--- a/pandas/_libs/src/klib/khash_python.h
+++ b/pandas/_libs/src/klib/khash_python.h
@@ -190,6 +190,24 @@ int PANDAS_INLINE complexobject_cmp(PyObject* a, PyObject* b){
            );
 }
 
+int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b);
+
+
+int PANDAS_INLINE tupleobject_cmp(PyObject* a, PyObject* b){
+    Py_ssize_t i;
+
+    if (Py_SIZE(a) != Py_SIZE(b)) {
+        return 0;
+    }
+
+    for (i = 0; i < Py_SIZE(a); ++i) {
+        if (!pyobject_cmp(PyTuple_GET_ITEM(a, i), PyTuple_GET_ITEM(b, i))) {
+            return 0;
+        }
+    }
+    return 1;
+}
+
 
 int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
 	int result = PyObject_RichCompareBool(a, b, Py_EQ);
@@ -207,6 +225,9 @@ int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
         if (PyComplex_CheckExact(a)) {
             return complexobject_cmp(a, b);
         }
+        if (PyTuple_CheckExact(a)) {
+            return tupleobject_cmp(a, b);
+        }
     }
 	return result;
 }

From 81ce1bced3750eba7ca3cd3c1235e5ee02a015ca Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Fri, 11 Jun 2021 16:38:02 +0200
Subject: [PATCH 05/11] make functions type-safe

---
 pandas/_libs/src/klib/khash_python.h | 32 ++++++++++++++--------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h
index 464a2f60f3dbd..d6a5c593df26b 100644
--- a/pandas/_libs/src/klib/khash_python.h
+++ b/pandas/_libs/src/klib/khash_python.h
@@ -163,37 +163,37 @@ KHASH_MAP_INIT_COMPLEX128(complex128, size_t)
 #define kh_exist_complex128(h, k) (kh_exist(h, k))
 
 
-int PANDAS_INLINE floatobject_cmp(PyObject* a, PyObject* b){
+int PANDAS_INLINE floatobject_cmp(PyFloatObject* a, PyFloatObject* b){
     return Py_IS_NAN(PyFloat_AS_DOUBLE(a)) &&
            Py_IS_NAN(PyFloat_AS_DOUBLE(b));
 }
 
 
-int PANDAS_INLINE complexobject_cmp(PyObject* a, PyObject* b){
+int PANDAS_INLINE complexobject_cmp(PyComplexObject* a, PyComplexObject* b){
     return (
-                Py_IS_NAN(PyComplex_RealAsDouble(a)) &&
-                Py_IS_NAN(PyComplex_RealAsDouble(b)) &&
-                Py_IS_NAN(PyComplex_ImagAsDouble(a)) &&
-                Py_IS_NAN(PyComplex_ImagAsDouble(b))
+                Py_IS_NAN(a->cval.real) &&
+                Py_IS_NAN(b->cval.real) &&
+                Py_IS_NAN(a->cval.imag) &&
+                Py_IS_NAN(b->cval.imag)
            )
            ||
            (
-                Py_IS_NAN(PyComplex_RealAsDouble(a)) &&
-                Py_IS_NAN(PyComplex_RealAsDouble(b)) &&
-                PyComplex_ImagAsDouble(a) == PyComplex_ImagAsDouble(b)
+                Py_IS_NAN(a->cval.real) &&
+                Py_IS_NAN(b->cval.real) &&
+                a->cval.imag == b->cval.imag
            )
            ||
            (
-                PyComplex_RealAsDouble(a) == PyComplex_RealAsDouble(b) &&
-                Py_IS_NAN(PyComplex_ImagAsDouble(a)) &&
-                Py_IS_NAN(PyComplex_ImagAsDouble(b))
+                a->cval.real == b->cval.real &&
+                Py_IS_NAN(a->cval.imag) &&
+                Py_IS_NAN(b->cval.imag)
            );
 }
 
 int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b);
 
 
-int PANDAS_INLINE tupleobject_cmp(PyObject* a, PyObject* b){
+int PANDAS_INLINE tupleobject_cmp(PyTupleObject* a, PyTupleObject* b){
     Py_ssize_t i;
 
     if (Py_SIZE(a) != Py_SIZE(b)) {
@@ -220,13 +220,13 @@ int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
             return 0;
         }
         if (PyFloat_CheckExact(a)) {
-            return floatobject_cmp(a, b);
+            return floatobject_cmp((PyFloatObject*)a, (PyFloatObject*)b);
         }
         if (PyComplex_CheckExact(a)) {
-            return complexobject_cmp(a, b);
+            return complexobject_cmp((PyComplexObject*)a, (PyComplexObject*)b);
         }
         if (PyTuple_CheckExact(a)) {
-            return tupleobject_cmp(a, b);
+            return tupleobject_cmp((PyTupleObject*)a, (PyTupleObject*)b);
         }
     }
 	return result;

From 9f6fc0360ae0de6c88a483b7e0f88d3b53c99966 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Fri, 11 Jun 2021 16:49:11 +0200
Subject: [PATCH 06/11] performance: first special cases, than all others
 (otherwise tuple will be traversed twice)

---
 pandas/_libs/src/klib/khash_python.h | 31 ++++++++++++++++++----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h
index d6a5c593df26b..3fcf7b7e1c0ee 100644
--- a/pandas/_libs/src/klib/khash_python.h
+++ b/pandas/_libs/src/klib/khash_python.h
@@ -164,8 +164,12 @@ KHASH_MAP_INIT_COMPLEX128(complex128, size_t)
 
 
 int PANDAS_INLINE floatobject_cmp(PyFloatObject* a, PyFloatObject* b){
-    return Py_IS_NAN(PyFloat_AS_DOUBLE(a)) &&
-           Py_IS_NAN(PyFloat_AS_DOUBLE(b));
+    return (
+             Py_IS_NAN(PyFloat_AS_DOUBLE(a)) &&
+             Py_IS_NAN(PyFloat_AS_DOUBLE(b))
+           )
+           ||
+           ( PyFloat_AS_DOUBLE(a) == PyFloat_AS_DOUBLE(b) );
 }
 
 
@@ -187,6 +191,11 @@ int PANDAS_INLINE complexobject_cmp(PyComplexObject* a, PyComplexObject* b){
                 a->cval.real == b->cval.real &&
                 Py_IS_NAN(a->cval.imag) &&
                 Py_IS_NAN(b->cval.imag)
+           )
+           ||
+           (
+                a->cval.real == b->cval.real &&
+                a->cval.imag == b->cval.imag
            );
 }
 
@@ -210,15 +219,8 @@ int PANDAS_INLINE tupleobject_cmp(PyTupleObject* a, PyTupleObject* b){
 
 
 int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
-	int result = PyObject_RichCompareBool(a, b, Py_EQ);
-	if (result < 0) {
-		PyErr_Clear();
-		return 0;
-	}
-    if (result == 0) {  // still could be built-ins with NaNs
-        if (Py_TYPE(a) != Py_TYPE(b)) {
-            return 0;
-        }
+    if (Py_TYPE(a) == Py_TYPE(b)) {
+        // special handling for some built-in types which could have NaNs:
         if (PyFloat_CheckExact(a)) {
             return floatobject_cmp((PyFloatObject*)a, (PyFloatObject*)b);
         }
@@ -228,7 +230,14 @@ int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
         if (PyTuple_CheckExact(a)) {
             return tupleobject_cmp((PyTupleObject*)a, (PyTupleObject*)b);
         }
+        // frozenset isn't yet supported
     }
+
+	int result = PyObject_RichCompareBool(a, b, Py_EQ);
+	if (result < 0) {
+		PyErr_Clear();
+		return 0;
+	}
 	return result;
 }
 

From a8ccfe974a94801cf35bb5a3aa12daefa0384ccc Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Fri, 11 Jun 2021 16:59:27 +0200
Subject: [PATCH 07/11] add test case from GH-41836

---
 pandas/tests/libs/test_hashtable.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index 353e074b324c2..0edcebdc069f4 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -8,6 +8,7 @@
 
 import pandas as pd
 import pandas._testing as tm
+from pandas.core.algorithms import isin
 
 
 @contextmanager
@@ -487,3 +488,12 @@ def test_mode(self, dtype, type_suffix):
         values = np.array([42, np.nan, np.nan, np.nan], dtype=dtype)
         assert mode(values, True) == 42
         assert np.isnan(mode(values, False))
+
+
+def test_ismember_tuple_with_nans():
+    # GH-41836
+    values = [("a", float("nan")), ("b", 1)]
+    comps = [("a", float("nan"))]
+    result = isin(values, comps)
+    expected = np.array([True, False], dtype=np.bool_)
+    tm.assert_numpy_array_equal(result, expected)

From 5bc56d8d2ed05c9a3630294de02d990769df19a0 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Sun, 13 Jun 2021 14:34:40 +0200
Subject: [PATCH 08/11] there are no differences now for PyPy and CPython -
 both take nan correctly into account

---
 pandas/tests/indexes/multi/test_isin.py | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/pandas/tests/indexes/multi/test_isin.py b/pandas/tests/indexes/multi/test_isin.py
index 97eb34e28764b..695458273d16e 100644
--- a/pandas/tests/indexes/multi/test_isin.py
+++ b/pandas/tests/indexes/multi/test_isin.py
@@ -1,14 +1,11 @@
 import numpy as np
 import pytest
 
-from pandas.compat import PYPY
-
 from pandas import MultiIndex
 import pandas._testing as tm
 
 
-@pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy")
-def test_isin_nan_pypy():
+def test_isin_nan():
     idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]])
     tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True]))
     tm.assert_numpy_array_equal(
@@ -31,15 +28,6 @@ def test_isin():
     assert result.dtype == np.bool_
 
 
-@pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy")
-def test_isin_nan_not_pypy():
-    idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]])
-    tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, False]))
-    tm.assert_numpy_array_equal(
-        idx.isin([("bar", float("nan"))]), np.array([False, False])
-    )
-
-
 def test_isin_level_kwarg():
     idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
 

From b2c870665e80864122a1869762927ca327beccfa Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Wed, 16 Jun 2021 07:01:45 +0200
Subject: [PATCH 09/11] adding what is new

---
 doc/source/whatsnew/v1.3.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 1a5a9980e5e96..4ccda47a98603 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -1033,6 +1033,7 @@ Missing
 - Bug in :meth:`DataFrame.fillna` not accepting a dictionary for the ``downcast`` keyword (:issue:`40809`)
 - Bug in :func:`isna` not returning a copy of the mask for nullable types, causing any subsequent mask modification to change the original array (:issue:`40935`)
 - Bug in :class:`DataFrame` construction with float data containing ``NaN`` and an integer ``dtype`` casting instead of retaining the ``NaN`` (:issue:`26919`)
+- Bug in :func:`isin` didn't treat all nans as equivalent if they were in tuples (:issue:`41836`)
 
 MultiIndex
 ^^^^^^^^^^

From 86ad54b01e6e808df5dba291207a416e7d940d31 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Wed, 16 Jun 2021 21:06:55 +0200
Subject: [PATCH 10/11] use Series.isin and MultiIndex.isin rather than isin in
 whatsnew

---
 doc/source/whatsnew/v1.3.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 4ccda47a98603..cadc5615cd654 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -1033,7 +1033,7 @@ Missing
 - Bug in :meth:`DataFrame.fillna` not accepting a dictionary for the ``downcast`` keyword (:issue:`40809`)
 - Bug in :func:`isna` not returning a copy of the mask for nullable types, causing any subsequent mask modification to change the original array (:issue:`40935`)
 - Bug in :class:`DataFrame` construction with float data containing ``NaN`` and an integer ``dtype`` casting instead of retaining the ``NaN`` (:issue:`26919`)
-- Bug in :func:`isin` didn't treat all nans as equivalent if they were in tuples (:issue:`41836`)
+- Bug in :meth:`Series.isin` and :meth:`MultiIndex.isin` didn't treat all nans as equivalent if they were in tuples (:issue:`41836`)
 
 MultiIndex
 ^^^^^^^^^^

From a35e61725aa05d4ec31611c1e088406508dade4c Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Wed, 16 Jun 2021 21:31:08 +0200
Subject: [PATCH 11/11] add  comments

---
 pandas/_libs/src/klib/khash_python.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h
index 3fcf7b7e1c0ee..87c6283c19a2f 100644
--- a/pandas/_libs/src/klib/khash_python.h
+++ b/pandas/_libs/src/klib/khash_python.h
@@ -163,6 +163,7 @@ KHASH_MAP_INIT_COMPLEX128(complex128, size_t)
 #define kh_exist_complex128(h, k) (kh_exist(h, k))
 
 
+// NaN-floats should be in the same equivalency class, see GH 22119
 int PANDAS_INLINE floatobject_cmp(PyFloatObject* a, PyFloatObject* b){
     return (
              Py_IS_NAN(PyFloat_AS_DOUBLE(a)) &&
@@ -173,6 +174,9 @@ int PANDAS_INLINE floatobject_cmp(PyFloatObject* a, PyFloatObject* b){
 }
 
 
+// NaNs should be in the same equivalency class, see GH 41836
+// PyObject_RichCompareBool for complexobjects has a different behavior
+// needs to be replaced
 int PANDAS_INLINE complexobject_cmp(PyComplexObject* a, PyComplexObject* b){
     return (
                 Py_IS_NAN(a->cval.real) &&
@@ -202,6 +206,9 @@ int PANDAS_INLINE complexobject_cmp(PyComplexObject* a, PyComplexObject* b){
 int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b);
 
 
+// replacing PyObject_RichCompareBool (NaN!=NaN) with pyobject_cmp (NaN==NaN),
+// which treats NaNs as equivalent
+// see GH 41836
 int PANDAS_INLINE tupleobject_cmp(PyTupleObject* a, PyTupleObject* b){
     Py_ssize_t i;
 
@@ -220,7 +227,9 @@ int PANDAS_INLINE tupleobject_cmp(PyTupleObject* a, PyTupleObject* b){
 
 int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
     if (Py_TYPE(a) == Py_TYPE(b)) {
-        // special handling for some built-in types which could have NaNs:
+        // special handling for some built-in types which could have NaNs
+        // as we would like to have them equivalent, but the usual
+        // PyObject_RichCompareBool would return False
         if (PyFloat_CheckExact(a)) {
             return floatobject_cmp((PyFloatObject*)a, (PyFloatObject*)b);
         }