From 3a3178eebc0c6298bb1e80ccb7f5a8c72c2560c9 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Sat, 4 Aug 2018 22:42:10 +0200
Subject: [PATCH 1/5] BUG: treat nan-objects the same way float64-nans are
 treated - all nans are from the same equivalence class

---
 pandas/_libs/src/klib/khash_python.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h
index e9fb49e8a5e42..45a93051f78d3 100644
--- a/pandas/_libs/src/klib/khash_python.h
+++ b/pandas/_libs/src/klib/khash_python.h
@@ -47,10 +47,19 @@ int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
 		PyErr_Clear();
 		return 0;
 	}
+    if (result == 0) {  // still could be two NaNs
+        return PyFloat_CheckExact(a) &&
+               PyFloat_CheckExact(b) &&
+               Py_IS_NAN(PyFloat_AS_DOUBLE(a)) &&
+               Py_IS_NAN(PyFloat_AS_DOUBLE(b));
+    }
 	return result;
 }
 
-
+// For PyObject_Hash holds:
+//    hash(0.0) == 0 == hash(-0.0)
+//    hash(X) == 0 if X is a NaN-value
+// so it is OK to use it directly
 #define kh_python_hash_func(key) (PyObject_Hash(key))
 #define kh_python_hash_equal(a, b) (pyobject_cmp(a, b))
 

From 06976bb09ec3b5778024ffaf4f9cc52ae6ea2bb4 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Sat, 4 Aug 2018 23:39:25 +0200
Subject: [PATCH 2/5] adding performance and unit tests

---
 asv_bench/benchmarks/series_methods.py | 37 ++++++++++++++++++++++++++
 pandas/tests/test_algos.py             |  8 ++++++
 2 files changed, 45 insertions(+)

diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index a5ccf5c32b876..eb2034728b650 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -38,6 +38,43 @@ def time_isin(self, dtypes):
         self.s.isin(self.values)
 
 
+class IsInForObjects(object):
+
+    def setup(self):
+        self.s_nans = Series(np.full(10**4, np.nan)).astype(np.object)
+        self.vals_nans = np.full(10**4, np.nan).astype(np.object)
+        self.s_short = Series(np.arange(2)).astype(np.object)
+        self.s_long = Series(np.arange(10**5)).astype(np.object)
+        self.vals_short = np.arange(2).astype(np.object)
+        self.vals_long = np.arange(10**5).astype(np.object)
+        # because of nans floats are special:
+        self.s_long_floats = Series(np.arange(10**5, 
+                                       dtype=np.float)).astype(np.object)
+        self.vals_long_floats = np.arange(10**5, 
+                                       dtype=np.float).astype(np.object)
+
+    def time_isin_nans(self):
+        # if nan-objects are different objects,
+        # this has the potential to trigger O(n^2) running time
+        self.s_nans.isin(self.vals_nans)
+
+    def time_isin_short_series_long_values(self):
+        # running time dominated by the preprocessing
+        self.s_short.isin(self.vals_long)
+
+    def time_isin_long_series_short_values(self):
+        # running time dominated by look-up
+        self.s_long.isin(self.vals_short)
+
+    def time_isin_long_series_long_values(self):
+        # no dominating part
+        self.s_long.isin(self.vals_long)
+
+    def time_isin_long_series_long_values_floats(self):
+        # no dominating part
+        self.s_long_floats.isin(self.vals_long_floats)
+
+
 class NSort(object):
 
     goal_time = 0.2
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 7ce2aaf7d7fbb..d8d98318b4c72 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -623,6 +623,14 @@ def test_empty(self, empty):
         result = algos.isin(vals, empty)
         tm.assert_numpy_array_equal(expected, result)
 
+    def test_different_nan_objects(self):
+        # GH 22119
+        comps = np.array(['nan', np.nan * 1j, float('nan')], dtype=np.object)
+        vals = np.array([float('nan')], dtype=np.object)
+        expected = np.array([False, False, True])
+        result = algos.isin(comps, vals)
+        tm.assert_numpy_array_equal(expected, result)
+
 
 class TestValueCounts(object):
 

From e583a0c9fd51c70fd148f85a95f5b05d519208b2 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Sat, 4 Aug 2018 23:49:34 +0200
Subject: [PATCH 3/5] adding whatsnew note

---
 doc/source/whatsnew/v0.24.0.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 42e286f487a7d..a513c2bd03ac7 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -512,6 +512,7 @@ Missing
 
 - Bug in :func:`DataFrame.fillna` where a ``ValueError`` would raise when one column contained a ``datetime64[ns, tz]`` dtype (:issue:`15522`)
 - Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`)
+- :func:`Series.isin` now treats all nans as equal also for `np.object`-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
 
 MultiIndex
 ^^^^^^^^^^

From d6b032463ed14cbabc1028842f81531222501762 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Sun, 5 Aug 2018 22:56:24 +0200
Subject: [PATCH 4/5] making PEP8 happy

---
 asv_bench/benchmarks/series_methods.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index eb2034728b650..34a8d552304d1 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -48,10 +48,10 @@ def setup(self):
         self.vals_short = np.arange(2).astype(np.object)
         self.vals_long = np.arange(10**5).astype(np.object)
         # because of nans floats are special:
-        self.s_long_floats = Series(np.arange(10**5, 
-                                       dtype=np.float)).astype(np.object)
-        self.vals_long_floats = np.arange(10**5, 
-                                       dtype=np.float).astype(np.object)
+        self.s_long_floats = Series(np.arange(10**5,
+                                    dtype=np.float)).astype(np.object)
+        self.vals_long_floats = np.arange(10**5,
+                                          dtype=np.float).astype(np.object)
 
     def time_isin_nans(self):
         # if nan-objects are different objects,

From 5cc5e0fa90f3eb15787935478c01195b79ce0284 Mon Sep 17 00:00:00 2001
From: Egor Dranischnikow <egor.dranischnikow@googlemail.com>
Date: Tue, 7 Aug 2018 22:44:21 +0200
Subject: [PATCH 5/5] all nans are the same, so we need to update the behavior
 of tests which assumed differently

---
 pandas/tests/indexes/test_base.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 754703dfc4bee..8ed8d2014f5bc 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -13,7 +13,8 @@
 from pandas.tests.indexes.common import Base
 
 from pandas.compat import (range, lrange, lzip, u,
-                           text_type, zip, PY3, PY35, PY36, PYPY, StringIO)
+                           text_type, zip, PY3, PY35, PY36, StringIO)
+import math
 import operator
 import numpy as np
 
@@ -1661,9 +1662,13 @@ def test_isin_nan_common_object(self, nulls_fixture, nulls_fixture2):
         # Test cartesian product of null fixtures and ensure that we don't
         # mangle the various types (save a corner case with PyPy)
 
-        if PYPY and nulls_fixture is np.nan:  # np.nan is float('nan') on PyPy
+        # all nans are the same
+        if (isinstance(nulls_fixture, float) and
+                isinstance(nulls_fixture2, float) and
+                math.isnan(nulls_fixture) and
+                math.isnan(nulls_fixture2)):
             tm.assert_numpy_array_equal(Index(['a', nulls_fixture]).isin(
-                [float('nan')]), np.array([False, True]))
+                [nulls_fixture2]), np.array([False, True]))
 
         elif nulls_fixture is nulls_fixture2:  # should preserve NA type
             tm.assert_numpy_array_equal(Index(['a', nulls_fixture]).isin(