pandas-dev · jreback · Jul 14, 2021 · Jul 9, 2021 · Jul 9, 2021 · Jul 9, 2021
diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py
@@ -1,10 +1,5 @@
 import numpy as np
 
-try:
-    from pandas.compat import np_version_under1p20
-except ImportError:
-    from pandas.compat.numpy import _np_version_under1p20 as np_version_under1p20
-
 from pandas import (
     Categorical,
     NaT,
@@ -283,10 +278,6 @@ class IsInLongSeriesLookUpDominates:
     def setup(self, dtype, MaxNumber, series_type):
         N = 10 ** 7
 
-        # https://github.com/pandas-dev/pandas/issues/39844
-        if not np_version_under1p20 and dtype in ("Int64", "Float64"):
-            raise NotImplementedError
-
         if series_type == "random_hits":
             array = np.random.randint(0, MaxNumber, N)
         if series_type == "random_misses":
@@ -297,7 +288,8 @@ def setup(self, dtype, MaxNumber, series_type):
             array = np.arange(N) + MaxNumber
 
         self.series = Series(array).astype(dtype)
-        self.values = np.arange(MaxNumber).astype(dtype)
+
+        self.values = np.arange(MaxNumber).astype(dtype.lower())
 
     def time_isin(self, dtypes, MaxNumber, series_type):
         self.series.isin(self.values)
@@ -313,16 +305,12 @@ class IsInLongSeriesValuesDominate:
     def setup(self, dtype, series_type):
         N = 10 ** 7
 
-        # https://github.com/pandas-dev/pandas/issues/39844
-        if not np_version_under1p20 and dtype in ("Int64", "Float64"):
-            raise NotImplementedError
-
         if series_type == "random":
             vals = np.random.randint(0, 10 * N, N)
         if series_type == "monotone":
             vals = np.arange(N)
 
-        self.values = vals.astype(dtype)
+        self.values = vals.astype(dtype.lower())
         M = 10 ** 6 + 1
         self.series = Series(np.arange(M)).astype(dtype)
 

diff --git a/doc/source/whatsnew/v1.3.1.rst b/doc/source/whatsnew/v1.3.1.rst
@@ -19,6 +19,7 @@ Fixed regressions
 - Performance regression in constructing a :class:`DataFrame` from a dictionary of dictionaries (:issue:`42338`)
 - Fixed regression in :meth:`DataFrame.agg` dropping values when the DataFrame had an Extension Array dtype, a duplicate index, and ``axis=1`` (:issue:`42380`)
 - Fixed regression in indexing with a ``list`` subclass incorrectly raising ``TypeError`` (:issue:`42433`, :issue:42461`)
+- Fixed regression in :meth:`DataFrame.isin` and :meth:`Series.isin` raising ``TypeError`` with nullable data containing at least one missing value (:issue:`42405`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -334,6 +334,22 @@ def isnaobj2d_old(arr: ndarray) -> ndarray:
     return result.view(np.bool_)
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def has_NA(ndarray[object, ndim=1] arr) -> bool:
+    """
+    Return True if NA present in arr, False otherwise
+    """
+    cdef:
+        Py_ssize_t i
+
+    for i in range(len(arr)):
+        if arr[i] is C_NA:
+            return True
+
+    return False
+
+
 def isposinf_scalar(val: object) -> bool:
     return util.is_float_object(val) and val == INF
 

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -403,12 +403,20 @@ def isin(self, values) -> BooleanArray:  # type: ignore[override]
 
         from pandas.core.arrays import BooleanArray
 
-        result = isin(self._data, values)
+        # algorithms.isin will eventually convert values to an ndarray, so no extra
+        # cost to doing it here first
+        values_arr = np.asarray(values)
+        result = isin(self._data, values_arr)
+
         if self._hasna:
-            if libmissing.NA in values:
-                result += self._mask
-            else:
-                result *= np.invert(self._mask)
+            values_have_NA = is_object_dtype(values_arr.dtype) and libmissing.has_NA(
+                values_arr
+            )
+
+            # For now, NA does not propagate so set result according to presence of NA,
+            # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion
+            result[self._mask] = values_have_NA
+
         mask = np.zeros_like(self, dtype=bool)
         return BooleanArray(result, mask, copy=False)
 

diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py
@@ -156,6 +156,27 @@ def test_isin_float_in_int_series(self, values):
         expected = Series([True, False])
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize("dtype", ["boolean", "Int64", "Float64"])
+    @pytest.mark.parametrize(
+        "data,values,expected",
+        [
+            ([0, 1, 0], [1], [False, True, False]),
+            ([0, 1, 0], [1, pd.NA], [False, True, False]),
+            ([0, pd.NA, 0], [1, 0], [True, False, True]),
+            ([0, 1, pd.NA], [1, pd.NA], [False, True, True]),
+            ([0, 1, pd.NA], [1, np.nan], [False, True, False]),
+            ([0, pd.NA, pd.NA], [np.nan, pd.NaT, None], [False, False, False]),
+        ],
+    )
+    def test_isin_masked_types(self, dtype, data, values, expected):
+        # GH#42405
+        ser = Series(data, dtype=dtype)
+
+        result = ser.isin(values)
+        expected = Series(expected, dtype="boolean")
+
+        tm.assert_series_equal(result, expected)
+
 
 @pytest.mark.slow
 def test_isin_large_series_mixed_dtypes_and_nan():