Skip to content

Commit bbb7425

Browse files
committed
BUG: Fixed TypeError for Series.isin() when large series and values contains NA (pandas-dev#60678)
1 parent 817b706 commit bbb7425

File tree

2 files changed

+31
-0
lines changed

2 files changed

+31
-0
lines changed

pandas/core/algorithms.py

+13
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
iNaT,
2424
lib,
2525
)
26+
27+
from pandas._libs.missing import NA
28+
2629
from pandas._typing import (
2730
AnyArrayLike,
2831
ArrayLike,
@@ -544,10 +547,20 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]:
544547
# Ensure np.isin doesn't get object types or it *may* throw an exception
545548
# Albeit hashmap has O(1) look-up (vs. O(logn) in sorted array),
546549
# isin is faster for small sizes
550+
551+
# GH60678
552+
# Ensure values don't contain <NA>, otherwise it throws exception with np.in1d
553+
values_contains_NA = False
554+
555+
if values.size != 0:
556+
vectorized_check = np.vectorize(lambda v: v is NA)
557+
values_contains_NA = vectorized_check(values).any()
558+
547559
if (
548560
len(comps_array) > _MINIMUM_COMP_ARR_LEN
549561
and len(values) <= 26
550562
and comps_array.dtype != object
563+
and values_contains_NA == False
551564
):
552565
# If the values include nan we need to check for nan explicitly
553566
# since np.nan it not equal to np.nan

pandas/tests/series/methods/test_isin.py

+18
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,24 @@ def test_isin_large_series_mixed_dtypes_and_nan(monkeypatch):
211211
tm.assert_series_equal(result, expected)
212212

213213

214+
@pytest.mark.parametrize("dtype, data, values, expected", [
215+
("boolean", [pd.NA, False, True], [False, pd.NA], [True, True, False]),
216+
("Int64", [pd.NA, 2, 1], [1, pd.NA], [True, False, True]),
217+
("Float64", [20.0, 30.0, pd.NA], [pd.NA], [False, False, True])
218+
])
219+
def test_isin_large_series_and_pdNA(dtype, data, values, expected, monkeypatch):
220+
# https://github.com/pandas-dev/pandas/issues/60678
221+
# combination of large series (> _MINIMUM_COMP_ARR_LEN elements) and
222+
# values contains pdNA
223+
min_isin_comp = 2
224+
ser = Series(data, dtype=dtype)
225+
expected = pd.Series(expected, dtype="boolean")
226+
227+
with monkeypatch.context() as m:
228+
m.setattr(algorithms, "_MINIMUM_COMP_ARR_LEN", min_isin_comp)
229+
result = ser.isin(values)
230+
tm.assert_series_equal(result, expected)
231+
214232
def test_isin_complex_numbers():
215233
# GH 17927
216234
array = [0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j]

0 commit comments

Comments
 (0)