Skip to content

Commit dbbf613

Browse files
committed
fix isin with nans and large arrays
1 parent 8df0218 commit dbbf613

File tree

2 files changed

+13
-2
lines changed

2 files changed

+13
-2
lines changed

pandas/core/algorithms.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,12 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
440440
# GH16012
441441
# Ensure np.in1d doesn't get object types or it *may* throw an exception
442442
if len(comps) > 1_000_000 and not is_object_dtype(comps):
443-
f = np.in1d
443+
# If the the values include nan we need to check for nan explicitly
444+
# since np.nan it not equal to np.nan
445+
if any(np.isnan(values)):
446+
f = lambda c, v: np.logical_or(np.in1d(c, v), np.isnan(c))
447+
else:
448+
f = np.in1d
444449
elif is_integer_dtype(comps):
445450
try:
446451
values = values.astype("int64", copy=False)

pandas/tests/test_algos.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -801,7 +801,6 @@ def test_i8(self):
801801
tm.assert_numpy_array_equal(result, expected)
802802

803803
def test_large(self):
804-
805804
s = pd.date_range("20000101", periods=2000000, freq="s").values
806805
result = algos.isin(s, s[0:2])
807806
expected = np.zeros(len(s), dtype=bool)
@@ -841,6 +840,13 @@ def test_same_nan_is_in(self):
841840
result = algos.isin(comps, values)
842841
tm.assert_numpy_array_equal(expected, result)
843842

843+
def test_same_nan_is_in_large(self):
844+
s = np.tile(1.0, 1_000_001)
845+
s[0] = np.nan
846+
result = algos.isin(s, [np.nan, 1])
847+
expected = np.ones(len(s), dtype=bool)
848+
tm.assert_numpy_array_equal(result, expected)
849+
844850
def test_same_object_is_in(self):
845851
# GH 22160
846852
# there could be special treatment for nans

0 commit comments

Comments
 (0)