forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_isin.py
101 lines (78 loc) · 3.36 KB
/
test_isin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import numpy as np
import pytest
import pandas as pd
from pandas import Series, date_range
import pandas._testing as tm
class TestSeriesIsIn:
def test_isin(self):
s = Series(["A", "B", "C", "a", "B", "B", "A", "C"])
result = s.isin(["A", "C"])
expected = Series([True, False, True, False, False, False, True, True])
tm.assert_series_equal(result, expected)
# GH#16012
# This specific issue has to have a series over 1e6 in len, but the
# comparison array (in_list) must be large enough so that numpy doesn't
# do a manual masking trick that will avoid this issue altogether
s = Series(list("abcdefghijk" * 10 ** 5))
# If numpy doesn't do the manual comparison/mask, these
# unorderable mixed types are what cause the exception in numpy
in_list = [-1, "a", "b", "G", "Y", "Z", "E", "K", "E", "S", "I", "R", "R"] * 6
assert s.isin(in_list).sum() == 200000
def test_isin_with_string_scalar(self):
# GH#4763
s = Series(["A", "B", "C", "a", "B", "B", "A", "C"])
msg = (
r"only list-like objects are allowed to be passed to isin\(\), "
r"you passed a \[str\]"
)
with pytest.raises(TypeError, match=msg):
s.isin("a")
s = Series(["aaa", "b", "c"])
with pytest.raises(TypeError, match=msg):
s.isin("aaa")
def test_isin_with_i8(self):
# GH#5021
expected = Series([True, True, False, False, False])
expected2 = Series([False, True, False, False, False])
# datetime64[ns]
s = Series(date_range("jan-01-2013", "jan-05-2013"))
result = s.isin(s[0:2])
tm.assert_series_equal(result, expected)
result = s.isin(s[0:2].values)
tm.assert_series_equal(result, expected)
# fails on dtype conversion in the first place
result = s.isin(s[0:2].values.astype("datetime64[D]"))
tm.assert_series_equal(result, expected)
result = s.isin([s[1]])
tm.assert_series_equal(result, expected2)
result = s.isin([np.datetime64(s[1])])
tm.assert_series_equal(result, expected2)
result = s.isin(set(s[0:2]))
tm.assert_series_equal(result, expected)
# timedelta64[ns]
s = Series(pd.to_timedelta(range(5), unit="d"))
result = s.isin(s[0:2])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])])
def test_isin_empty(self, empty):
# see GH#16991
s = Series(["a", "b"])
expected = Series([False, False])
result = s.isin(empty)
tm.assert_series_equal(expected, result)
def test_isin_read_only(self):
# https://github.com/pandas-dev/pandas/issues/37174
arr = np.array([1, 2, 3])
arr.setflags(write=False)
s = Series([1, 2, 3])
result = s.isin(arr)
expected = Series([True, True, True])
tm.assert_series_equal(result, expected)
@pytest.mark.slow
def test_isin_large_series_mixed_dtypes_and_nan():
# https://github.com/pandas-dev/pandas/issues/37094
# combination of object dtype for the values and > 1_000_000 elements
ser = Series([1, 2, np.nan] * 1_000_000)
result = ser.isin({"foo", "bar"})
expected = Series([False] * 3 * 1_000_000)
tm.assert_series_equal(result, expected)