forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_indexing.py
159 lines (138 loc) · 6.24 KB
/
test_indexing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
from decimal import Decimal
import numpy as np
import pytest
from pandas._libs.missing import is_matching_na
from pandas import Index
import pandas._testing as tm
class TestGetIndexer:
@pytest.mark.parametrize(
"method,expected",
[
("pad", [-1, 0, 1, 1]),
("backfill", [0, 0, 1, -1]),
],
)
def test_get_indexer_strings(self, method, expected):
expected = np.array(expected, dtype=np.intp)
index = Index(["b", "c"], dtype=object)
actual = index.get_indexer(["a", "b", "c", "d"], method=method)
tm.assert_numpy_array_equal(actual, expected)
def test_get_indexer_strings_raises(self):
index = Index(["b", "c"], dtype=object)
msg = "|".join(
[
"operation 'sub' not supported for dtype 'str'",
r"unsupported operand type\(s\) for -: 'str' and 'str'",
]
)
with pytest.raises(TypeError, match=msg):
index.get_indexer(["a", "b", "c", "d"], method="nearest")
with pytest.raises(TypeError, match=msg):
index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
with pytest.raises(TypeError, match=msg):
index.get_indexer(
["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
)
def test_get_indexer_with_NA_values(
self, unique_nulls_fixture, unique_nulls_fixture2
):
# GH#22332
# check pairwise, that no pair of na values
# is mangled
if unique_nulls_fixture is unique_nulls_fixture2:
return # skip it, values are not unique
arr = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=object)
index = Index(arr, dtype=object)
result = index.get_indexer(
Index(
[unique_nulls_fixture, unique_nulls_fixture2, "Unknown"], dtype=object
)
)
expected = np.array([0, 1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_infer_string_missing_values(self):
# ensure the passed list is not cast to string but to object so that
# the None value is matched in the index
# https://github.com/pandas-dev/pandas/issues/55834
idx = Index(["a", "b", None], dtype="object")
result = idx.get_indexer([None, "x"])
expected = np.array([2, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
class TestGetIndexerNonUnique:
def test_get_indexer_non_unique_nas(self, nulls_fixture):
# even though this isn't non-unique, this should still work
index = Index(["a", "b", nulls_fixture], dtype=object)
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
expected_indexer = np.array([2], dtype=np.intp)
expected_missing = np.array([], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)
# actually non-unique
index = Index(["a", nulls_fixture, "b", nulls_fixture], dtype=object)
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
expected_indexer = np.array([1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)
# matching-but-not-identical nans
if is_matching_na(nulls_fixture, float("NaN")):
index = Index(["a", float("NaN"), "b", float("NaN")], dtype=object)
match_but_not_identical = True
elif is_matching_na(nulls_fixture, Decimal("NaN")):
index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")], dtype=object)
match_but_not_identical = True
else:
match_but_not_identical = False
if match_but_not_identical:
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
expected_indexer = np.array([1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)
@pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning")
def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
expected_missing = np.array([], dtype=np.intp)
# matching-but-not-identical nats
if is_matching_na(np_nat_fixture, np_nat_fixture2):
# ensure nats are different objects
index = Index(
np.array(
["2021-10-02", np_nat_fixture.copy(), np_nat_fixture2.copy()],
dtype=object,
),
dtype=object,
)
# pass as index to prevent target from being casted to DatetimeIndex
indexer, missing = index.get_indexer_non_unique(
Index([np_nat_fixture], dtype=object)
)
expected_indexer = np.array([1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)
# dt64nat vs td64nat
else:
try:
np_nat_fixture == np_nat_fixture2
except (TypeError, OverflowError):
# Numpy will raise on uncomparable types, like
# np.datetime64('NaT', 'Y') and np.datetime64('NaT', 'ps')
# https://github.com/numpy/numpy/issues/22762
return
index = Index(
np.array(
[
"2021-10-02",
np_nat_fixture,
np_nat_fixture2,
np_nat_fixture,
np_nat_fixture2,
],
dtype=object,
),
dtype=object,
)
# pass as index to prevent target from being casted to DatetimeIndex
indexer, missing = index.get_indexer_non_unique(
Index([np_nat_fixture], dtype=object)
)
expected_indexer = np.array([1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)