-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
/
Copy pathindexers.py
416 lines (338 loc) · 11.9 KB
/
indexers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
"""
Low-dependency indexing utilities.
"""
import warnings
import numpy as np
from pandas._typing import Any, AnyArrayLike
from pandas.core.dtypes.common import (
is_array_like,
is_bool_dtype,
is_integer_dtype,
is_list_like,
)
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
# -----------------------------------------------------------
# Indexer Identification
def is_list_like_indexer(key) -> bool:
"""
Check if we have a list-like indexer that is *not* a NamedTuple.
Parameters
----------
key : object
Returns
-------
bool
"""
# allow a list_like, but exclude NamedTuples which can be indexers
return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple)
def is_scalar_indexer(indexer, arr_value) -> bool:
"""
Return True if we are all scalar indexers.
Returns
-------
bool
"""
if arr_value.ndim == 1:
if not isinstance(indexer, tuple):
indexer = tuple([indexer])
return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)
return False
def is_empty_indexer(indexer, arr_value: np.ndarray) -> bool:
"""
Check if we have an empty indexer.
Parameters
----------
indexer : object
arr_value : np.ndarray
Returns
-------
bool
"""
if is_list_like(indexer) and not len(indexer):
return True
if arr_value.ndim == 1:
if not isinstance(indexer, tuple):
indexer = tuple([indexer])
return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)
return False
# -----------------------------------------------------------
# Indexer Validation
def check_setitem_lengths(indexer, value, values) -> None:
"""
Validate that value and indexer are the same length.
An special-case is allowed for when the indexer is a boolean array
and the number of true values equals the length of ``value``. In
this case, no exception is raised.
Parameters
----------
indexer : sequence
Key for the setitem.
value : array-like
Value for the setitem.
values : array-like
Values being set into.
Returns
-------
None
Raises
------
ValueError
When the indexer is an ndarray or list and the lengths don't match.
"""
# boolean with truth values == len of the value is ok too
if isinstance(indexer, (np.ndarray, list)):
if is_list_like(value) and len(indexer) != len(value):
if not (
isinstance(indexer, np.ndarray)
and indexer.dtype == np.bool_
and len(indexer[indexer]) == len(value)
):
raise ValueError(
"cannot set using a list-like indexer "
"with a different length than the value"
)
elif isinstance(indexer, slice):
# slice
if is_list_like(value) and len(values):
if len(value) != length_of_indexer(indexer, values):
raise ValueError(
"cannot set using a slice indexer with a "
"different length than the value"
)
def validate_indices(indices: np.ndarray, n: int) -> None:
"""
Perform bounds-checking for an indexer.
-1 is allowed for indicating missing values.
Parameters
----------
indices : ndarray
n : int
Length of the array being indexed.
Raises
------
ValueError
Examples
--------
>>> validate_indices([1, 2], 3)
# OK
>>> validate_indices([1, -2], 3)
ValueError
>>> validate_indices([1, 2, 3], 3)
IndexError
>>> validate_indices([-1, -1], 0)
# OK
>>> validate_indices([0, 1], 0)
IndexError
"""
if len(indices):
min_idx = indices.min()
if min_idx < -1:
msg = f"'indices' contains values less than allowed ({min_idx} < -1)"
raise ValueError(msg)
max_idx = indices.max()
if max_idx >= n:
raise IndexError("indices are out-of-bounds")
# -----------------------------------------------------------
# Indexer Conversion
def maybe_convert_indices(indices, n: int):
"""
Attempt to convert indices into valid, positive indices.
If we have negative indices, translate to positive here.
If we have indices that are out-of-bounds, raise an IndexError.
Parameters
----------
indices : array-like
Array of indices that we are to convert.
n : int
Number of elements in the array that we are indexing.
Returns
-------
array-like
An array-like of positive indices that correspond to the ones
that were passed in initially to this function.
Raises
------
IndexError
One of the converted indices either exceeded the number of,
elements (specified by `n`), or was still negative.
"""
if isinstance(indices, list):
indices = np.array(indices)
if len(indices) == 0:
# If `indices` is empty, np.array will return a float,
# and will cause indexing errors.
return np.empty(0, dtype=np.intp)
mask = indices < 0
if mask.any():
indices = indices.copy()
indices[mask] += n
mask = (indices >= n) | (indices < 0)
if mask.any():
raise IndexError("indices are out-of-bounds")
return indices
# -----------------------------------------------------------
# Unsorted
def length_of_indexer(indexer, target=None) -> int:
"""
Return the length of a single non-tuple indexer which could be a slice.
Returns
-------
int
"""
if target is not None and isinstance(indexer, slice):
target_len = len(target)
start = indexer.start
stop = indexer.stop
step = indexer.step
if start is None:
start = 0
elif start < 0:
start += target_len
if stop is None or stop > target_len:
stop = target_len
elif stop < 0:
stop += target_len
if step is None:
step = 1
elif step < 0:
start, stop = stop + 1, start + 1
step = -step
return (stop - start + step - 1) // step
elif isinstance(indexer, (ABCSeries, ABCIndexClass, np.ndarray, list)):
return len(indexer)
elif not is_list_like_indexer(indexer):
return 1
raise AssertionError("cannot find the length of the indexer")
def deprecate_ndim_indexing(result):
"""
Helper function to raise the deprecation warning for multi-dimensional
indexing on 1D Series/Index.
GH#27125 indexer like idx[:, None] expands dim, but we cannot do that
and keep an index, so we currently return ndarray, which is deprecated
(Deprecation GH#30588).
"""
if np.ndim(result) > 1:
warnings.warn(
"Support for multi-dimensional indexing (e.g. `index[:, None]`) "
"on an Index is deprecated and will be removed in a future "
"version. Convert to a numpy array before indexing instead.",
DeprecationWarning,
stacklevel=3,
)
# -----------------------------------------------------------
# Public indexer validation
def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any:
"""
Check if `indexer` is a valid array indexer for `array`.
For a boolean mask, `array` and `indexer` are checked to have the same
length. The dtype is validated, and if it is an integer or boolean
ExtensionArray, it is checked if there are missing values present, and
it is converted to the appropriate numpy array. Other dtypes will raise
an error.
Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed
through as is.
.. versionadded:: 1.0.0
Parameters
----------
array : array-like
The array that is being indexed (only used for the length).
indexer : array-like or list-like
The array-like that's used to index. List-like input that is not yet
a numpy array or an ExtensionArray is converted to one. Other input
types are passed through as is
Returns
-------
numpy.ndarray
The validated indexer as a numpy array that can be used to index.
Raises
------
IndexError
When the lengths don't match.
ValueError
When `indexer` cannot be converted to a numpy ndarray to index
(e.g. presence of missing values).
See Also
--------
api.types.is_bool_dtype : Check if `key` is of boolean dtype.
Examples
--------
When checking a boolean mask, a boolean ndarray is returned when the
arguments are all valid.
>>> mask = pd.array([True, False])
>>> arr = pd.array([1, 2])
>>> pd.api.indexers.check_array_indexer(arr, mask)
array([ True, False])
An IndexError is raised when the lengths don't match.
>>> mask = pd.array([True, False, True])
>>> pd.api.indexers.check_array_indexer(arr, mask)
Traceback (most recent call last):
...
IndexError: Boolean index has wrong length: 3 instead of 2.
A ValueError is raised when the mask cannot be converted to
a bool-dtype ndarray.
>>> mask = pd.array([True, pd.NA])
>>> pd.api.indexers.check_array_indexer(arr, mask)
Traceback (most recent call last):
...
ValueError: Cannot mask with a boolean indexer containing NA values
A numpy boolean mask will get passed through (if the length is correct):
>>> mask = np.array([True, False])
>>> pd.api.indexers.check_array_indexer(arr, mask)
array([ True, False])
Similarly for integer indexers, an integer ndarray is returned when it is
a valid indexer, otherwise an error is (for integer indexers, a matching
length is not required):
>>> indexer = pd.array([0, 2], dtype="Int64")
>>> arr = pd.array([1, 2, 3])
>>> pd.api.indexers.check_array_indexer(arr, indexer)
array([0, 2])
>>> indexer = pd.array([0, pd.NA], dtype="Int64")
>>> pd.api.indexers.check_array_indexer(arr, indexer)
Traceback (most recent call last):
...
ValueError: Cannot index with an integer indexer containing NA values
For non-integer/boolean dtypes, an appropriate error is raised:
>>> indexer = np.array([0., 2.], dtype="float64")
>>> pd.api.indexers.check_array_indexer(arr, indexer)
Traceback (most recent call last):
...
IndexError: arrays used as indices must be of integer or boolean type
"""
from pandas.core.construction import array as pd_array
# whathever is not an array-like is returned as-is (possible valid array
# indexers that are not array-like: integer, slice, Ellipsis, None)
# In this context, tuples are not considered as array-like, as they have
# a specific meaning in indexing (multi-dimensional indexing)
if is_list_like(indexer):
if isinstance(indexer, tuple):
return indexer
else:
return indexer
# convert list-likes to array
if not is_array_like(indexer):
indexer = pd_array(indexer)
if len(indexer) == 0:
# empty list is converted to float array by pd.array
indexer = np.array([], dtype=np.intp)
dtype = indexer.dtype
if is_bool_dtype(dtype):
try:
indexer = np.asarray(indexer, dtype=bool)
except ValueError:
raise ValueError("Cannot mask with a boolean indexer containing NA values")
# GH26658
if len(indexer) != len(array):
raise IndexError(
f"Boolean index has wrong length: "
f"{len(indexer)} instead of {len(array)}"
)
elif is_integer_dtype(dtype):
try:
indexer = np.asarray(indexer, dtype=np.intp)
except ValueError:
raise ValueError(
"Cannot index with an integer indexer containing NA values"
)
else:
raise IndexError("arrays used as indices must be of integer or boolean type")
return indexer