|
1 | 1 | """
|
2 | 2 | Low-dependency indexing utilities.
|
3 | 3 | """
|
| 4 | +import warnings |
| 5 | + |
4 | 6 | import numpy as np
|
5 | 7 |
|
6 |
| -from pandas._typing import AnyArrayLike |
| 8 | +from pandas._typing import Any, AnyArrayLike |
7 | 9 |
|
8 |
| -from pandas.core.dtypes.common import is_list_like |
| 10 | +from pandas.core.dtypes.common import ( |
| 11 | + is_array_like, |
| 12 | + is_bool_dtype, |
| 13 | + is_integer_dtype, |
| 14 | + is_list_like, |
| 15 | +) |
9 | 16 | from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
|
10 | 17 |
|
11 | 18 | # -----------------------------------------------------------
|
@@ -244,66 +251,166 @@ def length_of_indexer(indexer, target=None) -> int:
|
244 | 251 | raise AssertionError("cannot find the length of the indexer")
|
245 | 252 |
|
246 | 253 |
|
247 |
| -def check_bool_array_indexer(array: AnyArrayLike, mask: AnyArrayLike) -> np.ndarray: |
| 254 | +def deprecate_ndim_indexing(result): |
| 255 | + """ |
| 256 | + Helper function to raise the deprecation warning for multi-dimensional |
| 257 | + indexing on 1D Series/Index. |
| 258 | +
|
| 259 | + GH#27125 indexer like idx[:, None] expands dim, but we cannot do that |
| 260 | + and keep an index, so we currently return ndarray, which is deprecated |
| 261 | + (Deprecation GH#30588). |
248 | 262 | """
|
249 |
| - Check if `mask` is a valid boolean indexer for `array`. |
| 263 | + if np.ndim(result) > 1: |
| 264 | + warnings.warn( |
| 265 | + "Support for multi-dimensional indexing (e.g. `index[:, None]`) " |
| 266 | + "on an Index is deprecated and will be removed in a future " |
| 267 | + "version. Convert to a numpy array before indexing instead.", |
| 268 | + DeprecationWarning, |
| 269 | + stacklevel=3, |
| 270 | + ) |
| 271 | + |
| 272 | + |
| 273 | +# ----------------------------------------------------------- |
| 274 | +# Public indexer validation |
250 | 275 |
|
251 |
| - `array` and `mask` are checked to have the same length, and the |
252 |
| - dtype is validated. |
| 276 | + |
| 277 | +def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: |
| 278 | + """ |
| 279 | + Check if `indexer` is a valid array indexer for `array`. |
| 280 | +
|
| 281 | + For a boolean mask, `array` and `indexer` are checked to have the same |
| 282 | + length. The dtype is validated, and if it is an integer or boolean |
| 283 | + ExtensionArray, it is checked if there are missing values present, and |
| 284 | + it is converted to the appropriate numpy array. Other dtypes will raise |
| 285 | + an error. |
| 286 | +
|
| 287 | + Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed |
| 288 | + through as is. |
253 | 289 |
|
254 | 290 | .. versionadded:: 1.0.0
|
255 | 291 |
|
256 | 292 | Parameters
|
257 | 293 | ----------
|
258 |
| - array : array |
259 |
| - The array that's being masked. |
260 |
| - mask : array |
261 |
| - The boolean array that's masking. |
| 294 | + array : array-like |
| 295 | + The array that is being indexed (only used for the length). |
| 296 | + indexer : array-like or list-like |
| 297 | + The array-like that's used to index. List-like input that is not yet |
| 298 | + a numpy array or an ExtensionArray is converted to one. Other input |
| 299 | + types are passed through as is |
262 | 300 |
|
263 | 301 | Returns
|
264 | 302 | -------
|
265 | 303 | numpy.ndarray
|
266 |
| - The validated boolean mask. |
| 304 | + The validated indexer as a numpy array that can be used to index. |
267 | 305 |
|
268 | 306 | Raises
|
269 | 307 | ------
|
270 | 308 | IndexError
|
271 | 309 | When the lengths don't match.
|
272 | 310 | ValueError
|
273 |
| - When `mask` cannot be converted to a bool-dtype ndarray. |
| 311 | + When `indexer` cannot be converted to a numpy ndarray to index |
| 312 | + (e.g. presence of missing values). |
274 | 313 |
|
275 | 314 | See Also
|
276 | 315 | --------
|
277 | 316 | api.types.is_bool_dtype : Check if `key` is of boolean dtype.
|
278 | 317 |
|
279 | 318 | Examples
|
280 | 319 | --------
|
281 |
| - A boolean ndarray is returned when the arguments are all valid. |
| 320 | + When checking a boolean mask, a boolean ndarray is returned when the |
| 321 | + arguments are all valid. |
282 | 322 |
|
283 | 323 | >>> mask = pd.array([True, False])
|
284 | 324 | >>> arr = pd.array([1, 2])
|
285 |
| - >>> pd.api.extensions.check_bool_array_indexer(arr, mask) |
| 325 | + >>> pd.api.indexers.check_array_indexer(arr, mask) |
286 | 326 | array([ True, False])
|
287 | 327 |
|
288 | 328 | An IndexError is raised when the lengths don't match.
|
289 | 329 |
|
290 | 330 | >>> mask = pd.array([True, False, True])
|
291 |
| - >>> pd.api.extensions.check_bool_array_indexer(arr, mask) |
| 331 | + >>> pd.api.indexers.check_array_indexer(arr, mask) |
292 | 332 | Traceback (most recent call last):
|
293 | 333 | ...
|
294 |
| - IndexError: Item wrong length 3 instead of 2. |
| 334 | + IndexError: Boolean index has wrong length: 3 instead of 2. |
295 | 335 |
|
296 | 336 | A ValueError is raised when the mask cannot be converted to
|
297 | 337 | a bool-dtype ndarray.
|
298 | 338 |
|
299 | 339 | >>> mask = pd.array([True, pd.NA])
|
300 |
| - >>> pd.api.extensions.check_bool_array_indexer(arr, mask) |
| 340 | + >>> pd.api.indexers.check_array_indexer(arr, mask) |
| 341 | + Traceback (most recent call last): |
| 342 | + ... |
| 343 | + ValueError: Cannot mask with a boolean indexer containing NA values |
| 344 | +
|
| 345 | + A numpy boolean mask will get passed through (if the length is correct): |
| 346 | +
|
| 347 | + >>> mask = np.array([True, False]) |
| 348 | + >>> pd.api.indexers.check_array_indexer(arr, mask) |
| 349 | + array([ True, False]) |
| 350 | +
|
| 351 | + Similarly for integer indexers, an integer ndarray is returned when it is |
| 352 | + a valid indexer, otherwise an error is (for integer indexers, a matching |
| 353 | + length is not required): |
| 354 | +
|
| 355 | + >>> indexer = pd.array([0, 2], dtype="Int64") |
| 356 | + >>> arr = pd.array([1, 2, 3]) |
| 357 | + >>> pd.api.indexers.check_array_indexer(arr, indexer) |
| 358 | + array([0, 2]) |
| 359 | +
|
| 360 | + >>> indexer = pd.array([0, pd.NA], dtype="Int64") |
| 361 | + >>> pd.api.indexers.check_array_indexer(arr, indexer) |
| 362 | + Traceback (most recent call last): |
| 363 | + ... |
| 364 | + ValueError: Cannot index with an integer indexer containing NA values |
| 365 | +
|
| 366 | + For non-integer/boolean dtypes, an appropriate error is raised: |
| 367 | +
|
| 368 | + >>> indexer = np.array([0., 2.], dtype="float64") |
| 369 | + >>> pd.api.indexers.check_array_indexer(arr, indexer) |
301 | 370 | Traceback (most recent call last):
|
302 | 371 | ...
|
303 |
| - ValueError: cannot convert to bool numpy array in presence of missing values |
| 372 | + IndexError: arrays used as indices must be of integer or boolean type |
304 | 373 | """
|
305 |
| - result = np.asarray(mask, dtype=bool) |
306 |
| - # GH26658 |
307 |
| - if len(result) != len(array): |
308 |
| - raise IndexError(f"Item wrong length {len(result)} instead of {len(array)}.") |
309 |
| - return result |
| 374 | + from pandas.core.construction import array as pd_array |
| 375 | + |
| 376 | + # whathever is not an array-like is returned as-is (possible valid array |
| 377 | + # indexers that are not array-like: integer, slice, Ellipsis, None) |
| 378 | + # In this context, tuples are not considered as array-like, as they have |
| 379 | + # a specific meaning in indexing (multi-dimensional indexing) |
| 380 | + if is_list_like(indexer): |
| 381 | + if isinstance(indexer, tuple): |
| 382 | + return indexer |
| 383 | + else: |
| 384 | + return indexer |
| 385 | + |
| 386 | + # convert list-likes to array |
| 387 | + if not is_array_like(indexer): |
| 388 | + indexer = pd_array(indexer) |
| 389 | + if len(indexer) == 0: |
| 390 | + # empty list is converted to float array by pd.array |
| 391 | + indexer = np.array([], dtype=np.intp) |
| 392 | + |
| 393 | + dtype = indexer.dtype |
| 394 | + if is_bool_dtype(dtype): |
| 395 | + try: |
| 396 | + indexer = np.asarray(indexer, dtype=bool) |
| 397 | + except ValueError: |
| 398 | + raise ValueError("Cannot mask with a boolean indexer containing NA values") |
| 399 | + |
| 400 | + # GH26658 |
| 401 | + if len(indexer) != len(array): |
| 402 | + raise IndexError( |
| 403 | + f"Boolean index has wrong length: " |
| 404 | + f"{len(indexer)} instead of {len(array)}" |
| 405 | + ) |
| 406 | + elif is_integer_dtype(dtype): |
| 407 | + try: |
| 408 | + indexer = np.asarray(indexer, dtype=np.intp) |
| 409 | + except ValueError: |
| 410 | + raise ValueError( |
| 411 | + "Cannot index with an integer indexer containing NA values" |
| 412 | + ) |
| 413 | + else: |
| 414 | + raise IndexError("arrays used as indices must be of integer or boolean type") |
| 415 | + |
| 416 | + return indexer |
0 commit comments