@@ -462,22 +462,36 @@ def factorize(self, na_sentinel=-1):
462
462
# ------------------------------------------------------------------------
463
463
# Indexing methods
464
464
# ------------------------------------------------------------------------
465
- def take (self , indexer , allow_fill = True , fill_value = None ):
465
+
466
+ def take (self , indices , allow_fill = False , fill_value = None ):
466
467
# type: (Sequence[int], bool, Optional[Any]) -> ExtensionArray
467
468
"""Take elements from an array.
468
469
469
470
Parameters
470
471
----------
471
- indexer : sequence of integers
472
- indices to be taken. -1 is used to indicate values
473
- that are missing.
474
- allow_fill : bool, default True
475
- If False, indexer is assumed to contain no -1 values so no filling
476
- will be done. This short-circuits computation of a mask. Result is
477
- undefined if allow_fill == False and -1 is present in indexer.
478
- fill_value : any, default None
479
- Fill value to replace -1 values with. If applicable, this should
480
- use the sentinel missing value for this type.
472
+ indices : sequence of integers
473
+ Indices to be taken.
474
+ allow_fill : bool, default False
475
+ How to handle negative values in `indices`.
476
+
477
+ * False: negative values in `indices` indicate positional indices
478
+ from the right (the default). This is similar to
479
+ :func:`numpy.take`.
480
+
481
+ * True: negative values in `indices` indicate
482
+ missing values. These values are set to `fill_value`. Any other
483
+ other negative values raise a ``ValueError``.
484
+
485
+ fill_value : any, optional
486
+ Fill value to use for NA-indices when `allow_fill` is True.
487
+ This may be ``None``, in which case the default NA value for
488
+ the type, ``self.dtype.na_value``, is used.
489
+
490
+ For many ExtensionArrays, there will be two representations of
491
+ `fill_value`: a user-facing "boxed" scalar, and a low-level
492
+ physical NA value. `fill_value` should be the user-facing version,
493
+ and the implementation should handle translating that to the
494
+ physical version for processing the take if nescessary.
481
495
482
496
Returns
483
497
-------
@@ -486,44 +500,56 @@ def take(self, indexer, allow_fill=True, fill_value=None):
486
500
Raises
487
501
------
488
502
IndexError
489
- When the indexer is out of bounds for the array.
503
+ When the indices are out of bounds for the array.
504
+ ValueError
505
+ When `indices` contains negative values other than ``-1``
506
+ and `allow_fill` is True.
490
507
491
508
Notes
492
509
-----
493
- This should follow pandas' semantics where -1 indicates missing values.
494
- Positions where indexer is ``-1`` should be filled with the missing
495
- value for this type.
496
- This gives rise to the special case of a take on an empty
497
- ExtensionArray that does not raises an IndexError straight away
498
- when the `indexer` is all ``-1``.
510
+ ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
511
+ ``iloc``, when `indices` is a sequence of values. Additionally,
512
+ it's called by :meth:`Series.reindex`, or any other method
513
+ that causes realignemnt, with a `fill_value`.
499
514
500
- This is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when the
501
- indexer is a sequence of values.
515
+ See Also
516
+ --------
517
+ numpy.take
518
+ pandas.api.extensions.take
502
519
503
520
Examples
504
521
--------
505
- Suppose the extension array is backed by a NumPy array stored as
506
- ``self.data``. Then ``take`` may be written as
522
+ Here's an example implementation, which relies on casting the
523
+ extension array to object dtype. This uses the helper method
524
+ :func:`pandas.api.extensions.take`.
507
525
508
526
.. code-block:: python
509
527
510
- def take(self, indexer, allow_fill=True, fill_value=None):
511
- indexer = np.asarray(indexer)
512
- mask = indexer == -1
528
+ def take(self, indices, allow_fill=False, fill_value=None):
529
+ from pandas.core.algorithms import take
513
530
514
- # take on empty array not handled as desired by numpy
515
- # in case of -1 (all missing take)
516
- if not len(self) and mask.all():
517
- return type(self)([np.nan] * len(indexer))
531
+ # If the ExtensionArray is backed by an ndarray, then
532
+ # just pass that here instead of coercing to object.
533
+ data = self.astype(object)
518
534
519
- result = self.data.take(indexer)
520
- result[mask] = np.nan # NA for this type
521
- return type(self)(result)
535
+ if allow_fill and fill_value is None:
536
+ fill_value = self.dtype.na_value
522
537
523
- See Also
524
- --------
525
- numpy.take
538
+ # fill value should always be translated from the scalar
539
+ # type for the array, to the physical storage type for
540
+ # the data, before passing to take.
541
+
542
+ result = take(data, indices, fill_value=fill_value,
543
+ allow_fill=allow_fill)
544
+ return self._from_sequence(result)
526
545
"""
546
+ # Implementer note: The `fill_value` parameter should be a user-facing
547
+ # value, an instance of self.dtype.type. When passed `fill_value=None`,
548
+ # the default of `self.dtype.na_value` should be used.
549
+ # This may differ from the physical storage type your ExtensionArray
550
+ # uses. In this case, your implementation is responsible for casting
551
+ # the user-facing type to the storage type, before using
552
+ # pandas.api.extensions.take
527
553
raise AbstractMethodError (self )
528
554
529
555
def copy (self , deep = False ):
0 commit comments