8
8
import numpy as np
9
9
10
10
from pandas .errors import AbstractMethodError
11
+ from pandas .compat import _default_fill_value
11
12
from pandas .compat .numpy import function as nv
12
13
13
14
_not_implemented_message = "{} does not implement {}."
@@ -53,6 +54,7 @@ class ExtensionArray(object):
53
54
* unique
54
55
* factorize / _values_for_factorize
55
56
* argsort / _values_for_argsort
57
+ * take / _values_for_take
56
58
57
59
This class does not inherit from 'abc.ABCMeta' for performance reasons.
58
60
Methods and properties required by the interface raise
@@ -462,22 +464,38 @@ def factorize(self, na_sentinel=-1):
462
464
# ------------------------------------------------------------------------
463
465
# Indexing methods
464
466
# ------------------------------------------------------------------------
465
- def take (self , indexer , allow_fill = True , fill_value = None ):
466
- # type: (Sequence[int], bool, Optional[Any]) -> ExtensionArray
467
+ def _values_for_take (self ):
468
+ """Values to use for `take`.
469
+
470
+ Coerces to object dtype by default.
471
+
472
+ Returns
473
+ -------
474
+ array-like
475
+ Must satisify NumPy's `take` semantics.
476
+ """
477
+ return self .astype (object )
478
+
479
+ def take (self , indexer , fill_value = _default_fill_value ):
480
+ # type: (Sequence[int], Optional[Any]) -> ExtensionArray
467
481
"""Take elements from an array.
468
482
469
483
Parameters
470
484
----------
471
485
indexer : sequence of integers
472
- indices to be taken. -1 is used to indicate values
473
- that are missing.
474
- allow_fill : bool, default True
475
- If False, indexer is assumed to contain no -1 values so no filling
476
- will be done. This short-circuits computation of a mask. Result is
477
- undefined if allow_fill == False and -1 is present in indexer.
478
- fill_value : any, default None
479
- Fill value to replace -1 values with. If applicable, this should
480
- use the sentinel missing value for this type.
486
+ Indices to be taken. See Notes for how negative indicies
487
+ are handled.
488
+ fill_value : any, optional
489
+ Fill value to use for NA-indicies. This has a few behaviors.
490
+
491
+ * fill_value is not specified : triggers NumPy's semantics
492
+ where negative values in `indexer` mean slices from the end.
493
+ * fill_value is NA : Fill positions where `indexer` is ``-1``
494
+ with ``self.dtype.na_value``. Anything considered NA by
495
+ :func:`pandas.isna` will result in ``self.dtype.na_value``
496
+ being used to fill.
497
+ * fill_value is not NA : Fill positions where `indexer` is ``-1``
498
+ with `fill_value`.
481
499
482
500
Returns
483
501
-------
@@ -487,44 +505,39 @@ def take(self, indexer, allow_fill=True, fill_value=None):
487
505
------
488
506
IndexError
489
507
When the indexer is out of bounds for the array.
508
+ ValueError
509
+ When the indexer contains negative values other than ``-1``
510
+ and `fill_value` is specified.
490
511
491
512
Notes
492
513
-----
493
- This should follow pandas' semantics where -1 indicates missing values.
494
- Positions where indexer is ``-1`` should be filled with the missing
495
- value for this type.
496
- This gives rise to the special case of a take on an empty
497
- ExtensionArray that does not raises an IndexError straight away
498
- when the `indexer` is all ``-1``.
499
-
500
- This is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when the
501
- indexer is a sequence of values.
514
+ The meaning of negative values in `indexer` depends on the
515
+ `fill_value` argument. By default, we follow the behavior
516
+ :meth:`numpy.take` of where negative indices indicate slices
517
+ from the end.
502
518
503
- Examples
504
- --------
505
- Suppose the extension array is backed by a NumPy array stored as
506
- ``self.data``. Then ``take`` may be written as
507
-
508
- .. code-block:: python
509
-
510
- def take(self, indexer, allow_fill=True, fill_value=None):
511
- indexer = np.asarray(indexer)
512
- mask = indexer == -1
513
-
514
- # take on empty array not handled as desired by numpy
515
- # in case of -1 (all missing take)
516
- if not len(self) and mask.all():
517
- return type(self)([np.nan] * len(indexer))
519
+ When `fill_value` is specified, we follow pandas semantics of ``-1``
520
+ indicating a missing value. In this case, positions where `indexer`
521
+ is ``-1`` will be filled with `fill_value` or the default NA value
522
+ for this type.
518
523
519
- result = self.data.take(indexer)
520
- result[mask] = np.nan # NA for this type
521
- return type(self)(result)
524
+ ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
525
+ ``iloc``, when the indexer is a sequence of values. Additionally,
526
+ it's called by :meth:`Series.reindex` with a `fill_value`.
522
527
523
528
See Also
524
529
--------
525
530
numpy.take
526
531
"""
527
- raise AbstractMethodError (self )
532
+ from pandas .core .algorithms import take_ea
533
+ from pandas .core .missing import isna
534
+
535
+ if isna (fill_value ):
536
+ fill_value = self .dtype .na_value
537
+
538
+ data = self ._values_for_take ()
539
+ result = take_ea (data , indexer , fill_value = fill_value )
540
+ return self ._from_sequence (result )
528
541
529
542
def copy (self , deep = False ):
530
543
# type: (bool) -> ExtensionArray
0 commit comments