Simplify

TomAugspurger · TomAugspurger · commit 125ca0b7cf57 · 2018-04-25T08:46:23.000-05:00
Upcasting is still broken
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -1510,7 +1510,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
     # TODO(EA): Remove these if / elifs as datetimeTZ, interval, become EAs
     # dispatch to internal type takes
     if is_extension_array_dtype(arr):
-        return arr.take(indexer, fill_value=fill_value)
+        return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
     elif is_datetimetz(arr):
         return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
     elif is_interval_dtype(arr):
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -5,71 +5,14 @@
    This is an experimental API and subject to breaking changes
    without warning.
 """
-import textwrap
-
 import numpy as np
 
 from pandas.errors import AbstractMethodError
 from pandas.compat.numpy import function as nv
-from pandas.util._decorators import Appender, Substitution
 
 _not_implemented_message = "{} does not implement {}."
 
 
-_take_docstring = textwrap.dedent("""\
-Take elements from an array.
-
-Parameters
-----------
-%(arr)s\
-indexer : sequence of integers
-    Indices to be taken. See Notes for how negative indicies
-    are handled.
-fill_value : any, optional
-    Fill value to use for NA-indicies. This has a few behaviors.
-
-    * fill_value is not specified : triggers NumPy's semantics
-      where negative values in `indexer` mean slices from the end.
-    * fill_value is NA : Fill positions where `indexer` is ``-1``
-      with ``self.dtype.na_value``. Anything considered NA by
-      :func:`pandas.isna` will result in ``self.dtype.na_value``
-      being used to fill.
-    * fill_value is not NA : Fill positions where `indexer` is ``-1``
-      with `fill_value`.
-
-Returns
--------
-ExtensionArray
-
-Raises
-------
-IndexError
-    When the indexer is out of bounds for the array.
-ValueError
-    When the indexer contains negative values other than ``-1``
-    and `fill_value` is specified.
-
-Notes
------
-The meaning of negative values in `indexer` depends on the
-`fill_value` argument. By default, we follow the behavior
-:meth:`numpy.take` of where negative indices indicate slices
-from the end.
-
-When `fill_value` is specified, we follow pandas semantics of ``-1``
-indicating a missing value. In this case, positions where `indexer`
-is ``-1`` will be filled with `fill_value` or the default NA value
-for this type.
-
-ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
-``iloc``, when the indexer is a sequence of values. Additionally,
-it's called by :meth:`Series.reindex` with a `fill_value`.
-
-See Also
---------
-numpy.take""")
-
-
 class ExtensionArray(object):
     """Abstract base class for custom 1-D array types.
 
@@ -532,15 +475,66 @@ def _values_for_take(self):
         """
         return self.astype(object)
 
-    @Substitution(arr='')
-    @Appender(_take_docstring)
     def take(self, indexer, fill_value=None, allow_fill=None):
-        # type: (Sequence[int], Optional[Any]) -> ExtensionArray
-        # assert fill_value is not np.nan
+        # type: (Sequence[int], Optional[Any], Optional[bool]) -> ExtensionArray
+        """Take elements from an array.
+
+        Parameters
+        ----------
+        indexer : sequence of integers
+            Indices to be taken. See Notes for how negative indicies
+            are handled.
+        fill_value : any, optional
+            Fill value to use for NA-indicies. This has a few behaviors.
+
+            * fill_value is not specified : triggers NumPy's semantics
+              where negative values in `indexer` mean slices from the end.
+            * fill_value is NA : Fill positions where `indexer` is ``-1``
+              with ``self.dtype.na_value``. Anything considered NA by
+              :func:`pandas.isna` will result in ``self.dtype.na_value``
+              being used to fill.
+            * fill_value is not NA : Fill positions where `indexer` is ``-1``
+              with `fill_value`.
+
+        Returns
+        -------
+        ExtensionArray
+
+        Raises
+        ------
+        IndexError
+            When the indexer is out of bounds for the array.
+        ValueError
+            When the indexer contains negative values other than ``-1``
+            and `fill_value` is specified.
+
+        Notes
+        -----
+        The meaning of negative values in `indexer` depends on the
+        `fill_value` argument. By default, we follow the behavior
+        :meth:`numpy.take` of where negative indices indicate slices
+        from the end.
+
+        When `fill_value` is specified, we follow pandas semantics of ``-1``
+        indicating a missing value. In this case, positions where `indexer`
+        is ``-1`` will be filled with `fill_value` or the default NA value
+        for this type.
+
+        ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
+        ``iloc``, when the indexer is a sequence of values. Additionally,
+        it's called by :meth:`Series.reindex` with a `fill_value`.
+
+        See Also
+        --------
+        numpy.take
+        """
         from pandas.core.algorithms import take
 
         data = self._values_for_take()
-        result = take(data, indexer, fill_value=fill_value)
+        if allow_fill and fill_value is None:
+            fill_value = self.dtype.na_value
+
+        result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill)
         return self._from_sequence(result)
 
     def copy(self, deep=False):
@@ -605,4 +599,3 @@ def _ndarray_values(self):
         used for interacting with our indexers.
         """
         return np.array(self)
-
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -256,7 +256,11 @@ def changeit():
 
 def maybe_promote(dtype, fill_value=np.nan):
     # if we passed an array here, determine the fill value by dtype
-    if isinstance(fill_value, np.ndarray):
+    if is_extension_array_dtype(dtype):
+        # XXX: verify this change
+        fill_value = dtype.na_value
+
+    elif isinstance(fill_value, np.ndarray):
         if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)):
             fill_value = iNaT
         else:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3476,7 +3476,7 @@ def _reindex_index(self, new_index, method, copy, level, fill_value=np.nan,
                                            allow_dups=False)
 
     def _reindex_columns(self, new_columns, method, copy, level,
-                         fill_value=np.nan, limit=None, tolerance=None):
+                         fill_value=None, limit=None, tolerance=None):
         new_columns, indexer = self.columns.reindex(new_columns, method=method,
                                                     level=level, limit=limit,
                                                     tolerance=tolerance)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -3660,7 +3660,7 @@ def reindex(self, *args, **kwargs):
         copy = kwargs.pop('copy', True)
         limit = kwargs.pop('limit', None)
         tolerance = kwargs.pop('tolerance', None)
-        fill_value = kwargs.pop('fill_value', np.nan)
+        fill_value = kwargs.pop('fill_value', None)
 
         # Series.reindex doesn't use / need the axis kwarg
         # We pop and ignore it here, to make writing Series/Frame generic code
@@ -3776,7 +3776,7 @@ def _reindex_multi(self, axes, copy, fill_value):
 
     @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs)
     def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
-                     limit=None, fill_value=np.nan):
+                     limit=None, fill_value=None):
         msg = ("'.reindex_axis' is deprecated and will be removed in a future "
                "version. Use '.reindex' instead.")
         self._consolidate_inplace()
@@ -3790,7 +3790,7 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
         return self._reindex_with_indexers({axis: [new_index, indexer]},
                                            fill_value=fill_value, copy=copy)
 
-    def _reindex_with_indexers(self, reindexers, fill_value=np.nan, copy=False,
+    def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False,
                                allow_dups=False):
         """allow_dups indicates an internal call here """
 
@@ -7252,7 +7252,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True,
             raise TypeError('unsupported type: %s' % type(other))
 
     def _align_frame(self, other, join='outer', axis=None, level=None,
-                     copy=True, fill_value=np.nan, method=None, limit=None,
+                     copy=True, fill_value=None, method=None, limit=None,
                      fill_axis=0):
         # defaults
         join_index, join_columns = None, None
@@ -7420,6 +7420,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
             if other.ndim <= self.ndim:
 
                 _, other = self.align(other, join='left', axis=axis,
+                                      # XXX
                                       level=level, fill_value=np.nan)
 
                 # if we are NOT aligned, raise as we cannot where index
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -1888,6 +1888,11 @@ def _holder(self):
         # For extension blocks, the holder is values-dependent.
         return type(self.values)
 
+    @property
+    def fill_value(self):
+        # Used in reindex_indexer
+        return self.values.dtype.na_value
+
     @property
     def _can_hold_na(self):
         # The default ExtensionArray._can_hold_na is True
@@ -1951,7 +1956,8 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
         # axis doesn't matter; we are really a single-dim object
         # but are passed the axis depending on the calling routing
         # if its REALLY axis 0, then this will be a reindex and not a take
-        new_values = self.values.take(indexer, fill_value=fill_value)
+        new_values = self.values.take(indexer, fill_value=fill_value,
+                                      allow_fill=True)
 
         # if we are a 1-dim object, then always place at 0
         if self.ndim == 1:
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2185,7 +2185,7 @@ def _binop(self, other, func, level=None, fill_value=None):
             result.name = None
         return result
 
-    def combine(self, other, func, fill_value=np.nan):
+    def combine(self, other, func, fill_value=None):
         """
         Perform elementwise binary operation on two Series using given function
         with optional fill value when an index is missing from one Series or
@@ -3216,7 +3216,10 @@ def _reindex_indexer(self, new_index, indexer, copy):
                 return self.copy()
             return self
 
-        new_values = algorithms.take_1d(self._values, indexer)
+        # TODO: determine if we want EA to handle fill_value=None
+        # if not, then we have to determine this here.
+        new_values = algorithms.take_1d(self._values, indexer,
+                                        fill_value=None, allow_fill=True)
         return self._constructor(new_values, index=new_index)
 
     def _needs_reindex_multi(self, axes, method, level):
diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
@@ -155,7 +155,7 @@ def test_take_non_na_fill_value(self, data_missing):
 
     def test_take_pandas_style_negative_raises(self, data, na_value):
         with pytest.raises(ValueError):
-            data.take([0, -2], fill_value=na_value)
+            data.take([0, -2], fill_value=na_value, allow_fill=True)
 
     @pytest.mark.xfail(reason="Series.take with extension array buggy for -1")
     def test_take_series(self, data):
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
@@ -28,6 +28,7 @@ class DecimalArray(ExtensionArray):
     dtype = DecimalDtype()
 
     def __init__(self, values):
+        assert all(isinstance(v, decimal.Decimal) for v in values)
         values = np.asarray(values, dtype=object)
 
         self._data = values
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
@@ -119,7 +119,8 @@ def test_take_basic(self):
                                  decimal.Decimal('3')])
         self.assert_extension_array_equal(result, expected)
 
-        result = ea.take([1, 2, -1], fill_value=ea.dtype.na_value)
+        result = ea.take([1, 2, -1], fill_value=ea.dtype.na_value,
+                         allow_fill=True)
         expected = DecimalArray([decimal.Decimal('2'),
                                  decimal.Decimal('3'),
                                  decimal.Decimal('NaN')])
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
@@ -94,20 +94,22 @@ def nbytes(self):
     def isna(self):
         return np.array([x == self.dtype.na_value for x in self.data])
 
-    def take(self, indexer, fill_value=None):
+    def take(self, indexer, fill_value=None, allow_fill=None):
         # re-implement here, since NumPy has trouble setting
         # sized objects like UserDicts into scalar slots of
         # an ndarary.
         indexer = np.asarray(indexer)
         msg = ("Index is out of bounds or cannot do a "
                "non-empty take from an empty array.")
 
-        if fill_value is None:
+        if allow_fill is None:
             try:
                 output = [self.data[loc] for loc in indexer]
             except IndexError:
                 raise IndexError(msg)
         else:
+            if fill_value is None:
+                fill_value = self.dtype.na_value
             # bounds check
             if (indexer < -1).any():
                 raise ValueError