pandas-dev · TomAugspurger · Jan 29, 2020 · Jan 20, 2020 · Jan 20, 2020 · Jan 20, 2020
diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst
@@ -66,7 +66,7 @@ behaves correctly.
 .. autosummary::
   :toctree: api/
 
-  api.indexers.check_bool_array_indexer
+  api.indexers.check_array_indexer
 
 
 The sentinel ``pandas.api.extensions.no_default`` is used as the default

diff --git a/pandas/api/indexers/__init__.py b/pandas/api/indexers/__init__.py
@@ -2,7 +2,7 @@
 Public API for Rolling Window Indexers.
 """
 
-from pandas.core.indexers import check_bool_array_indexer
+from pandas.core.indexers import check_array_indexer
 from pandas.core.window.indexers import BaseIndexer
 
-__all__ = ["check_bool_array_indexer", "BaseIndexer"]
+__all__ = ["check_array_indexer", "BaseIndexer"]
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -39,7 +39,7 @@
 )
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
-from pandas.core.dtypes.inference import is_array_like, is_hashable
+from pandas.core.dtypes.inference import is_hashable
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core import ops
@@ -54,7 +54,7 @@
 from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs
 import pandas.core.common as com
 from pandas.core.construction import array, extract_array, sanitize_array
-from pandas.core.indexers import check_bool_array_indexer
+from pandas.core.indexers import check_array_indexer, deprecate_ndim_indexing
 from pandas.core.missing import interpolate_2d
 from pandas.core.ops.common import unpack_zerodim_and_defer
 from pandas.core.sorting import nargsort
@@ -2001,14 +2001,12 @@ def __getitem__(self, key):
             else:
                 return self.categories[i]
 
-        if is_list_like(key) and not is_array_like(key):
-            key = np.asarray(key)
-
-        if com.is_bool_indexer(key):
-            key = check_bool_array_indexer(self, key)
+        if is_list_like(key) and not isinstance(key, tuple):
+            key = check_array_indexer(self, key)
 
         result = self._codes[key]
         if result.ndim > 1:
+            deprecate_ndim_indexing(result)
             return result
         return self._constructor(result, dtype=self.dtype, fastpath=True)
 

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -42,7 +42,7 @@
 from pandas.core.algorithms import checked_add_with_arr, take, unique1d, value_counts
 from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
 import pandas.core.common as com
-from pandas.core.indexers import check_bool_array_indexer
+from pandas.core.indexers import check_array_indexer
 from pandas.core.ops.common import unpack_zerodim_and_defer
 from pandas.core.ops.invalid import invalid_comparison, make_invalid_op
 
@@ -517,8 +517,12 @@ def __getitem__(self, key):
                 return self._box_func(val)
             return type(self)(val, dtype=self.dtype)
 
+        if is_list_like(key) and not isinstance(key, tuple):
+            key = check_array_indexer(self, key)
+
         if com.is_bool_indexer(key):
-            key = check_bool_array_indexer(self, key)
+            # can still have object dtype
+            key = np.asarray(key, dtype=bool)
             if key.all():
                 key = slice(0, None, None)
             else:

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -40,6 +40,7 @@
 from pandas.core.arrays.categorical import Categorical
 import pandas.core.common as com
 from pandas.core.construction import array
+from pandas.core.indexers import check_array_indexer
 from pandas.core.indexes.base import ensure_index
 
 _VALID_CLOSED = {"left", "right", "both", "neither"}
@@ -495,6 +496,8 @@ def __len__(self) -> int:
         return len(self.left)
 
     def __getitem__(self, value):
+        if is_list_like(value):
+            value = check_array_indexer(self, value)
         left = self.left[value]
         right = self.right[value]
 

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -4,13 +4,17 @@
 
 from pandas._libs import lib, missing as libmissing
 
-from pandas.core.dtypes.common import is_integer, is_object_dtype, is_string_dtype
+from pandas.core.dtypes.common import (
+    is_integer,
+    is_list_like,
+    is_object_dtype,
+    is_string_dtype,
+)
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core.algorithms import take
 from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
-import pandas.core.common as com
-from pandas.core.indexers import check_bool_array_indexer
+from pandas.core.indexers import check_array_indexer
 
 if TYPE_CHECKING:
     from pandas._typing import Scalar
@@ -35,8 +39,8 @@ def __getitem__(self, item):
                 return self.dtype.na_value
             return self._data[item]
 
-        elif com.is_bool_indexer(item):
-            item = check_bool_array_indexer(self, item)
+        elif is_list_like(item):
+            item = check_array_indexer(self, item)
 
         return type(self)(self._data[item], self._mask[item])
 

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -9,6 +9,7 @@
 from pandas.util._decorators import Appender
 from pandas.util._validators import validate_fillna_kwargs
 
+from pandas.core.dtypes.common import is_list_like
 from pandas.core.dtypes.dtypes import ExtensionDtype
 from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
 from pandas.core.dtypes.inference import is_array_like
@@ -18,9 +19,8 @@
 from pandas.core import nanops
 from pandas.core.algorithms import searchsorted, take, unique
 from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
-import pandas.core.common as com
 from pandas.core.construction import extract_array
-from pandas.core.indexers import check_bool_array_indexer
+from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import backfill_1d, pad_1d
 
 
@@ -234,8 +234,8 @@ def __getitem__(self, item):
         if isinstance(item, type(self)):
             item = item._ndarray
 
-        elif com.is_bool_indexer(item):
-            item = check_bool_array_indexer(self, item)
+        elif is_list_like(item):
+            item = check_array_indexer(self, item)
 
         result = self._ndarray[item]
         if not lib.is_scalar(item):

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -29,6 +29,7 @@
     is_datetime64_any_dtype,
     is_dtype_equal,
     is_integer,
+    is_list_like,
     is_object_dtype,
     is_scalar,
     is_string_dtype,
@@ -43,6 +44,7 @@
 from pandas.core.base import PandasObject
 import pandas.core.common as com
 from pandas.core.construction import sanitize_array
+from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import interpolate_2d
 import pandas.core.ops as ops
 from pandas.core.ops.common import unpack_zerodim_and_defer
@@ -768,6 +770,9 @@ def __getitem__(self, key):
                 else:
                     key = np.asarray(key)
 
+            if is_list_like(key):
+                key = check_array_indexer(self, key)
+
             if com.is_bool_indexer(key):
                 key = check_bool_indexer(self, key)
 

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -121,8 +121,8 @@ def is_bool_indexer(key: Any) -> bool:
 
     See Also
     --------
-    check_bool_array_indexer : Check that `key`
-        is a valid mask for an array, and convert to an ndarray.
+    check_array_indexer : Check that `key` is a valid array to index,
+        and convert to an ndarray.
     """
     na_msg = "cannot mask with array containing NA / NaN values"
     if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or (

diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py
@@ -1,11 +1,18 @@
 """
 Low-dependency indexing utilities.
 """
+import warnings
+
 import numpy as np
 
 from pandas._typing import AnyArrayLike
 
-from pandas.core.dtypes.common import is_list_like
+from pandas.core.dtypes.common import (
+    is_array_like,
+    is_bool_dtype,
+    is_integer_dtype,
+    is_list_like,
+)
 from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
 
 # -----------------------------------------------------------
@@ -244,66 +251,147 @@ def length_of_indexer(indexer, target=None) -> int:
     raise AssertionError("cannot find the length of the indexer")
 
 
-def check_bool_array_indexer(array: AnyArrayLike, mask: AnyArrayLike) -> np.ndarray:
+def deprecate_ndim_indexing(result):
+    """
+    Helper function to raise the deprecation warning for multi-dimensional
+    indexing on 1D Series/Index.
+
+    GH#27125 indexer like idx[:, None] expands dim, but we cannot do that
+    and keep an index, so we currently return ndarray, which is deprecated
+    (Deprecation GH#30588).
     """
-    Check if `mask` is a valid boolean indexer for `array`.
+    if np.ndim(result) > 1:
+        warnings.warn(
+            "Support for multi-dimensional indexing (e.g. `index[:, None]`) "
+            "on an Index is deprecated and will be removed in a future "
+            "version.  Convert to a numpy array before indexing instead.",
+            DeprecationWarning,
+            stacklevel=3,
+        )
+
+
+# -----------------------------------------------------------
+# Public indexer validation
 
-    `array` and `mask` are checked to have the same length, and the
-    dtype is validated.
+
+def check_array_indexer(array: AnyArrayLike, indexer) -> np.ndarray:
+    """
+    Check if `indexer` is a valid array indexer for `array`.
+
+    For a boolean mask, `array` and `indexer` are checked to have the same
+    length. The dtype is validated, and if it is an integer or boolean
+    ExtensionArray, it is checked if there are missing values present, and
+    it is converted to the appropriate numpy array.
 
     .. versionadded:: 1.0.0
 
     Parameters
     ----------
-    array : array
-        The array that's being masked.
-    mask : array
-        The boolean array that's masking.
+    array : array-like
+        The array that is being indexed (only used for the length).
+    indexer : array-like or list-like
+        The array-like that's used to index. The function assumes this is an
+        array-like, and input that is not yet an numpy array or an ExtensionArray
+        is converted to one.
 
     Returns
     -------
     numpy.ndarray
-        The validated boolean mask.
+        The validated indexer as a numpy array that can be used to index.
 
     Raises
     ------
     IndexError
         When the lengths don't match.
     ValueError
-        When `mask` cannot be converted to a bool-dtype ndarray.
+        When `indexer` cannot be converted to a numpy ndarray to index
+        (e.g. presence of missing values).
 
     See Also
     --------
     api.types.is_bool_dtype : Check if `key` is of boolean dtype.
 
     Examples
     --------
-    A boolean ndarray is returned when the arguments are all valid.
+    When checking a boolean mask, a boolean ndarray is returned when the
+    arguments are all valid.
 
     >>> mask = pd.array([True, False])
     >>> arr = pd.array([1, 2])
-    >>> pd.api.extensions.check_bool_array_indexer(arr, mask)
+    >>> pd.api.indexers.check_array_indexer(arr, mask)
     array([ True, False])
 
     An IndexError is raised when the lengths don't match.
 
     >>> mask = pd.array([True, False, True])
-    >>> pd.api.extensions.check_bool_array_indexer(arr, mask)
+    >>> pd.api.indexers.check_array_indexer(arr, mask)
     Traceback (most recent call last):
     ...
-    IndexError: Item wrong length 3 instead of 2.
+    IndexError: Boolean index has wrong length: 3 instead of 2.
 
     A ValueError is raised when the mask cannot be converted to
     a bool-dtype ndarray.
 
     >>> mask = pd.array([True, pd.NA])
-    >>> pd.api.extensions.check_bool_array_indexer(arr, mask)
+    >>> pd.api.indexers.check_array_indexer(arr, mask)
+    Traceback (most recent call last):
+    ...
+    ValueError: Cannot mask with a boolean indexer containing NA values
+
+    A numpy boolean mask will get passed through (if the length is correct):
+
+    >>> mask = np.array([True, False])
+    >>> pd.api.indexers.check_array_indexer(arr, mask)
+    array([ True, False])
+
+    Similarly for integer indexers, an integer ndarray is returned when it is
+    a valid indexer, otherwise an error is  (for integer indexers, a matching
+    length is not required):
+
+    >>> indexer = pd.array([0, 2], dtype="Int64")
+    >>> arr = pd.array([1, 2, 3])
+    >>> pd.api.indexers.check_array_indexer(arr, indexer)
+    array([0, 2])
+
+    >>> indexer = pd.array([0, pd.NA], dtype="Int64")
+    >>> pd.api.indexers.check_array_indexer(arr, indexer)
+    Traceback (most recent call last):
+    ...
+    ValueError: Cannot index with an integer indexer containing NA values
+
+    For non-integer/boolean dtypes, an appropriate error is raised:
+
+    >>> indexer = np.array([0., 2.], dtype="float64")
+    >>> pd.api.indexers.check_array_indexer(arr, indexer)
     Traceback (most recent call last):
     ...
-    ValueError: cannot convert to bool numpy array in presence of missing values
+    IndexError: arrays used as indices must be of integer or boolean type
     """
-    result = np.asarray(mask, dtype=bool)
-    # GH26658
-    if len(result) != len(array):
-        raise IndexError(f"Item wrong length {len(result)} instead of {len(array)}.")
-    return result
+    from pandas.core.construction import array as pd_array
+
+    if not is_array_like(indexer):
+        indexer = pd_array(indexer)
+    dtype = indexer.dtype
+    if is_bool_dtype(dtype):
+        try:
+            indexer = np.asarray(indexer, dtype=bool)
+        except ValueError:
+            raise ValueError("Cannot mask with a boolean indexer containing NA values")
+
+        # GH26658
+        if len(indexer) != len(array):
+            raise IndexError(
+                f"Boolean index has wrong length: "
+                f"{len(indexer)} instead of {len(array)}"
+            )
+    elif is_integer_dtype(dtype):
+        try:
+            indexer = np.asarray(indexer, dtype=np.intp)
+        except ValueError:
+            raise ValueError(
+                "Cannot index with an integer indexer containing NA values"
+            )
+    else:
+        raise IndexError("arrays used as indices must be of integer or boolean type")
+
+    return indexer