pandas-dev · TomAugspurger · Jan 29, 2020 · Jan 20, 2020 · Jan 20, 2020 · Jan 20, 2020
diff --git a/pandas/api/indexers/__init__.py b/pandas/api/indexers/__init__.py
@@ -2,7 +2,7 @@
 Public API for Rolling Window Indexers.
 """
 
-from pandas.core.indexers import check_bool_array_indexer
+from pandas.core.indexers import check_array_indexer, check_bool_array_indexer
 from pandas.core.window.indexers import BaseIndexer
 
-__all__ = ["check_bool_array_indexer", "BaseIndexer"]
+__all__ = ["check_array_indexer", "check_bool_array_indexer", "BaseIndexer"]
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -39,7 +39,7 @@
 )
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
-from pandas.core.dtypes.inference import is_array_like, is_hashable
+from pandas.core.dtypes.inference import is_hashable
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core import ops
@@ -54,7 +54,7 @@
 from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs
 import pandas.core.common as com
 from pandas.core.construction import array, extract_array, sanitize_array
-from pandas.core.indexers import check_bool_array_indexer
+from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import interpolate_2d
 from pandas.core.ops.common import unpack_zerodim_and_defer
 from pandas.core.sorting import nargsort
@@ -2001,15 +2001,12 @@ def __getitem__(self, key):
             else:
                 return self.categories[i]
 
-        if is_list_like(key) and not is_array_like(key):
-            key = np.asarray(key)
-
-        if com.is_bool_indexer(key):
-            key = check_bool_array_indexer(self, key)
+        if is_list_like(key) and not isinstance(key, tuple):
+            key = check_array_indexer(self, key)
 
         result = self._codes[key]
         if result.ndim > 1:
-            return result
+            raise IndexError("Cannot user indexer with multiple dimensions")
         return self._constructor(result, dtype=self.dtype, fastpath=True)
 
     def __setitem__(self, key, value):

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -42,7 +42,7 @@
 from pandas.core.algorithms import checked_add_with_arr, take, unique1d, value_counts
 from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
 import pandas.core.common as com
-from pandas.core.indexers import check_bool_array_indexer
+from pandas.core.indexers import check_array_indexer
 from pandas.core.ops.common import unpack_zerodim_and_defer
 from pandas.core.ops.invalid import invalid_comparison, make_invalid_op
 
@@ -517,8 +517,12 @@ def __getitem__(self, key):
                 return self._box_func(val)
             return type(self)(val, dtype=self.dtype)
 
+        if is_list_like(key):
+            key = check_array_indexer(self, key)
+
         if com.is_bool_indexer(key):
-            key = check_bool_array_indexer(self, key)
+            # can still have object dtype
+            key = np.asarray(key, dtype=bool)
             if key.all():
                 key = slice(0, None, None)
             else:

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -40,6 +40,7 @@
 from pandas.core.arrays.categorical import Categorical
 import pandas.core.common as com
 from pandas.core.construction import array
+from pandas.core.indexers import check_array_indexer
 from pandas.core.indexes.base import ensure_index
 
 _VALID_CLOSED = {"left", "right", "both", "neither"}
@@ -495,6 +496,8 @@ def __len__(self) -> int:
         return len(self.left)
 
     def __getitem__(self, value):
+        if is_list_like(value):
+            value = check_array_indexer(self, value)
         left = self.left[value]
         right = self.right[value]
 

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -4,13 +4,17 @@
 
 from pandas._libs import lib, missing as libmissing
 
-from pandas.core.dtypes.common import is_integer, is_object_dtype, is_string_dtype
+from pandas.core.dtypes.common import (
+    is_integer,
+    is_list_like,
+    is_object_dtype,
+    is_string_dtype,
+)
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core.algorithms import take
 from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
-import pandas.core.common as com
-from pandas.core.indexers import check_bool_array_indexer
+from pandas.core.indexers import check_array_indexer
 
 if TYPE_CHECKING:
     from pandas._typing import Scalar
@@ -35,8 +39,8 @@ def __getitem__(self, item):
                 return self.dtype.na_value
             return self._data[item]
 
-        elif com.is_bool_indexer(item):
-            item = check_bool_array_indexer(self, item)
+        elif is_list_like(item):
+            item = check_array_indexer(self, item)
 
         return type(self)(self._data[item], self._mask[item])
 

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -9,6 +9,7 @@
 from pandas.util._decorators import Appender
 from pandas.util._validators import validate_fillna_kwargs
 
+from pandas.core.dtypes.common import is_list_like
 from pandas.core.dtypes.dtypes import ExtensionDtype
 from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
 from pandas.core.dtypes.inference import is_array_like
@@ -18,9 +19,8 @@
 from pandas.core import nanops
 from pandas.core.algorithms import searchsorted, take, unique
 from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
-import pandas.core.common as com
 from pandas.core.construction import extract_array
-from pandas.core.indexers import check_bool_array_indexer
+from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import backfill_1d, pad_1d
 
 
@@ -235,8 +235,8 @@ def __getitem__(self, item):
         if isinstance(item, type(self)):
             item = item._ndarray
 
-        elif com.is_bool_indexer(item):
-            item = check_bool_array_indexer(self, item)
+        elif is_list_like(item):
+            item = check_array_indexer(self, item)
 
         result = self._ndarray[item]
         if not lib.is_scalar(item):

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -29,6 +29,7 @@
     is_datetime64_any_dtype,
     is_dtype_equal,
     is_integer,
+    is_list_like,
     is_object_dtype,
     is_scalar,
     is_string_dtype,
@@ -43,6 +44,7 @@
 from pandas.core.base import PandasObject
 import pandas.core.common as com
 from pandas.core.construction import sanitize_array
+from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import interpolate_2d
 import pandas.core.ops as ops
 from pandas.core.ops.common import unpack_zerodim_and_defer
@@ -768,6 +770,9 @@ def __getitem__(self, key):
                 else:
                     key = np.asarray(key)
 
+            if is_list_like(key):
+                key = check_array_indexer(self, key)
+
             if com.is_bool_indexer(key):
                 key = check_bool_indexer(self, key)
 

diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py
@@ -5,7 +5,12 @@
 
 from pandas._typing import AnyArrayLike
 
-from pandas.core.dtypes.common import is_list_like
+from pandas.core.dtypes.common import (
+    is_array_like,
+    is_bool_dtype,
+    is_integer_dtype,
+    is_list_like,
+)
 from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
 
 # -----------------------------------------------------------
@@ -307,3 +312,62 @@ def check_bool_array_indexer(array: AnyArrayLike, mask: AnyArrayLike) -> np.ndar
     if len(result) != len(array):
         raise IndexError(f"Item wrong length {len(result)} instead of {len(array)}.")
     return result
+
+
+def check_array_indexer(array, indexer) -> np.ndarray:
+    """
+    Check if `indexer` is a valid array indexer for `array`.
+
+    `array` and `indexer` are checked to have the same length, and the
+    dtype is validated. If it is an integer or boolean ExtensionArray, it is
+    checked if there are missing values present, and it is converted to
+    the appropriate numpy array.
+
+    .. versionadded:: 1.0.0
+
+    Parameters
+    ----------
+    array : array
+        The array that's being indexed (only used for the length).
+    indexer : array-like
+        The array-like that's used to index.
+
+    Returns
+    -------
+    numpy.ndarray
+        The validated indexer.
+
+    Raises
+    ------
+    IndexError
+        When the lengths don't match.
+    ValueError
+        When `indexer` cannot be converted to a numpy ndarray.
+
+    """
+    import pandas as pd
+
+    if not is_array_like(indexer):
+        indexer = pd.array(indexer)
+    dtype = indexer.dtype
+    if is_bool_dtype(dtype):
+        try:
+            indexer = np.asarray(indexer, dtype=bool)
+        except ValueError:
+            raise ValueError("Cannot mask with a boolean indexer containing NA values")
+
+        # GH26658
+        if len(indexer) != len(array):
+            raise IndexError(
+                f"Item wrong length {len(indexer)} instead of {len(array)}."
+            )
+
+    elif is_integer_dtype(dtype):
+        try:
+            indexer = np.asarray(indexer, dtype=int)
+        except ValueError:
+            raise ValueError(
+                "Cannot index with an integer indexer containing NA values"
+            )
+
+    return indexer
diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
@@ -152,14 +152,48 @@ def test_getitem_boolean_array_mask(self, data):
     def test_getitem_boolean_array_mask_raises(self, data):
         mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
         mask[:2] = pd.NA
-        with pytest.raises(ValueError):
+
+        msg = "Cannot mask with a boolean indexer containing NA values"
+        with pytest.raises(ValueError, match=msg):
             data[mask]
 
         s = pd.Series(data)
 
         with pytest.raises(ValueError):
             s[mask]
 
+    @pytest.mark.parametrize(
+        "idx",
+        [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
+        ids=["list", "integer-array", "numpy-array"],
+    )
+    def test_getitem_integer_array(self, data, idx):
+        result = data[idx]
+        assert len(result) == 3
+        assert isinstance(result, type(data))
+        expected = data.take([0, 1, 2])
+        self.assert_extension_array_equal(result, expected)
+
+        expected = pd.Series(expected)
+        result = pd.Series(data)[idx]
+        self.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "idx",
+        [[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")],
+        ids=["list", "integer-array"],
+    )
+    def test_getitem_integer_with_missing_raises(self, data, idx):
+        msg = "Cannot index with an integer indexer containing NA values"
+        with pytest.raises(ValueError, match=msg):
+            data[idx]
+
+        # TODO this raises KeyError about labels not found (it tries label-based)
+        # import pandas._testing as tm
+        # s = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))])
+        # with pytest.raises(ValueError, match=msg):
+        #    s[idx]
+
     def test_getitem_slice(self, data):
         # getitem[slice] should return an array
         result = data[slice(0)]  # empty

diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
@@ -117,13 +117,7 @@ def __getitem__(self, item):
         else:
             # array, slice.
             if pd.api.types.is_list_like(item):
-                if not pd.api.types.is_array_like(item):
-                    item = pd.array(item)
-                dtype = item.dtype
-                if pd.api.types.is_bool_dtype(dtype):
-                    item = pd.api.indexers.check_bool_array_indexer(self, item)
-                elif pd.api.types.is_integer_dtype(dtype):
-                    item = np.asarray(item, dtype="int")
+                item = pd.api.indexers.check_array_indexer(self, item)
             return type(self)(self._data[item])
 
     def take(self, indexer, allow_fill=False, fill_value=None):

diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
@@ -82,11 +82,8 @@ def __getitem__(self, item):
             # slice
             return type(self)(self.data[item])
         else:
-            if not pd.api.types.is_array_like(item):
-                item = pd.array(item)
-            dtype = item.dtype
-            if pd.api.types.is_bool_dtype(dtype):
-                item = pd.api.indexers.check_bool_array_indexer(self, item)
+            item = pd.api.indexers.check_array_indexer(self, item)
+            if pd.api.types.is_bool_dtype(item.dtype):
                 return self._from_sequence([x for x, m in zip(self, item) if m])
             # integer
             return type(self)([self.data[i] for i in item])

diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py
@@ -976,8 +976,9 @@ def test_engine_type(self, dtype, engine_type):
         assert np.issubdtype(ci.codes.dtype, dtype)
         assert isinstance(ci._engine, engine_type)
 
-    def test_getitem_2d_deprecated(self):
+    def test_getitem_raise_2d(self):
         # GH#30588 multi-dim indexing is deprecated, but raising is also acceptable
         idx = self.create_index()
-        with pytest.raises(ValueError, match="cannot mask with array containing NA"):
+        msg = "Cannot user indexer with multiple dimensions"
+        with pytest.raises(IndexError, match=msg):
             idx[:, None]