diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index c072237850d82..78fdfbfd28144 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -66,7 +66,7 @@ behaves correctly. .. autosummary:: :toctree: api/ - api.indexers.check_bool_array_indexer + api.indexers.check_array_indexer The sentinel ``pandas.api.extensions.no_default`` is used as the default diff --git a/pandas/api/indexers/__init__.py b/pandas/api/indexers/__init__.py index 10654eb0888ee..826297e6b498f 100644 --- a/pandas/api/indexers/__init__.py +++ b/pandas/api/indexers/__init__.py @@ -2,7 +2,7 @@ Public API for Rolling Window Indexers. """ -from pandas.core.indexers import check_bool_array_indexer +from pandas.core.indexers import check_array_indexer from pandas.core.window.indexers import BaseIndexer -__all__ = ["check_bool_array_indexer", "BaseIndexer"] +__all__ = ["check_array_indexer", "BaseIndexer"] diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 9d7359dd9c614..412422397af06 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -39,7 +39,7 @@ ) from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries -from pandas.core.dtypes.inference import is_array_like, is_hashable +from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import isna, notna from pandas.core import ops @@ -54,7 +54,7 @@ from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs import pandas.core.common as com from pandas.core.construction import array, extract_array, sanitize_array -from pandas.core.indexers import check_bool_array_indexer +from pandas.core.indexers import check_array_indexer, deprecate_ndim_indexing from pandas.core.missing import interpolate_2d from pandas.core.ops.common import unpack_zerodim_and_defer from pandas.core.sorting import nargsort @@ -2001,14 +2001,11 @@ def __getitem__(self, key): else: return self.categories[i] - if is_list_like(key) and not is_array_like(key): - key = np.asarray(key) - - if com.is_bool_indexer(key): - key = check_bool_array_indexer(self, key) + key = check_array_indexer(self, key) result = self._codes[key] if result.ndim > 1: + deprecate_ndim_indexing(result) return result return self._constructor(result, dtype=self.dtype, fastpath=True) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 70637026c278d..0ea707e1ae69d 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -42,7 +42,7 @@ from pandas.core.algorithms import checked_add_with_arr, take, unique1d, value_counts from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin import pandas.core.common as com -from pandas.core.indexers import check_bool_array_indexer +from pandas.core.indexers import check_array_indexer from pandas.core.ops.common import unpack_zerodim_and_defer from pandas.core.ops.invalid import invalid_comparison, make_invalid_op @@ -518,11 +518,20 @@ def __getitem__(self, key): return type(self)(val, dtype=self.dtype) if com.is_bool_indexer(key): - key = check_bool_array_indexer(self, key) + # first convert to boolean, because check_array_indexer doesn't + # allow object dtype + key = np.asarray(key, dtype=bool) + key = check_array_indexer(self, key) if key.all(): key = slice(0, None, None) else: key = lib.maybe_booleans_to_slice(key.view(np.uint8)) + elif isinstance(key, list) and len(key) == 1 and isinstance(key[0], slice): + # see https://github.com/pandas-dev/pandas/issues/31299, need to allow + # this for now (would otherwise raise in check_array_indexer) + pass + else: + key = check_array_indexer(self, key) is_period = is_period_dtype(self) if is_period: diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 37d2baed2c09e..d890c0c16aecc 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -40,6 +40,7 @@ from pandas.core.arrays.categorical import Categorical import pandas.core.common as com from pandas.core.construction import array +from pandas.core.indexers import check_array_indexer from pandas.core.indexes.base import ensure_index _VALID_CLOSED = {"left", "right", "both", "neither"} @@ -495,6 +496,7 @@ def __len__(self) -> int: return len(self.left) def __getitem__(self, value): + value = check_array_indexer(self, value) left = self.left[value] right = self.right[value] diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 5eaed70721592..80e317123126a 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -9,8 +9,7 @@ from pandas.core.algorithms import take from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin -import pandas.core.common as com -from pandas.core.indexers import check_bool_array_indexer +from pandas.core.indexers import check_array_indexer if TYPE_CHECKING: from pandas._typing import Scalar @@ -35,8 +34,7 @@ def __getitem__(self, item): return self.dtype.na_value return self._data[item] - elif com.is_bool_indexer(item): - item = check_bool_array_indexer(self, item) + item = check_array_indexer(self, item) return type(self)(self._data[item], self._mask[item]) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 075096f6cfb54..8b1d1e58dc36c 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -18,9 +18,8 @@ from pandas.core import nanops from pandas.core.algorithms import searchsorted, take, unique from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin -import pandas.core.common as com from pandas.core.construction import extract_array -from pandas.core.indexers import check_bool_array_indexer +from pandas.core.indexers import check_array_indexer from pandas.core.missing import backfill_1d, pad_1d @@ -234,8 +233,7 @@ def __getitem__(self, item): if isinstance(item, type(self)): item = item._ndarray - elif com.is_bool_indexer(item): - item = check_bool_array_indexer(self, item) + item = check_array_indexer(self, item) result = self._ndarray[item] if not lib.is_scalar(item): diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 75dd603aa6c7b..b476a019c66cc 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -43,6 +43,7 @@ from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.construction import sanitize_array +from pandas.core.indexers import check_array_indexer from pandas.core.missing import interpolate_2d import pandas.core.ops as ops from pandas.core.ops.common import unpack_zerodim_and_defer @@ -768,6 +769,8 @@ def __getitem__(self, key): else: key = np.asarray(key) + key = check_array_indexer(self, key) + if com.is_bool_indexer(key): key = check_bool_indexer(self, key) diff --git a/pandas/core/common.py b/pandas/core/common.py index c883ec9fa49b7..8c52999c4a79e 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -121,8 +121,8 @@ def is_bool_indexer(key: Any) -> bool: See Also -------- - check_bool_array_indexer : Check that `key` - is a valid mask for an array, and convert to an ndarray. + check_array_indexer : Check that `key` is a valid array to index, + and convert to an ndarray. """ na_msg = "cannot mask with array containing NA / NaN values" if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or ( diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 4d45769d2fea9..fe475527f4596 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -1,11 +1,18 @@ """ Low-dependency indexing utilities. """ +import warnings + import numpy as np -from pandas._typing import AnyArrayLike +from pandas._typing import Any, AnyArrayLike -from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_integer_dtype, + is_list_like, +) from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries # ----------------------------------------------------------- @@ -244,33 +251,65 @@ def length_of_indexer(indexer, target=None) -> int: raise AssertionError("cannot find the length of the indexer") -def check_bool_array_indexer(array: AnyArrayLike, mask: AnyArrayLike) -> np.ndarray: +def deprecate_ndim_indexing(result): + """ + Helper function to raise the deprecation warning for multi-dimensional + indexing on 1D Series/Index. + + GH#27125 indexer like idx[:, None] expands dim, but we cannot do that + and keep an index, so we currently return ndarray, which is deprecated + (Deprecation GH#30588). """ - Check if `mask` is a valid boolean indexer for `array`. + if np.ndim(result) > 1: + warnings.warn( + "Support for multi-dimensional indexing (e.g. `index[:, None]`) " + "on an Index is deprecated and will be removed in a future " + "version. Convert to a numpy array before indexing instead.", + DeprecationWarning, + stacklevel=3, + ) + + +# ----------------------------------------------------------- +# Public indexer validation - `array` and `mask` are checked to have the same length, and the - dtype is validated. + +def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: + """ + Check if `indexer` is a valid array indexer for `array`. + + For a boolean mask, `array` and `indexer` are checked to have the same + length. The dtype is validated, and if it is an integer or boolean + ExtensionArray, it is checked if there are missing values present, and + it is converted to the appropriate numpy array. Other dtypes will raise + an error. + + Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed + through as is. .. versionadded:: 1.0.0 Parameters ---------- - array : array - The array that's being masked. - mask : array - The boolean array that's masking. + array : array-like + The array that is being indexed (only used for the length). + indexer : array-like or list-like + The array-like that's used to index. List-like input that is not yet + a numpy array or an ExtensionArray is converted to one. Other input + types are passed through as is Returns ------- numpy.ndarray - The validated boolean mask. + The validated indexer as a numpy array that can be used to index. Raises ------ IndexError When the lengths don't match. ValueError - When `mask` cannot be converted to a bool-dtype ndarray. + When `indexer` cannot be converted to a numpy ndarray to index + (e.g. presence of missing values). See Also -------- @@ -278,32 +317,100 @@ def check_bool_array_indexer(array: AnyArrayLike, mask: AnyArrayLike) -> np.ndar Examples -------- - A boolean ndarray is returned when the arguments are all valid. + When checking a boolean mask, a boolean ndarray is returned when the + arguments are all valid. >>> mask = pd.array([True, False]) >>> arr = pd.array([1, 2]) - >>> pd.api.extensions.check_bool_array_indexer(arr, mask) + >>> pd.api.indexers.check_array_indexer(arr, mask) array([ True, False]) An IndexError is raised when the lengths don't match. >>> mask = pd.array([True, False, True]) - >>> pd.api.extensions.check_bool_array_indexer(arr, mask) + >>> pd.api.indexers.check_array_indexer(arr, mask) Traceback (most recent call last): ... - IndexError: Item wrong length 3 instead of 2. + IndexError: Boolean index has wrong length: 3 instead of 2. A ValueError is raised when the mask cannot be converted to a bool-dtype ndarray. >>> mask = pd.array([True, pd.NA]) - >>> pd.api.extensions.check_bool_array_indexer(arr, mask) + >>> pd.api.indexers.check_array_indexer(arr, mask) + Traceback (most recent call last): + ... + ValueError: Cannot mask with a boolean indexer containing NA values + + A numpy boolean mask will get passed through (if the length is correct): + + >>> mask = np.array([True, False]) + >>> pd.api.indexers.check_array_indexer(arr, mask) + array([ True, False]) + + Similarly for integer indexers, an integer ndarray is returned when it is + a valid indexer, otherwise an error is (for integer indexers, a matching + length is not required): + + >>> indexer = pd.array([0, 2], dtype="Int64") + >>> arr = pd.array([1, 2, 3]) + >>> pd.api.indexers.check_array_indexer(arr, indexer) + array([0, 2]) + + >>> indexer = pd.array([0, pd.NA], dtype="Int64") + >>> pd.api.indexers.check_array_indexer(arr, indexer) + Traceback (most recent call last): + ... + ValueError: Cannot index with an integer indexer containing NA values + + For non-integer/boolean dtypes, an appropriate error is raised: + + >>> indexer = np.array([0., 2.], dtype="float64") + >>> pd.api.indexers.check_array_indexer(arr, indexer) Traceback (most recent call last): ... - ValueError: cannot convert to bool numpy array in presence of missing values + IndexError: arrays used as indices must be of integer or boolean type """ - result = np.asarray(mask, dtype=bool) - # GH26658 - if len(result) != len(array): - raise IndexError(f"Item wrong length {len(result)} instead of {len(array)}.") - return result + from pandas.core.construction import array as pd_array + + # whathever is not an array-like is returned as-is (possible valid array + # indexers that are not array-like: integer, slice, Ellipsis, None) + # In this context, tuples are not considered as array-like, as they have + # a specific meaning in indexing (multi-dimensional indexing) + if is_list_like(indexer): + if isinstance(indexer, tuple): + return indexer + else: + return indexer + + # convert list-likes to array + if not is_array_like(indexer): + indexer = pd_array(indexer) + if len(indexer) == 0: + # empty list is converted to float array by pd.array + indexer = np.array([], dtype=np.intp) + + dtype = indexer.dtype + if is_bool_dtype(dtype): + try: + indexer = np.asarray(indexer, dtype=bool) + except ValueError: + raise ValueError("Cannot mask with a boolean indexer containing NA values") + + # GH26658 + if len(indexer) != len(array): + raise IndexError( + f"Boolean index has wrong length: " + f"{len(indexer)} instead of {len(array)}" + ) + elif is_integer_dtype(dtype): + try: + indexer = np.asarray(indexer, dtype=np.intp) + except ValueError: + raise ValueError( + "Cannot index with an integer indexer containing NA values" + ) + else: + raise IndexError("arrays used as indices must be of integer or boolean type") + + return indexer diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8ac8e51795ddc..1738c03e754b2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -67,7 +67,7 @@ from pandas.core.arrays import ExtensionArray from pandas.core.base import IndexOpsMixin, PandasObject import pandas.core.common as com -from pandas.core.indexers import maybe_convert_indices +from pandas.core.indexers import deprecate_ndim_indexing, maybe_convert_indices from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing from pandas.core.ops import get_op_result_name @@ -5825,19 +5825,3 @@ def _try_convert_to_int_array( pass raise ValueError - - -def deprecate_ndim_indexing(result): - if np.ndim(result) > 1: - # GH#27125 indexer like idx[:, None] expands dim, but we - # cannot do that and keep an index, so return ndarray - # Deprecation GH#30588 - # Note: update SingleBlockManager.get_slice when the DeprecationWarning - # is elevated to a FutureWarning - warnings.warn( - "Support for multi-dimensional indexing (e.g. `index[:, None]`) " - "on an Index is deprecated and will be removed in a future " - "version. Convert to a numpy array before indexing instead.", - DeprecationWarning, - stacklevel=3, - ) diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 830f7c14a5493..d5664d760114e 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -16,7 +16,8 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.arrays import ExtensionArray -from pandas.core.indexes.base import Index, deprecate_ndim_indexing +from pandas.core.indexers import deprecate_ndim_indexing +from pandas.core.indexes.base import Index from pandas.core.ops import get_op_result_name diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index d79f0f484c5fe..6a679708206fc 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -15,7 +15,6 @@ is_numeric_dtype, is_scalar, is_sequence, - is_sparse, ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries @@ -23,7 +22,7 @@ import pandas.core.common as com from pandas.core.indexers import ( - check_bool_array_indexer, + check_array_indexer, is_list_like_indexer, length_of_indexer, ) @@ -2231,9 +2230,9 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: ) result = result.astype(bool)._values else: - if is_sparse(result): - result = np.asarray(result) - result = check_bool_array_indexer(index, result) + # key might be sparse / object-dtype bool, check_array_indexer needs bool array + result = np.asarray(result, dtype=bool) + result = check_array_indexer(index, result) return result diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index e0f3a4754221f..8615a8df22dcc 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -97,6 +97,15 @@ def test_getitem_scalar_na(self, data_missing, na_cmp, na_value): result = data_missing[0] assert na_cmp(result, na_value) + def test_getitem_empty(self, data): + # Indexing with empty list + result = data[[]] + assert len(result) == 0 + assert isinstance(result, type(data)) + + expected = data[np.array([], dtype="int64")] + self.assert_extension_array_equal(result, expected) + def test_getitem_mask(self, data): # Empty mask, raw array mask = np.zeros(len(data), dtype=bool) @@ -152,7 +161,12 @@ def test_getitem_boolean_array_mask(self, data): def test_getitem_boolean_array_mask_raises(self, data): mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") mask[:2] = pd.NA - with pytest.raises(ValueError): + + msg = ( + "Cannot mask with a boolean indexer containing NA values|" + "cannot mask with array containing NA / NaN values" + ) + with pytest.raises(ValueError, match=msg): data[mask] s = pd.Series(data) @@ -160,6 +174,38 @@ def test_getitem_boolean_array_mask_raises(self, data): with pytest.raises(ValueError): s[mask] + @pytest.mark.parametrize( + "idx", + [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_getitem_integer_array(self, data, idx): + result = data[idx] + assert len(result) == 3 + assert isinstance(result, type(data)) + expected = data.take([0, 1, 2]) + self.assert_extension_array_equal(result, expected) + + expected = pd.Series(expected) + result = pd.Series(data)[idx] + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "idx", + [[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")], + ids=["list", "integer-array"], + ) + def test_getitem_integer_with_missing_raises(self, data, idx): + msg = "Cannot index with an integer indexer containing NA values" + with pytest.raises(ValueError, match=msg): + data[idx] + + # TODO this raises KeyError about labels not found (it tries label-based) + # import pandas._testing as tm + # s = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))]) + # with pytest.raises(ValueError, match=msg): + # s[idx] + def test_getitem_slice(self, data): # getitem[slice] should return an array result = data[slice(0)] # empty diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 46ab8fe9a62ed..02153ade46610 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -110,14 +110,7 @@ def __getitem__(self, item): return self._data[item] else: # array, slice. - if pd.api.types.is_list_like(item): - if not pd.api.types.is_array_like(item): - item = pd.array(item) - dtype = item.dtype - if pd.api.types.is_bool_dtype(dtype): - item = pd.api.indexers.check_bool_array_indexer(self, item) - elif pd.api.types.is_integer_dtype(dtype): - item = np.asarray(item, dtype="int") + item = pd.api.indexers.check_array_indexer(self, item) return type(self)(self._data[item]) def take(self, indexer, allow_fill=False, fill_value=None): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index dc4653bd01dea..9e741bb7f267c 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -76,11 +76,8 @@ def __getitem__(self, item): # slice return type(self)(self.data[item]) else: - if not pd.api.types.is_array_like(item): - item = pd.array(item) - dtype = item.dtype - if pd.api.types.is_bool_dtype(dtype): - item = pd.api.indexers.check_bool_array_indexer(self, item) + item = pd.api.indexers.check_array_indexer(self, item) + if pd.api.types.is_bool_dtype(item.dtype): return self._from_sequence([x for x, m in zip(self, item) if m]) # integer return type(self)([self.data[i] for i in item]) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index c69c1f3f386a9..d870259c2539b 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -976,12 +976,6 @@ def test_engine_type(self, dtype, engine_type): assert np.issubdtype(ci.codes.dtype, dtype) assert isinstance(ci._engine, engine_type) - def test_getitem_2d_deprecated(self): - # GH#30588 multi-dim indexing is deprecated, but raising is also acceptable - idx = self.create_index() - with pytest.raises(ValueError, match="cannot mask with array containing NA"): - idx[:, None] - @pytest.mark.parametrize( "data, categories", [ diff --git a/pandas/tests/indexing/test_check_indexer.py b/pandas/tests/indexing/test_check_indexer.py new file mode 100644 index 0000000000000..82f8c12229824 --- /dev/null +++ b/pandas/tests/indexing/test_check_indexer.py @@ -0,0 +1,97 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.api.indexers import check_array_indexer + + +@pytest.mark.parametrize( + "indexer, expected", + [ + # integer + ([1, 2], np.array([1, 2], dtype=np.intp)), + (np.array([1, 2], dtype="int64"), np.array([1, 2], dtype=np.intp)), + (pd.array([1, 2], dtype="Int32"), np.array([1, 2], dtype=np.intp)), + (pd.Index([1, 2]), np.array([1, 2], dtype=np.intp)), + # boolean + ([True, False, True], np.array([True, False, True], dtype=np.bool_)), + (np.array([True, False, True]), np.array([True, False, True], dtype=np.bool_)), + ( + pd.array([True, False, True], dtype="boolean"), + np.array([True, False, True], dtype=np.bool_), + ), + # other + ([], np.array([], dtype=np.intp)), + ], +) +def test_valid_input(indexer, expected): + array = np.array([1, 2, 3]) + result = check_array_indexer(array, indexer) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")], +) +def test_bool_raise_missing_values(indexer): + array = np.array([1, 2, 3]) + + msg = "Cannot mask with a boolean indexer containing NA values" + with pytest.raises(ValueError, match=msg): + check_array_indexer(array, indexer) + + +@pytest.mark.parametrize( + "indexer", + [ + [True, False], + pd.array([True, False], dtype="boolean"), + np.array([True, False], dtype=np.bool_), + ], +) +def test_bool_raise_length(indexer): + array = np.array([1, 2, 3]) + + msg = "Boolean index has wrong length" + with pytest.raises(IndexError, match=msg): + check_array_indexer(array, indexer) + + +@pytest.mark.parametrize( + "indexer", [[0, 1, None], pd.array([0, 1, pd.NA], dtype="Int64")], +) +def test_int_raise_missing_values(indexer): + array = np.array([1, 2, 3]) + + msg = "Cannot index with an integer indexer containing NA values" + with pytest.raises(ValueError, match=msg): + check_array_indexer(array, indexer) + + +@pytest.mark.parametrize( + "indexer", + [ + [0.0, 1.0], + np.array([1.0, 2.0], dtype="float64"), + np.array([True, False], dtype=object), + pd.Index([True, False], dtype=object), + pd.array(["a", "b"], dtype="string"), + ], +) +def test_raise_invalid_array_dtypes(indexer): + array = np.array([1, 2, 3]) + + msg = "arrays used as indices must be of integer or boolean type" + with pytest.raises(IndexError, match=msg): + check_array_indexer(array, indexer) + + +@pytest.mark.parametrize( + "indexer", [None, Ellipsis, slice(0, 3), (None,)], +) +def test_pass_through_non_array_likes(indexer): + array = np.array([1, 2, 3]) + + result = check_array_indexer(array, indexer) + assert result == indexer diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 48c25ec034653..d67259e8b7d40 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -249,10 +249,10 @@ def test_iloc_getitem_bool(self): def test_iloc_getitem_bool_diff_len(self, index): # GH26658 s = Series([1, 2, 3]) - with pytest.raises( - IndexError, - match=("Item wrong length {} instead of {}.".format(len(index), len(s))), - ): + msg = "Boolean index has wrong length: {} instead of {}".format( + len(index), len(s) + ) + with pytest.raises(IndexError, match=msg): _ = s.iloc[index] def test_iloc_getitem_slice(self): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4c1436b800fc3..b9dc96adfa738 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -200,10 +200,10 @@ def test_loc_getitem_bool(self): def test_loc_getitem_bool_diff_len(self, index): # GH26658 s = Series([1, 2, 3]) - with pytest.raises( - IndexError, - match=("Item wrong length {} instead of {}.".format(len(index), len(s))), - ): + msg = "Boolean index has wrong length: {} instead of {}".format( + len(index), len(s) + ) + with pytest.raises(IndexError, match=msg): _ = s.loc[index] def test_loc_getitem_int_slice(self):