diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index b7efec8fd2e89..bd92c9890c0b8 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -148,11 +148,13 @@ Other enhancements - Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`) - Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`) - Attempting to write into a file in missing parent directory with :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_html`, :meth:`DataFrame.to_excel`, :meth:`DataFrame.to_feather`, :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_json`, :meth:`DataFrame.to_pickle`, and :meth:`DataFrame.to_xml` now explicitly mentions missing parent directory, the same is true for :class:`Series` counterparts (:issue:`24306`) +- Indexing with ``.loc`` and ``.iloc`` now supports ``Ellipsis`` (:issue:`37750`) - :meth:`IntegerArray.all` , :meth:`IntegerArray.any`, :meth:`FloatingArray.any`, and :meth:`FloatingArray.all` use Kleene logic (:issue:`41967`) - Added support for nullable boolean and integer types in :meth:`DataFrame.to_stata`, :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`40855`) - :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`) - The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`) - Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`) +- .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7b0d163c659a5..e2f1a2d6a1e23 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3254,6 +3254,11 @@ def get_locs(self, seq): # entry in `seq` indexer = Index(np.arange(n)) + if any(x is Ellipsis for x in seq): + raise NotImplementedError( + "MultiIndex does not support indexing with Ellipsis" + ) + def _convert_to_indexer(r) -> Int64Index: # return an indexer if isinstance(r, slice): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index bbb3cb3391dfa..918a9ea1b8030 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -3,9 +3,7 @@ from contextlib import suppress from typing import ( TYPE_CHECKING, - Any, Hashable, - Sequence, ) import warnings @@ -67,6 +65,7 @@ # "null slice" _NS = slice(None, None) +_one_ellipsis_message = "indexer may only contain one '...' entry" # the public IndexSlicerMaker @@ -731,11 +730,33 @@ def _validate_key(self, key, axis: int): """ raise AbstractMethodError(self) - def _has_valid_tuple(self, key: tuple): + def _expand_ellipsis(self, tup: tuple) -> tuple: + """ + If a tuple key includes an Ellipsis, replace it with an appropriate + number of null slices. + """ + if any(x is Ellipsis for x in tup): + if tup.count(Ellipsis) > 1: + raise IndexingError(_one_ellipsis_message) + + if len(tup) == self.ndim: + # It is unambiguous what axis this Ellipsis is indexing, + # treat as a single null slice. + i = tup.index(Ellipsis) + # FIXME: this assumes only one Ellipsis + new_key = tup[:i] + (_NS,) + tup[i + 1 :] + return new_key + + # TODO: other cases? only one test gets here, and that is covered + # by _validate_key_length + return tup + + def _validate_tuple_indexer(self, key: tuple) -> tuple: """ Check the key for valid keys across my indexer. """ - self._validate_key_length(key) + key = self._validate_key_length(key) + key = self._expand_ellipsis(key) for i, k in enumerate(key): try: self._validate_key(k, i) @@ -744,6 +765,7 @@ def _has_valid_tuple(self, key: tuple): "Location based indexing can only have " f"[{self._valid_types}] types" ) from err + return key def _is_nested_tuple_indexer(self, tup: tuple) -> bool: """ @@ -772,9 +794,16 @@ def _convert_tuple(self, key): return tuple(keyidx) - def _validate_key_length(self, key: Sequence[Any]) -> None: + def _validate_key_length(self, key: tuple) -> tuple: if len(key) > self.ndim: + if key[0] is Ellipsis: + # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3] + key = key[1:] + if Ellipsis in key: + raise IndexingError(_one_ellipsis_message) + return self._validate_key_length(key) raise IndexingError("Too many indexers") + return key def _getitem_tuple_same_dim(self, tup: tuple): """ @@ -822,7 +851,7 @@ def _getitem_lowerdim(self, tup: tuple): with suppress(IndexingError): return self._handle_lowerdim_multi_index_axis0(tup) - self._validate_key_length(tup) + tup = self._validate_key_length(tup) for i, key in enumerate(tup): if is_label_like(key): @@ -1093,10 +1122,11 @@ def _getitem_iterable(self, key, axis: int): def _getitem_tuple(self, tup: tuple): with suppress(IndexingError): + tup = self._expand_ellipsis(tup) return self._getitem_lowerdim(tup) # no multi-index, so validate all of the indexers - self._has_valid_tuple(tup) + tup = self._validate_tuple_indexer(tup) # ugly hack for GH #836 if self._multi_take_opportunity(tup): @@ -1126,6 +1156,8 @@ def _getitem_axis(self, key, axis: int): key = item_from_zerodim(key) if is_iterator(key): key = list(key) + if key is Ellipsis: + key = slice(None) labels = self.obj._get_axis(axis) @@ -1409,7 +1441,7 @@ def _validate_integer(self, key: int, axis: int) -> None: def _getitem_tuple(self, tup: tuple): - self._has_valid_tuple(tup) + tup = self._validate_tuple_indexer(tup) with suppress(IndexingError): return self._getitem_lowerdim(tup) @@ -1439,7 +1471,9 @@ def _get_list_axis(self, key, axis: int): raise IndexError("positional indexers are out-of-bounds") from err def _getitem_axis(self, key, axis: int): - if isinstance(key, ABCDataFrame): + if key is Ellipsis: + key = slice(None) + elif isinstance(key, ABCDataFrame): raise IndexError( "DataFrame indexer is not allowed for .iloc\n" "Consider using .loc for automatic alignment." @@ -2381,7 +2415,11 @@ def is_label_like(key) -> bool: bool """ # select a label or row - return not isinstance(key, slice) and not is_list_like_indexer(key) + return ( + not isinstance(key, slice) + and not is_list_like_indexer(key) + and key is not Ellipsis + ) def need_slice(obj: slice) -> bool: diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 25c0625d1d790..94ecfe81abd45 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -37,6 +37,10 @@ import pandas._testing as tm from pandas.api.types import is_scalar from pandas.core.api import Float64Index +from pandas.core.indexing import ( + IndexingError, + _one_ellipsis_message, +) from pandas.tests.indexing.common import Base @@ -1524,6 +1528,66 @@ def test_loc_setitem_cast3(self): assert df.dtypes.one == np.dtype(np.int8) +class TestLocWithEllipsis: + @pytest.fixture(params=[tm.loc, tm.iloc]) + def indexer(self, request): + # Test iloc while we're here + return request.param + + @pytest.fixture + def obj(self, series_with_simple_index, frame_or_series): + obj = series_with_simple_index + if frame_or_series is not Series: + obj = obj.to_frame() + return obj + + def test_loc_iloc_getitem_ellipsis(self, obj, indexer): + result = indexer(obj)[...] + tm.assert_equal(result, obj) + + def test_loc_iloc_getitem_leading_ellipses(self, series_with_simple_index, indexer): + obj = series_with_simple_index + key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0] + + if indexer is tm.loc and obj.index.is_boolean(): + # passing [False] will get interpreted as a boolean mask + # TODO: should it? unambiguous when lengths dont match? + return + if indexer is tm.loc and isinstance(obj.index, MultiIndex): + msg = "MultiIndex does not support indexing with Ellipsis" + with pytest.raises(NotImplementedError, match=msg): + result = indexer(obj)[..., [key]] + + elif len(obj) != 0: + result = indexer(obj)[..., [key]] + expected = indexer(obj)[[key]] + tm.assert_series_equal(result, expected) + + key2 = 0 if indexer is tm.iloc else obj.name + df = obj.to_frame() + result = indexer(df)[..., [key2]] + expected = indexer(df)[:, [key2]] + tm.assert_frame_equal(result, expected) + + def test_loc_iloc_getitem_ellipses_only_one_ellipsis(self, obj, indexer): + # GH37750 + key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0] + + with pytest.raises(IndexingError, match=_one_ellipsis_message): + indexer(obj)[..., ...] + + with pytest.raises(IndexingError, match=_one_ellipsis_message): + indexer(obj)[..., [key], ...] + + with pytest.raises(IndexingError, match=_one_ellipsis_message): + indexer(obj)[..., ..., key] + + # one_ellipsis_message takes precedence over "Too many indexers" + # only when the first key is Ellipsis + with pytest.raises(IndexingError, match="Too many indexers"): + indexer(obj)[key, ..., ...] + + class TestLocWithMultiIndex: @pytest.mark.parametrize( "keys, expected",