Skip to content

ENH: support Ellipsis in loc/iloc #37750

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Oct 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9573948
ENH: support Ellipsis in loc/iloc
jbrockmendel Nov 10, 2020
3033365
mypy fixup
jbrockmendel Nov 11, 2020
10e8048
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Nov 11, 2020
9ed4838
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Nov 24, 2020
7827c8d
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Dec 24, 2020
d0888df
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Jan 12, 2021
a703e49
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Jan 18, 2021
d109346
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Jan 19, 2021
9445cf4
rename per request
jbrockmendel Jan 19, 2021
6d33e00
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Jan 20, 2021
a53f9a2
TST: flesh out tests
jbrockmendel Jan 20, 2021
843a5da
Merge branch 'master' into enh-indexing-ellipses
jbrockmendel Feb 2, 2021
fbec549
Merge branch 'master' into enh-indexing-ellipses
jbrockmendel Feb 3, 2021
8b637ee
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Feb 4, 2021
ac76a33
Merge branch 'master' into enh-indexing-ellipses
jbrockmendel Feb 16, 2021
351b65b
Merge branch 'master' into enh-indexing-ellipses
jbrockmendel Feb 17, 2021
3797b42
Merge branch 'master' into enh-indexing-ellipses
jbrockmendel Mar 4, 2021
59a0e58
Merge branch 'master' into enh-indexing-ellipses
jbrockmendel Mar 6, 2021
8c656a2
Merge branch 'master' into enh-indexing-ellipses
jbrockmendel Apr 8, 2021
a4e9fc7
Merge branch 'master' into enh-indexing-ellipses
jbrockmendel Jun 1, 2021
a5632f3
whatsnew
jbrockmendel Jun 1, 2021
1953d49
Merge branch 'master' into enh-indexing-ellipses
jbrockmendel Jun 3, 2021
8515d04
Merge branch 'master' into enh-indexing-ellipses
jbrockmendel Sep 28, 2021
c41d0fc
Merge branch 'master' into enh-indexing-ellipses
jbrockmendel Oct 5, 2021
a2eb766
Merge branch 'master' into enh-indexing-ellipses
jbrockmendel Oct 6, 2021
d186fa3
Merge branch 'master' into enh-indexing-ellipses
jbrockmendel Oct 16, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,13 @@ Other enhancements
- Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`)
- Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`)
- Attempting to write into a file in missing parent directory with :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_html`, :meth:`DataFrame.to_excel`, :meth:`DataFrame.to_feather`, :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_json`, :meth:`DataFrame.to_pickle`, and :meth:`DataFrame.to_xml` now explicitly mentions missing parent directory, the same is true for :class:`Series` counterparts (:issue:`24306`)
- Indexing with ``.loc`` and ``.iloc`` now supports ``Ellipsis`` (:issue:`37750`)
- :meth:`IntegerArray.all` , :meth:`IntegerArray.any`, :meth:`FloatingArray.any`, and :meth:`FloatingArray.all` use Kleene logic (:issue:`41967`)
- Added support for nullable boolean and integer types in :meth:`DataFrame.to_stata`, :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`40855`)
- :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`)
- The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`)
- Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`)
-

.. ---------------------------------------------------------------------------

Expand Down
5 changes: 5 additions & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3254,6 +3254,11 @@ def get_locs(self, seq):
# entry in `seq`
indexer = Index(np.arange(n))

if any(x is Ellipsis for x in seq):
raise NotImplementedError(
"MultiIndex does not support indexing with Ellipsis"
)

def _convert_to_indexer(r) -> Int64Index:
# return an indexer
if isinstance(r, slice):
Expand Down
58 changes: 48 additions & 10 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
from contextlib import suppress
from typing import (
TYPE_CHECKING,
Any,
Hashable,
Sequence,
)
import warnings

Expand Down Expand Up @@ -67,6 +65,7 @@

# "null slice"
_NS = slice(None, None)
_one_ellipsis_message = "indexer may only contain one '...' entry"


# the public IndexSlicerMaker
Expand Down Expand Up @@ -731,11 +730,33 @@ def _validate_key(self, key, axis: int):
"""
raise AbstractMethodError(self)

def _has_valid_tuple(self, key: tuple):
def _expand_ellipsis(self, tup: tuple) -> tuple:
"""
If a tuple key includes an Ellipsis, replace it with an appropriate
number of null slices.
"""
if any(x is Ellipsis for x in tup):
if tup.count(Ellipsis) > 1:
raise IndexingError(_one_ellipsis_message)

if len(tup) == self.ndim:
# It is unambiguous what axis this Ellipsis is indexing,
# treat as a single null slice.
i = tup.index(Ellipsis)
# FIXME: this assumes only one Ellipsis
new_key = tup[:i] + (_NS,) + tup[i + 1 :]
return new_key

# TODO: other cases? only one test gets here, and that is covered
# by _validate_key_length
return tup

def _validate_tuple_indexer(self, key: tuple) -> tuple:
"""
Check the key for valid keys across my indexer.
"""
self._validate_key_length(key)
key = self._validate_key_length(key)
key = self._expand_ellipsis(key)
for i, k in enumerate(key):
try:
self._validate_key(k, i)
Expand All @@ -744,6 +765,7 @@ def _has_valid_tuple(self, key: tuple):
"Location based indexing can only have "
f"[{self._valid_types}] types"
) from err
return key

def _is_nested_tuple_indexer(self, tup: tuple) -> bool:
"""
Expand Down Expand Up @@ -772,9 +794,16 @@ def _convert_tuple(self, key):

return tuple(keyidx)

def _validate_key_length(self, key: Sequence[Any]) -> None:
def _validate_key_length(self, key: tuple) -> tuple:
if len(key) > self.ndim:
if key[0] is Ellipsis:
# e.g. Series.iloc[..., 3] reduces to just Series.iloc[3]
key = key[1:]
if Ellipsis in key:
raise IndexingError(_one_ellipsis_message)
return self._validate_key_length(key)
raise IndexingError("Too many indexers")
return key

def _getitem_tuple_same_dim(self, tup: tuple):
"""
Expand Down Expand Up @@ -822,7 +851,7 @@ def _getitem_lowerdim(self, tup: tuple):
with suppress(IndexingError):
return self._handle_lowerdim_multi_index_axis0(tup)

self._validate_key_length(tup)
tup = self._validate_key_length(tup)

for i, key in enumerate(tup):
if is_label_like(key):
Expand Down Expand Up @@ -1093,10 +1122,11 @@ def _getitem_iterable(self, key, axis: int):

def _getitem_tuple(self, tup: tuple):
with suppress(IndexingError):
tup = self._expand_ellipsis(tup)
return self._getitem_lowerdim(tup)

# no multi-index, so validate all of the indexers
self._has_valid_tuple(tup)
tup = self._validate_tuple_indexer(tup)

# ugly hack for GH #836
if self._multi_take_opportunity(tup):
Expand Down Expand Up @@ -1126,6 +1156,8 @@ def _getitem_axis(self, key, axis: int):
key = item_from_zerodim(key)
if is_iterator(key):
key = list(key)
if key is Ellipsis:
key = slice(None)

labels = self.obj._get_axis(axis)

Expand Down Expand Up @@ -1409,7 +1441,7 @@ def _validate_integer(self, key: int, axis: int) -> None:

def _getitem_tuple(self, tup: tuple):

self._has_valid_tuple(tup)
tup = self._validate_tuple_indexer(tup)
with suppress(IndexingError):
return self._getitem_lowerdim(tup)

Expand Down Expand Up @@ -1439,7 +1471,9 @@ def _get_list_axis(self, key, axis: int):
raise IndexError("positional indexers are out-of-bounds") from err

def _getitem_axis(self, key, axis: int):
if isinstance(key, ABCDataFrame):
if key is Ellipsis:
key = slice(None)
elif isinstance(key, ABCDataFrame):
raise IndexError(
"DataFrame indexer is not allowed for .iloc\n"
"Consider using .loc for automatic alignment."
Expand Down Expand Up @@ -2381,7 +2415,11 @@ def is_label_like(key) -> bool:
bool
"""
# select a label or row
return not isinstance(key, slice) and not is_list_like_indexer(key)
return (
not isinstance(key, slice)
and not is_list_like_indexer(key)
and key is not Ellipsis
)


def need_slice(obj: slice) -> bool:
Expand Down
64 changes: 64 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@
import pandas._testing as tm
from pandas.api.types import is_scalar
from pandas.core.api import Float64Index
from pandas.core.indexing import (
IndexingError,
_one_ellipsis_message,
)
from pandas.tests.indexing.common import Base


Expand Down Expand Up @@ -1524,6 +1528,66 @@ def test_loc_setitem_cast3(self):
assert df.dtypes.one == np.dtype(np.int8)


class TestLocWithEllipsis:
@pytest.fixture(params=[tm.loc, tm.iloc])
def indexer(self, request):
# Test iloc while we're here
return request.param

@pytest.fixture
def obj(self, series_with_simple_index, frame_or_series):
obj = series_with_simple_index
if frame_or_series is not Series:
obj = obj.to_frame()
return obj

def test_loc_iloc_getitem_ellipsis(self, obj, indexer):
result = indexer(obj)[...]
tm.assert_equal(result, obj)

def test_loc_iloc_getitem_leading_ellipses(self, series_with_simple_index, indexer):
obj = series_with_simple_index
key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0]

if indexer is tm.loc and obj.index.is_boolean():
# passing [False] will get interpreted as a boolean mask
# TODO: should it? unambiguous when lengths dont match?
return
if indexer is tm.loc and isinstance(obj.index, MultiIndex):
msg = "MultiIndex does not support indexing with Ellipsis"
with pytest.raises(NotImplementedError, match=msg):
result = indexer(obj)[..., [key]]

elif len(obj) != 0:
result = indexer(obj)[..., [key]]
expected = indexer(obj)[[key]]
tm.assert_series_equal(result, expected)

key2 = 0 if indexer is tm.iloc else obj.name
df = obj.to_frame()
result = indexer(df)[..., [key2]]
expected = indexer(df)[:, [key2]]
tm.assert_frame_equal(result, expected)

def test_loc_iloc_getitem_ellipses_only_one_ellipsis(self, obj, indexer):
# GH37750
key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0]

with pytest.raises(IndexingError, match=_one_ellipsis_message):
indexer(obj)[..., ...]

with pytest.raises(IndexingError, match=_one_ellipsis_message):
indexer(obj)[..., [key], ...]

with pytest.raises(IndexingError, match=_one_ellipsis_message):
indexer(obj)[..., ..., key]

# one_ellipsis_message takes precedence over "Too many indexers"
# only when the first key is Ellipsis
with pytest.raises(IndexingError, match="Too many indexers"):
indexer(obj)[key, ..., ...]


class TestLocWithMultiIndex:
@pytest.mark.parametrize(
"keys, expected",
Expand Down