Skip to content

Commit d2bc846

Browse files
authored
PERF: Return a RangeIndex from __getitem__ with a bool mask when possible (#57588)
* PERF: Return a RangeIndex from __getitem__ with a bool mask when possible * Add whatsnew * Handle EA types * force to Index * Type ignore
1 parent ab2980b commit d2bc846

File tree

4 files changed

+57
-2
lines changed

4 files changed

+57
-2
lines changed

doc/source/whatsnew/v3.0.0.rst

+3-2
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ Removal of prior version deprecations/changes
244244

245245
Performance improvements
246246
~~~~~~~~~~~~~~~~~~~~~~~~
247+
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
247248
- Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
248249
- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
249250
- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
@@ -252,11 +253,11 @@ Performance improvements
252253
- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
253254
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
254255
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
256+
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
255257
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
256258
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
257-
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
258-
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`?``)
259259
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
260+
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
260261

261262
.. ---------------------------------------------------------------------------
262263
.. _whatsnew_300.bug_fixes:

pandas/core/indexes/range.py

+14
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
doc,
3030
)
3131

32+
from pandas.core.dtypes.base import ExtensionDtype
3233
from pandas.core.dtypes.common import (
3334
ensure_platform_int,
3435
ensure_python_int,
@@ -42,6 +43,7 @@
4243
from pandas.core import ops
4344
import pandas.core.common as com
4445
from pandas.core.construction import extract_array
46+
from pandas.core.indexers import check_array_indexer
4547
import pandas.core.indexes.base as ibase
4648
from pandas.core.indexes.base import (
4749
Index,
@@ -1048,6 +1050,18 @@ def __getitem__(self, key):
10481050
"and integer or boolean "
10491051
"arrays are valid indices"
10501052
)
1053+
elif com.is_bool_indexer(key):
1054+
if isinstance(getattr(key, "dtype", None), ExtensionDtype):
1055+
np_key = key.to_numpy(dtype=bool, na_value=False)
1056+
else:
1057+
np_key = np.asarray(key, dtype=bool)
1058+
check_array_indexer(self._range, np_key) # type: ignore[arg-type]
1059+
# Short circuit potential _shallow_copy check
1060+
if np_key.all():
1061+
return self._simple_new(self._range, name=self.name)
1062+
elif not np_key.any():
1063+
return self._simple_new(_empty_range, name=self.name)
1064+
return self.take(np.flatnonzero(np_key))
10511065
return super().__getitem__(key)
10521066

10531067
def _getitem_slice(self, slobj: slice) -> Self:

pandas/tests/indexes/ranges/test_range.py

+38
Original file line numberDiff line numberDiff line change
@@ -623,3 +623,41 @@ def test_append_one_nonempty_preserve_step():
623623
expected = RangeIndex(0, -1, -1)
624624
result = RangeIndex(0).append([expected])
625625
tm.assert_index_equal(result, expected, exact=True)
626+
627+
628+
def test_getitem_boolmask_all_true():
629+
ri = RangeIndex(3, name="foo")
630+
expected = ri.copy()
631+
result = ri[[True] * 3]
632+
tm.assert_index_equal(result, expected, exact=True)
633+
634+
635+
def test_getitem_boolmask_all_false():
636+
ri = RangeIndex(3, name="foo")
637+
result = ri[[False] * 3]
638+
expected = RangeIndex(0, name="foo")
639+
tm.assert_index_equal(result, expected, exact=True)
640+
641+
642+
def test_getitem_boolmask_returns_rangeindex():
643+
ri = RangeIndex(3, name="foo")
644+
result = ri[[False, True, True]]
645+
expected = RangeIndex(1, 3, name="foo")
646+
tm.assert_index_equal(result, expected, exact=True)
647+
648+
result = ri[[True, False, True]]
649+
expected = RangeIndex(0, 3, 2, name="foo")
650+
tm.assert_index_equal(result, expected, exact=True)
651+
652+
653+
def test_getitem_boolmask_returns_index():
654+
ri = RangeIndex(4, name="foo")
655+
result = ri[[True, True, False, True]]
656+
expected = Index([0, 1, 3], name="foo")
657+
tm.assert_index_equal(result, expected)
658+
659+
660+
def test_getitem_boolmask_wrong_length():
661+
ri = RangeIndex(4, name="foo")
662+
with pytest.raises(IndexError, match="Boolean index has wrong length"):
663+
ri[[True]]

pandas/tests/io/pytables/test_append.py

+2
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ def test_append_series(setup_path):
132132

133133
# select on the index and values
134134
expected = ns[(ns > 70) & (ns.index < 90)]
135+
# Reading/writing RangeIndex info is not supported yet
136+
expected.index = Index(expected.index._data)
135137
result = store.select("ns", "foo>70 and index<90")
136138
tm.assert_series_equal(result, expected, check_index_type=True)
137139

0 commit comments

Comments
 (0)