diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d6e3ac1ea48cc..d07f4f9f05708 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -167,6 +167,7 @@ Removal of prior version deprecations/changes Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`) - Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`) - Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`) - Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`) @@ -175,11 +176,11 @@ Performance improvements - Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`) - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`) - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`) +- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`) - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`) -- Performance improvement in indexing operations for string dtypes (:issue:`56997`) -- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`?``) - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`) +- Performance improvement in indexing operations for string dtypes (:issue:`56997`) .. --------------------------------------------------------------------------- .. _whatsnew_300.bug_fixes: diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 8a2e3fbf500a4..c5036a2b32967 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -29,6 +29,7 @@ doc, ) +from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.common import ( ensure_platform_int, ensure_python_int, @@ -42,6 +43,7 @@ from pandas.core import ops import pandas.core.common as com from pandas.core.construction import extract_array +from pandas.core.indexers import check_array_indexer import pandas.core.indexes.base as ibase from pandas.core.indexes.base import ( Index, @@ -1048,6 +1050,18 @@ def __getitem__(self, key): "and integer or boolean " "arrays are valid indices" ) + elif com.is_bool_indexer(key): + if isinstance(getattr(key, "dtype", None), ExtensionDtype): + np_key = key.to_numpy(dtype=bool, na_value=False) + else: + np_key = np.asarray(key, dtype=bool) + check_array_indexer(self._range, np_key) # type: ignore[arg-type] + # Short circuit potential _shallow_copy check + if np_key.all(): + return self._simple_new(self._range, name=self.name) + elif not np_key.any(): + return self._simple_new(_empty_range, name=self.name) + return self.take(np.flatnonzero(np_key)) return super().__getitem__(key) def _getitem_slice(self, slobj: slice) -> Self: diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index d500687763a1e..8b4b7a5d70ee4 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -623,3 +623,41 @@ def test_append_one_nonempty_preserve_step(): expected = RangeIndex(0, -1, -1) result = RangeIndex(0).append([expected]) tm.assert_index_equal(result, expected, exact=True) + + +def test_getitem_boolmask_all_true(): + ri = RangeIndex(3, name="foo") + expected = ri.copy() + result = ri[[True] * 3] + tm.assert_index_equal(result, expected, exact=True) + + +def test_getitem_boolmask_all_false(): + ri = RangeIndex(3, name="foo") + result = ri[[False] * 3] + expected = RangeIndex(0, name="foo") + tm.assert_index_equal(result, expected, exact=True) + + +def test_getitem_boolmask_returns_rangeindex(): + ri = RangeIndex(3, name="foo") + result = ri[[False, True, True]] + expected = RangeIndex(1, 3, name="foo") + tm.assert_index_equal(result, expected, exact=True) + + result = ri[[True, False, True]] + expected = RangeIndex(0, 3, 2, name="foo") + tm.assert_index_equal(result, expected, exact=True) + + +def test_getitem_boolmask_returns_index(): + ri = RangeIndex(4, name="foo") + result = ri[[True, True, False, True]] + expected = Index([0, 1, 3], name="foo") + tm.assert_index_equal(result, expected) + + +def test_getitem_boolmask_wrong_length(): + ri = RangeIndex(4, name="foo") + with pytest.raises(IndexError, match="Boolean index has wrong length"): + ri[[True]] diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index aff08e5ad3882..529d6d789596f 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -132,6 +132,8 @@ def test_append_series(setup_path): # select on the index and values expected = ns[(ns > 70) & (ns.index < 90)] + # Reading/writing RangeIndex info is not supported yet + expected.index = Index(expected.index._data) result = store.select("ns", "foo>70 and index<90") tm.assert_series_equal(result, expected, check_index_type=True)