Skip to content

Commit 04487b3

Browse files
authored
PERF: RangeIndex.__getitem__ with integers return RangeIndex (#57770)
* PERF: RangeIndex.take with 1 value return RangeIndex * add issue number * Move to _shallow_copy, support empty join as well * Fix self.name * FIx error message * Fix hdf test * PERF: RangeIndex.__getitem__ with integers return RangeIndex * PERF: RangeIndex.__getitem__ with integers return RangeIndex * Handle ellipse * Catch ValueError
1 parent 10f31f6 commit 04487b3

File tree

7 files changed

+110
-21
lines changed

7 files changed

+110
-21
lines changed

doc/source/whatsnew/v3.0.0.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -272,11 +272,11 @@ Performance improvements
272272
- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
273273
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
274274
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
275-
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
275+
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
276276
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
277-
- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`)
278-
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`)
279-
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
277+
- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`)
278+
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
279+
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
280280
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
281281
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
282282

pandas/core/indexes/base.py

-1
Original file line numberDiff line numberDiff line change
@@ -4235,7 +4235,6 @@ def join(
42354235

42364236
return self._join_via_get_indexer(other, how, sort)
42374237

4238-
@final
42394238
def _join_empty(
42404239
self, other: Index, how: JoinHow, sort: bool
42414240
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

pandas/core/indexes/range.py

+39-14
Original file line numberDiff line numberDiff line change
@@ -472,18 +472,31 @@ def _shallow_copy(self, values, name: Hashable = no_default):
472472

473473
if values.dtype.kind == "f":
474474
return Index(values, name=name, dtype=np.float64)
475-
if values.dtype.kind == "i" and values.ndim == 1 and len(values) > 1:
475+
if values.dtype.kind == "i" and values.ndim == 1:
476476
# GH 46675 & 43885: If values is equally spaced, return a
477477
# more memory-compact RangeIndex instead of Index with 64-bit dtype
478+
if len(values) == 0:
479+
return type(self)._simple_new(_empty_range, name=name)
480+
elif len(values) == 1:
481+
start = values[0]
482+
new_range = range(start, start + self.step, self.step)
483+
return type(self)._simple_new(new_range, name=name)
478484
diff = values[1] - values[0]
479485
if not missing.isna(diff) and diff != 0:
480-
maybe_range_indexer, remainder = np.divmod(values - values[0], diff)
481-
if (
482-
lib.is_range_indexer(maybe_range_indexer, len(maybe_range_indexer))
483-
and not remainder.any()
484-
):
486+
if len(values) == 2:
487+
# Can skip is_range_indexer check
485488
new_range = range(values[0], values[-1] + diff, diff)
486489
return type(self)._simple_new(new_range, name=name)
490+
else:
491+
maybe_range_indexer, remainder = np.divmod(values - values[0], diff)
492+
if (
493+
lib.is_range_indexer(
494+
maybe_range_indexer, len(maybe_range_indexer)
495+
)
496+
and not remainder.any()
497+
):
498+
new_range = range(values[0], values[-1] + diff, diff)
499+
return type(self)._simple_new(new_range, name=name)
487500
return self._constructor._simple_new(values, name=name)
488501

489502
def _view(self) -> Self:
@@ -894,12 +907,19 @@ def symmetric_difference(
894907
result = result.rename(result_name)
895908
return result
896909

910+
def _join_empty(
911+
self, other: Index, how: JoinHow, sort: bool
912+
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
913+
if other.dtype.kind == "i":
914+
other = self._shallow_copy(other._values, name=other.name)
915+
return super()._join_empty(other, how=how, sort=sort)
916+
897917
def _join_monotonic(
898918
self, other: Index, how: JoinHow = "left"
899919
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
900920
# This currently only gets called for the monotonic increasing case
901921
if not isinstance(other, type(self)):
902-
maybe_ri = self._shallow_copy(other._values)
922+
maybe_ri = self._shallow_copy(other._values, name=other.name)
903923
if not isinstance(maybe_ri, type(self)):
904924
return super()._join_monotonic(other, how=how)
905925
other = maybe_ri
@@ -1075,6 +1095,8 @@ def __getitem__(self, key):
10751095
"""
10761096
Conserve RangeIndex type for scalar and slice keys.
10771097
"""
1098+
if key is Ellipsis:
1099+
key = slice(None)
10781100
if isinstance(key, slice):
10791101
return self._getitem_slice(key)
10801102
elif is_integer(key):
@@ -1094,17 +1116,20 @@ def __getitem__(self, key):
10941116
)
10951117
elif com.is_bool_indexer(key):
10961118
if isinstance(getattr(key, "dtype", None), ExtensionDtype):
1097-
np_key = key.to_numpy(dtype=bool, na_value=False)
1119+
key = key.to_numpy(dtype=bool, na_value=False)
10981120
else:
1099-
np_key = np.asarray(key, dtype=bool)
1100-
check_array_indexer(self._range, np_key) # type: ignore[arg-type]
1121+
key = np.asarray(key, dtype=bool)
1122+
check_array_indexer(self._range, key) # type: ignore[arg-type]
11011123
# Short circuit potential _shallow_copy check
1102-
if np_key.all():
1124+
if key.all():
11031125
return self._simple_new(self._range, name=self.name)
1104-
elif not np_key.any():
1126+
elif not key.any():
11051127
return self._simple_new(_empty_range, name=self.name)
1106-
return self.take(np.flatnonzero(np_key))
1107-
return super().__getitem__(key)
1128+
key = np.flatnonzero(key)
1129+
try:
1130+
return self.take(key)
1131+
except (TypeError, ValueError):
1132+
return super().__getitem__(key)
11081133

11091134
def _getitem_slice(self, slobj: slice) -> Self:
11101135
"""

pandas/tests/indexes/ranges/test_join.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -207,9 +207,15 @@ def test_join_self(self, join_type):
207207
[-1, -1, 0, 1],
208208
"outer",
209209
],
210+
[RangeIndex(2), RangeIndex(0), RangeIndex(2), None, [-1, -1], "left"],
211+
[RangeIndex(2), RangeIndex(0), RangeIndex(0), [], None, "right"],
212+
[RangeIndex(2), RangeIndex(0), RangeIndex(0), [], None, "inner"],
213+
[RangeIndex(2), RangeIndex(0), RangeIndex(2), None, [-1, -1], "outer"],
210214
],
211215
)
212-
@pytest.mark.parametrize("right_type", [RangeIndex, lambda x: Index(list(x))])
216+
@pytest.mark.parametrize(
217+
"right_type", [RangeIndex, lambda x: Index(list(x), dtype=x.dtype)]
218+
)
213219
def test_join_preserves_rangeindex(
214220
left, right, expected, expected_lidx, expected_ridx, how, right_type
215221
):

pandas/tests/indexes/ranges/test_range.py

+57
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,26 @@ def test_range_index_rsub_by_const(self):
608608
tm.assert_index_equal(result, expected)
609609

610610

611+
def test_reindex_1_value_returns_rangeindex():
612+
ri = RangeIndex(0, 10, 2, name="foo")
613+
result, result_indexer = ri.reindex([2])
614+
expected = RangeIndex(2, 4, 2, name="foo")
615+
tm.assert_index_equal(result, expected, exact=True)
616+
617+
expected_indexer = np.array([1], dtype=np.intp)
618+
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
619+
620+
621+
def test_reindex_empty_returns_rangeindex():
622+
ri = RangeIndex(0, 10, 2, name="foo")
623+
result, result_indexer = ri.reindex([])
624+
expected = RangeIndex(0, 0, 2, name="foo")
625+
tm.assert_index_equal(result, expected, exact=True)
626+
627+
expected_indexer = np.array([], dtype=np.intp)
628+
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
629+
630+
611631
def test_append_non_rangeindex_return_rangeindex():
612632
ri = RangeIndex(1)
613633
result = ri.append(Index([1]))
@@ -653,6 +673,21 @@ def test_take_return_rangeindex():
653673
tm.assert_index_equal(result, expected, exact=True)
654674

655675

676+
@pytest.mark.parametrize(
677+
"rng, exp_rng",
678+
[
679+
[range(5), range(3, 4)],
680+
[range(0, -10, -2), range(-6, -8, -2)],
681+
[range(0, 10, 2), range(6, 8, 2)],
682+
],
683+
)
684+
def test_take_1_value_returns_rangeindex(rng, exp_rng):
685+
ri = RangeIndex(rng, name="foo")
686+
result = ri.take([3])
687+
expected = RangeIndex(exp_rng, name="foo")
688+
tm.assert_index_equal(result, expected, exact=True)
689+
690+
656691
def test_append_one_nonempty_preserve_step():
657692
expected = RangeIndex(0, -1, -1)
658693
result = RangeIndex(0).append([expected])
@@ -695,3 +730,25 @@ def test_getitem_boolmask_wrong_length():
695730
ri = RangeIndex(4, name="foo")
696731
with pytest.raises(IndexError, match="Boolean index has wrong length"):
697732
ri[[True]]
733+
734+
735+
def test_getitem_integers_return_rangeindex():
736+
result = RangeIndex(0, 10, 2, name="foo")[[0, -1]]
737+
expected = RangeIndex(start=0, stop=16, step=8, name="foo")
738+
tm.assert_index_equal(result, expected, exact=True)
739+
740+
result = RangeIndex(0, 10, 2, name="foo")[[3]]
741+
expected = RangeIndex(start=6, stop=8, step=2, name="foo")
742+
tm.assert_index_equal(result, expected, exact=True)
743+
744+
745+
def test_getitem_empty_return_rangeindex():
746+
result = RangeIndex(0, 10, 2, name="foo")[[]]
747+
expected = RangeIndex(start=0, stop=0, step=1, name="foo")
748+
tm.assert_index_equal(result, expected, exact=True)
749+
750+
751+
def test_getitem_integers_return_index():
752+
result = RangeIndex(0, 10, 2, name="foo")[[0, 1, -1]]
753+
expected = Index([0, 2, 8], dtype="int64", name="foo")
754+
tm.assert_index_equal(result, expected)

pandas/tests/indexing/test_loc.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ def test_loc_getitem_list_with_fail(self):
509509

510510
s.loc[[2]]
511511

512-
msg = f"\"None of [Index([3], dtype='{np.dtype(int)}')] are in the [index]"
512+
msg = "None of [RangeIndex(start=3, stop=4, step=1)] are in the [index]"
513513
with pytest.raises(KeyError, match=re.escape(msg)):
514514
s.loc[[3]]
515515

pandas/tests/io/pytables/test_append.py

+2
Original file line numberDiff line numberDiff line change
@@ -968,6 +968,8 @@ def test_append_to_multiple_min_itemsize(setup_path):
968968
}
969969
)
970970
expected = df.iloc[[0]]
971+
# Reading/writing RangeIndex info is not supported yet
972+
expected.index = Index(list(range(len(expected.index))))
971973

972974
with ensure_clean_store(setup_path) as store:
973975
store.append_to_multiple(

0 commit comments

Comments
 (0)