Skip to content

Commit 58e63ec

Browse files
authored
PERF: Return RangeIndex from RangeIndex.join when possible (#57651)
* PERF: Return RangeIndex from RangeIndex.join when possible * whatsnew number * Fix indexer
1 parent 59e5d93 commit 58e63ec

File tree

4 files changed

+89
-1
lines changed

4 files changed

+89
-1
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ Performance improvements
255255
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
256256
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
257257
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
258+
- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`)
258259
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
259260
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
260261
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)

pandas/core/indexes/base.py

-1
Original file line numberDiff line numberDiff line change
@@ -4588,7 +4588,6 @@ def _get_leaf_sorter(
45884588
)
45894589
return join_index, left_indexer, right_indexer
45904590

4591-
@final
45924591
def _join_monotonic(
45934592
self, other: Index, how: JoinHow = "left"
45944593
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

pandas/core/indexes/range.py

+36
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
from pandas._typing import (
5656
Axis,
5757
Dtype,
58+
JoinHow,
5859
NaPosition,
5960
Self,
6061
npt,
@@ -890,6 +891,41 @@ def symmetric_difference(
890891
result = result.rename(result_name)
891892
return result
892893

894+
def _join_monotonic(
895+
self, other: Index, how: JoinHow = "left"
896+
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
897+
# This currently only gets called for the monotonic increasing case
898+
if not isinstance(other, type(self)):
899+
maybe_ri = self._shallow_copy(other._values)
900+
if not isinstance(maybe_ri, type(self)):
901+
return super()._join_monotonic(other, how=how)
902+
other = maybe_ri
903+
904+
if self.equals(other):
905+
ret_index = other if how == "right" else self
906+
return ret_index, None, None
907+
908+
if how == "left":
909+
join_index = self
910+
lidx = None
911+
ridx = other.get_indexer(join_index)
912+
elif how == "right":
913+
join_index = other
914+
lidx = self.get_indexer(join_index)
915+
ridx = None
916+
elif how == "inner":
917+
join_index = self.intersection(other)
918+
lidx = self.get_indexer(join_index)
919+
ridx = other.get_indexer(join_index)
920+
elif how == "outer":
921+
join_index = self.union(other)
922+
lidx = self.get_indexer(join_index)
923+
ridx = other.get_indexer(join_index)
924+
925+
lidx = None if lidx is None else ensure_platform_int(lidx)
926+
ridx = None if ridx is None else ensure_platform_int(ridx)
927+
return join_index, lidx, ridx
928+
893929
# --------------------------------------------------------------------
894930

895931
# error: Return type "Index" of "delete" incompatible with return type

pandas/tests/indexes/ranges/test_join.py

+52
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
import pytest
23

34
from pandas import (
45
Index,
@@ -175,3 +176,54 @@ def test_join_self(self, join_type):
175176
index = RangeIndex(start=0, stop=20, step=2)
176177
joined = index.join(index, how=join_type)
177178
assert index is joined
179+
180+
181+
@pytest.mark.parametrize(
182+
"left, right, expected, expected_lidx, expected_ridx, how",
183+
[
184+
[RangeIndex(2), RangeIndex(3), RangeIndex(2), None, [0, 1], "left"],
185+
[RangeIndex(2), RangeIndex(2), RangeIndex(2), None, None, "left"],
186+
[RangeIndex(2), RangeIndex(20, 22), RangeIndex(2), None, [-1, -1], "left"],
187+
[RangeIndex(2), RangeIndex(3), RangeIndex(3), [0, 1, -1], None, "right"],
188+
[RangeIndex(2), RangeIndex(2), RangeIndex(2), None, None, "right"],
189+
[
190+
RangeIndex(2),
191+
RangeIndex(20, 22),
192+
RangeIndex(20, 22),
193+
[-1, -1],
194+
None,
195+
"right",
196+
],
197+
[RangeIndex(2), RangeIndex(3), RangeIndex(2), [0, 1], [0, 1], "inner"],
198+
[RangeIndex(2), RangeIndex(2), RangeIndex(2), None, None, "inner"],
199+
[RangeIndex(2), RangeIndex(1, 3), RangeIndex(1, 2), [1], [0], "inner"],
200+
[RangeIndex(2), RangeIndex(3), RangeIndex(3), [0, 1, -1], [0, 1, 2], "outer"],
201+
[RangeIndex(2), RangeIndex(2), RangeIndex(2), None, None, "outer"],
202+
[
203+
RangeIndex(2),
204+
RangeIndex(2, 4),
205+
RangeIndex(4),
206+
[0, 1, -1, -1],
207+
[-1, -1, 0, 1],
208+
"outer",
209+
],
210+
],
211+
)
212+
@pytest.mark.parametrize("right_type", [RangeIndex, lambda x: Index(list(x))])
213+
def test_join_preserves_rangeindex(
214+
left, right, expected, expected_lidx, expected_ridx, how, right_type
215+
):
216+
result, lidx, ridx = left.join(right_type(right), how=how, return_indexers=True)
217+
tm.assert_index_equal(result, expected, exact=True)
218+
219+
if expected_lidx is None:
220+
assert lidx is expected_lidx
221+
else:
222+
exp_lidx = np.array(expected_lidx, dtype=np.intp)
223+
tm.assert_numpy_array_equal(lidx, exp_lidx)
224+
225+
if expected_ridx is None:
226+
assert ridx is expected_ridx
227+
else:
228+
exp_ridx = np.array(expected_ridx, dtype=np.intp)
229+
tm.assert_numpy_array_equal(ridx, exp_ridx)

0 commit comments

Comments
 (0)