Skip to content

PERF: Return RangeIndex from RangeIndex.join when possible #57651

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ Performance improvements
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`)
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`?``)
Expand Down
1 change: 0 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4588,7 +4588,6 @@ def _get_leaf_sorter(
)
return join_index, left_indexer, right_indexer

@final
def _join_monotonic(
self, other: Index, how: JoinHow = "left"
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
Expand Down
36 changes: 36 additions & 0 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from pandas._typing import (
Axis,
Dtype,
JoinHow,
NaPosition,
Self,
npt,
Expand Down Expand Up @@ -888,6 +889,41 @@ def symmetric_difference(
result = result.rename(result_name)
return result

def _join_monotonic(
self, other: Index, how: JoinHow = "left"
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
# This currently only gets called for the monotonic increasing case
if not isinstance(other, type(self)):
maybe_ri = self._shallow_copy(other._values)
if not isinstance(maybe_ri, type(self)):
return super()._join_monotonic(other, how=how)
other = maybe_ri

if self.equals(other):
ret_index = other if how == "right" else self
return ret_index, None, None

if how == "left":
join_index = self
lidx = None
ridx = other.get_indexer(join_index)
elif how == "right":
join_index = other
lidx = self.get_indexer(join_index)
ridx = None
elif how == "inner":
join_index = self.intersection(other)
lidx = self.get_indexer(join_index)
ridx = other.get_indexer(join_index)
elif how == "outer":
join_index = self.union(other)
lidx = self.get_indexer(join_index)
ridx = other.get_indexer(join_index)

lidx = None if lidx is None else ensure_platform_int(lidx)
ridx = None if ridx is None else ensure_platform_int(ridx)
return join_index, lidx, ridx

# --------------------------------------------------------------------

# error: Return type "Index" of "delete" incompatible with return type
Expand Down
52 changes: 52 additions & 0 deletions pandas/tests/indexes/ranges/test_join.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pytest

from pandas import (
Index,
Expand Down Expand Up @@ -175,3 +176,54 @@ def test_join_self(self, join_type):
index = RangeIndex(start=0, stop=20, step=2)
joined = index.join(index, how=join_type)
assert index is joined


@pytest.mark.parametrize(
"left, right, expected, expected_lidx, expected_ridx, how",
[
[RangeIndex(2), RangeIndex(3), RangeIndex(2), None, [0, 1], "left"],
[RangeIndex(2), RangeIndex(2), RangeIndex(2), None, None, "left"],
[RangeIndex(2), RangeIndex(20, 22), RangeIndex(2), None, [-1, -1], "left"],
[RangeIndex(2), RangeIndex(3), RangeIndex(3), [0, 1, -1], None, "right"],
[RangeIndex(2), RangeIndex(2), RangeIndex(2), None, None, "right"],
[
RangeIndex(2),
RangeIndex(20, 22),
RangeIndex(20, 22),
[-1, -1],
None,
"right",
],
[RangeIndex(2), RangeIndex(3), RangeIndex(2), [0, 1], [0, 1], "inner"],
[RangeIndex(2), RangeIndex(2), RangeIndex(2), None, None, "inner"],
[RangeIndex(2), RangeIndex(1, 3), RangeIndex(1, 2), [1], [0], "inner"],
[RangeIndex(2), RangeIndex(3), RangeIndex(3), [0, 1, -1], [0, 1, 2], "outer"],
[RangeIndex(2), RangeIndex(2), RangeIndex(2), None, None, "outer"],
[
RangeIndex(2),
RangeIndex(2, 4),
RangeIndex(4),
[0, 1, -1, -1],
[-1, -1, 0, 1],
"outer",
],
],
)
@pytest.mark.parametrize("right_type", [RangeIndex, lambda x: Index(list(x))])
def test_join_preserves_rangeindex(
left, right, expected, expected_lidx, expected_ridx, how, right_type
):
result, lidx, ridx = left.join(right_type(right), how=how, return_indexers=True)
tm.assert_index_equal(result, expected, exact=True)

if expected_lidx is None:
assert lidx is expected_lidx
else:
exp_lidx = np.array(expected_lidx, dtype=np.intp)
tm.assert_numpy_array_equal(lidx, exp_lidx)

if expected_ridx is None:
assert ridx is expected_ridx
else:
exp_ridx = np.array(expected_ridx, dtype=np.intp)
tm.assert_numpy_array_equal(ridx, exp_ridx)