From 371334a8ac575769b1246f3e9966d0850e4a9d3a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 20 Feb 2024 16:10:16 -0800 Subject: [PATCH 1/4] PERF: Return RangeIndex from RangeIndex.join when possible --- pandas/core/indexes/range.py | 17 +++++++++++++++++ pandas/tests/extension/base/methods.py | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 2dd6f672eca45..a82208fb49f66 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -53,6 +53,7 @@ from pandas._typing import ( Axis, Dtype, + JoinHow, NaPosition, Self, npt, @@ -1016,6 +1017,22 @@ def _concat(self, indexes: list[Index], name: Hashable) -> Index: # In this case return an empty range index. return RangeIndex(_empty_range, name=name) + def _wrap_join_result( + self, + joined, + other, + lidx: npt.NDArray[np.intp] | None, + ridx: npt.NDArray[np.intp] | None, + how: JoinHow, + ) -> tuple[Self, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: + join_index, lidx, ridx = super()._wrap_join_result( + joined, other, lidx, ridx, how + ) + maybe_ri = self._shallow_copy(join_index._values) + if isinstance(maybe_ri, type(self)): + return maybe_ri, lidx, ridx + return join_index, lidx, ridx + def __len__(self) -> int: """ return the length of the RangeIndex diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index c803a8113b4a4..20f564d8f32a2 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -397,7 +397,7 @@ def test_combine_first(self, data): b = pd.Series(data[2:5], index=[2, 3, 4]) result = a.combine_first(b) expected = pd.Series(data[:5]) - tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected, check_index_type=True) @pytest.mark.parametrize("frame", [True, False]) @pytest.mark.parametrize( From f5115ac4e039c88df1ef03e45d1cf2ffaf4e3be0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 20 Feb 2024 16:18:23 -0800 Subject: [PATCH 2/4] Add test --- pandas/tests/indexes/ranges/test_join.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/indexes/ranges/test_join.py b/pandas/tests/indexes/ranges/test_join.py index 682b5c8def9ff..729b4d4aaeeb6 100644 --- a/pandas/tests/indexes/ranges/test_join.py +++ b/pandas/tests/indexes/ranges/test_join.py @@ -175,3 +175,9 @@ def test_join_self(self, join_type): index = RangeIndex(start=0, stop=20, step=2) joined = index.join(index, how=join_type) assert index is joined + + +def test_join_overlap_returns_rangeindex(): + result = RangeIndex(3).join(RangeIndex(2, 5), how="outer") + expected = RangeIndex(5) + tm.assert_index_equal(result, expected, exact=True) From 1a19260e6fd1bd9bb5e6ff9caebeeab86017d86f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 20 Feb 2024 16:19:19 -0800 Subject: [PATCH 3/4] Add whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a77e93c4ab4be..441859a764c27 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -165,6 +165,7 @@ Removal of prior version deprecations/changes Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- :meth:`RangeIndex.join` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`) - Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`) - Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`) - Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`) From 3aa72f9d4e3ab9d8425712d6f03ebd12d97904a5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 20 Feb 2024 16:19:42 -0800 Subject: [PATCH 4/4] Change number --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 441859a764c27..63b0442a293ff 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -165,7 +165,7 @@ Removal of prior version deprecations/changes Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- :meth:`RangeIndex.join` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`) +- :meth:`RangeIndex.join` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57543`) - Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`) - Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`) - Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`)