From 907cda6c85f995d96395650442af0b47c3141e47 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 16 Feb 2024 17:15:57 -0800 Subject: [PATCH 1/6] PERF: RangeIndex.append returns a RangeIndex when possible --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/indexes/range.py | 4 ++-- pandas/tests/indexes/ranges/test_range.py | 7 +++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 483cf659080ea..9a8655392e370 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -158,6 +158,7 @@ Removal of prior version deprecations/changes Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57252`) - Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`) - Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`) - Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index a8e8ae140041a..43fa50a38a0ad 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -466,7 +466,6 @@ def __iter__(self) -> Iterator[int]: @doc(Index._shallow_copy) def _shallow_copy(self, values, name: Hashable = no_default): name = self._name if name is no_default else name - if values.dtype.kind == "f": return Index(values, name=name, dtype=np.float64) # GH 46675 & 43885: If values is equally spaced, return a @@ -942,7 +941,8 @@ def _concat(self, indexes: list[Index], name: Hashable) -> Index: indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Index([0,1,2,4,5], dtype='int64') """ if not all(isinstance(x, RangeIndex) for x in indexes): - return super()._concat(indexes, name) + result = super()._concat(indexes, name) + return self._shallow_copy(result._values) elif len(indexes) == 1: return indexes[0] diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 29d6916cc7f08..db51fdd9aa8f1 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -606,3 +606,10 @@ def test_range_index_rsub_by_const(self): result = 3 - RangeIndex(0, 4, 1) expected = RangeIndex(3, -1, -1) tm.assert_index_equal(result, expected) + + +def test_append_non_rangeindex_return_rangeindex(): + ri = RangeIndex(1) + result = ri.append([1]) + expected = RangeIndex(2) + tm.assert_index_equal(result, expected, exact=True) From 91dd97c142d91fccdc3785bd2b1dc21e6d319438 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 16 Feb 2024 17:17:28 -0800 Subject: [PATCH 2/6] add correct issue number --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9a8655392e370..3fd0fef1dab0b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -158,7 +158,7 @@ Removal of prior version deprecations/changes Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57252`) +- :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`) - Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`) - Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`) - Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`) From 06880f30ea896b6943f8dfe9614a5083f9a530b3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 16 Feb 2024 17:17:34 -0800 Subject: [PATCH 3/6] add correct issue number --- pandas/core/indexes/range.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 43fa50a38a0ad..2108386057113 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -466,6 +466,7 @@ def __iter__(self) -> Iterator[int]: @doc(Index._shallow_copy) def _shallow_copy(self, values, name: Hashable = no_default): name = self._name if name is no_default else name + if values.dtype.kind == "f": return Index(values, name=name, dtype=np.float64) # GH 46675 & 43885: If values is equally spaced, return a From bfb101d73809ef598ae978a6708939e393fb1d26 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 19 Feb 2024 16:04:05 -0800 Subject: [PATCH 4/6] Only if int --- pandas/core/indexes/range.py | 18 +++++++++--------- pandas/tests/indexes/ranges/test_range.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 20c1e249ed257..fae484ef9e627 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -469,15 +469,15 @@ def _shallow_copy(self, values, name: Hashable = no_default): if values.dtype.kind == "f": return Index(values, name=name, dtype=np.float64) - # GH 46675 & 43885: If values is equally spaced, return a - # more memory-compact RangeIndex instead of Index with 64-bit dtype - unique_diffs = unique_deltas(values) - if len(unique_diffs) == 1 and unique_diffs[0] != 0: - diff = unique_diffs[0] - new_range = range(values[0], values[-1] + diff, diff) - return type(self)._simple_new(new_range, name=name) - else: - return self._constructor._simple_new(values, name=name) + if values.dtype.kind == "i": + # GH 46675 & 43885: If values is equally spaced, return a + # more memory-compact RangeIndex instead of Index with 64-bit dtype + unique_diffs = unique_deltas(values) + if len(unique_diffs) == 1 and unique_diffs[0] != 0: + diff = unique_diffs[0] + new_range = range(values[0], values[-1] + diff, diff) + return type(self)._simple_new(new_range, name=name) + return self._constructor._simple_new(values, name=name) def _view(self) -> Self: result = type(self)._simple_new(self._range, name=self._name) diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index eaac174237aee..b4bbfb75db92e 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -610,7 +610,7 @@ def test_range_index_rsub_by_const(self): def test_append_non_rangeindex_return_rangeindex(): ri = RangeIndex(1) - result = ri.append([1]) + result = ri.append(Index([1])) expected = RangeIndex(2) tm.assert_index_equal(result, expected, exact=True) From b12e87762f84cfda470a569a0edc200711fe3474 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 6 Mar 2024 09:52:00 -0800 Subject: [PATCH 5/6] Guard on integer dtype kind --- pandas/core/indexes/range.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index ffa8cfeec9d21..0781a86e5d57e 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -990,7 +990,9 @@ def _concat(self, indexes: list[Index], name: Hashable) -> Index: """ if not all(isinstance(x, RangeIndex) for x in indexes): result = super()._concat(indexes, name) - return self._shallow_copy(result._values) + if result.dtype.kind == "i": + return self._shallow_copy(result._values) + return result elif len(indexes) == 1: return indexes[0] From 176a95c59243ebd009fe5f70fd2def98e7e28ea7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 6 Mar 2024 11:51:19 -0800 Subject: [PATCH 6/6] Add test for return Index case --- pandas/tests/indexes/ranges/test_range.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 585331e48217f..8c24ce5d699d5 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -615,6 +615,13 @@ def test_append_non_rangeindex_return_rangeindex(): tm.assert_index_equal(result, expected, exact=True) +def test_append_non_rangeindex_return_index(): + ri = RangeIndex(1) + result = ri.append(Index([1, 3, 4])) + expected = Index([0, 1, 3, 4]) + tm.assert_index_equal(result, expected, exact=True) + + def test_reindex_returns_rangeindex(): ri = RangeIndex(2, name="foo") result, result_indexer = ri.reindex([1, 2, 3])