From f719252678917a038c1bbd1abc9538d3390a4b86 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 15 Feb 2024 15:44:07 -0800 Subject: [PATCH 1/6] PERF: Allow RangeIndex.take to return a RangeIndex when possible --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/indexes/range.py | 7 +++---- pandas/tests/indexes/ranges/test_range.py | 11 +++++++++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 483cf659080ea..09adb49f66d2a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -166,6 +166,7 @@ Performance improvements - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`) - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`) - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) +- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`?`) - Performance improvement in indexing operations for string dtypes (:issue:`56997`) - diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index a8e8ae140041a..fde3846d65228 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1168,7 +1168,7 @@ def take( # type: ignore[override] allow_fill: bool = True, fill_value=None, **kwargs, - ) -> Index: + ) -> Self | Index: if kwargs: nv.validate_take((), kwargs) if is_scalar(indices): @@ -1179,7 +1179,7 @@ def take( # type: ignore[override] self._maybe_disallow_fill(allow_fill, fill_value, indices) if len(indices) == 0: - taken = np.array([], dtype=self.dtype) + return type(self)(range(0), name=self.name) else: ind_max = indices.max() if ind_max >= len(self): @@ -1199,5 +1199,4 @@ def take( # type: ignore[override] if self.start != 0: taken += self.start - # _constructor so RangeIndex-> Index with an int64 dtype - return self._constructor._simple_new(taken, name=self.name) + return self._shallow_copy(taken, name=self.name) diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 29d6916cc7f08..694f0b5516ead 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -606,3 +606,14 @@ def test_range_index_rsub_by_const(self): result = 3 - RangeIndex(0, 4, 1) expected = RangeIndex(3, -1, -1) tm.assert_index_equal(result, expected) + + +def test_take_return_rangeindex(): + ri = RangeIndex(5, name="foo") + result = ri.take([]) + expected = RangeIndex(0, name="foo") + tm.assert_index_equal(result, expected, exact=True) + + result = ri.take([3, 4]) + expected = RangeIndex(3, 5, name="foo") + tm.assert_index_equal(result, expected, exact=True) From c3ddf9880e803a92defb594dd94cfbb13aa3da69 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 15 Feb 2024 15:45:24 -0800 Subject: [PATCH 2/6] Add number --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 09adb49f66d2a..e74967695a2ff 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -166,7 +166,7 @@ Performance improvements - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`) - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`) - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) -- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`?`) +- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`) - Performance improvement in indexing operations for string dtypes (:issue:`56997`) - From 29fb7254f6d5d684a98677cf73d862f495f7c835 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 15 Feb 2024 16:00:28 -0800 Subject: [PATCH 3/6] Trigger CI From 327766ada742d9fa81bf8e19eb8cac94990228e7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 15 Feb 2024 17:50:04 -0800 Subject: [PATCH 4/6] Use empty --- pandas/core/indexes/range.py | 2 +- pandas/tests/indexes/ranges/test_range.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index fde3846d65228..ea3b203375159 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1179,7 +1179,7 @@ def take( # type: ignore[override] self._maybe_disallow_fill(allow_fill, fill_value, indices) if len(indices) == 0: - return type(self)(range(0), name=self.name) + return type(self)(name=self.name) else: ind_max = indices.max() if ind_max >= len(self): diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 694f0b5516ead..eb037faa45dd6 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -611,7 +611,7 @@ def test_range_index_rsub_by_const(self): def test_take_return_rangeindex(): ri = RangeIndex(5, name="foo") result = ri.take([]) - expected = RangeIndex(0, name="foo") + expected = RangeIndex(name="foo") tm.assert_index_equal(result, expected, exact=True) result = ri.take([3, 4]) From 60a3a141190da05239a93b07f6e8f0f0106a40ee Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 16 Feb 2024 10:38:01 -0800 Subject: [PATCH 5/6] Use range(0) again --- pandas/core/indexes/range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index ea3b203375159..fde3846d65228 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1179,7 +1179,7 @@ def take( # type: ignore[override] self._maybe_disallow_fill(allow_fill, fill_value, indices) if len(indices) == 0: - return type(self)(name=self.name) + return type(self)(range(0), name=self.name) else: ind_max = indices.max() if ind_max >= len(self): From d1128e23f94e77d01c6b12b1c0cb148f520aa1b5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 16 Feb 2024 12:12:25 -0800 Subject: [PATCH 6/6] Fix append bug --- pandas/core/indexes/range.py | 8 +++++--- pandas/tests/indexes/ranges/test_range.py | 8 +++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index fde3846d65228..20286cad58df9 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1006,11 +1006,13 @@ def _concat(self, indexes: list[Index], name: Hashable) -> Index: # Get the stop value from "next" or alternatively # from the last non-empty index stop = non_empty_indexes[-1].stop if next_ is None else next_ - return RangeIndex(start, stop, step).rename(name) + if len(non_empty_indexes) == 1: + step = non_empty_indexes[0].step + return RangeIndex(start, stop, step, name=name) # Here all "indexes" had 0 length, i.e. were empty. # In this case return an empty range index. - return RangeIndex(0, 0).rename(name) + return RangeIndex(_empty_range, name=name) def __len__(self) -> int: """ @@ -1179,7 +1181,7 @@ def take( # type: ignore[override] self._maybe_disallow_fill(allow_fill, fill_value, indices) if len(indices) == 0: - return type(self)(range(0), name=self.name) + return type(self)(_empty_range, name=self.name) else: ind_max = indices.max() if ind_max >= len(self): diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index eb037faa45dd6..d500687763a1e 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -611,9 +611,15 @@ def test_range_index_rsub_by_const(self): def test_take_return_rangeindex(): ri = RangeIndex(5, name="foo") result = ri.take([]) - expected = RangeIndex(name="foo") + expected = RangeIndex(0, name="foo") tm.assert_index_equal(result, expected, exact=True) result = ri.take([3, 4]) expected = RangeIndex(3, 5, name="foo") tm.assert_index_equal(result, expected, exact=True) + + +def test_append_one_nonempty_preserve_step(): + expected = RangeIndex(0, -1, -1) + result = RangeIndex(0).append([expected]) + tm.assert_index_equal(result, expected, exact=True)