Skip to content

Commit fff62b1

Browse files
committed
PERF: accessing sliced indexes with populated indexing engines
1 parent 6bb8f73 commit fff62b1

File tree

3 files changed

+20
-4
lines changed

3 files changed

+20
-4
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ Performance improvements
105105
- Performance improvement in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` for extension array dtypes (:issue:`51549`)
106106
- Performance improvement in :meth:`DataFrame.clip` and :meth:`Series.clip` (:issue:`51472`)
107107
- Performance improvement in :func:`read_parquet` on string columns when using ``use_nullable_dtypes=True`` (:issue:`47345`)
108+
- Performance improvement when searching an :class:`Index` sliced from other indexes (:issue:`xxxxx`)
108109
- Performance improvement in :meth:`read_orc` when reading a remote URI file path. (:issue:`51609`)
109110

110111
.. ---------------------------------------------------------------------------

pandas/_libs/index.pyx

+12
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,18 @@ cdef class IndexEngine:
230230
def __sizeof__(self) -> int:
231231
return self.sizeof()
232232

233+
cpdef _update_from_other(self, IndexEngine other, reverse: bool):
234+
self.unique = other.unique
235+
self.need_unique_check = other.need_unique_check
236+
self.need_monotonic_check = other.need_monotonic_check
237+
if reverse and not self.need_monotonic_check:
238+
# the slice reverses the index
239+
self.monotonic_inc = other.monotonic_dec
240+
self.monotonic_dec = other.monotonic_inc
241+
else:
242+
self.monotonic_inc = other.monotonic_inc
243+
self.monotonic_dec = other.monotonic_dec
244+
233245
@property
234246
def is_unique(self) -> bool:
235247
if self.need_unique_check:

pandas/core/indexes/base.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -5153,9 +5153,7 @@ def __getitem__(self, key):
51535153
if isinstance(key, slice):
51545154
# This case is separated from the conditional above to avoid
51555155
# pessimization com.is_bool_indexer and ndim checks.
5156-
result = getitem(key)
5157-
# Going through simple_new for performance.
5158-
return type(self)._simple_new(result, name=self._name)
5156+
return self._getitem_slice(key)
51595157

51605158
if com.is_bool_indexer(key):
51615159
# if we have list[bools, length=1e5] then doing this check+convert
@@ -5181,7 +5179,12 @@ def _getitem_slice(self: _IndexT, slobj: slice) -> _IndexT:
51815179
Fastpath for __getitem__ when we know we have a slice.
51825180
"""
51835181
res = self._data[slobj]
5184-
return type(self)._simple_new(res, name=self._name)
5182+
result = type(self)._simple_new(res, name=self._name)
5183+
if isinstance(result._engine, libindex.IndexEngine): # may also be IntervalTree
5184+
reverse = slobj.step is not None and slobj.step < 0
5185+
result._engine._update_from_other(self._engine, reverse=reverse)
5186+
5187+
return result
51855188

51865189
@final
51875190
def _can_hold_identifiers_and_holds_name(self, name) -> bool:

0 commit comments

Comments
 (0)