Skip to content

Commit 9b4cffc

Browse files
authored
PERF: accessing sliced indexes with populated indexing engines (#51738)
* PERF: accessing sliced indexes with populated indexing engines * update * also use for RangeIndex * pre-commit issues * pre-commit issues * add back pylint
1 parent a07cb65 commit 9b4cffc

File tree

5 files changed

+36
-6
lines changed

5 files changed

+36
-6
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ Performance improvements
113113
- Performance improvement in :meth:`MultiIndex.sortlevel` when ``ascending`` is a list (:issue:`51612`)
114114
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.isna` when array has zero nulls or is all nulls (:issue:`51630`)
115115
- Performance improvement when parsing strings to ``boolean[pyarrow]`` dtype (:issue:`51730`)
116+
- Performance improvement when searching an :class:`Index` sliced from other indexes (:issue:`51738`)
116117

117118
.. ---------------------------------------------------------------------------
118119
.. _whatsnew_210.bug_fixes:

pandas/_libs/index.pyx

+20
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,16 @@ cdef class IndexEngine:
230230
def __sizeof__(self) -> int:
231231
return self.sizeof()
232232

233+
cpdef _update_from_sliced(self, IndexEngine other, reverse: bool):
234+
self.unique = other.unique
235+
self.need_unique_check = other.need_unique_check
236+
if not other.need_monotonic_check and (
237+
other.is_monotonic_increasing or other.is_monotonic_decreasing):
238+
self.need_monotonic_check = other.need_monotonic_check
239+
# reverse=True means the index has been reversed
240+
self.monotonic_inc = other.monotonic_dec if reverse else other.monotonic_inc
241+
self.monotonic_dec = other.monotonic_inc if reverse else other.monotonic_dec
242+
233243
@property
234244
def is_unique(self) -> bool:
235245
if self.need_unique_check:
@@ -892,6 +902,16 @@ cdef class SharedEngine:
892902
# for compat with IndexEngine
893903
pass
894904

905+
cpdef _update_from_sliced(self, ExtensionEngine other, reverse: bool):
906+
self.unique = other.unique
907+
self.need_unique_check = other.need_unique_check
908+
if not other.need_monotonic_check and (
909+
other.is_monotonic_increasing or other.is_monotonic_decreasing):
910+
self.need_monotonic_check = other.need_monotonic_check
911+
# reverse=True means the index has been reversed
912+
self.monotonic_inc = other.monotonic_dec if reverse else other.monotonic_inc
913+
self.monotonic_dec = other.monotonic_inc if reverse else other.monotonic_dec
914+
895915
@property
896916
def is_unique(self) -> bool:
897917
if self.need_unique_check:

pandas/core/indexes/base.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -5164,9 +5164,7 @@ def __getitem__(self, key):
51645164
if isinstance(key, slice):
51655165
# This case is separated from the conditional above to avoid
51665166
# pessimization com.is_bool_indexer and ndim checks.
5167-
result = getitem(key)
5168-
# Going through simple_new for performance.
5169-
return type(self)._simple_new(result, name=self._name)
5167+
return self._getitem_slice(key)
51705168

51715169
if com.is_bool_indexer(key):
51725170
# if we have list[bools, length=1e5] then doing this check+convert
@@ -5192,7 +5190,12 @@ def _getitem_slice(self: _IndexT, slobj: slice) -> _IndexT:
51925190
Fastpath for __getitem__ when we know we have a slice.
51935191
"""
51945192
res = self._data[slobj]
5195-
return type(self)._simple_new(res, name=self._name)
5193+
result = type(self)._simple_new(res, name=self._name)
5194+
if "_engine" in self._cache:
5195+
reverse = slobj.step is not None and slobj.step < 0
5196+
result._engine._update_from_sliced(self._engine, reverse=reverse) # type: ignore[union-attr] # noqa: E501
5197+
5198+
return result
51965199

51975200
@final
51985201
def _can_hold_identifiers_and_holds_name(self, name) -> bool:

pandas/core/indexes/interval.py

+7
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,13 @@ def __contains__(self, key: Any) -> bool:
374374
except KeyError:
375375
return False
376376

377+
def _getitem_slice(self, slobj: slice) -> IntervalIndex:
378+
"""
379+
Fastpath for __getitem__ when we know we have a slice.
380+
"""
381+
res = self._data[slobj]
382+
return type(self)._simple_new(res, name=self._name)
383+
377384
@cache_readonly
378385
def _multiindex(self) -> MultiIndex:
379386
return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"])

pandas/core/indexes/range.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -903,8 +903,7 @@ def __getitem__(self, key):
903903
Conserve RangeIndex type for scalar and slice keys.
904904
"""
905905
if isinstance(key, slice):
906-
new_range = self._range[key]
907-
return self._simple_new(new_range, name=self._name)
906+
return self._getitem_slice(key)
908907
elif is_integer(key):
909908
new_key = int(key)
910909
try:

0 commit comments

Comments
 (0)