Skip to content

Commit a173cac

Browse files
toobazjreback
authored andcommitted
PERF: do not instantiate IndexEngine for standard lookup over RangeIndex (#27119)
* TST: actually test #16877 on numeric index (not just RangeIndex) * PERF: do not instantiate IndexEngine for standard lookup over RangeIndex closes #16685
1 parent 65ec968 commit a173cac

File tree

4 files changed

+60
-4
lines changed

4 files changed

+60
-4
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ Performance improvements
645645
int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`)
646646
- Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`)
647647
- Improved performance of slicing and other selected operation on a :class:`RangeIndex` (:issue:`26565`, :issue:`26617`, :issue:`26722`)
648+
- :class:`RangeIndex` now performs standard lookup without instantiating an actual hashtable, hence saving memory (:issue:`16685`)
648649
- Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`)
649650
- Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`)
650651
- Improved performance of :attr:`IntervalIndex.is_monotonic`, :attr:`IntervalIndex.is_monotonic_increasing` and :attr:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`)

pandas/core/indexes/range.py

+32-2
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313

1414
from pandas.core.dtypes import concat as _concat
1515
from pandas.core.dtypes.common import (
16-
ensure_python_int, is_int64_dtype, is_integer, is_scalar,
17-
is_timedelta64_dtype)
16+
ensure_platform_int, ensure_python_int, is_int64_dtype, is_integer,
17+
is_integer_dtype, is_list_like, is_scalar, is_timedelta64_dtype)
1818
from pandas.core.dtypes.generic import (
1919
ABCDataFrame, ABCSeries, ABCTimedeltaIndex)
2020

@@ -348,6 +348,36 @@ def get_loc(self, key, method=None, tolerance=None):
348348
raise KeyError(key)
349349
return super().get_loc(key, method=method, tolerance=tolerance)
350350

351+
@Appender(_index_shared_docs['get_indexer'])
352+
def get_indexer(self, target, method=None, limit=None, tolerance=None):
353+
if not (method is None and tolerance is None and is_list_like(target)):
354+
return super().get_indexer(target, method=method,
355+
tolerance=tolerance)
356+
357+
if self.step > 0:
358+
start, stop, step = self.start, self.stop, self.step
359+
else:
360+
# Work on reversed range for simplicity:
361+
start, stop, step = (self.stop - self.step,
362+
self.start + 1,
363+
- self.step)
364+
365+
target_array = np.asarray(target)
366+
if not (is_integer_dtype(target_array) and target_array.ndim == 1):
367+
# checks/conversions/roundings are delegated to general method
368+
return super().get_indexer(target, method=method,
369+
tolerance=tolerance)
370+
371+
locs = target_array - start
372+
valid = (locs % step == 0) & (locs >= 0) & (target_array < stop)
373+
locs[~valid] = -1
374+
locs[valid] = locs[valid] / step
375+
376+
if step != self.step:
377+
# We reversed this range: transform to original locs
378+
locs[valid] = len(self) - 1 - locs[valid]
379+
return ensure_platform_int(locs)
380+
351381
def tolist(self):
352382
return list(self._range)
353383

pandas/tests/indexes/test_base.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1439,9 +1439,12 @@ def test_get_indexer_strings_raises(self):
14391439
index.get_indexer(['a', 'b', 'c', 'd'], method='pad',
14401440
tolerance=[2, 2, 2, 2])
14411441

1442-
def test_get_indexer_numeric_index_boolean_target(self):
1442+
@pytest.mark.parametrize("idx_class", [Int64Index, RangeIndex,
1443+
Float64Index])
1444+
def test_get_indexer_numeric_index_boolean_target(self, idx_class):
14431445
# GH 16877
1444-
numeric_index = pd.Index(range(4))
1446+
1447+
numeric_index = idx_class(RangeIndex((4)))
14451448
result = numeric_index.get_indexer([True, False, True])
14461449
expected = np.array([-1, -1, -1], dtype=np.intp)
14471450
tm.assert_numpy_array_equal(result, expected)

pandas/tests/indexes/test_range.py

+22
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
from pandas.core.dtypes.common import ensure_platform_int
7+
68
import pandas as pd
79
from pandas import Float64Index, Index, Int64Index, RangeIndex, Series
810
import pandas.util.testing as tm
@@ -965,3 +967,23 @@ def test_append(self, appends):
965967
# Append single item rather than list
966968
result2 = indices[0].append(indices[1])
967969
tm.assert_index_equal(result2, expected, exact=True)
970+
971+
def test_engineless_lookup(self):
972+
# GH 16685
973+
# Standard lookup on RangeIndex should not require the engine to be
974+
# created
975+
idx = RangeIndex(2, 10, 3)
976+
977+
assert idx.get_loc(5) == 1
978+
tm.assert_numpy_array_equal(idx.get_indexer([2, 8]),
979+
ensure_platform_int(np.array([0, 2])))
980+
with pytest.raises(KeyError):
981+
idx.get_loc(3)
982+
983+
assert '_engine' not in idx._cache
984+
985+
# The engine is still required for lookup of a different dtype scalar:
986+
with pytest.raises(KeyError):
987+
assert idx.get_loc('a') == -1
988+
989+
assert '_engine' in idx._cache

0 commit comments

Comments
 (0)