Skip to content

Commit bc404ba

Browse files
committed
PERF: do not instantiate IndexEngine for standard lookup over RangeIndex
closes #16685
1 parent 53a36a7 commit bc404ba

File tree

3 files changed

+57
-1
lines changed

3 files changed

+57
-1
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,7 @@ Performance improvements
641641
- Restored performance of :meth:`DatetimeIndex.__iter__` by re-enabling specialized code path (:issue:`26702`)
642642
- Improved performance when building :class:`MultiIndex` with at least one :class:`CategoricalIndex` level (:issue:`22044`)
643643
- Improved performance by removing the need for a garbage collect when checking for ``SettingWithCopyWarning`` (:issue:`27031`)
644+
- RangeIndex now performs standard lookup without instantiating an actual hashtable, hence saving memory (:issue:`16685`)
644645

645646
.. _whatsnew_0250.bug_fixes:
646647

pandas/core/indexes/range.py

+29-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from pandas.core.dtypes import concat as _concat
1515
from pandas.core.dtypes.common import (
1616
ensure_python_int, is_int64_dtype, is_integer, is_scalar,
17-
is_timedelta64_dtype)
17+
is_timedelta64_dtype, is_list_like, ensure_platform_int)
1818
from pandas.core.dtypes.generic import (
1919
ABCDataFrame, ABCSeries, ABCTimedeltaIndex)
2020

@@ -348,6 +348,34 @@ def get_loc(self, key, method=None, tolerance=None):
348348
raise KeyError(key)
349349
return super().get_loc(key, method=method, tolerance=tolerance)
350350

351+
@Appender(_index_shared_docs['get_indexer'])
352+
def get_indexer(self, target, method=None, limit=None, tolerance=None):
353+
if method is None and tolerance is None and is_list_like(target):
354+
if self.step > 0:
355+
start, stop, step = self.start, self.stop, self.step
356+
else:
357+
# Work on reversed range for simplicity:
358+
start, stop, step = (self.stop - self.step,
359+
self.start + 1,
360+
- self.step)
361+
362+
target_array = np.asarray(target)
363+
if target_array.dtype != int:
364+
# checks/conversions/roundings are delegated to general method
365+
return super().get_indexer(target, method=method,
366+
tolerance=tolerance)
367+
368+
locs = ensure_platform_int(target_array) - start
369+
valid = (locs % step == 0) & (locs >= 0) & (target_array < stop)
370+
locs[~valid] = -1
371+
locs[valid] = locs[valid] / step
372+
373+
if step != self.step:
374+
# We reversed this range: transform to original locs
375+
locs[valid] = len(self) - locs[valid] - 1
376+
return np.array(locs, dtype=int)
377+
return super().get_indexer(target, method=method, tolerance=tolerance)
378+
351379
def tolist(self):
352380
return list(self._range)
353381

pandas/tests/indexes/test_range.py

+27
Original file line numberDiff line numberDiff line change
@@ -965,3 +965,30 @@ def test_append(self, appends):
965965
# Append single item rather than list
966966
result2 = indices[0].append(indices[1])
967967
tm.assert_index_equal(result2, expected, exact=True)
968+
969+
def test_engineless_lookup(self):
970+
# GH 16685
971+
# Standard lookup on RangeIndex should not require the engine to be
972+
# created
973+
idx = RangeIndex(2, 10, 3)
974+
idx._engine_type = lambda *args, **kwargs: 'dummy_engine'
975+
assert idx.get_loc(5) == 1
976+
tm.assert_numpy_array_equal(idx.get_indexer([2, 8]),
977+
np.array([0, 2]))
978+
with pytest.raises(KeyError):
979+
idx.get_loc(3)
980+
981+
msg = "'str' object has no attribute "
982+
# The engine is still required for lookup of a different dtype scalar:
983+
with pytest.raises(AttributeError, match=msg):
984+
idx.get_loc('a')
985+
# ... or for non-standard lookup methods:
986+
for method in 'backfill', 'nearest':
987+
with pytest.raises(AttributeError, match=msg):
988+
idx.get_indexer([4, 8], method=method)
989+
990+
# Array with different dtype follows the object codepath, and works:
991+
tm.assert_numpy_array_equal(idx.get_indexer(['a', 8]),
992+
np.array([-1, 2]))
993+
tm.assert_numpy_array_equal(idx.get_indexer([5., 8]),
994+
np.array([1, 2]))

0 commit comments

Comments
 (0)