Skip to content

Commit 649ad5c

Browse files
authored
PERF: custom ops for RangeIndex.[all|any|__contains__] (#26617)
* PERF: custom ops for RangeIndex.[all|any|__contain__] * changes * Changes to .all() * Rabased and added typing * add issue number to whatsnew
1 parent c9c6c22 commit 649ad5c

File tree

4 files changed

+38
-11
lines changed

4 files changed

+38
-11
lines changed

doc/source/whatsnew/v0.25.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ Performance Improvements
502502
- Improved performance of :meth:`Series.searchsorted`. The speedup is especially large when the dtype is
503503
int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`)
504504
- Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`)
505-
- Improved performance when slicing :class:`RangeIndex` (:issue:`26565`)
505+
- Improved performance of slicing and other selected operation on a :class:`RangeIndex` (:issue:`26565`, :issue:`26617`)
506506
- Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`)
507507
- Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`)
508508
- Improved performance of :attr:`IntervalIndex.is_monotonic`, :attr:`IntervalIndex.is_monotonic_increasing` and :attr:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`)

pandas/core/indexes/base.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -4015,11 +4015,7 @@ def __contains__(self, key):
40154015

40164016
@Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
40174017
def contains(self, key):
4018-
hash(key)
4019-
try:
4020-
return key in self._engine
4021-
except (TypeError, ValueError):
4022-
return False
4018+
return key in self
40234019

40244020
def __hash__(self):
40254021
raise TypeError("unhashable type: %r" % type(self).__name__)

pandas/core/indexes/range.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from datetime import timedelta
22
import operator
33
from sys import getsizeof
4+
from typing import Union
45
import warnings
56

67
import numpy as np
@@ -334,6 +335,14 @@ def is_monotonic_decreasing(self):
334335
def has_duplicates(self):
335336
return False
336337

338+
def __contains__(self, key: Union[int, np.integer]) -> bool:
339+
hash(key)
340+
try:
341+
key = ensure_python_int(key)
342+
except TypeError:
343+
return False
344+
return key in self._range
345+
337346
@Appender(_index_shared_docs['get_loc'])
338347
def get_loc(self, key, method=None, tolerance=None):
339348
if is_integer(key) and method is None and tolerance is None:
@@ -640,6 +649,12 @@ def __floordiv__(self, other):
640649
return self._simple_new(start, start + 1, 1, name=self.name)
641650
return self._int64index // other
642651

652+
def all(self) -> bool:
653+
return 0 not in self._range
654+
655+
def any(self) -> bool:
656+
return any(self._range)
657+
643658
@classmethod
644659
def _add_numeric_methods_binary(cls):
645660
""" add in numeric methods, specialized to RangeIndex """
@@ -725,4 +740,3 @@ def _evaluate_numeric_binop(self, other):
725740

726741

727742
RangeIndex._add_numeric_methods()
728-
RangeIndex._add_logical_methods()

pandas/tests/indexes/test_range.py

+21-4
Original file line numberDiff line numberDiff line change
@@ -245,10 +245,9 @@ def test_dtype(self):
245245
assert self.index.dtype == np.int64
246246

247247
def test_cached_data(self):
248-
# GH 26565
249-
# Calling RangeIndex._data caches an int64 array of the same length as
250-
# self at self._cached_data.
251-
# This tests whether _cached_data is being set by various operations.
248+
# GH 26565, GH26617
249+
# Calling RangeIndex._data caches an int64 array of the same length at
250+
# self._cached_data. This test checks whether _cached_data has been set
252251
idx = RangeIndex(0, 100, 10)
253252

254253
assert idx._cached_data is None
@@ -262,6 +261,24 @@ def test_cached_data(self):
262261
idx.get_loc(20)
263262
assert idx._cached_data is None
264263

264+
90 in idx
265+
assert idx._cached_data is None
266+
267+
91 in idx
268+
assert idx._cached_data is None
269+
270+
idx.contains(90)
271+
assert idx._cached_data is None
272+
273+
idx.contains(91)
274+
assert idx._cached_data is None
275+
276+
idx.all()
277+
assert idx._cached_data is None
278+
279+
idx.any()
280+
assert idx._cached_data is None
281+
265282
df = pd.DataFrame({'a': range(10)}, index=idx)
266283

267284
df.loc[50]

0 commit comments

Comments
 (0)