Skip to content

Commit 3e29889

Browse files
committed
PERF: don't call RangeIndex._data unneccesary
1 parent d2beaf3 commit 3e29889

File tree

4 files changed

+55
-0
lines changed

4 files changed

+55
-0
lines changed

asv_bench/benchmarks/index_object.py

+6
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,12 @@ def time_min(self):
9595
def time_min_trivial(self):
9696
self.idx_inc.min()
9797

98+
def time_get_loc_inc(self):
99+
self.idx_inc.get_loc(900000)
100+
101+
def time_get_loc_dec(self):
102+
self.idx_dec.get_loc(900000)
103+
98104

99105
class IndexAppend:
100106

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ Performance Improvements
326326
- Improved performance of :meth:`Series.searchsorted`. The speedup is especially large when the dtype is
327327
int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`)
328328
- Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`)
329+
- Improved performance when slicing :class:`RangeIndex` (:issue:`xxxxx`)
329330
- Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`)
330331
- Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`)
331332
- Improved performance of :meth:`IntervalIndex.is_monotonic`, :meth:`IntervalIndex.is_monotonic_increasing` and :meth:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`)

pandas/core/indexes/range.py

+18
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import pandas.core.indexes.base as ibase
2222
from pandas.core.indexes.base import Index, _index_shared_docs
2323
from pandas.core.indexes.numeric import Int64Index
24+
from pandas.io.formats.printing import pprint_thing
2425

2526

2627
class RangeIndex(Int64Index):
@@ -64,6 +65,8 @@ class RangeIndex(Int64Index):
6465
_typ = 'rangeindex'
6566
_engine_type = libindex.Int64Engine
6667

68+
# check whether self._data has benn called
69+
_has_called_data = False # type: bool
6770
# --------------------------------------------------------------------
6871
# Constructors
6972

@@ -164,6 +167,8 @@ def _simple_new(cls, start, stop=None, step=None, name=None,
164167
for k, v in kwargs.items():
165168
setattr(result, k, v)
166169

170+
result._range = range(result._start, result._stop, result._step)
171+
167172
result._reset_identity()
168173
return result
169174

@@ -182,6 +187,7 @@ def _constructor(self):
182187

183188
@cache_readonly
184189
def _data(self):
190+
self._has_called_data = True
185191
return np.arange(self._start, self._stop, self._step, dtype=np.int64)
186192

187193
@cache_readonly
@@ -215,6 +221,9 @@ def _format_data(self, name=None):
215221
# we are formatting thru the attributes
216222
return None
217223

224+
def _format_with_header(self, header, na_rep='NaN', **kwargs):
225+
return header + [pprint_thing(x) for x in self._range]
226+
218227
# --------------------------------------------------------------------
219228
@property
220229
def start(self):
@@ -296,6 +305,15 @@ def is_monotonic_decreasing(self):
296305
def has_duplicates(self):
297306
return False
298307

308+
@Appender(_index_shared_docs['get_loc'])
309+
def get_loc(self, key, method=None, tolerance=None):
310+
if method is None and tolerance is None:
311+
try:
312+
return self._range.index(key)
313+
except ValueError:
314+
raise KeyError(key)
315+
return super().__get_loc(key, method=method, tolerance=tolerance)
316+
299317
def tolist(self):
300318
return list(range(self._start, self._stop, self._step))
301319

pandas/tests/indexes/test_range.py

+30
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,36 @@ def test_view(self):
241241
def test_dtype(self):
242242
assert self.index.dtype == np.int64
243243

244+
def test_has_called_data(self):
245+
# Calling RangeIndex._data caches a array of the same length.
246+
# This tests whether RangeIndex._data has been called by doing methods
247+
idx = RangeIndex(0, 100, 10)
248+
assert idx._has_called_data is False
249+
250+
repr(idx)
251+
assert idx._has_called_data is False
252+
253+
str(idx)
254+
assert idx._has_called_data is False
255+
256+
idx.get_loc(20)
257+
assert idx._has_called_data is False
258+
259+
df = pd.DataFrame({'a': range(10)}, index=idx)
260+
261+
df.loc[50]
262+
assert idx._has_called_data is False
263+
264+
with pytest.raises(KeyError):
265+
df.loc[51]
266+
assert idx._has_called_data is False
267+
268+
df.loc[10:50]
269+
assert idx._has_called_data is False
270+
271+
df.iloc[5:10]
272+
assert idx._has_called_data is False
273+
244274
def test_is_monotonic(self):
245275
assert self.index.is_monotonic is True
246276
assert self.index.is_monotonic_increasing is True

0 commit comments

Comments
 (0)