Skip to content

Commit d9d34ae

Browse files
authored
CLN/PERF: move RangeIndex._cached_data to RangeIndex._cache (#35432)
* CLN: move cached_data to _cache['_data'] * add GH number * flake8 cleanup
1 parent cda8284 commit d9d34ae

File tree

2 files changed

+29
-32
lines changed

2 files changed

+29
-32
lines changed

pandas/core/indexes/range.py

+4-12
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import timedelta
22
import operator
33
from sys import getsizeof
4-
from typing import Any, Optional
4+
from typing import Any
55
import warnings
66

77
import numpy as np
@@ -78,8 +78,6 @@ class RangeIndex(Int64Index):
7878
_engine_type = libindex.Int64Engine
7979
_range: range
8080

81-
# check whether self._data has been called
82-
_cached_data: Optional[np.ndarray] = None
8381
# --------------------------------------------------------------------
8482
# Constructors
8583

@@ -150,20 +148,14 @@ def _constructor(self):
150148
""" return the class to use for construction """
151149
return Int64Index
152150

153-
@property
151+
@cache_readonly
154152
def _data(self):
155153
"""
156154
An int array that for performance reasons is created only when needed.
157155
158-
The constructed array is saved in ``_cached_data``. This allows us to
159-
check if the array has been created without accessing ``_data`` and
160-
triggering the construction.
156+
The constructed array is saved in ``_cache``.
161157
"""
162-
if self._cached_data is None:
163-
self._cached_data = np.arange(
164-
self.start, self.stop, self.step, dtype=np.int64
165-
)
166-
return self._cached_data
158+
return np.arange(self.start, self.stop, self.step, dtype=np.int64)
167159

168160
@cache_readonly
169161
def _int64index(self) -> Int64Index:

pandas/tests/indexes/ranges/test_range.py

+25-20
Original file line numberDiff line numberDiff line change
@@ -137,53 +137,58 @@ def test_dtype(self):
137137
index = self.create_index()
138138
assert index.dtype == np.int64
139139

140-
def test_cached_data(self):
141-
# GH 26565, GH26617
142-
# Calling RangeIndex._data caches an int64 array of the same length at
143-
# self._cached_data. This test checks whether _cached_data has been set
140+
def test_cache(self):
141+
# GH 26565, GH26617, GH35432
142+
# This test checks whether _cache has been set.
143+
# Calling RangeIndex._cache["_data"] creates an int64 array of the same length
144+
# as the RangeIndex and stores it in _cache.
144145
idx = RangeIndex(0, 100, 10)
145146

146-
assert idx._cached_data is None
147+
assert idx._cache == {}
147148

148149
repr(idx)
149-
assert idx._cached_data is None
150+
assert idx._cache == {}
150151

151152
str(idx)
152-
assert idx._cached_data is None
153+
assert idx._cache == {}
153154

154155
idx.get_loc(20)
155-
assert idx._cached_data is None
156+
assert idx._cache == {}
156157

157-
90 in idx
158-
assert idx._cached_data is None
158+
90 in idx # True
159+
assert idx._cache == {}
159160

160-
91 in idx
161-
assert idx._cached_data is None
161+
91 in idx # False
162+
assert idx._cache == {}
162163

163164
idx.all()
164-
assert idx._cached_data is None
165+
assert idx._cache == {}
165166

166167
idx.any()
167-
assert idx._cached_data is None
168+
assert idx._cache == {}
168169

169170
df = pd.DataFrame({"a": range(10)}, index=idx)
170171

171172
df.loc[50]
172-
assert idx._cached_data is None
173+
assert idx._cache == {}
173174

174175
with pytest.raises(KeyError, match="51"):
175176
df.loc[51]
176-
assert idx._cached_data is None
177+
assert idx._cache == {}
177178

178179
df.loc[10:50]
179-
assert idx._cached_data is None
180+
assert idx._cache == {}
180181

181182
df.iloc[5:10]
182-
assert idx._cached_data is None
183+
assert idx._cache == {}
183184

184-
# actually calling idx._data
185+
# idx._cache should contain a _data entry after call to idx._data
186+
idx._data
185187
assert isinstance(idx._data, np.ndarray)
186-
assert isinstance(idx._cached_data, np.ndarray)
188+
assert idx._data is idx._data # check cached value is reused
189+
assert len(idx._cache) == 4
190+
expected = np.arange(0, 100, 10, dtype="int64")
191+
tm.assert_numpy_array_equal(idx._cache["_data"], expected)
187192

188193
def test_is_monotonic(self):
189194
index = RangeIndex(0, 20, 2)

0 commit comments

Comments
 (0)