Skip to content

Commit 48ff187

Browse files
committed
Merge remote-tracking branch 'upstream/master' into str_cat_err
2 parents b910df7 + 437efa6 commit 48ff187

File tree

5 files changed

+73
-62
lines changed

5 files changed

+73
-62
lines changed

asv_bench/benchmarks/index_object.py

+6
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,12 @@ def time_min(self):
9494
def time_min_trivial(self):
9595
self.idx_inc.min()
9696

97+
def time_get_loc_inc(self):
98+
self.idx_inc.get_loc(900000)
99+
100+
def time_get_loc_dec(self):
101+
self.idx_dec.get_loc(100000)
102+
97103

98104
class IndexAppend:
99105

azure-pipelines.yml

-60
Original file line numberDiff line numberDiff line change
@@ -116,63 +116,3 @@ jobs:
116116
fi
117117
displayName: 'Running benchmarks'
118118
condition: true
119-
120-
- job: 'Docs'
121-
pool:
122-
vmImage: ubuntu-16.04
123-
timeoutInMinutes: 90
124-
steps:
125-
- script: |
126-
echo '##vso[task.setvariable variable=CONDA_ENV]pandas-dev'
127-
echo '##vso[task.setvariable variable=ENV_FILE]environment.yml'
128-
displayName: 'Setting environment variables'
129-
130-
- script: |
131-
export PATH=$HOME/miniconda3/bin:$PATH
132-
sudo apt-get install -y libc6-dev-i386
133-
ci/setup_env.sh
134-
displayName: 'Setup environment and build pandas'
135-
136-
- script: |
137-
export PATH=$HOME/miniconda3/bin:$PATH
138-
source activate pandas-dev
139-
doc/make.py
140-
displayName: 'Build documentation'
141-
142-
- script: |
143-
cd doc/build/html
144-
git init
145-
touch .nojekyll
146-
git add --all .
147-
git config user.email "[email protected]"
148-
git config user.name "pandas-docs-bot"
149-
git commit -m "pandas documentation in master"
150-
displayName: 'Create git repo for docs build'
151-
condition : |
152-
and(not(eq(variables['Build.Reason'], 'PullRequest')),
153-
eq(variables['Build.SourceBranch'], 'refs/heads/master'))
154-
155-
# This task to work requires next steps:
156-
# 1. Got to "Library > Secure files" in the azure-pipelines dashboard: https://dev.azure.com/pandas-dev/pandas/_library?itemType=SecureFiles
157-
# 2. Click on "+ Secure file"
158-
# 3. Upload the private key (the name of the file must match with the specified in "sshKeySecureFile" input below, "pandas_docs_key")
159-
# 4. Click on file name after it is created, tick the box "Authorize for use in all pipelines" and save
160-
# 5. The public key specified in "sshPublicKey" is the pair of the uploaded private key, and needs to be specified as a deploy key of the repo where the docs will be pushed: https://github.com/pandas-dev/pandas-dev.github.io/settings/keys
161-
- task: InstallSSHKey@0
162-
inputs:
163-
hostName: 'github.com'
164-
sshPublicKey: 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDHmz3l/EdqrgNxEUKkwDUuUcLv91unig03pYFGO/DMIgCmPdMG96zAgfnESd837Rm0wSSqylwSzkRJt5MV/TpFlcVifDLDQmUhqCeO8Z6dLl/oe35UKmyYICVwcvQTAaHNnYRpKC5IUlTh0JEtw9fGlnp1Ta7U1ENBLbKdpywczElhZu+hOQ892zqOj3CwA+U2329/d6cd7YnqIKoFN9DWT3kS5K6JE4IoBfQEVekIOs23bKjNLvPoOmi6CroAhu/K8j+NCWQjge5eJf2x/yTnIIP1PlEcXoHIr8io517posIx3TBup+CN8bNS1PpDW3jyD3ttl1uoBudjOQrobNnJeR6Rn67DRkG6IhSwr3BWj8alwUG5mTdZzwV5Pa9KZFdIiqX7NoDGg+itsR39QCn0thK8lGRNSR8KrWC1PSjecwelKBO7uQ7rnk/rkrZdBWR4oEA8YgNH8tirUw5WfOr5a0AIaJicKxGKNdMxZt+zmC+bS7F4YCOGIm9KHa43RrKhoGRhRf9fHHHKUPwFGqtWG4ykcUgoamDOURJyepesBAO3FiRE9rLU6ILbB3yEqqoekborHmAJD5vf7PWItW3Q/YQKuk3kkqRcKnexPyzyyq5lUgTi8CxxZdaASIOu294wjBhhdyHlXEkVTNJ9JKkj/obF+XiIIp0cBDsOXY9hDQ== [email protected]'
165-
sshKeySecureFile: 'pandas_docs_key'
166-
displayName: 'Install GitHub ssh deployment key'
167-
condition : |
168-
and(not(eq(variables['Build.Reason'], 'PullRequest')),
169-
eq(variables['Build.SourceBranch'], 'refs/heads/master'))
170-
171-
- script: |
172-
cd doc/build/html
173-
git remote add origin [email protected]:pandas-dev/pandas-dev.github.io.git
174-
git push origin master -f
175-
displayName: 'Publish docs to GitHub pages'
176-
condition : |
177-
and(not(eq(variables['Build.Reason'], 'PullRequest')),
178-
eq(variables['Build.SourceBranch'], 'refs/heads/master'))

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,7 @@ Performance Improvements
493493
- Improved performance of :meth:`Series.searchsorted`. The speedup is especially large when the dtype is
494494
int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`)
495495
- Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`)
496+
- Improved performance when slicing :class:`RangeIndex` (:issue:`26565`)
496497
- Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`)
497498
- Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`)
498499
- Improved performance of :meth:`IntervalIndex.is_monotonic`, :meth:`IntervalIndex.is_monotonic_increasing` and :meth:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`)

pandas/core/indexes/range.py

+30-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
from pandas.core.indexes.base import Index, _index_shared_docs
2323
from pandas.core.indexes.numeric import Int64Index
2424

25+
from pandas.io.formats.printing import pprint_thing
26+
2527

2628
class RangeIndex(Int64Index):
2729
"""
@@ -64,6 +66,8 @@ class RangeIndex(Int64Index):
6466
_typ = 'rangeindex'
6567
_engine_type = libindex.Int64Engine
6668

69+
# check whether self._data has benn called
70+
_cached_data = None # type: np.ndarray
6771
# --------------------------------------------------------------------
6872
# Constructors
6973

@@ -164,6 +168,8 @@ def _simple_new(cls, start, stop=None, step=None, name=None,
164168
for k, v in kwargs.items():
165169
setattr(result, k, v)
166170

171+
result._range = range(result._start, result._stop, result._step)
172+
167173
result._reset_identity()
168174
return result
169175

@@ -180,9 +186,19 @@ def _constructor(self):
180186
""" return the class to use for construction """
181187
return Int64Index
182188

183-
@cache_readonly
189+
@property
184190
def _data(self):
185-
return np.arange(self._start, self._stop, self._step, dtype=np.int64)
191+
"""
192+
An int array that for performance reasons is created only when needed.
193+
194+
The constructed array is saved in ``_cached_data``. This allows us to
195+
check if the array has been created without accessing ``_data`` and
196+
triggering the construction.
197+
"""
198+
if self._cached_data is None:
199+
self._cached_data = np.arange(self._start, self._stop, self._step,
200+
dtype=np.int64)
201+
return self._cached_data
186202

187203
@cache_readonly
188204
def _int64index(self):
@@ -215,6 +231,9 @@ def _format_data(self, name=None):
215231
# we are formatting thru the attributes
216232
return None
217233

234+
def _format_with_header(self, header, na_rep='NaN', **kwargs):
235+
return header + list(map(pprint_thing, self._range))
236+
218237
# --------------------------------------------------------------------
219238
@property
220239
def start(self):
@@ -296,6 +315,15 @@ def is_monotonic_decreasing(self):
296315
def has_duplicates(self):
297316
return False
298317

318+
@Appender(_index_shared_docs['get_loc'])
319+
def get_loc(self, key, method=None, tolerance=None):
320+
if is_integer(key) and method is None and tolerance is None:
321+
try:
322+
return self._range.index(key)
323+
except ValueError:
324+
raise KeyError(key)
325+
return super().get_loc(key, method=method, tolerance=tolerance)
326+
299327
def tolist(self):
300328
return list(range(self._start, self._stop, self._step))
301329

pandas/tests/indexes/test_range.py

+36
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,42 @@ def test_view(self):
241241
def test_dtype(self):
242242
assert self.index.dtype == np.int64
243243

244+
def test_cached_data(self):
245+
# GH 26565
246+
# Calling RangeIndex._data caches an int64 array of the same length at
247+
# self._cached_data. This tests whether _cached_data has been set.
248+
idx = RangeIndex(0, 100, 10)
249+
250+
assert idx._cached_data is None
251+
252+
repr(idx)
253+
assert idx._cached_data is None
254+
255+
str(idx)
256+
assert idx._cached_data is None
257+
258+
idx.get_loc(20)
259+
assert idx._cached_data is None
260+
261+
df = pd.DataFrame({'a': range(10)}, index=idx)
262+
263+
df.loc[50]
264+
assert idx._cached_data is None
265+
266+
with pytest.raises(KeyError):
267+
df.loc[51]
268+
assert idx._cached_data is None
269+
270+
df.loc[10:50]
271+
assert idx._cached_data is None
272+
273+
df.iloc[5:10]
274+
assert idx._cached_data is None
275+
276+
# actually calling data._data
277+
assert isinstance(idx._data, np.ndarray)
278+
assert isinstance(idx._cached_data, np.ndarray)
279+
244280
def test_is_monotonic(self):
245281
assert self.index.is_monotonic is True
246282
assert self.index.is_monotonic_increasing is True

0 commit comments

Comments
 (0)