Skip to content

Commit a57e681

Browse files
jorisvandenbosschejreback
authored andcommitted
PERF: improve iloc list indexing
Author: Joris Van den Bossche <[email protected]> Closes #15504 from jorisvandenbossche/perf-iloc-list and squashes the following commits: bf54a0b [Joris Van den Bossche] TST: edit test_take to preserve original dtype 74d45ae [Joris Van den Bossche] add whatsnew 3e537b6 [Joris Van den Bossche] small clean-up 6d2705c [Joris Van den Bossche] take method: only validate kwargs if there are kwargs aacbaa8 [Joris Van den Bossche] PERF: improve iloc list indexing
1 parent 57c7c87 commit a57e681

File tree

5 files changed

+22
-16
lines changed

5 files changed

+22
-16
lines changed

doc/source/whatsnew/v0.20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -905,7 +905,7 @@ Performance Improvements
905905
- Improved performance of ``drop_duplicates()`` on ``bool`` columns (:issue:`12963`)
906906
- Improve performance of ``pd.core.groupby.GroupBy.apply`` when the applied
907907
function used the ``.name`` attribute of the group DataFrame (:issue:`15062`).
908-
908+
- Improved performance of ``iloc`` indexing with a list or array (:issue:`15504`).
909909

910910

911911
.. _whatsnew_0200.bug_fixes:

pandas/core/indexing.py

+14-10
Original file line numberDiff line numberDiff line change
@@ -1697,34 +1697,38 @@ def _get_slice_axis(self, slice_obj, axis=0):
16971697
else:
16981698
return self.obj.take(slice_obj, axis=axis, convert=False)
16991699

1700-
def _get_list_axis(self, key_list, axis=0):
1700+
def _get_list_axis(self, key, axis=0):
17011701
"""
17021702
Return Series values by list or array of integers
17031703
17041704
Parameters
17051705
----------
1706-
key_list : list-like positional indexer
1706+
key : list-like positional indexer
17071707
axis : int (can only be zero)
17081708
17091709
Returns
17101710
-------
17111711
Series object
17121712
"""
1713-
1714-
# validate list bounds
1715-
self._is_valid_list_like(key_list, axis)
1716-
1717-
# force an actual list
1718-
key_list = list(key_list)
1719-
return self.obj.take(key_list, axis=axis, convert=False)
1713+
try:
1714+
return self.obj.take(key, axis=axis, convert=False)
1715+
except IndexError:
1716+
# re-raise with different error message
1717+
raise IndexError("positional indexers are out-of-bounds")
17201718

17211719
def _getitem_axis(self, key, axis=0):
17221720

17231721
if isinstance(key, slice):
17241722
self._has_valid_type(key, axis)
17251723
return self._get_slice_axis(key, axis=axis)
17261724

1727-
elif is_bool_indexer(key):
1725+
if isinstance(key, list):
1726+
try:
1727+
key = np.asarray(key)
1728+
except TypeError: # pragma: no cover
1729+
pass
1730+
1731+
if is_bool_indexer(key):
17281732
self._has_valid_type(key, axis)
17291733
return self._getbool_axis(key, axis=axis)
17301734

pandas/core/series.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -2378,7 +2378,8 @@ def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs):
23782378
--------
23792379
numpy.ndarray.take
23802380
"""
2381-
nv.validate_take(tuple(), kwargs)
2381+
if kwargs:
2382+
nv.validate_take(tuple(), kwargs)
23822383

23832384
# check/convert indicies here
23842385
if convert:
@@ -2387,8 +2388,8 @@ def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs):
23872388
indices = _ensure_platform_int(indices)
23882389
new_index = self.index.take(indices)
23892390
new_values = self._values.take(indices)
2390-
return self._constructor(new_values,
2391-
index=new_index).__finalize__(self)
2391+
return (self._constructor(new_values, index=new_index, fastpath=True)
2392+
.__finalize__(self))
23922393

23932394
def isin(self, values):
23942395
"""

pandas/indexes/base.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1668,7 +1668,8 @@ def _append_same_dtype(self, to_concat, name):
16681668
@Appender(_index_shared_docs['take'] % _index_doc_kwargs)
16691669
def take(self, indices, axis=0, allow_fill=True,
16701670
fill_value=None, **kwargs):
1671-
nv.validate_take(tuple(), kwargs)
1671+
if kwargs:
1672+
nv.validate_take(tuple(), kwargs)
16721673
indices = _ensure_platform_int(indices)
16731674
if self._can_hold_na:
16741675
taken = self._assert_take_fillable(self.values, indices,

pandas/tests/test_generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1870,7 +1870,7 @@ def test_take(self):
18701870
tm.makeObjectSeries()]:
18711871
out = s.take(indices)
18721872
expected = Series(data=s.values.take(indices),
1873-
index=s.index.take(indices))
1873+
index=s.index.take(indices), dtype=s.dtype)
18741874
tm.assert_series_equal(out, expected)
18751875
for df in [tm.makeTimeDataFrame()]:
18761876
out = df.take(indices)

0 commit comments

Comments
 (0)