Skip to content

Commit 441e442

Browse files
committed
Merge pull request #3093 from jreback/perf_indexing
PERF: added convert=boolean to take to enable negative index conversion
2 parents 99c74cb + 9363377 commit 441e442

File tree

7 files changed

+38
-23
lines changed

7 files changed

+38
-23
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ pandas 0.11.0
4444
- Moved functionaility from ``irow,icol,iget_value/iset_value`` to ``.iloc`` indexer
4545
(via ``_ixs`` methods in each object)
4646
- Added support for expression evaluation using the ``numexpr`` library
47+
- Added ``convert=boolean`` to ``take`` routines to translate negative indices to positive,
48+
defaults to True
4749

4850
**Improvements to existing features**
4951

pandas/core/frame.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -1854,8 +1854,7 @@ def _ixs(self, i, axis=0, copy=False):
18541854
else:
18551855
label = self.columns[i]
18561856
if isinstance(label, Index):
1857-
1858-
return self.take(i, axis=1)
1857+
return self.take(i, axis=1, convert=True)
18591858

18601859
values = self._data.iget(i)
18611860
return self._col_klass.from_array(values, index=self.index,
@@ -1907,10 +1906,10 @@ def _getitem_array(self, key):
19071906
# be reindexed to match DataFrame rows
19081907
key = _check_bool_indexer(self.index, key)
19091908
indexer = key.nonzero()[0]
1910-
return self.take(indexer, axis=0)
1909+
return self.take(indexer, axis=0, convert=False)
19111910
else:
19121911
indexer = self.ix._convert_to_indexer(key, axis=1)
1913-
return self.take(indexer, axis=1)
1912+
return self.take(indexer, axis=1, convert=True)
19141913

19151914
def _getitem_multilevel(self, key):
19161915
loc = self.columns.get_loc(key)
@@ -2242,9 +2241,9 @@ def xs(self, key, axis=0, level=None, copy=True):
22422241
if isinstance(loc, np.ndarray):
22432242
if loc.dtype == np.bool_:
22442243
inds, = loc.nonzero()
2245-
return self.take(inds, axis=axis)
2244+
return self.take(inds, axis=axis, convert=False)
22462245
else:
2247-
return self.take(loc, axis=axis)
2246+
return self.take(loc, axis=axis, convert=True)
22482247

22492248
if not np.isscalar(loc):
22502249
new_index = self.index[loc]
@@ -2820,7 +2819,7 @@ def _maybe_cast(values):
28202819

28212820
delevel = deprecate('delevel', reset_index)
28222821

2823-
def take(self, indices, axis=0):
2822+
def take(self, indices, axis=0, convert=True):
28242823
"""
28252824
Analogous to ndarray.take, return DataFrame corresponding to requested
28262825
indices along an axis
@@ -2829,14 +2828,17 @@ def take(self, indices, axis=0):
28292828
----------
28302829
indices : list / array of ints
28312830
axis : {0, 1}
2831+
convert : convert indices for negative values, check bounds, default True
2832+
mainly useful for an user routine calling
28322833
28332834
Returns
28342835
-------
28352836
taken : DataFrame
28362837
"""
28372838

28382839
# check/convert indicies here
2839-
indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))
2840+
if convert:
2841+
indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))
28402842

28412843
if self._is_mixed_type:
28422844
if axis == 0:
@@ -2950,7 +2952,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None):
29502952
else:
29512953
raise ValueError('must specify how or thresh')
29522954

2953-
return self.take(mask.nonzero()[0], axis=axis)
2955+
return self.take(mask.nonzero()[0], axis=axis, convert=False)
29542956

29552957
def drop_duplicates(self, cols=None, take_last=False, inplace=False):
29562958
"""
@@ -3141,7 +3143,7 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False):
31413143
" from pandas 0.11 onward", FutureWarning)
31423144
return self
31433145
else:
3144-
return self.take(indexer, axis=axis)
3146+
return self.take(indexer, axis=axis, convert=False)
31453147

31463148
def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
31473149
"""
@@ -3187,7 +3189,7 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
31873189
" from pandas 0.11 onward", FutureWarning)
31883190
return self
31893191
else:
3190-
return self.take(indexer, axis=axis)
3192+
return self.take(indexer, axis=axis, convert=False)
31913193

31923194
def swaplevel(self, i, j, axis=0):
31933195
"""

pandas/core/generic.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ def at_time(self, time, asof=False):
189189
"""
190190
try:
191191
indexer = self.index.indexer_at_time(time, asof=asof)
192-
return self.take(indexer)
192+
return self.take(indexer, convert=False)
193193
except AttributeError:
194194
raise TypeError('Index must be DatetimeIndex')
195195

@@ -213,7 +213,7 @@ def between_time(self, start_time, end_time, include_start=True,
213213
indexer = self.index.indexer_between_time(
214214
start_time, end_time, include_start=include_start,
215215
include_end=include_end)
216-
return self.take(indexer)
216+
return self.take(indexer, convert=False)
217217
except AttributeError:
218218
raise TypeError('Index must be DatetimeIndex')
219219

@@ -934,22 +934,24 @@ def rename_axis(self, mapper, axis=0, copy=True):
934934

935935
return self._constructor(new_data)
936936

937-
def take(self, indices, axis=0):
937+
def take(self, indices, axis=0, convert=True):
938938
"""
939939
Analogous to ndarray.take
940940
941941
Parameters
942942
----------
943943
indices : list / array of ints
944944
axis : int, default 0
945+
convert : translate neg to pos indices (default)
945946
946947
Returns
947948
-------
948949
taken : type of caller
949950
"""
950951

951952
# check/convert indicies here
952-
indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))
953+
if convert:
954+
indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))
953955

954956
if axis == 0:
955957
labels = self._get_axis(axis)

pandas/core/indexing.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ def _reindex(keys, level=None):
391391
if com._is_bool_indexer(key):
392392
key = _check_bool_indexer(labels, key)
393393
inds, = key.nonzero()
394-
return self.obj.take(inds, axis=axis)
394+
return self.obj.take(inds, axis=axis, convert=False)
395395
else:
396396
if isinstance(key, Index):
397397
# want Index objects to pass through untouched
@@ -408,7 +408,7 @@ def _reindex(keys, level=None):
408408
if labels.inferred_type == 'mixed-integer':
409409
indexer = labels.get_indexer(keyarr)
410410
if (indexer >= 0).all():
411-
self.obj.take(indexer, axis=axis)
411+
self.obj.take(indexer, axis=axis, convert=True)
412412
else:
413413
return self.obj.take(keyarr, axis=axis)
414414
elif not labels.inferred_type == 'integer':
@@ -426,7 +426,7 @@ def _reindex(keys, level=None):
426426
return _reindex(keyarr, level=level)
427427
else:
428428
mask = labels.isin(keyarr)
429-
return self.obj.take(mask.nonzero()[0], axis=axis)
429+
return self.obj.take(mask.nonzero()[0], axis=axis, convert=False)
430430

431431
def _convert_to_indexer(self, obj, axis=0):
432432
"""
@@ -644,7 +644,7 @@ def _getbool_axis(self, key, axis=0):
644644
key = _check_bool_indexer(labels, key)
645645
inds, = key.nonzero()
646646
try:
647-
return self.obj.take(inds, axis=axis)
647+
return self.obj.take(inds, axis=axis, convert=False)
648648
except (Exception), detail:
649649
raise self._exception(detail)
650650
def _get_slice_axis(self, slice_obj, axis=0):

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2550,14 +2550,15 @@ def reindex_like(self, other, method=None, limit=None, fill_value=pa.NA):
25502550
return self.reindex(other.index, method=method, limit=limit,
25512551
fill_value=fill_value)
25522552

2553-
def take(self, indices, axis=0):
2553+
def take(self, indices, axis=0, convert=True):
25542554
"""
25552555
Analogous to ndarray.take, return Series corresponding to requested
25562556
indices
25572557
25582558
Parameters
25592559
----------
25602560
indices : list / array of ints
2561+
convert : translate negative to positive indices (default)
25612562
25622563
Returns
25632564
-------

pandas/sparse/frame.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from pandas.core.common import _pickle_array, _unpickle_array, _try_sort
1212
from pandas.core.index import Index, MultiIndex, _ensure_index
13-
from pandas.core.indexing import _check_slice_bounds
13+
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
1414
from pandas.core.series import Series
1515
from pandas.core.frame import (DataFrame, extract_index, _prep_ndarray,
1616
_default_index)
@@ -634,7 +634,7 @@ def _rename_columns_inplace(self, mapper):
634634
self.columns = new_columns
635635
self._series = new_series
636636

637-
def take(self, indices, axis=0):
637+
def take(self, indices, axis=0, convert=True):
638638
"""
639639
Analogous to ndarray.take, return SparseDataFrame corresponding to
640640
requested indices along an axis
@@ -643,12 +643,20 @@ def take(self, indices, axis=0):
643643
----------
644644
indices : list / array of ints
645645
axis : {0, 1}
646+
convert : convert indices for negative values, check bounds, default True
647+
mainly useful for an user routine calling
646648
647649
Returns
648650
-------
649651
taken : SparseDataFrame
650652
"""
653+
651654
indices = com._ensure_platform_int(indices)
655+
656+
# check/convert indicies here
657+
if convert:
658+
indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))
659+
652660
new_values = self.values.take(indices, axis=axis)
653661
if axis == 0:
654662
new_columns = self.columns

pandas/sparse/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ def fillna(self, value=None, method=None, inplace=False, limit=None):
468468
else:
469469
return result
470470

471-
def take(self, indices, axis=0):
471+
def take(self, indices, axis=0, convert=True):
472472
"""
473473
Sparse-compatible version of ndarray.take
474474

0 commit comments

Comments
 (0)