Merge pull request #3093 from jreback/perf_indexing

jreback · jreback · commit 441e4427d0f5 · 2013-03-19T11:51:32.000-07:00
PERF: added convert=boolean to take to enable negative index conversion
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -44,6 +44,8 @@ pandas 0.11.0
   - Moved functionaility from ``irow,icol,iget_value/iset_value`` to ``.iloc`` indexer
     (via ``_ixs`` methods in each object)
   - Added support for expression evaluation using the ``numexpr`` library
+  - Added ``convert=boolean`` to ``take`` routines to translate negative indices to positive,
+    defaults to True
 
 **Improvements to existing features**
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1854,8 +1854,7 @@ def _ixs(self, i, axis=0, copy=False):
             else:
                 label = self.columns[i]
                 if isinstance(label, Index):
-
-                    return self.take(i, axis=1)
+                    return self.take(i, axis=1, convert=True)
 
                 values = self._data.iget(i)
                 return self._col_klass.from_array(values, index=self.index,
@@ -1907,10 +1906,10 @@ def _getitem_array(self, key):
             # be reindexed to match DataFrame rows
             key = _check_bool_indexer(self.index, key)
             indexer = key.nonzero()[0]
-            return self.take(indexer, axis=0)
+            return self.take(indexer, axis=0, convert=False)
         else:
             indexer = self.ix._convert_to_indexer(key, axis=1)
-            return self.take(indexer, axis=1)
+            return self.take(indexer, axis=1, convert=True)
 
     def _getitem_multilevel(self, key):
         loc = self.columns.get_loc(key)
@@ -2242,9 +2241,9 @@ def xs(self, key, axis=0, level=None, copy=True):
             if isinstance(loc, np.ndarray):
                 if loc.dtype == np.bool_:
                     inds, = loc.nonzero()
-                    return self.take(inds, axis=axis)
+                    return self.take(inds, axis=axis, convert=False)
                 else:
-                    return self.take(loc, axis=axis)
+                    return self.take(loc, axis=axis, convert=True)
 
             if not np.isscalar(loc):
                 new_index = self.index[loc]
@@ -2820,7 +2819,7 @@ def _maybe_cast(values):
 
     delevel = deprecate('delevel', reset_index)
 
-    def take(self, indices, axis=0):
+    def take(self, indices, axis=0, convert=True):
         """
         Analogous to ndarray.take, return DataFrame corresponding to requested
         indices along an axis
@@ -2829,14 +2828,17 @@ def take(self, indices, axis=0):
         ----------
         indices : list / array of ints
         axis : {0, 1}
+        convert : convert indices for negative values, check bounds, default True
+                  mainly useful for an user routine calling
 
         Returns
         -------
         taken : DataFrame
         """
 
         # check/convert indicies here
-        indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))
+        if convert:
+            indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))
 
         if self._is_mixed_type:
             if axis == 0:
@@ -2950,7 +2952,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None):
             else:
                 raise ValueError('must specify how or thresh')
 
-        return self.take(mask.nonzero()[0], axis=axis)
+        return self.take(mask.nonzero()[0], axis=axis, convert=False)
 
     def drop_duplicates(self, cols=None, take_last=False, inplace=False):
         """
@@ -3141,7 +3143,7 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False):
                           " from pandas 0.11 onward", FutureWarning)
             return self
         else:
-            return self.take(indexer, axis=axis)
+            return self.take(indexer, axis=axis, convert=False)
 
     def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
         """
@@ -3187,7 +3189,7 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
                           " from pandas 0.11 onward", FutureWarning)
             return self
         else:
-            return self.take(indexer, axis=axis)
+            return self.take(indexer, axis=axis, convert=False)
 
     def swaplevel(self, i, j, axis=0):
         """
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -189,7 +189,7 @@ def at_time(self, time, asof=False):
         """
         try:
             indexer = self.index.indexer_at_time(time, asof=asof)
-            return self.take(indexer)
+            return self.take(indexer, convert=False)
         except AttributeError:
             raise TypeError('Index must be DatetimeIndex')
 
@@ -213,7 +213,7 @@ def between_time(self, start_time, end_time, include_start=True,
             indexer = self.index.indexer_between_time(
                 start_time, end_time, include_start=include_start,
                 include_end=include_end)
-            return self.take(indexer)
+            return self.take(indexer, convert=False)
         except AttributeError:
             raise TypeError('Index must be DatetimeIndex')
 
@@ -934,22 +934,24 @@ def rename_axis(self, mapper, axis=0, copy=True):
 
         return self._constructor(new_data)
 
-    def take(self, indices, axis=0):
+    def take(self, indices, axis=0, convert=True):
         """
         Analogous to ndarray.take
 
         Parameters
         ----------
         indices : list / array of ints
         axis : int, default 0
+        convert : translate neg to pos indices (default)
 
         Returns
         -------
         taken : type of caller
         """
 
         # check/convert indicies here
-        indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))
+        if convert:
+            indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))
 
         if axis == 0:
             labels = self._get_axis(axis)
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -391,7 +391,7 @@ def _reindex(keys, level=None):
         if com._is_bool_indexer(key):
             key = _check_bool_indexer(labels, key)
             inds, = key.nonzero()
-            return self.obj.take(inds, axis=axis)
+            return self.obj.take(inds, axis=axis, convert=False)
         else:
             if isinstance(key, Index):
                 # want Index objects to pass through untouched
@@ -408,7 +408,7 @@ def _reindex(keys, level=None):
                 if labels.inferred_type == 'mixed-integer':
                     indexer = labels.get_indexer(keyarr)
                     if (indexer >= 0).all():
-                        self.obj.take(indexer, axis=axis)
+                        self.obj.take(indexer, axis=axis, convert=True)
                     else:
                         return self.obj.take(keyarr, axis=axis)
                 elif not labels.inferred_type == 'integer':
@@ -426,7 +426,7 @@ def _reindex(keys, level=None):
                 return _reindex(keyarr, level=level)
             else:
                 mask = labels.isin(keyarr)
-                return self.obj.take(mask.nonzero()[0], axis=axis)
+                return self.obj.take(mask.nonzero()[0], axis=axis, convert=False)
 
     def _convert_to_indexer(self, obj, axis=0):
         """
@@ -644,7 +644,7 @@ def _getbool_axis(self, key, axis=0):
             key = _check_bool_indexer(labels, key)
             inds, = key.nonzero()
             try:
-                return self.obj.take(inds, axis=axis)
+                return self.obj.take(inds, axis=axis, convert=False)
             except (Exception), detail:
                 raise self._exception(detail)
     def _get_slice_axis(self, slice_obj, axis=0):
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2550,14 +2550,15 @@ def reindex_like(self, other, method=None, limit=None, fill_value=pa.NA):
         return self.reindex(other.index, method=method, limit=limit,
                             fill_value=fill_value)
 
-    def take(self, indices, axis=0):
+    def take(self, indices, axis=0, convert=True):
         """
         Analogous to ndarray.take, return Series corresponding to requested
         indices
 
         Parameters
         ----------
         indices : list / array of ints
+        convert : translate negative to positive indices (default)
 
         Returns
         -------
diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py
@@ -10,7 +10,7 @@
 
 from pandas.core.common import _pickle_array, _unpickle_array, _try_sort
 from pandas.core.index import Index, MultiIndex, _ensure_index
-from pandas.core.indexing import _check_slice_bounds
+from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
 from pandas.core.series import Series
 from pandas.core.frame import (DataFrame, extract_index, _prep_ndarray,
                                _default_index)
@@ -634,7 +634,7 @@ def _rename_columns_inplace(self, mapper):
         self.columns = new_columns
         self._series = new_series
 
-    def take(self, indices, axis=0):
+    def take(self, indices, axis=0, convert=True):
         """
         Analogous to ndarray.take, return SparseDataFrame corresponding to
         requested indices along an axis
@@ -643,12 +643,20 @@ def take(self, indices, axis=0):
         ----------
         indices : list / array of ints
         axis : {0, 1}
+        convert : convert indices for negative values, check bounds, default True
+                  mainly useful for an user routine calling
 
         Returns
         -------
         taken : SparseDataFrame
         """
+
         indices = com._ensure_platform_int(indices)
+
+        # check/convert indicies here
+        if convert:
+            indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))
+
         new_values = self.values.take(indices, axis=axis)
         if axis == 0:
             new_columns = self.columns
diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py
@@ -468,7 +468,7 @@ def fillna(self, value=None, method=None, inplace=False, limit=None):
         else:
             return result
 
-    def take(self, indices, axis=0):
+    def take(self, indices, axis=0, convert=True):
         """
         Sparse-compatible version of ndarray.take