pandas-dev · chris-b1 · Aug 6, 2016 · Aug 6, 2016 · Aug 7, 2016 · Aug 7, 2016
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -221,8 +221,8 @@ def sort_mixed(values):
         ordered = sort_mixed(values)
     else:
         try:
-            sorter = values.argsort()
-            ordered = values.take(sorter)
+            sorter = _ensure_int64(values.argsort())
+            ordered = take_nd(values, sorter, allow_fill=False)
         except TypeError:
             # try this anyway
             ordered = sort_mixed(values)
@@ -235,7 +235,7 @@ def sort_mixed(values):
     if not is_list_like(labels):
         raise TypeError("Only list-like objects or None are allowed to be"
                         "passed to safe_sort as labels")
-    labels = _ensure_platform_int(np.asarray(labels))
+    labels = np.asarray(labels)
 
     from pandas import Index
     if not assume_unique and not Index(values).is_unique:
@@ -246,18 +246,16 @@ def sort_mixed(values):
         (hash_klass, _), values = _get_data_algo(values, _hashtables)
         t = hash_klass(len(values))
         t.map_locations(values)
-        sorter = _ensure_platform_int(t.lookup(ordered))
+        sorter = t.lookup(ordered)
 
     reverse_indexer = np.empty(len(sorter), dtype=np.int_)
     reverse_indexer.put(sorter, np.arange(len(sorter)))
 
     mask = (labels < -len(values)) | (labels >= len(values)) | \
         (labels == na_sentinel)
+    np.putmask(labels, mask, -1)
 
-    # (Out of bound indices will be masked with `na_sentinel` next, so we may
-    # deal with them here without performance loss using `mode='wrap'`.)
-    new_labels = reverse_indexer.take(labels, mode='wrap')
-    np.putmask(new_labels, mask, na_sentinel)
+    new_labels = take_nd(reverse_indexer, labels, fill_value=na_sentinel)
 
     return ordered, new_labels
 
@@ -304,8 +302,6 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
     uniques = vec_klass()
     labels = table.get_labels(vals, uniques, 0, na_sentinel, True)
 
-    labels = _ensure_platform_int(labels)
-
     uniques = uniques.to_array()
 
     if sort and len(uniques) > 0:
@@ -825,6 +821,7 @@ def _take_2d_multi_generic(arr, indexer, out, fill_value, mask_info):
             out[i, j] = arr[u_, v]
 
 
+# is this used ?
 def _take_nd_generic(arr, indexer, out, axis, fill_value, mask_info):
     if mask_info is not None:
         mask, needs_masking = mask_info
@@ -1076,7 +1073,6 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
 
     func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis,
                                  mask_info=mask_info)
-    indexer = _ensure_int64(indexer)
     func(arr, indexer, out, fill_value)
 
     if flip_order:

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -48,7 +48,6 @@
                                  _ensure_float,
                                  _ensure_float64,
                                  _ensure_int64,
-                                 _ensure_platform_int,
                                  is_list_like,
                                  is_iterator,
                                  is_sequence,
@@ -3195,7 +3194,6 @@ def trans(v):
                 keys.append(trans(k))
             indexer = _lexsort_indexer(keys, orders=ascending,
                                        na_position=na_position)
-            indexer = _ensure_platform_int(indexer)
         else:
             from pandas.core.groupby import _nargsort
 

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -1706,7 +1706,6 @@ def get_group_levels(self):
 
         name_list = []
         for ping, labels in zip(self.groupings, self.recons_labels):
-            labels = _ensure_platform_int(labels)
             levels = ping.group_index.take(labels)
 
             name_list.append(levels)
@@ -4368,7 +4367,7 @@ def _get_group_index_sorter(group_index, ngroups):
     if alpha + beta * ngroups < count * np.log(count):
         sorter, _ = _algos.groupsort_indexer(_ensure_int64(group_index),
                                              ngroups)
-        return _ensure_platform_int(sorter)
+        return sorter
     else:
         return group_index.argsort(kind='mergesort')
 

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -10,7 +10,7 @@
                                  is_list_like,
                                  is_sequence,
                                  is_scalar,
-                                 _ensure_platform_int)
+                                 _ensure_int64)
 from pandas.types.missing import isnull, _infer_fill_value
 
 from pandas.core.index import Index, MultiIndex
@@ -864,7 +864,6 @@ def _convert_for_reindex(self, key, axis=0):
                 keyarr = _asarray_tuplesafe(key)
 
             if is_integer_dtype(keyarr) and not labels.is_integer():
-                keyarr = _ensure_platform_int(keyarr)
                 return labels.take(keyarr)
 
             return keyarr
@@ -1853,20 +1852,12 @@ def maybe_convert_indices(indices, n):
     """ if we have negative indicies, translate to postive here
     if have indicies that are out-of-bounds, raise an IndexError
     """
-    if isinstance(indices, list):
-        indices = np.array(indices)
-        if len(indices) == 0:
-            # If list is empty, np.array will return float and cause indexing
-            # errors.
-            return np.empty(0, dtype=np.int_)
-
-    mask = indices < 0
-    if mask.any():
-        indices[mask] += n
-    mask = (indices >= n) | (indices < 0)
-    if mask.any():
-        raise IndexError("indices are out-of-bounds")
-    return indices
+    # return indices
+    from pandas.algos import take_bounds_check
+    indices = _ensure_int64(indices)
+    out = np.empty(len(indices), dtype='int64')
+    take_bounds_check(indices, out, n)
+    return out
 
 
 def maybe_convert_ix(*args):

diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 
-from pandas.types.common import _ensure_platform_int, is_list_like
+from pandas.types.common import is_list_like
 from pandas.types.cast import _maybe_promote
 from pandas.types.missing import notnull
 import pandas.types.concat as _concat
@@ -114,10 +114,10 @@ def _make_sorted_values_labels(self):
         ngroups = len(obs_ids)
 
         indexer = _algos.groupsort_indexer(comp_index, ngroups)[0]
-        indexer = _ensure_platform_int(indexer)
 
         self.sorted_values = algos.take_nd(self.values, indexer, axis=0)
-        self.sorted_labels = [l.take(indexer) for l in to_sort]
+        self.sorted_labels = [algos.take_nd(l, indexer, allow_fill=False)
+                              for l in to_sort]
 
     def _make_selectors(self):
         new_levels = self.new_index_levels
@@ -129,7 +129,6 @@ def _make_selectors(self):
         comp_index, obs_ids = get_compressed_ids(remaining_labels, level_sizes)
         ngroups = len(obs_ids)
 
-        comp_index = _ensure_platform_int(comp_index)
         stride = self.index.levshape[self.level] + self.lift
         self.full_shape = ngroups, stride
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1768,7 +1768,6 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
         elif isinstance(index, MultiIndex):
             from pandas.core.groupby import _lexsort_indexer
             indexer = _lexsort_indexer(index.labels, orders=ascending)
-            indexer = _ensure_platform_int(indexer)
             new_index = index.take(indexer)
         else:
             new_index, indexer = index.sort_values(return_indexer=True,
@@ -2381,14 +2380,14 @@ def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs):
         numpy.ndarray.take
         """
         nv.validate_take(tuple(), kwargs)
+        indices = np.asarray(indices)
 
         # check/convert indicies here
         if convert:
             indices = maybe_convert_indices(indices, len(self._get_axis(axis)))
 
-        indices = _ensure_platform_int(indices)
-        new_index = self.index.take(indices)
-        new_values = self._values.take(indices)
+        new_index = self.index.take(indices, convert=False)
+        new_values = algos.take_nd(self._values, indices, allow_fill=False)
         return self._constructor(new_values,
                                  index=new_index).__finalize__(self)
 

diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py
@@ -1463,38 +1463,39 @@ def _ensure_compat_concat(indexes):
 
     @Appender(_index_shared_docs['take'])
     def take(self, indices, axis=0, allow_fill=True,
-             fill_value=None, **kwargs):
+             fill_value=None, convert=True, **kwargs):
         nv.validate_take(tuple(), kwargs)
-        indices = _ensure_platform_int(indices)
-        if self._can_hold_na:
+
+        if not self._can_hold_na and allow_fill and fill_value is not None:
+            msg = 'Unable to fill values because {0} cannot contain NA'
+            raise ValueError(msg.format(self.__class__.__name__))
+        else:
             taken = self._assert_take_fillable(self.values, indices,
                                                allow_fill=allow_fill,
                                                fill_value=fill_value,
-                                               na_value=self._na_value)
-        else:
-            if allow_fill and fill_value is not None:
-                msg = 'Unable to fill values because {0} cannot contain NA'
-                raise ValueError(msg.format(self.__class__.__name__))
-            taken = self.values.take(indices)
+                                               na_value=self._na_value,
+                                               convert=convert)
         return self._shallow_copy(taken)
 
     def _assert_take_fillable(self, values, indices, allow_fill=True,
-                              fill_value=None, na_value=np.nan):
-        """ Internal method to handle NA filling of take """
-        indices = _ensure_platform_int(indices)
-
-        # only fill if we are passing a non-None fill_value
+                              fill_value=None, convert=True, na_value=np.nan):
+        """ internal method to handle NA filling of take """
+        indices = np.asarray(indices)
         if allow_fill and fill_value is not None:
             if (indices < -1).any():
                 msg = ('When allow_fill=True and fill_value is not None, '
                        'all indices must be >= -1')
                 raise ValueError(msg)
-            taken = values.take(indices)
-            mask = indices == -1
-            if mask.any():
-                taken[mask] = na_value
+            else:
+                taken = algos.take_nd(values, indices, allow_fill=allow_fill,
+                                      fill_value=na_value)
         else:
-            taken = values.take(indices)
+            # provide wraparound semantics if fill_value not specified
+            if convert:
+                from pandas.core.indexing import maybe_convert_indices
+                n = values.shape[0]
+                indices = maybe_convert_indices(indices, n)
+            taken = algos.take_nd(values, indices, allow_fill=False)
         return taken
 
     @cache_readonly
@@ -2529,7 +2530,7 @@ def _reindex_non_unique(self, target):
         if len(missing):
             l = np.arange(len(indexer))
 
-            missing = _ensure_platform_int(missing)
+            missing = missing
             missing_labels = target.take(missing)
             missing_indexer = _ensure_int64(l[~check])
             cur_labels = self.take(indexer[check])._values
@@ -2723,12 +2724,9 @@ def _join_non_unique(self, other, how='left', return_indexers=False):
                                                  [other._values], how=how,
                                                  sort=True)
 
-        left_idx = _ensure_platform_int(left_idx)
-        right_idx = _ensure_platform_int(right_idx)
-
-        join_index = self.values.take(left_idx)
-        mask = left_idx == -1
-        np.putmask(join_index, mask, other._values.take(right_idx))
+        lvals = algos.take_nd(self.values, left_idx, fill_value=-1)
+        rvals = algos.take_nd(other._values, right_idx, fill_value=-1)
+        join_index = np.where(left_idx == -1, rvals, lvals)
 
         join_index = self._wrap_joined_index(join_index, other)
 

diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py
@@ -5,7 +5,6 @@
 from pandas.compat.numpy import function as nv
 from pandas.types.generic import ABCCategorical, ABCSeries
 from pandas.types.common import (is_categorical_dtype,
-                                 _ensure_platform_int,
                                  is_list_like,
                                  is_scalar)
 from pandas.types.missing import array_equivalent
@@ -466,7 +465,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
             codes = self.categories.get_indexer(target)
             indexer, _ = self._engine.get_indexer_non_unique(codes)
 
-        return _ensure_platform_int(indexer)
+        return indexer
 
     def get_indexer_non_unique(self, target):
         """ this is the same for a CategoricalIndex for get_indexer; the API
@@ -497,7 +496,6 @@ def _convert_list_indexer(self, keyarr, kind=None):
     def take(self, indices, axis=0, allow_fill=True,
              fill_value=None, **kwargs):
         nv.validate_take(tuple(), kwargs)
-        indices = _ensure_platform_int(indices)
         taken = self._assert_take_fillable(self.codes, indices,
                                            allow_fill=allow_fill,
                                            fill_value=fill_value,

diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py
@@ -1038,7 +1038,6 @@ def __getitem__(self, key):
     def take(self, indices, axis=0, allow_fill=True,
              fill_value=None, **kwargs):
         nv.validate_take(tuple(), kwargs)
-        indices = _ensure_platform_int(indices)
         taken = self._assert_take_fillable(self.labels, indices,
                                            allow_fill=allow_fill,
                                            fill_value=fill_value,
@@ -1055,17 +1054,16 @@ def _assert_take_fillable(self, values, indices, allow_fill=True,
                 msg = ('When allow_fill=True and fill_value is not None, '
                        'all indices must be >= -1')
                 raise ValueError(msg)
-            taken = [lab.take(indices) for lab in self.labels]
-            mask = indices == -1
-            if mask.any():
-                masked = []
-                for new_label in taken:
-                    label_values = new_label.values()
-                    label_values[mask] = na_value
-                    masked.append(base.FrozenNDArray(label_values))
-                taken = masked
+            taken = [algos.take_nd(lab, indices, fill_value=na_value)
+                     for lab in values]
         else:
-            taken = [lab.take(indices) for lab in self.labels]
+            # provide wraparound semantics
+            from pandas.core.indexing import maybe_convert_indices
+            taken = []
+            for i, lab in enumerate(values):
+                lab = maybe_convert_indices(lab, len(self.levels[i]))
+                taken.append(algos.take_nd(lab, indices, allow_fill=False))
+
         return taken
 
     def append(self, other):
@@ -1340,7 +1338,6 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True):
             if not ascending:
                 indexer = indexer[::-1]
 
-        indexer = _ensure_platform_int(indexer)
         new_labels = [lab.take(indexer) for lab in self.labels]
 
         new_index = MultiIndex(labels=new_labels, levels=self.levels,
@@ -1786,7 +1783,7 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels):
                 # selected
                 from pandas import Series
                 mapper = Series(indexer)
-                indexer = labels.take(_ensure_platform_int(indexer))
+                indexer = labels.take(indexer)
                 result = Series(Index(indexer).isin(r).nonzero()[0])
                 m = result.map(mapper)._values
 

diff --git a/pandas/src/algos_common_helper.pxi b/pandas/src/algos_common_helper.pxi
@@ -2847,17 +2847,16 @@ def put2d_int64_float64(ndarray[int64_t, ndim=2, cast=True] values,
 #----------------------------------------------------------------------
 # ensure_dtype
 #----------------------------------------------------------------------
-
-cdef int PLATFORM_INT = (<ndarray> np.arange(0, dtype=np.int_)).descr.type_num
+cdef int PLATFORM_INT = (<ndarray> np.arange(0, dtype=np.intp)).descr.type_num
 
 cpdef ensure_platform_int(object arr):
     if util.is_array(arr):
         if (<ndarray> arr).descr.type_num == PLATFORM_INT:
             return arr
         else:
-            return arr.astype(np.int_)
+            return arr.astype(np.intp)
     else:
-        return np.array(arr, dtype=np.int_)
+        return np.array(arr, dtype=np.intp)
 
 cpdef ensure_object(object arr):
     if util.is_array(arr):