remove unnecessary get_value_at calls (#28977)

jbrockmendel · jreback · commit c65cfb6ac0fa · 2019-10-15T08:08:41.000-04:00
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -41,11 +41,13 @@ cdef inline bint is_definitely_invalid_key(object val):
 
 
 cpdef get_value_at(ndarray arr, object loc, object tz=None):
+    obj = util.get_value_at(arr, loc)
+
     if arr.descr.type_num == NPY_DATETIME:
-        return Timestamp(util.get_value_at(arr, loc), tz=tz)
+        return Timestamp(obj, tz=tz)
     elif arr.descr.type_num == NPY_TIMEDELTA:
-        return Timedelta(util.get_value_at(arr, loc))
-    return util.get_value_at(arr, loc)
+        return Timedelta(obj)
+    return obj
 
 
 # Don't populate hash tables in monotonic indexes larger than this
@@ -102,6 +104,9 @@ cdef class IndexEngine:
         arr[loc] = value
 
     cpdef get_loc(self, object val):
+        cdef:
+            Py_ssize_t loc
+
         if is_definitely_invalid_key(val):
             raise TypeError("'{val}' is an invalid key".format(val=val))
 
@@ -114,7 +119,7 @@ cdef class IndexEngine:
             loc = _bin_search(values, val)  # .searchsorted(val, side='left')
             if loc >= len(values):
                 raise KeyError(val)
-            if util.get_value_at(values, loc) != val:
+            if values[loc] != val:
                 raise KeyError(val)
             return loc
 
@@ -352,22 +357,22 @@ cdef Py_ssize_t _bin_search(ndarray values, object val) except -1:
         Py_ssize_t mid = 0, lo = 0, hi = len(values) - 1
         object pval
 
-    if hi == 0 or (hi > 0 and val > util.get_value_at(values, hi)):
+    if hi == 0 or (hi > 0 and val > values[hi]):
         return len(values)
 
     while lo < hi:
         mid = (lo + hi) // 2
-        pval = util.get_value_at(values, mid)
+        pval = values[mid]
         if val < pval:
             hi = mid
         elif val > pval:
             lo = mid + 1
         else:
-            while mid > 0 and val == util.get_value_at(values, mid - 1):
+            while mid > 0 and val == values[mid - 1]:
                 mid -= 1
             return mid
 
-    if val <= util.get_value_at(values, mid):
+    if val <= values[mid]:
         return mid
     else:
         return mid + 1
@@ -387,13 +392,16 @@ cdef class DatetimeEngine(Int64Engine):
         return 'M8[ns]'
 
     def __contains__(self, object val):
+        cdef:
+            int64_t loc
+
         if self.over_size_threshold and self.is_monotonic_increasing:
             if not self.is_unique:
                 return self._get_loc_duplicates(val)
             values = self._get_index_values()
             conv = maybe_datetimelike_to_i8(val)
             loc = values.searchsorted(conv, side='left')
-            return util.get_value_at(values, loc) == conv
+            return values[loc] == conv
 
         self._ensure_mapping_populated()
         return maybe_datetimelike_to_i8(val) in self.mapping
@@ -405,6 +413,8 @@ cdef class DatetimeEngine(Int64Engine):
         return algos.is_monotonic(values, timelike=True)
 
     cpdef get_loc(self, object val):
+        cdef:
+            int64_t loc
         if is_definitely_invalid_key(val):
             raise TypeError
 
@@ -422,7 +432,7 @@ cdef class DatetimeEngine(Int64Engine):
                 self._date_check_type(val)
                 raise KeyError(val)
 
-            if loc == len(values) or util.get_value_at(values, loc) != conv:
+            if loc == len(values) or values[loc] != conv:
                 raise KeyError(val)
             return loc
 
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -782,8 +782,16 @@ def generate_slices(const int64_t[:] labels, Py_ssize_t ngroups):
     return starts, ends
 
 
-def indices_fast(object index, const int64_t[:] labels, list keys,
+def indices_fast(ndarray index, const int64_t[:] labels, list keys,
                  list sorted_labels):
+    """
+    Parameters
+    ----------
+    index : ndarray
+    labels : ndarray[int64]
+    keys : list
+    sorted_labels : list[ndarray[int64]]
+    """
     cdef:
         Py_ssize_t i, j, k, lab, cur, start, n = len(labels)
         dict result = {}
@@ -803,8 +811,7 @@ def indices_fast(object index, const int64_t[:] labels, list keys,
             if lab != -1:
                 tup = PyTuple_New(k)
                 for j in range(k):
-                    val = util.get_value_at(keys[j],
-                                            sorted_labels[j][i - 1])
+                    val = keys[j][sorted_labels[j][i - 1]]
                     PyTuple_SET_ITEM(tup, j, val)
                     Py_INCREF(val)
 
@@ -814,8 +821,7 @@ def indices_fast(object index, const int64_t[:] labels, list keys,
 
     tup = PyTuple_New(k)
     for j in range(k):
-        val = util.get_value_at(keys[j],
-                                sorted_labels[j][n - 1])
+        val = keys[j][sorted_labels[j][n - 1]]
         PyTuple_SET_ITEM(tup, j, val)
         Py_INCREF(val)
     result[tup] = index[start:]
diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
@@ -121,7 +121,7 @@ cdef class Reducer:
             for i in range(self.nresults):
 
                 if has_ndarray_labels:
-                    name = util.get_value_at(labels, i)
+                    name = labels[i]
                 elif has_labels:
                     # labels is an ExtensionArray
                     name = labels[i]
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
@@ -303,8 +303,8 @@ def get_flattened_iterator(comp_ids, ngroups, levels, labels):
 
 
 def get_indexer_dict(label_list, keys):
-    """ return a diction of {labels} -> {indexers} """
-    shape = list(map(len, keys))
+    """ return a dict of {labels} -> {indexers} """
+    shape = [len(x) for x in keys]
 
     group_index = get_group_index(label_list, shape, sort=True, xnull=True)
     ngroups = (