REF: avoid getattr pattern for rank_1d, rank_2d (pandas-dev#29137)

jbrockmendel · Nico Cernek · commit 5b486925ebc8 · 2019-12-31T19:27:51.000-06:00
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -311,7 +311,7 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
     ranked_mat = np.empty((N, K), dtype=np.float64)
 
     for i in range(K):
-        ranked_mat[:, i] = rank_1d_float64(mat[:, i])
+        ranked_mat[:, i] = rank_1d(mat[:, i])
 
     for xi in range(K):
         for yi in range(xi + 1):
@@ -337,8 +337,8 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
                         j += 1
 
                 if not all_ranks:
-                    maskedx = rank_1d_float64(maskedx)
-                    maskedy = rank_1d_float64(maskedy)
+                    maskedx = rank_1d(maskedx)
+                    maskedy = rank_1d(maskedy)
 
                 mean = (nobs + 1) / 2.
 
@@ -1005,12 +1005,6 @@ def rank_1d(rank_t[:] in_arr, ties_method='average',
         return ranks
 
 
-rank_1d_object = rank_1d["object"]
-rank_1d_float64 = rank_1d["float64_t"]
-rank_1d_uint64 = rank_1d["uint64_t"]
-rank_1d_int64 = rank_1d["int64_t"]
-
-
 def rank_2d(rank_t[:, :] in_arr, axis=0, ties_method='average',
             ascending=True, na_option='keep', pct=False):
     """
@@ -1083,8 +1077,8 @@ def rank_2d(rank_t[:, :] in_arr, axis=0, ties_method='average',
         except TypeError:
             values = in_arr
             for i in range(len(values)):
-                ranks[i] = rank_1d_object(in_arr[i], ties_method=ties_method,
-                                          ascending=ascending, pct=pct)
+                ranks[i] = rank_1d(in_arr[i], ties_method=ties_method,
+                                   ascending=ascending, pct=pct)
             if axis == 0:
                 return ranks.T
             else:
@@ -1179,12 +1173,6 @@ def rank_2d(rank_t[:, :] in_arr, axis=0, ties_method='average',
         return ranks
 
 
-rank_2d_object = rank_2d["object"]
-rank_2d_float64 = rank_2d["float64_t"]
-rank_2d_uint64 = rank_2d["uint64_t"]
-rank_2d_int64 = rank_2d["int64_t"]
-
-
 # generated from template
 include "algos_common_helper.pxi"
 include "algos_take_helper.pxi"
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -455,7 +455,7 @@ def _group_add(complexfloating_t[:, :] out,
     if len(values) != len(labels):
         raise ValueError("len(index) != len(labels)")
 
-    nobs = np.zeros((len(out), out.shape[1]), dtype=np.int64)
+    nobs = np.zeros((<object>out).shape, dtype=np.int64)
     sumx = np.zeros_like(out)
 
     N, K = (<object>values).shape
@@ -507,12 +507,13 @@ def _group_prod(floating[:, :] out,
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         floating val, count
-        floating[:, :] prodx, nobs
+        floating[:, :] prodx
+        int64_t[:, :] nobs
 
     if not len(values) == len(labels):
         raise ValueError("len(index) != len(labels)")
 
-    nobs = np.zeros_like(out)
+    nobs = np.zeros((<object>out).shape, dtype=np.int64)
     prodx = np.ones_like(out)
 
     N, K = (<object>values).shape
@@ -555,14 +556,15 @@ def _group_var(floating[:, :] out,
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         floating val, ct, oldmean
-        floating[:, :] nobs, mean
+        floating[:, :] mean
+        int64_t[:, :] nobs
 
     assert min_count == -1, "'min_count' only used in add and prod"
 
     if not len(values) == len(labels):
         raise ValueError("len(index) != len(labels)")
 
-    nobs = np.zeros_like(out)
+    nobs = np.zeros((<object>out).shape, dtype=np.int64)
     mean = np.zeros_like(out)
 
     N, K = (<object>values).shape
@@ -610,14 +612,15 @@ def _group_mean(floating[:, :] out,
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         floating val, count
-        floating[:, :] sumx, nobs
+        floating[:, :] sumx
+        int64_t[:, :] nobs
 
     assert min_count == -1, "'min_count' only used in add and prod"
 
     if not len(values) == len(labels):
         raise ValueError("len(index) != len(labels)")
 
-    nobs = np.zeros_like(out)
+    nobs = np.zeros((<object>out).shape, dtype=np.int64)
     sumx = np.zeros_like(out)
 
     N, K = (<object>values).shape
@@ -1243,15 +1246,16 @@ def group_max(groupby_t[:, :] out,
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         groupby_t val, count, nan_val
-        ndarray[groupby_t, ndim=2] maxx, nobs
+        ndarray[groupby_t, ndim=2] maxx
         bint runtime_error = False
+        int64_t[:, :] nobs
 
     assert min_count == -1, "'min_count' only used in add and prod"
 
     if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
 
-    nobs = np.zeros_like(out)
+    nobs = np.zeros((<object>out).shape, dtype=np.int64)
 
     maxx = np.empty_like(out)
     if groupby_t is int64_t:
@@ -1314,15 +1318,16 @@ def group_min(groupby_t[:, :] out,
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         groupby_t val, count, nan_val
-        ndarray[groupby_t, ndim=2] minx, nobs
+        ndarray[groupby_t, ndim=2] minx
         bint runtime_error = False
+        int64_t[:, :] nobs
 
     assert min_count == -1, "'min_count' only used in add and prod"
 
     if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
 
-    nobs = np.zeros_like(out)
+    nobs = np.zeros((<object>out).shape, dtype=np.int64)
 
     minx = np.empty_like(out)
     if groupby_t is int64_t:
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -245,11 +245,17 @@ def _get_hashtable_algo(values):
     return (htable, table, values, dtype, ndtype)
 
 
-def _get_data_algo(values, func_map):
+def _get_values_for_rank(values):
     if is_categorical_dtype(values):
         values = values._values_for_rank()
 
     values, dtype, ndtype = _ensure_data(values)
+    return values, dtype, ndtype
+
+
+def _get_data_algo(values, func_map):
+    values, dtype, ndtype = _get_values_for_rank(values)
+
     if ndtype == "object":
 
         # it's cheaper to use a String Hash Table than Object; we infer
@@ -900,17 +906,17 @@ def rank(values, axis=0, method="average", na_option="keep", ascending=True, pct
         (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
     """
     if values.ndim == 1:
-        f, values = _get_data_algo(values, _rank1d_functions)
-        ranks = f(
+        values, _, _ = _get_values_for_rank(values)
+        ranks = algos.rank_1d(
             values,
             ties_method=method,
             ascending=ascending,
             na_option=na_option,
             pct=pct,
         )
     elif values.ndim == 2:
-        f, values = _get_data_algo(values, _rank2d_functions)
-        ranks = f(
+        values, _, _ = _get_values_for_rank(values)
+        ranks = algos.rank_2d(
             values,
             axis=axis,
             ties_method=method,
@@ -1000,21 +1006,6 @@ def checked_add_with_arr(arr, b, arr_mask=None, b_mask=None):
     return arr + b
 
 
-_rank1d_functions = {
-    "float64": algos.rank_1d_float64,
-    "int64": algos.rank_1d_int64,
-    "uint64": algos.rank_1d_uint64,
-    "object": algos.rank_1d_object,
-}
-
-_rank2d_functions = {
-    "float64": algos.rank_2d_float64,
-    "int64": algos.rank_2d_int64,
-    "uint64": algos.rank_2d_uint64,
-    "object": algos.rank_2d_object,
-}
-
-
 def quantile(x, q, interpolation_method="fraction"):
     """
     Compute sample quantile or quantiles of the input array. For example, q=0.5
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
@@ -1619,7 +1619,7 @@ def test_scipy_compat(self):
         def _check(arr):
             mask = ~np.isfinite(arr)
             arr = arr.copy()
-            result = libalgos.rank_1d_float64(arr)
+            result = libalgos.rank_1d(arr)
             arr[mask] = np.inf
             exp = rankdata(arr)
             exp[mask] = np.nan