Combined build_count_table_int64 and build_count_table_float64 into a

insertinterestingnamehere · insertinterestingnamehere · commit f28a443f7ead · 2015-08-18T11:41:38.000-06:00
single function using fused types.
diff --git a/pandas/hashtable.pyx b/pandas/hashtable.pyx
@@ -867,25 +867,38 @@ cdef class Int64Factorizer:
         return labels
 
 @cython.boundscheck(False)
-cdef build_count_table_float64(float64_t[:] values, kh_float64_t *table, bint dropna):
+cdef build_count_table_scalar64(sixty_four_bit_scalar[:] values, void *table, bint dropna):
     cdef:
         khiter_t k
         Py_ssize_t i, n = len(values)
-        float64_t val
+        sixty_four_bit_scalar val
         int ret = 0
 
     with nogil:
-        kh_resize_float64(table, n)
+        if sixty_four_bit_scalar is float64_t:
+            kh_resize_float64(<kh_float64_t*>table, n)
 
-        for i in range(n):
-            val = values[i]
-            if val == val or not dropna:
-                k = kh_get_float64(table, val)
-                if k != table.n_buckets:
-                    table.vals[k] += 1
+            for i in range(n):
+                val = values[i]
+                if val == val or not dropna:
+                    k = kh_get_float64(<kh_float64_t*>table, val)
+                    if k != (<kh_float64_t*>table).n_buckets:
+                        (<kh_float64_t*>table).vals[k] += 1
+                    else:
+                        k = kh_put_float64(<kh_float64_t*>table, val, &ret)
+                        (<kh_float64_t*>table).vals[k] = 1
+        elif sixty_four_bit_scalar is int64_t:
+            kh_resize_int64(<kh_int64_t*>table, n)
+
+            for i in range(n):
+                val = values[i]
+                k = kh_get_int64(<kh_int64_t*>table, val)
+                if k != (<kh_int64_t*>table).n_buckets:
+                    (<kh_int64_t*>table).vals[k] += 1
                 else:
-                    k = kh_put_float64(table, val, &ret)
-                    table.vals[k] = 1
+                    k = kh_put_int64(<kh_int64_t*>table, val, &ret)
+                    (<kh_int64_t*>table).vals[k] = 1
+
 
 
 @cython.boundscheck(False)
@@ -902,7 +915,7 @@ cpdef value_count_scalar64(sixty_four_bit_scalar[:] values, bint dropna):
 
     if sixty_four_bit_scalar is float64_t:
         ftable = kh_init_float64()
-        build_count_table_float64(values, ftable, dropna)
+        build_count_table_scalar64(values, ftable, dropna)
 
         result_keys = np.empty(ftable.n_occupied, dtype=np.float64)
         result_counts = np.zeros(ftable.n_occupied, dtype=np.int64)
@@ -917,7 +930,7 @@ cpdef value_count_scalar64(sixty_four_bit_scalar[:] values, bint dropna):
 
     elif sixty_four_bit_scalar is int64_t:
         itable = kh_init_int64()
-        build_count_table_int64(values, itable)
+        build_count_table_scalar64(values, itable, dropna)
 
         result_keys = np.empty(itable.n_occupied, dtype=np.int64)
         result_counts = np.zeros(itable.n_occupied, dtype=np.int64)
@@ -932,26 +945,6 @@ cpdef value_count_scalar64(sixty_four_bit_scalar[:] values, bint dropna):
 
     return np.asarray(result_keys), np.asarray(result_counts)
 
-@cython.boundscheck(False)
-cdef build_count_table_int64(int64_t[:] values, kh_int64_t *table):
-    cdef:
-        khiter_t k
-        Py_ssize_t i, n = len(values)
-        int64_t val
-        int ret = 0
-
-    with nogil:
-        kh_resize_int64(table, n)
-
-        for i in range(n):
-            val = values[i]
-            k = kh_get_int64(table, val)
-            if k != table.n_buckets:
-                table.vals[k] += 1
-            else:
-                k = kh_put_int64(table, val, &ret)
-                table.vals[k] = 1
-
 
 cdef build_count_table_object(ndarray[object] values,
                               ndarray[uint8_t, cast=True] mask,
@@ -1040,7 +1033,7 @@ def mode_int64(int64_t[:] values):
 
     table = kh_init_int64()
 
-    build_count_table_int64(values, table)
+    build_count_table_scalar64(values, table, 0)
 
     modes = np.empty(table.n_buckets, dtype=np.int64)