@@ -65,6 +65,18 @@ cdef Py_ssize_t _INIT_VEC_CAP = 128
65
65
include " hashtable_class_helper.pxi"
66
66
include " hashtable_func_helper.pxi"
67
67
68
+
69
+ # map derived hash-map types onto basic hash-map types:
70
+ if np.dtype(np.intp) == np.dtype(np.int64):
71
+ IntpHashTable = Int64HashTable
72
+ unique_label_indices = _unique_label_indices_int64
73
+ elif np.dtype(np.intp) == np.dtype(np.int32):
74
+ IntpHashTable = Int32HashTable
75
+ unique_label_indices = _unique_label_indices_int32
76
+ else :
77
+ raise ValueError (np.dtype(np.intp))
78
+
79
+
68
80
cdef class Factorizer:
69
81
cdef readonly:
70
82
Py_ssize_t count
@@ -168,38 +180,3 @@ cdef class Int64Factorizer(Factorizer):
168
180
169
181
self.count = len (self .uniques)
170
182
return labels
171
-
172
-
173
- @cython.wraparound(False )
174
- @cython.boundscheck(False )
175
- def unique_label_indices(const int64_t[:] labels ) -> ndarray:
176
- """
177
- Indices of the first occurrences of the unique labels
178
- *excluding* -1. equivalent to:
179
- np.unique(labels , return_index = True )[1]
180
- """
181
- cdef:
182
- int ret = 0
183
- Py_ssize_t i , n = len (labels)
184
- kh_int64_t *table = kh_init_int64()
185
- Int64Vector idx = Int64Vector()
186
- ndarray[int64_t , ndim = 1 ] arr
187
- Int64VectorData *ud = idx.data
188
-
189
- kh_resize_int64(table , min(kh_needed_n_buckets(n ), SIZE_HINT_LIMIT ))
190
-
191
- with nogil:
192
- for i in range(n ):
193
- kh_put_int64(table, labels[i], & ret)
194
- if ret != 0 :
195
- if needs_resize(ud):
196
- with gil:
197
- idx.resize()
198
- append_data_int64(ud, i)
199
-
200
- kh_destroy_int64(table)
201
-
202
- arr = idx.to_array()
203
- arr = arr[np.asarray(labels)[arr].argsort()]
204
-
205
- return arr[1 :] if arr.size != 0 and labels[arr[0 ]] == - 1 else arr
0 commit comments