@@ -28,52 +28,6 @@ dtypes = [('Complex128', 'complex128', 'complex128',
28
28
{{for name, dtype, ttype, c_type, to_c_type in dtypes}}
29
29
30
30
31
- @cython.wraparound(False)
32
- @cython.boundscheck(False)
33
- {{if dtype == 'object'}}
34
- cdef build_count_table_{{dtype}}(ndarray[{{dtype}}] values,
35
- kh_{{ttype}}_t *table, bint dropna):
36
- {{else}}
37
- cdef build_count_table_{{dtype}}(const {{dtype}}_t[:] values,
38
- kh_{{ttype}}_t *table, bint dropna):
39
- {{endif}}
40
- cdef:
41
- khiter_t k
42
- Py_ssize_t i, n = len(values)
43
-
44
- {{c_type}} val
45
-
46
- int ret = 0
47
-
48
- {{if dtype == 'object'}}
49
- kh_resize_{{ttype}}(table, n // 10)
50
-
51
- for i in range(n):
52
- val = values[i]
53
- if not checknull(val) or not dropna:
54
- k = kh_get_{{ttype}}(table, <PyObject*>val)
55
- if k != table.n_buckets:
56
- table.vals[k] += 1
57
- else:
58
- k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
59
- table.vals[k] = 1
60
- {{else}}
61
- with nogil:
62
- kh_resize_{{ttype}}(table, n)
63
-
64
- for i in range(n):
65
- val = {{to_c_type}}(values[i])
66
-
67
- if not is_nan_{{c_type}}(val) or not dropna:
68
- k = kh_get_{{ttype}}(table, val)
69
- if k != table.n_buckets:
70
- table.vals[k] += 1
71
- else:
72
- k = kh_put_{{ttype}}(table, val, &ret)
73
- table.vals[k] = 1
74
- {{endif}}
75
-
76
-
77
31
@cython.wraparound(False)
78
32
@cython.boundscheck(False)
79
33
{{if dtype == 'object'}}
@@ -84,8 +38,6 @@ cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
84
38
cdef:
85
39
Py_ssize_t i = 0
86
40
Py_ssize_t n = len(values)
87
- size_t unique_key_index = 0
88
- size_t unique_key_count = 0
89
41
kh_{{ttype}}_t *table
90
42
91
43
# Don't use Py_ssize_t, since table.n_buckets is unsigned
@@ -98,12 +50,10 @@ cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
98
50
99
51
# we track the order in which keys are first seen (GH39009),
100
52
# khash-map isn't insertion-ordered, thus:
101
- # table maps key to index_of_appearence
102
- # result_keys maps index_of_appearence to key
103
- # result_counts maps index_of_appearence to number of elements
53
+ # table maps keys to counts
54
+ # result_keys remembers the original order of keys
104
55
105
56
result_keys = {{name}}Vector()
106
- result_counts = Int64Vector()
107
57
table = kh_init_{{ttype}}()
108
58
109
59
{{if dtype == 'object'}}
@@ -118,14 +68,11 @@ cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
118
68
val = navalue
119
69
k = kh_get_{{ttype}}(table, <PyObject*>val)
120
70
if k != table.n_buckets:
121
- unique_key_index = table.vals[k]
122
- result_counts.data.data[unique_key_index] += 1
71
+ table.vals[k] += 1
123
72
else:
124
73
k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
125
- table.vals[k] = unique_key_count
74
+ table.vals[k] = 1
126
75
result_keys.append(val)
127
- result_counts.append(1)
128
- unique_key_count+=1
129
76
{{else}}
130
77
kh_resize_{{ttype}}(table, n)
131
78
@@ -135,19 +82,26 @@ cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
135
82
if not is_nan_{{c_type}}(val) or not dropna:
136
83
k = kh_get_{{ttype}}(table, val)
137
84
if k != table.n_buckets:
138
- unique_key_index = table.vals[k]
139
- result_counts.data.data[unique_key_index] += 1
85
+ table.vals[k] += 1
140
86
else:
141
87
k = kh_put_{{ttype}}(table, val, &ret)
142
- table.vals[k] = unique_key_count
88
+ table.vals[k] = 1
143
89
result_keys.append(val)
144
- result_counts.append(1)
145
- unique_key_count+=1
146
90
{{endif}}
147
91
92
+ # collect counts in the order corresponding to result_keys:
93
+ cdef int64_t[:] result_counts = np.empty(table.size, dtype=np.int64)
94
+ for i in range(table.size):
95
+ {{if dtype == 'object'}}
96
+ k = kh_get_{{ttype}}(table, result_keys.data[i])
97
+ {{else}}
98
+ k = kh_get_{{ttype}}(table, result_keys.data.data[i])
99
+ {{endif}}
100
+ result_counts[i] = table.vals[k]
101
+
148
102
kh_destroy_{{ttype}}(table)
149
103
150
- return result_keys.to_array(), result_counts.to_array()
104
+ return result_keys.to_array(), result_counts.base
151
105
152
106
153
107
@cython.wraparound(False)
@@ -294,78 +248,42 @@ def ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
294
248
kh_destroy_{{ttype}}(table)
295
249
return result.view(np.bool_)
296
250
297
- {{endfor}}
298
-
299
-
300
251
# ----------------------------------------------------------------------
301
252
# Mode Computations
302
253
# ----------------------------------------------------------------------
303
254
304
- {{py:
305
-
306
- # dtype, ctype, table_type, npy_dtype
307
- dtypes = [('complex128', 'khcomplex128_t', 'complex128', 'complex128'),
308
- ('complex64', 'khcomplex64_t', 'complex64', 'complex64'),
309
- ('float64', 'float64_t', 'float64', 'float64'),
310
- ('float32', 'float32_t', 'float32', 'float32'),
311
- ('int64', 'int64_t', 'int64', 'int64'),
312
- ('int32', 'int32_t', 'int32', 'int32'),
313
- ('int16', 'int16_t', 'int16', 'int16'),
314
- ('int8', 'int8_t', 'int8', 'int8'),
315
- ('uint64', 'uint64_t', 'uint64', 'uint64'),
316
- ('uint32', 'uint32_t', 'uint32', 'uint32'),
317
- ('uint16', 'uint16_t', 'uint16', 'uint16'),
318
- ('uint8', 'uint8_t', 'uint8', 'uint8'),
319
- ('object', 'object', 'pymap', 'object_')]
320
- }}
321
-
322
- {{for dtype, ctype, table_type, npy_dtype in dtypes}}
323
-
324
255
325
256
@cython.wraparound(False)
326
257
@cython.boundscheck(False)
327
-
328
258
{{if dtype == 'object'}}
329
-
330
-
331
- def mode_{{dtype}}(ndarray[{{ctype}}] values, bint dropna):
259
+ def mode_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
332
260
{{else}}
333
-
334
-
335
261
def mode_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
336
262
{{endif}}
337
263
cdef:
338
- int count, max_count = 1
339
- int j = -1 # so you can do +=
340
- # Don't use Py_ssize_t, since table.n_buckets is unsigned
341
- khiter_t k
342
- kh_{{table_type}}_t *table
343
- ndarray[{{ctype}}] modes
264
+ {{if dtype == 'object'}}
265
+ ndarray[{{dtype}}] keys
266
+ ndarray[{{dtype}}] modes
267
+ {{else}}
268
+ {{dtype}}_t[:] keys
269
+ ndarray[{{dtype}}_t] modes
270
+ {{endif}}
271
+ int64_t[:] counts
272
+ int64_t count, max_count = -1
273
+ Py_ssize_t k, j = 0
344
274
345
- table = kh_init_{{table_type}}()
346
- build_count_table_{{dtype}}(values, table, dropna)
275
+ keys, counts = value_count_{{dtype}}(values, dropna)
347
276
348
- modes = np.empty(table.n_buckets, dtype=np.{{npy_dtype}})
277
+ {{if dtype == 'object'}}
278
+ modes = np.empty(len(keys), dtype=np.object_)
279
+ {{else}}
280
+ modes = np.empty(len(keys), dtype=np.{{dtype}})
281
+ {{endif}}
349
282
350
283
{{if dtype != 'object'}}
351
284
with nogil:
352
- for k in range(table.n_buckets):
353
- if kh_exist_{{table_type}}(table, k):
354
- count = table.vals[k]
355
- if count == max_count:
356
- j += 1
357
- elif count > max_count:
358
- max_count = count
359
- j = 0
360
- else:
361
- continue
362
-
363
- modes[j] = table.keys[k]
364
- {{else}}
365
- for k in range(table.n_buckets):
366
- if kh_exist_{{table_type}}(table, k):
367
- count = table.vals[k]
368
-
285
+ for k in range(len(keys)):
286
+ count = counts[k]
369
287
if count == max_count:
370
288
j += 1
371
289
elif count > max_count:
@@ -374,11 +292,21 @@ def mode_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
374
292
else:
375
293
continue
376
294
377
- modes[j] = <object>table.keys[k]
295
+ modes[j] = keys[k]
296
+ {{else}}
297
+ for k in range(len(keys)):
298
+ count = counts[k]
299
+ if count == max_count:
300
+ j += 1
301
+ elif count > max_count:
302
+ max_count = count
303
+ j = 0
304
+ else:
305
+ continue
306
+
307
+ modes[j] = keys[k]
378
308
{{endif}}
379
309
380
- kh_destroy_{{table_type}}(table)
381
-
382
310
return modes[:j + 1]
383
311
384
312
{{endfor}}
0 commit comments