@@ -211,7 +211,6 @@ cdef class StringHashTable(HashTable):
211
211
def unique (self , ndarray[object] values ):
212
212
cdef:
213
213
Py_ssize_t i, n = len (values)
214
- Py_ssize_t idx, count = 0
215
214
int ret = 0
216
215
object val
217
216
char * buf
@@ -223,12 +222,9 @@ cdef class StringHashTable(HashTable):
223
222
buf = util.get_c_string(val)
224
223
k = kh_get_str(self .table, buf)
225
224
if k == self .table.n_buckets:
226
- k = kh_put_str(self .table, buf, & ret)
227
- # print 'putting %s, %s' % (val, count)
228
- count += 1
225
+ kh_put_str(self .table, buf, & ret)
229
226
uniques.append(val)
230
227
231
- # return None
232
228
return uniques.to_array()
233
229
234
230
def factorize (self , ndarray[object] values ):
@@ -258,7 +254,6 @@ cdef class StringHashTable(HashTable):
258
254
labels[i] = count
259
255
count += 1
260
256
261
- # return None
262
257
return reverse, labels
263
258
264
259
cdef class Int32HashTable(HashTable):
@@ -319,7 +314,6 @@ cdef class Int32HashTable(HashTable):
319
314
def lookup (self , ndarray[int32_t] values ):
320
315
cdef:
321
316
Py_ssize_t i, n = len (values)
322
- int ret = 0
323
317
int32_t val
324
318
khiter_t k
325
319
ndarray[int32_t] locs = np.empty(n, dtype = np.int64)
@@ -357,7 +351,6 @@ cdef class Int32HashTable(HashTable):
357
351
labels[i] = count
358
352
count += 1
359
353
360
- # return None
361
354
return reverse, labels
362
355
363
356
cdef class Int64HashTable: # (HashTable):
@@ -518,7 +511,6 @@ cdef class Int64HashTable: #(HashTable):
518
511
def unique (self , ndarray[int64_t] values ):
519
512
cdef:
520
513
Py_ssize_t i, n = len (values)
521
- Py_ssize_t idx, count = 0
522
514
int ret = 0
523
515
ndarray result
524
516
int64_t val
@@ -529,9 +521,8 @@ cdef class Int64HashTable: #(HashTable):
529
521
val = values[i]
530
522
k = kh_get_int64(self .table, val)
531
523
if k == self .table.n_buckets:
532
- k = kh_put_int64(self .table, val, & ret)
524
+ kh_put_int64(self .table, val, & ret)
533
525
uniques.append(val)
534
- count += 1
535
526
536
527
result = uniques.to_array()
537
528
@@ -644,7 +635,6 @@ cdef class Float64HashTable(HashTable):
644
635
def unique (self , ndarray[float64_t] values ):
645
636
cdef:
646
637
Py_ssize_t i, n = len (values)
647
- Py_ssize_t idx, count = 0
648
638
int ret = 0
649
639
float64_t val
650
640
khiter_t k
@@ -657,9 +647,8 @@ cdef class Float64HashTable(HashTable):
657
647
if val == val:
658
648
k = kh_get_float64(self .table, val)
659
649
if k == self .table.n_buckets:
660
- k = kh_put_float64(self .table, val, & ret)
650
+ kh_put_float64(self .table, val, & ret)
661
651
uniques.append(val)
662
- count += 1
663
652
elif not seen_na:
664
653
seen_na = 1
665
654
uniques.append(ONAN)
@@ -786,7 +775,6 @@ cdef class PyObjectHashTable(HashTable):
786
775
def unique (self , ndarray[object] values ):
787
776
cdef:
788
777
Py_ssize_t i, n = len (values)
789
- Py_ssize_t idx, count = 0
790
778
int ret = 0
791
779
object val
792
780
ndarray result
@@ -800,7 +788,7 @@ cdef class PyObjectHashTable(HashTable):
800
788
if not _checknan(val):
801
789
k = kh_get_pymap(self .table, < PyObject* > val)
802
790
if k == self .table.n_buckets:
803
- k = kh_put_pymap(self .table, < PyObject* > val, & ret)
791
+ kh_put_pymap(self .table, < PyObject* > val, & ret)
804
792
uniques.append(val)
805
793
elif not seen_na:
806
794
seen_na = 1
@@ -918,7 +906,7 @@ cdef class Int64Factorizer:
918
906
919
907
cdef build_count_table_int64(ndarray[int64_t] values, kh_int64_t * table):
920
908
cdef:
921
- int k
909
+ khiter_t k
922
910
Py_ssize_t i, n = len (values)
923
911
int ret = 0
924
912
@@ -938,7 +926,6 @@ cpdef value_count_int64(ndarray[int64_t] values):
938
926
cdef:
939
927
Py_ssize_t i
940
928
kh_int64_t * table
941
- int ret = 0
942
929
int k
943
930
944
931
table = kh_init_int64()
@@ -961,7 +948,7 @@ cdef build_count_table_object(ndarray[object] values,
961
948
ndarray[uint8_t, cast= True ] mask,
962
949
kh_pymap_t * table):
963
950
cdef:
964
- int k
951
+ khiter_t k
965
952
Py_ssize_t i, n = len (values)
966
953
int ret = 0
967
954
@@ -983,7 +970,7 @@ cdef build_count_table_object(ndarray[object] values,
983
970
cpdef value_count_object(ndarray[object ] values,
984
971
ndarray[uint8_t, cast= True ] mask):
985
972
cdef:
986
- Py_ssize_t i = len (values)
973
+ Py_ssize_t i
987
974
kh_pymap_t * table
988
975
int k
989
976
@@ -1008,9 +995,7 @@ def mode_object(ndarray[object] values, ndarray[uint8_t, cast=True] mask):
1008
995
int count, max_count = 2
1009
996
int j = - 1 # so you can do +=
1010
997
int k
1011
- Py_ssize_t i, n = len (values)
1012
998
kh_pymap_t * table
1013
- int ret = 0
1014
999
1015
1000
table = kh_init_pymap()
1016
1001
build_count_table_object(values, mask, table)
@@ -1036,11 +1021,10 @@ def mode_object(ndarray[object] values, ndarray[uint8_t, cast=True] mask):
1036
1021
1037
1022
def mode_int64 (ndarray[int64_t] values ):
1038
1023
cdef:
1039
- int val, max_val = 2
1024
+ int count, max_count = 2
1040
1025
int j = - 1 # so you can do +=
1041
1026
int k
1042
1027
kh_int64_t * table
1043
- list uniques = []
1044
1028
1045
1029
table = kh_init_int64()
1046
1030
@@ -1049,12 +1033,12 @@ def mode_int64(ndarray[int64_t] values):
1049
1033
modes = np.empty(table.n_buckets, dtype = np.int64)
1050
1034
for k in range (table.n_buckets):
1051
1035
if kh_exist_int64(table, k):
1052
- val = table.vals[k]
1036
+ count = table.vals[k]
1053
1037
1054
- if val == max_val :
1038
+ if count == max_count :
1055
1039
j += 1
1056
- elif val > max_val :
1057
- max_val = val
1040
+ elif count > max_count :
1041
+ max_count = count
1058
1042
j = 0
1059
1043
else :
1060
1044
continue
0 commit comments