@@ -890,15 +890,12 @@ cdef class Int64Factorizer:
890
890
return labels
891
891
892
892
893
-
894
- def value_count_int64 (ndarray[int64_t] values ):
893
+ cdef build_count_table_int64(ndarray[int64_t] values, kh_int64_t * table):
895
894
cdef:
895
+ int k
896
896
Py_ssize_t i, n = len (values)
897
- kh_int64_t * table
898
897
int ret = 0
899
- list uniques = []
900
898
901
- table = kh_init_int64()
902
899
kh_resize_int64(table, n)
903
900
904
901
for i in range (n):
@@ -910,8 +907,17 @@ def value_count_int64(ndarray[int64_t] values):
910
907
k = kh_put_int64(table, val, & ret)
911
908
table.vals[k] = 1
912
909
913
- # for (k = kh_begin(h); k != kh_end(h); ++k)
914
- # if (kh_exist(h, k)) kh_value(h, k) = 1;
910
+
911
+ cpdef value_count_int64(ndarray[int64_t] values):
912
+ cdef:
913
+ Py_ssize_t i
914
+ kh_int64_t * table
915
+ int ret = 0
916
+ int k
917
+
918
+ table = kh_init_int64()
919
+ build_count_table_int64(values, table)
920
+
915
921
i = 0
916
922
result_keys = np.empty(table.n_occupied, dtype = np.int64)
917
923
result_counts = np.zeros(table.n_occupied, dtype = np.int64)
@@ -924,15 +930,15 @@ def value_count_int64(ndarray[int64_t] values):
924
930
925
931
return result_keys, result_counts
926
932
927
- def value_count_object (ndarray[object] values ,
928
- ndarray[uint8_t , cast = True ] mask):
933
+
934
+ cdef build_count_table_object(ndarray[object ] values,
935
+ ndarray[uint8_t, cast= True ] mask,
936
+ kh_pymap_t * table):
929
937
cdef:
938
+ int k
930
939
Py_ssize_t i, n = len (values)
931
- kh_pymap_t * table
932
940
int ret = 0
933
- list uniques = []
934
941
935
- table = kh_init_pymap()
936
942
kh_resize_pymap(table, n // 10 )
937
943
938
944
for i in range (n):
@@ -947,6 +953,17 @@ def value_count_object(ndarray[object] values,
947
953
k = kh_put_pymap(table, < PyObject* > val, & ret)
948
954
table.vals[k] = 1
949
955
956
+
957
+ cpdef value_count_object(ndarray[object ] values,
958
+ ndarray[uint8_t, cast= True ] mask):
959
+ cdef:
960
+ Py_ssize_t i = len (values)
961
+ kh_pymap_t * table
962
+ int k
963
+
964
+ table = kh_init_pymap()
965
+ build_count_table_object(values, mask, table)
966
+
950
967
i = 0
951
968
result_keys = np.empty(table.n_occupied, dtype = object )
952
969
result_counts = np.zeros(table.n_occupied, dtype = np.int64)
@@ -959,3 +976,64 @@ def value_count_object(ndarray[object] values,
959
976
960
977
return result_keys, result_counts
961
978
979
+
980
+ def mode_object (ndarray[object] values , ndarray[uint8_t , cast = True ] mask):
981
+ cdef:
982
+ int count, max_count = 2
983
+ int j = - 1 # so you can do +=
984
+ int k
985
+ Py_ssize_t i, n = len (values)
986
+ kh_pymap_t * table
987
+ int ret = 0
988
+
989
+ table = kh_init_pymap()
990
+ build_count_table_object(values, mask, table)
991
+
992
+ modes = np.empty(table.n_buckets, dtype = np.object_)
993
+ for k in range (table.n_buckets):
994
+ if kh_exist_pymap(table, k):
995
+ count = table.vals[k]
996
+
997
+ if count == max_count:
998
+ j += 1
999
+ elif count > max_count:
1000
+ max_count = count
1001
+ j = 0
1002
+ else :
1003
+ continue
1004
+ modes[j] = < object > table.keys[k]
1005
+
1006
+ kh_destroy_pymap(table)
1007
+
1008
+ return modes[:j+ 1 ]
1009
+
1010
+
1011
+ def mode_int64 (ndarray[int64_t] values ):
1012
+ cdef:
1013
+ int val, max_val = 2
1014
+ int j = - 1 # so you can do +=
1015
+ int k
1016
+ kh_int64_t * table
1017
+ list uniques = []
1018
+
1019
+ table = kh_init_int64()
1020
+
1021
+ build_count_table_int64(values, table)
1022
+
1023
+ modes = np.empty(table.n_buckets, dtype = np.int64)
1024
+ for k in range (table.n_buckets):
1025
+ if kh_exist_int64(table, k):
1026
+ val = table.vals[k]
1027
+
1028
+ if val == max_val:
1029
+ j += 1
1030
+ elif val > max_val:
1031
+ max_val = val
1032
+ j = 0
1033
+ else :
1034
+ continue
1035
+ modes[j] = table.keys[k]
1036
+
1037
+ kh_destroy_int64(table)
1038
+
1039
+ return modes[:j+ 1 ]
0 commit comments