@@ -77,54 +77,79 @@ cdef build_count_table_{{dtype}}(const {{dtype}}_t[:] values,
77
77
@cython.wraparound(False)
78
78
@cython.boundscheck(False)
79
79
{{if dtype == 'object'}}
80
- cpdef value_count_ {{dtype}}(ndarray[{{dtype}}] values, bint dropna):
80
+ cpdef stable_value_count_ {{dtype}}(ndarray[{{dtype}}] values, bint dropna):
81
81
{{else}}
82
- cpdef value_count_ {{dtype}}(const {{dtype}}_t[:] values, bint dropna):
82
+ cpdef stable_value_count_ {{dtype}}(const {{dtype}}_t[:] values, bint dropna):
83
83
{{endif}}
84
84
cdef:
85
85
Py_ssize_t i = 0
86
+ Py_ssize_t n = len(values)
87
+ size_t unique_key_index = 0
88
+ size_t unique_key_count = 0
86
89
kh_{{ttype}}_t *table
87
90
88
- {{if dtype != 'object'}}
89
- {{dtype}}_t[:] result_keys
90
- int64_t[:] result_counts
91
- {{endif}}
92
-
93
91
# Don't use Py_ssize_t, since table.n_buckets is unsigned
94
92
khiter_t k
95
93
96
- table = kh_init_{{ttype}}()
97
- {{if dtype == 'object'}}
98
- build_count_table_{{dtype}}(values, table, 1)
94
+ {{c_type}} val
95
+
96
+ int ret = 0
97
+
98
+ {{if dtype[0]!='u'}}
99
+ result_keys = {{dtype.title()}}Vector()
99
100
{{else}}
100
- build_count_table_{{ dtype}}(values, table, dropna )
101
+ result_keys = {{'U'+ dtype[1::].title()}}Vector( )
101
102
{{endif}}
102
-
103
- result_keys = np.empty(table.n_occupied, '{{dtype}}')
104
- result_counts = np.zeros(table.n_occupied, dtype=np.int64)
103
+ result_counts = Int64Vector()
104
+ table = kh_init_{{ttype}}()
105
105
106
106
{{if dtype == 'object'}}
107
- for k in range(table.n_buckets):
108
- if kh_exist_{{ttype}}(table, k):
109
- result_keys[i] = <{{dtype}}>table.keys[k]
110
- result_counts[i] = table.vals[k]
111
- i += 1
107
+ kh_resize_{{ttype}}(table, n // 10)
108
+
109
+ for i in range(n):
110
+ val = values[i]
111
+ if not checknull(val) or not dropna:
112
+ k = kh_get_{{ttype}}(table, <PyObject*>val)
113
+ if k != table.n_buckets:
114
+ unique_key_index = table.vals[k]
115
+ result_counts.data.data[unique_key_index] += 1
116
+ else:
117
+ k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
118
+ table.vals[k] = unique_key_count
119
+ result_keys.append(val)
120
+ result_counts.append(1)
121
+ unique_key_count+=1
112
122
{{else}}
113
- with nogil:
114
- for k in range(table.n_buckets):
115
- if kh_exist_{{ttype}}(table, k):
116
- result_keys[i] = {{to_dtype}}(table.keys[k])
117
- result_counts[i] = table.vals[k]
118
- i += 1
123
+ kh_resize_{{ttype}}(table, n)
124
+
125
+ for i in range(n):
126
+ val = {{to_c_type}}(values[i])
127
+
128
+ if not is_nan_{{c_type}}(val) or not dropna:
129
+ k = kh_get_{{ttype}}(table, val)
130
+ if k != table.n_buckets:
131
+ unique_key_index = table.vals[k]
132
+ result_counts.data.data[unique_key_index] += 1
133
+ else:
134
+ k = kh_put_{{ttype}}(table, val, &ret)
135
+ table.vals[k] = unique_key_count
136
+ result_keys.append(val)
137
+ result_counts.append(1)
138
+ unique_key_count+=1
119
139
{{endif}}
120
140
121
141
kh_destroy_{{ttype}}(table)
122
142
123
- {{if dtype == 'object'}}
124
- return result_keys, result_counts
125
- {{else}}
126
- return np.asarray(result_keys), np.asarray(result_counts)
127
- {{endif}}
143
+ return result_keys.to_array(), result_counts.to_array()
144
+
145
+
146
+ {{if dtype == 'object'}}
147
+ cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
148
+ return stable_value_count_{{dtype}}(values, 1)
149
+ {{else}}
150
+ cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
151
+ return stable_value_count_{{dtype}}(values, dropna)
152
+ {{endif}}
128
153
129
154
130
155
@cython.wraparound(False)
0 commit comments