Skip to content

Commit 8ae9b9a

Browse files
CLN: Combined value_count_in64 and value_count_float64 into a single
routine using fused types.
1 parent 874c317 commit 8ae9b9a

File tree

3 files changed

+38
-45
lines changed

3 files changed

+38
-45
lines changed

pandas/core/algorithms.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
232232
values = PeriodIndex(values, name=name)
233233

234234
values = values.view(np.int64)
235-
keys, counts = htable.value_count_int64(values)
235+
keys, counts = htable.value_count_scalar64(values, dropna)
236236

237237
if dropna:
238238
from pandas.tslib import iNaT
@@ -244,10 +244,10 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
244244

245245
elif com.is_integer_dtype(dtype):
246246
values = com._ensure_int64(values)
247-
keys, counts = htable.value_count_int64(values)
247+
keys, counts = htable.value_count_scalar64(values, dropna)
248248
elif com.is_float_dtype(dtype):
249249
values = com._ensure_float64(values)
250-
keys, counts = htable.value_count_float64(values, dropna)
250+
keys, counts = htable.value_count_scalar64(values, dropna)
251251

252252
else:
253253
values = com._ensure_object(values)

pandas/core/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1030,7 +1030,7 @@ def value_counts(self, dropna=True):
10301030
from pandas.core.index import CategoricalIndex
10311031

10321032
cat = self.dropna() if dropna else self
1033-
keys, counts = htable.value_count_int64(com._ensure_int64(cat._codes))
1033+
keys, counts = htable.value_count_scalar64(com._ensure_int64(cat._codes), dropna)
10341034
result = Series(counts, index=keys)
10351035

10361036
ix = np.arange(len(cat.categories), dtype='int64')

pandas/hashtable.pyx

+34-41
Original file line numberDiff line numberDiff line change
@@ -887,29 +887,48 @@ cdef build_count_table_float64(float64_t[:] values, kh_float64_t *table, bint dr
887887
k = kh_put_float64(table, val, &ret)
888888
table.vals[k] = 1
889889

890+
890891
@cython.boundscheck(False)
891-
cpdef value_count_float64(float64_t[:] values, bint dropna):
892+
cpdef value_count_scalar64(sixty_four_bit_scalar[:] values, bint dropna):
892893
cdef:
893894
Py_ssize_t i
894-
kh_float64_t * table
895-
float64_t[:] result_keys
895+
kh_float64_t *ftable
896+
kh_int64_t *itable
897+
sixty_four_bit_scalar[:] result_keys
896898
int64_t[:] result_counts
897899
int k
898900

899-
table = kh_init_float64()
900-
build_count_table_float64(values, table, dropna)
901-
902901
i = 0
903-
result_keys = np.empty(table.n_occupied, dtype=np.float64)
904-
result_counts = np.zeros(table.n_occupied, dtype=np.int64)
905902

906-
with nogil:
907-
for k in range(table.n_buckets):
908-
if kh_exist_float64(table, k):
909-
result_keys[i] = table.keys[k]
910-
result_counts[i] = table.vals[k]
911-
i += 1
912-
kh_destroy_float64(table)
903+
if sixty_four_bit_scalar is float64_t:
904+
ftable = kh_init_float64()
905+
build_count_table_float64(values, ftable, dropna)
906+
907+
result_keys = np.empty(ftable.n_occupied, dtype=np.float64)
908+
result_counts = np.zeros(ftable.n_occupied, dtype=np.int64)
909+
910+
with nogil:
911+
for k in range(ftable.n_buckets):
912+
if kh_exist_float64(ftable, k):
913+
result_keys[i] = ftable.keys[k]
914+
result_counts[i] = ftable.vals[k]
915+
i += 1
916+
kh_destroy_float64(ftable)
917+
918+
elif sixty_four_bit_scalar is int64_t:
919+
itable = kh_init_int64()
920+
build_count_table_int64(values, itable)
921+
922+
result_keys = np.empty(itable.n_occupied, dtype=np.int64)
923+
result_counts = np.zeros(itable.n_occupied, dtype=np.int64)
924+
925+
with nogil:
926+
for k in range(itable.n_buckets):
927+
if kh_exist_int64(itable, k):
928+
result_keys[i] = itable.keys[k]
929+
result_counts[i] = itable.vals[k]
930+
i += 1
931+
kh_destroy_int64(itable)
913932

914933
return np.asarray(result_keys), np.asarray(result_counts)
915934

@@ -934,32 +953,6 @@ cdef build_count_table_int64(int64_t[:] values, kh_int64_t *table):
934953
table.vals[k] = 1
935954

936955

937-
@cython.boundscheck(False)
938-
cpdef value_count_int64(int64_t[:] values):
939-
cdef:
940-
Py_ssize_t i
941-
kh_int64_t *table
942-
int64_t[:] result_keys, result_counts
943-
int k
944-
945-
table = kh_init_int64()
946-
build_count_table_int64(values, table)
947-
948-
i = 0
949-
result_keys = np.empty(table.n_occupied, dtype=np.int64)
950-
result_counts = np.zeros(table.n_occupied, dtype=np.int64)
951-
952-
with nogil:
953-
for k in range(table.n_buckets):
954-
if kh_exist_int64(table, k):
955-
result_keys[i] = table.keys[k]
956-
result_counts[i] = table.vals[k]
957-
i += 1
958-
kh_destroy_int64(table)
959-
960-
return np.asarray(result_keys), np.asarray(result_counts)
961-
962-
963956
cdef build_count_table_object(ndarray[object] values,
964957
ndarray[uint8_t, cast=True] mask,
965958
kh_pymap_t *table):

0 commit comments

Comments
 (0)