Skip to content

Commit b32f218

Browse files
committed
Merge pull request #10111 from gdementen/hashtablecleanup
CLN: cleanup hashtable.pyx
2 parents f6c7d89 + c2b58cb commit b32f218

File tree

1 file changed

+12
-28
lines changed

1 file changed

+12
-28
lines changed

pandas/hashtable.pyx

+12-28
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,6 @@ cdef class StringHashTable(HashTable):
211211
def unique(self, ndarray[object] values):
212212
cdef:
213213
Py_ssize_t i, n = len(values)
214-
Py_ssize_t idx, count = 0
215214
int ret = 0
216215
object val
217216
char *buf
@@ -223,12 +222,9 @@ cdef class StringHashTable(HashTable):
223222
buf = util.get_c_string(val)
224223
k = kh_get_str(self.table, buf)
225224
if k == self.table.n_buckets:
226-
k = kh_put_str(self.table, buf, &ret)
227-
# print 'putting %s, %s' % (val, count)
228-
count += 1
225+
kh_put_str(self.table, buf, &ret)
229226
uniques.append(val)
230227

231-
# return None
232228
return uniques.to_array()
233229

234230
def factorize(self, ndarray[object] values):
@@ -258,7 +254,6 @@ cdef class StringHashTable(HashTable):
258254
labels[i] = count
259255
count += 1
260256

261-
# return None
262257
return reverse, labels
263258

264259
cdef class Int32HashTable(HashTable):
@@ -319,7 +314,6 @@ cdef class Int32HashTable(HashTable):
319314
def lookup(self, ndarray[int32_t] values):
320315
cdef:
321316
Py_ssize_t i, n = len(values)
322-
int ret = 0
323317
int32_t val
324318
khiter_t k
325319
ndarray[int32_t] locs = np.empty(n, dtype=np.int64)
@@ -357,7 +351,6 @@ cdef class Int32HashTable(HashTable):
357351
labels[i] = count
358352
count += 1
359353

360-
# return None
361354
return reverse, labels
362355

363356
cdef class Int64HashTable: #(HashTable):
@@ -518,7 +511,6 @@ cdef class Int64HashTable: #(HashTable):
518511
def unique(self, ndarray[int64_t] values):
519512
cdef:
520513
Py_ssize_t i, n = len(values)
521-
Py_ssize_t idx, count = 0
522514
int ret = 0
523515
ndarray result
524516
int64_t val
@@ -529,9 +521,8 @@ cdef class Int64HashTable: #(HashTable):
529521
val = values[i]
530522
k = kh_get_int64(self.table, val)
531523
if k == self.table.n_buckets:
532-
k = kh_put_int64(self.table, val, &ret)
524+
kh_put_int64(self.table, val, &ret)
533525
uniques.append(val)
534-
count += 1
535526

536527
result = uniques.to_array()
537528

@@ -644,7 +635,6 @@ cdef class Float64HashTable(HashTable):
644635
def unique(self, ndarray[float64_t] values):
645636
cdef:
646637
Py_ssize_t i, n = len(values)
647-
Py_ssize_t idx, count = 0
648638
int ret = 0
649639
float64_t val
650640
khiter_t k
@@ -657,9 +647,8 @@ cdef class Float64HashTable(HashTable):
657647
if val == val:
658648
k = kh_get_float64(self.table, val)
659649
if k == self.table.n_buckets:
660-
k = kh_put_float64(self.table, val, &ret)
650+
kh_put_float64(self.table, val, &ret)
661651
uniques.append(val)
662-
count += 1
663652
elif not seen_na:
664653
seen_na = 1
665654
uniques.append(ONAN)
@@ -786,7 +775,6 @@ cdef class PyObjectHashTable(HashTable):
786775
def unique(self, ndarray[object] values):
787776
cdef:
788777
Py_ssize_t i, n = len(values)
789-
Py_ssize_t idx, count = 0
790778
int ret = 0
791779
object val
792780
ndarray result
@@ -800,7 +788,7 @@ cdef class PyObjectHashTable(HashTable):
800788
if not _checknan(val):
801789
k = kh_get_pymap(self.table, <PyObject*>val)
802790
if k == self.table.n_buckets:
803-
k = kh_put_pymap(self.table, <PyObject*>val, &ret)
791+
kh_put_pymap(self.table, <PyObject*>val, &ret)
804792
uniques.append(val)
805793
elif not seen_na:
806794
seen_na = 1
@@ -918,7 +906,7 @@ cdef class Int64Factorizer:
918906

919907
cdef build_count_table_int64(ndarray[int64_t] values, kh_int64_t *table):
920908
cdef:
921-
int k
909+
khiter_t k
922910
Py_ssize_t i, n = len(values)
923911
int ret = 0
924912

@@ -938,7 +926,6 @@ cpdef value_count_int64(ndarray[int64_t] values):
938926
cdef:
939927
Py_ssize_t i
940928
kh_int64_t *table
941-
int ret = 0
942929
int k
943930

944931
table = kh_init_int64()
@@ -961,7 +948,7 @@ cdef build_count_table_object(ndarray[object] values,
961948
ndarray[uint8_t, cast=True] mask,
962949
kh_pymap_t *table):
963950
cdef:
964-
int k
951+
khiter_t k
965952
Py_ssize_t i, n = len(values)
966953
int ret = 0
967954

@@ -983,7 +970,7 @@ cdef build_count_table_object(ndarray[object] values,
983970
cpdef value_count_object(ndarray[object] values,
984971
ndarray[uint8_t, cast=True] mask):
985972
cdef:
986-
Py_ssize_t i = len(values)
973+
Py_ssize_t i
987974
kh_pymap_t *table
988975
int k
989976

@@ -1008,9 +995,7 @@ def mode_object(ndarray[object] values, ndarray[uint8_t, cast=True] mask):
1008995
int count, max_count = 2
1009996
int j = -1 # so you can do +=
1010997
int k
1011-
Py_ssize_t i, n = len(values)
1012998
kh_pymap_t *table
1013-
int ret = 0
1014999

10151000
table = kh_init_pymap()
10161001
build_count_table_object(values, mask, table)
@@ -1036,11 +1021,10 @@ def mode_object(ndarray[object] values, ndarray[uint8_t, cast=True] mask):
10361021

10371022
def mode_int64(ndarray[int64_t] values):
10381023
cdef:
1039-
int val, max_val = 2
1024+
int count, max_count = 2
10401025
int j = -1 # so you can do +=
10411026
int k
10421027
kh_int64_t *table
1043-
list uniques = []
10441028

10451029
table = kh_init_int64()
10461030

@@ -1049,12 +1033,12 @@ def mode_int64(ndarray[int64_t] values):
10491033
modes = np.empty(table.n_buckets, dtype=np.int64)
10501034
for k in range(table.n_buckets):
10511035
if kh_exist_int64(table, k):
1052-
val = table.vals[k]
1036+
count = table.vals[k]
10531037

1054-
if val == max_val:
1038+
if count == max_count:
10551039
j += 1
1056-
elif val > max_val:
1057-
max_val = val
1040+
elif count > max_count:
1041+
max_count = count
10581042
j = 0
10591043
else:
10601044
continue

0 commit comments

Comments
 (0)