Skip to content

Commit aca4453

Browse files
PERF: improve hash collision check for single MI labels
1 parent 4bdbcb6 commit aca4453

File tree

1 file changed

+13
-2
lines changed

1 file changed

+13
-2
lines changed

pandas/_libs/hashtable_class_helper.pxi.in

+13-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ Template for each `dtype` helper function for hashtable
44
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
55
"""
66

7+
from pandas.core.dtypes.missing import array_equivalent
8+
79
#----------------------------------------------------------------------
810
# VectorData
911
#----------------------------------------------------------------------
@@ -921,6 +923,16 @@ cdef class MultiIndexHashTable(HashTable):
921923
"hash collision\nlocs:\n{}\n"
922924
"result:\n{}\nmi:\n{}".format(alocs, result, mi))
923925

926+
def _check_for_collision(self, Py_ssize_t loc, object label):
927+
# validate that the loc maps to the actual value
928+
# version of _check_for_collisions above for single label (tuple)
929+
930+
result = self.mi[loc]
931+
if not array_equivalent(result, label):
932+
raise AssertionError(
933+
"hash collision\nloc:\n{}\n"
934+
"result:\n{}\nmi:\n{}".format(loc, result, label))
935+
924936
def __contains__(self, object key):
925937
try:
926938
self.get_item(key)
@@ -939,8 +951,7 @@ cdef class MultiIndexHashTable(HashTable):
939951
k = kh_get_uint64(self.table, value)
940952
if k != self.table.n_buckets:
941953
loc = self.table.vals[k]
942-
locs = np.array([loc], dtype=np.int64)
943-
self._check_for_collisions(locs, key)
954+
self._check_for_collision(loc, key)
944955
return loc
945956
else:
946957
raise KeyError(key)

0 commit comments

Comments
 (0)