@@ -4,6 +4,8 @@ Template for each `dtype` helper function for hashtable
4
4
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
5
5
"""
6
6
7
+ from pandas.core.dtypes.missing import array_equivalent
8
+
7
9
#----------------------------------------------------------------------
8
10
# VectorData
9
11
#----------------------------------------------------------------------
@@ -921,6 +923,16 @@ cdef class MultiIndexHashTable(HashTable):
921
923
"hash collision\nlocs:\n{}\n"
922
924
"result:\n{}\nmi:\n{}".format(alocs, result, mi))
923
925
926
+ def _check_for_collision(self, Py_ssize_t loc, object label):
927
+ # validate that the loc maps to the actual value
928
+ # version of _check_for_collisions above for single label (tuple)
929
+
930
+ result = self.mi[loc]
931
+ if not array_equivalent(result, label):
932
+ raise AssertionError(
933
+ "hash collision\nloc:\n{}\n"
934
+ "result:\n{}\nmi:\n{}".format(loc, result, label))
935
+
924
936
def __contains__(self, object key):
925
937
try:
926
938
self.get_item(key)
@@ -939,8 +951,7 @@ cdef class MultiIndexHashTable(HashTable):
939
951
k = kh_get_uint64(self.table, value)
940
952
if k != self.table.n_buckets:
941
953
loc = self.table.vals[k]
942
- locs = np.array([loc], dtype=np.int64)
943
- self._check_for_collisions(locs, key)
954
+ self._check_for_collision(loc, key)
944
955
return loc
945
956
else:
946
957
raise KeyError(key)
0 commit comments