@@ -899,139 +899,3 @@ cdef class PyObjectHashTable(HashTable):
899
899
count += 1
900
900
901
901
return np.asarray(labels)
902
-
903
-
904
- cdef class MultiIndexHashTable(HashTable):
905
-
906
- def __init__(self, size_hint=1):
907
- self.table = kh_init_uint64()
908
- self.mi = None
909
- kh_resize_uint64(self.table, size_hint)
910
-
911
- def __dealloc__(self):
912
- if self.table is not NULL:
913
- kh_destroy_uint64(self.table)
914
- self.table = NULL
915
-
916
- def __len__(self):
917
- return self.table.size
918
-
919
- def sizeof(self, deep=False):
920
- """ return the size of my table in bytes """
921
- return self.table.n_buckets * (sizeof(uint64_t) + # keys
922
- sizeof(size_t) + # vals
923
- sizeof(uint32_t)) # flags
924
-
925
- def _check_for_collisions(self, int64_t[:] locs, object mi):
926
- # validate that the locs map to the actual values
927
- # provided in the mi
928
- # we can only check if we *don't* have any missing values
929
- # :<
930
- cdef:
931
- ndarray[int64_t] alocs
932
-
933
- alocs = np.asarray(locs)
934
- if (alocs != -1).all():
935
-
936
- result = self.mi.take(locs)
937
- if isinstance(mi, tuple):
938
- from pandas import Index
939
- mi = Index([mi])
940
- if not result.equals(mi):
941
- raise AssertionError(
942
- "hash collision\nlocs:\n{}\n"
943
- "result:\n{}\nmi:\n{}".format(alocs, result, mi))
944
-
945
- cdef inline void _check_for_collision(self, Py_ssize_t loc, object label):
946
- # validate that the loc maps to the actual value
947
- # version of _check_for_collisions above for single label (tuple)
948
-
949
- result = self.mi[loc]
950
-
951
- if not all(l == r or (is_null_datetimelike(l)
952
- and is_null_datetimelike(r))
953
- for l, r in zip(result, label)):
954
- raise AssertionError(
955
- "hash collision\nloc:\n{}\n"
956
- "result:\n{}\nmi:\n{}".format(loc, result, label))
957
-
958
- def __contains__(self, object key):
959
- try:
960
- self.get_item(key)
961
- return True
962
- except (KeyError, ValueError, TypeError):
963
- return False
964
-
965
- cpdef get_item(self, object key):
966
- cdef:
967
- khiter_t k
968
- uint64_t value
969
- int64_t[:] locs
970
- Py_ssize_t loc
971
-
972
- value = self.mi._hashed_indexing_key(key)
973
- k = kh_get_uint64(self.table, value)
974
- if k != self.table.n_buckets:
975
- loc = self.table.vals[k]
976
- self._check_for_collision(loc, key)
977
- return loc
978
- else:
979
- raise KeyError(key)
980
-
981
- cpdef set_item(self, object key, Py_ssize_t val):
982
- raise NotImplementedError
983
-
984
- @cython.boundscheck(False)
985
- def map_locations(self, object mi):
986
- cdef:
987
- Py_ssize_t i, n
988
- ndarray[uint64_t] values
989
- uint64_t val
990
- int ret = 0
991
- khiter_t k
992
-
993
- self.mi = mi
994
- n = len(mi)
995
- values = mi._hashed_values
996
-
997
- with nogil:
998
- for i in range(n):
999
- val = values[i]
1000
- k = kh_put_uint64(self.table, val, &ret)
1001
- self.table.vals[k] = i
1002
-
1003
- @cython.boundscheck(False)
1004
- def lookup(self, object mi):
1005
- # look up with a target mi
1006
- cdef:
1007
- Py_ssize_t i, n
1008
- ndarray[uint64_t] values
1009
- int ret = 0
1010
- uint64_t val
1011
- khiter_t k
1012
- int64_t[:] locs
1013
-
1014
- n = len(mi)
1015
- values = mi._hashed_values
1016
-
1017
- locs = np.empty(n, dtype=np.int64)
1018
-
1019
- with nogil:
1020
- for i in range(n):
1021
- val = values[i]
1022
- k = kh_get_uint64(self.table, val)
1023
- if k != self.table.n_buckets:
1024
- locs[i] = self.table.vals[k]
1025
- else:
1026
- locs[i] = -1
1027
-
1028
- self._check_for_collisions(locs, mi)
1029
- return np.asarray(locs)
1030
-
1031
- def unique(self, object mi):
1032
- raise NotImplementedError
1033
-
1034
- def get_labels(self, object mi, ObjectVector uniques,
1035
- Py_ssize_t count_prior, int64_t na_sentinel,
1036
- bint check_null=True):
1037
- raise NotImplementedError
0 commit comments