CLN: remove obsolete MultiIndex engines

toobaz · toobaz · commit 79b4a0c924ea · 2018-01-11T19:00:36.000+01:00
diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd
@@ -31,15 +31,6 @@ cdef class PyObjectHashTable(HashTable):
     cpdef get_item(self, object val)
     cpdef set_item(self, object key, Py_ssize_t val)
 
-cdef class MultiIndexHashTable(HashTable):
-    cdef:
-        kh_uint64_t *table
-        object mi
-
-    cpdef get_item(self, object val)
-    cpdef set_item(self, object key, Py_ssize_t val)
-    cdef inline void _check_for_collision(self, Py_ssize_t loc, object label)
-
 
 cdef class StringHashTable(HashTable):
     cdef kh_str_t *table
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -899,139 +899,3 @@ cdef class PyObjectHashTable(HashTable):
                 count += 1
 
         return np.asarray(labels)
-
-
-cdef class MultiIndexHashTable(HashTable):
-
-    def __init__(self, size_hint=1):
-        self.table = kh_init_uint64()
-        self.mi = None
-        kh_resize_uint64(self.table, size_hint)
-
-    def __dealloc__(self):
-        if self.table is not NULL:
-            kh_destroy_uint64(self.table)
-            self.table = NULL
-
-    def __len__(self):
-        return self.table.size
-
-    def sizeof(self, deep=False):
-        """ return the size of my table in bytes """
-        return self.table.n_buckets * (sizeof(uint64_t) + # keys
-                                       sizeof(size_t) + # vals
-                                       sizeof(uint32_t)) # flags
-
-    def _check_for_collisions(self, int64_t[:] locs, object mi):
-        # validate that the locs map to the actual values
-        # provided in the mi
-        # we can only check if we *don't* have any missing values
-        # :<
-        cdef:
-            ndarray[int64_t] alocs
-
-        alocs = np.asarray(locs)
-        if (alocs != -1).all():
-
-            result = self.mi.take(locs)
-            if isinstance(mi, tuple):
-                from pandas import Index
-                mi = Index([mi])
-            if not result.equals(mi):
-                raise AssertionError(
-                    "hash collision\nlocs:\n{}\n"
-                    "result:\n{}\nmi:\n{}".format(alocs, result, mi))
-
-    cdef inline void _check_for_collision(self, Py_ssize_t loc, object label):
-        # validate that the loc maps to the actual value
-        # version of _check_for_collisions above for single label (tuple)
-
-        result = self.mi[loc]
-
-        if not all(l == r or (is_null_datetimelike(l)
-                              and is_null_datetimelike(r))
-                   for l, r in zip(result, label)):
-            raise AssertionError(
-                "hash collision\nloc:\n{}\n"
-                "result:\n{}\nmi:\n{}".format(loc, result, label))
-
-    def __contains__(self, object key):
-        try:
-            self.get_item(key)
-            return True
-        except (KeyError, ValueError, TypeError):
-            return False
-
-    cpdef get_item(self, object key):
-        cdef:
-            khiter_t k
-            uint64_t value
-            int64_t[:] locs
-            Py_ssize_t loc
-
-        value = self.mi._hashed_indexing_key(key)
-        k = kh_get_uint64(self.table, value)
-        if k != self.table.n_buckets:
-            loc = self.table.vals[k]
-            self._check_for_collision(loc, key)
-            return loc
-        else:
-            raise KeyError(key)
-
-    cpdef set_item(self, object key, Py_ssize_t val):
-        raise NotImplementedError
-
-    @cython.boundscheck(False)
-    def map_locations(self, object mi):
-        cdef:
-            Py_ssize_t i, n
-            ndarray[uint64_t] values
-            uint64_t val
-            int ret = 0
-            khiter_t k
-
-        self.mi = mi
-        n = len(mi)
-        values = mi._hashed_values
-
-        with nogil:
-            for i in range(n):
-                val = values[i]
-                k = kh_put_uint64(self.table, val, &ret)
-                self.table.vals[k] = i
-
-    @cython.boundscheck(False)
-    def lookup(self, object mi):
-        # look up with a target mi
-        cdef:
-            Py_ssize_t i, n
-            ndarray[uint64_t] values
-            int ret = 0
-            uint64_t val
-            khiter_t k
-            int64_t[:] locs
-
-        n = len(mi)
-        values = mi._hashed_values
-
-        locs = np.empty(n, dtype=np.int64)
-
-        with nogil:
-            for i in range(n):
-                val = values[i]
-                k = kh_get_uint64(self.table, val)
-                if k != self.table.n_buckets:
-                    locs[i] = self.table.vals[k]
-                else:
-                    locs[i] = -1
-
-        self._check_for_collisions(locs, mi)
-        return np.asarray(locs)
-
-    def unique(self, object mi):
-        raise NotImplementedError
-
-    def get_labels(self, object mi, ObjectVector uniques,
-                   Py_ssize_t count_prior, int64_t na_sentinel,
-                   bint check_null=True):
-        raise NotImplementedError
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -686,71 +686,5 @@ cdef class BaseMultiIndexCodesEngine(object):
             return False
 
 
-
-cdef class MultiIndexObjectEngine(ObjectEngine):
-    """
-    provide the same interface as the MultiIndexEngine
-    but use the IndexEngine for computation
-
-    This provides good performance with samller MI's
-    """
-    def get_indexer(self, values):
-        # convert a MI to an ndarray
-        if hasattr(values, 'values'):
-            values = values.values
-        return super(MultiIndexObjectEngine, self).get_indexer(values)
-
-    cpdef get_loc(self, object val):
-
-        # convert a MI to an ndarray
-        if hasattr(val, 'values'):
-            val = val.values
-        return super(MultiIndexObjectEngine, self).get_loc(val)
-
-
-cdef class MultiIndexHashEngine(ObjectEngine):
-    """
-    Use a hashing based MultiIndex impl
-    but use the IndexEngine for computation
-
-    This provides good performance with larger MI's
-    """
-
-    def _call_monotonic(self, object mi):
-        # defer these back to the mi iteself
-        return (mi.is_monotonic_increasing,
-                mi.is_monotonic_decreasing,
-                mi.is_unique)
-
-    def get_backfill_indexer(self, other, limit=None):
-        # we coerce to ndarray-of-tuples
-        values = np.array(self._get_index_values())
-        return algos.backfill_object(values, other, limit=limit)
-
-    def get_pad_indexer(self, other, limit=None):
-        # we coerce to ndarray-of-tuples
-        values = np.array(self._get_index_values())
-        return algos.pad_object(values, other, limit=limit)
-
-    cpdef get_loc(self, object val):
-        if is_definitely_invalid_key(val):
-            raise TypeError("'{val}' is an invalid key".format(val=val))
-
-        self._ensure_mapping_populated()
-        if not self.unique:
-            return self._get_loc_duplicates(val)
-
-        try:
-            return self.mapping.get_item(val)
-        except TypeError:
-            raise KeyError(val)
-
-    def get_indexer(self, values):
-        self._ensure_mapping_populated()
-        return self.mapping.lookup(values)
-
-    cdef _make_hash_table(self, n):
-        return _hash.MultiIndexHashTable(n)
-
 # Generated from template.
 include "index_class_helper.pxi"