is_lexsorted

simonjayhawkins · simonjayhawkins · commit b10722f51780 · 2021-02-24T19:22:40.000Z
diff --git a/pandas/_libs_numba/algos.py b/pandas/_libs_numba/algos.py
@@ -141,40 +141,32 @@
 #     return result
 
 
-# @cython.wraparound(False)
-# @cython.boundscheck(False)
-# def is_lexsorted(list_of_arrays: list) -> bint:
-#     cdef:
-#         Py_ssize_t i
-#         Py_ssize_t n, nlevels
-#         int64_t k, cur, pre
-#         ndarray arr
-#         bint result = True
-
-#     nlevels = len(list_of_arrays)
-#     n = len(list_of_arrays[0])
-
-#     cdef int64_t **vecs = <int64_t**>malloc(nlevels * sizeof(int64_t*))
-#     for i in range(nlevels):
-#         arr = list_of_arrays[i]
-#         assert arr.dtype.name == 'int64'
-#         vecs[i] = <int64_t*>cnp.PyArray_DATA(arr)
-
-#     # Assume uniqueness??
-#     with nogil:
-#         for i in range(1, n):
-#             for k in range(nlevels):
-#                 cur = vecs[k][i]
-#                 pre = vecs[k][i -1]
-#                 if cur == pre:
-#                     continue
-#                 elif cur > pre:
-#                     break
-#                 else:
-#                     result = False
-#                     break
-#     free(vecs)
-#     return result
+def is_lexsorted(list_of_arrays: list[np.ndarray]) -> bool:
+    nlevels = len(list_of_arrays)
+    n = len(list_of_arrays[0])
+    arr = np.concatenate(list_of_arrays)
+    arr = arr.reshape(nlevels, n)
+    return _is_lexsorted(arr)
+
+
+@numba.njit
+def _is_lexsorted(vecs: np.ndarray) -> bool:
+    result = True
+    nlevels, n = vecs.shape
+
+    for i in range(1, n):
+        for k in range(nlevels):
+            cur = vecs[k, i]
+            pre = vecs[k, i - 1]
+            if cur == pre:
+                continue
+            elif cur > pre:
+                break
+            else:
+                result = False
+                break
+
+    return result
 
 
 # @cython.boundscheck(False)
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -22,11 +22,11 @@
 from pandas._config import get_option
 
 from pandas._libs import (
-    algos as libalgos,
     index as libindex,
     lib,
 )
 from pandas._libs.hashtable import duplicated_int64
+from pandas._libs_numba import algos as libalgos
 from pandas._typing import (
     AnyArrayLike,
     DtypeObj,
@@ -1586,9 +1586,7 @@ def is_monotonic_increasing(self) -> bool:
 
         if all(level.is_monotonic for level in self.levels):
             # If each level is sorted, we can operate on the codes directly. GH27495
-            return libalgos.is_lexsorted(
-                [x.astype("int64", copy=False) for x in self.codes]
-            )
+            return libalgos.is_lexsorted(self.codes)
 
         # reversed() because lexsort() wants the most significant key last.
         values = [
@@ -3794,9 +3792,8 @@ def isin(self, values, level=None) -> np.ndarray:
 
 def _lexsort_depth(codes: List[np.ndarray], nlevels: int) -> int:
     """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted."""
-    int64_codes = [ensure_int64(level_codes) for level_codes in codes]
     for k in range(nlevels, 0, -1):
-        if libalgos.is_lexsorted(int64_codes[:k]):
+        if libalgos.is_lexsorted(codes[:k]):
             return k
     return 0