Skip to content

Commit b10722f

Browse files
is_lexsorted
1 parent 698d3dd commit b10722f

File tree

2 files changed

+29
-40
lines changed

2 files changed

+29
-40
lines changed

pandas/_libs_numba/algos.py

+26-34
Original file line numberDiff line numberDiff line change
@@ -141,40 +141,32 @@
141141
# return result
142142

143143

144-
# @cython.wraparound(False)
145-
# @cython.boundscheck(False)
146-
# def is_lexsorted(list_of_arrays: list) -> bint:
147-
# cdef:
148-
# Py_ssize_t i
149-
# Py_ssize_t n, nlevels
150-
# int64_t k, cur, pre
151-
# ndarray arr
152-
# bint result = True
153-
154-
# nlevels = len(list_of_arrays)
155-
# n = len(list_of_arrays[0])
156-
157-
# cdef int64_t **vecs = <int64_t**>malloc(nlevels * sizeof(int64_t*))
158-
# for i in range(nlevels):
159-
# arr = list_of_arrays[i]
160-
# assert arr.dtype.name == 'int64'
161-
# vecs[i] = <int64_t*>cnp.PyArray_DATA(arr)
162-
163-
# # Assume uniqueness??
164-
# with nogil:
165-
# for i in range(1, n):
166-
# for k in range(nlevels):
167-
# cur = vecs[k][i]
168-
# pre = vecs[k][i -1]
169-
# if cur == pre:
170-
# continue
171-
# elif cur > pre:
172-
# break
173-
# else:
174-
# result = False
175-
# break
176-
# free(vecs)
177-
# return result
144+
def is_lexsorted(list_of_arrays: list[np.ndarray]) -> bool:
145+
nlevels = len(list_of_arrays)
146+
n = len(list_of_arrays[0])
147+
arr = np.concatenate(list_of_arrays)
148+
arr = arr.reshape(nlevels, n)
149+
return _is_lexsorted(arr)
150+
151+
152+
@numba.njit
153+
def _is_lexsorted(vecs: np.ndarray) -> bool:
154+
result = True
155+
nlevels, n = vecs.shape
156+
157+
for i in range(1, n):
158+
for k in range(nlevels):
159+
cur = vecs[k, i]
160+
pre = vecs[k, i - 1]
161+
if cur == pre:
162+
continue
163+
elif cur > pre:
164+
break
165+
else:
166+
result = False
167+
break
168+
169+
return result
178170

179171

180172
# @cython.boundscheck(False)

pandas/core/indexes/multi.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@
2222
from pandas._config import get_option
2323

2424
from pandas._libs import (
25-
algos as libalgos,
2625
index as libindex,
2726
lib,
2827
)
2928
from pandas._libs.hashtable import duplicated_int64
29+
from pandas._libs_numba import algos as libalgos
3030
from pandas._typing import (
3131
AnyArrayLike,
3232
DtypeObj,
@@ -1586,9 +1586,7 @@ def is_monotonic_increasing(self) -> bool:
15861586

15871587
if all(level.is_monotonic for level in self.levels):
15881588
# If each level is sorted, we can operate on the codes directly. GH27495
1589-
return libalgos.is_lexsorted(
1590-
[x.astype("int64", copy=False) for x in self.codes]
1591-
)
1589+
return libalgos.is_lexsorted(self.codes)
15921590

15931591
# reversed() because lexsort() wants the most significant key last.
15941592
values = [
@@ -3794,9 +3792,8 @@ def isin(self, values, level=None) -> np.ndarray:
37943792

37953793
def _lexsort_depth(codes: List[np.ndarray], nlevels: int) -> int:
37963794
"""Count depth (up to a maximum of `nlevels`) with which codes are lexsorted."""
3797-
int64_codes = [ensure_int64(level_codes) for level_codes in codes]
37983795
for k in range(nlevels, 0, -1):
3799-
if libalgos.is_lexsorted(int64_codes[:k]):
3796+
if libalgos.is_lexsorted(codes[:k]):
38003797
return k
38013798
return 0
38023799

0 commit comments

Comments
 (0)