Skip to content

Commit 49675be

Browse files
committed
ENH: implemented lexsort_depth and is_lexsorted
1 parent 92052ca commit 49675be

File tree

3 files changed

+71
-8
lines changed

3 files changed

+71
-8
lines changed

pandas/core/index.py

+23-8
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pandas.core.common import (_format, adjoin as _adjoin, _stringify,
99
_ensure_index, _is_bool_indexer,
1010
_asarray_tuplesafe)
11-
from pandas.util.decorators import deprecate
11+
from pandas.util.decorators import deprecate, cache_readonly
1212
import pandas.core.common as common
1313
import pandas._tseries as _tseries
1414

@@ -555,8 +555,25 @@ def format(self, space=2, sparsify=True, vertical=False, adjoin=True):
555555
def is_all_dates(self):
556556
return False
557557

558-
def is_sorted(self):
559-
raise NotImplementedError
558+
def is_lexsorted(self):
559+
"""
560+
Return True if the labels are lexicographically sorted
561+
"""
562+
return self.lexsort_depth == self.nlevels
563+
564+
@cache_readonly
565+
def lexsort_depth(self):
566+
if self.sortorder is not None:
567+
if self.sortorder == 0:
568+
return self.nlevels
569+
else:
570+
return 0
571+
572+
for k in range(self.nlevels, 0, -1):
573+
if _tseries.is_lexsorted(self.labels[:k]):
574+
return k
575+
576+
return 0
560577

561578
@classmethod
562579
def from_arrays(cls, arrays, sortorder=None):
@@ -864,9 +881,6 @@ def slice_locs(self, start=None, end=None):
864881
-----
865882
This function assumes that the data is sorted by the first level
866883
"""
867-
# relax for now
868-
# assert(self.sortorder == 0)
869-
870884
if start is None:
871885
start_slice = 0
872886
else:
@@ -884,8 +898,9 @@ def slice_locs(self, start=None, end=None):
884898
return start_slice, end_slice
885899

886900
def _partial_tup_index(self, tup, side='left'):
887-
# relax for now
888-
# assert(self.sortorder == 0)
901+
if len(tup) > self.lexsort_depth:
902+
raise Exception('MultiIndex lexsort depth %d, key was %d long' %
903+
(self.lexsort_depth, len(tup)))
889904

890905
n = len(tup)
891906
start, end = 0, len(self)

pandas/src/groupby.pyx

+30
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,36 @@ def groupby_indices(ndarray values):
133133
return result
134134

135135

136+
@cython.wraparound(False)
137+
@cython.boundscheck(False)
138+
def is_lexsorted(list list_of_arrays):
139+
cdef:
140+
int i
141+
Py_ssize_t n, nlevels
142+
int32_t k, cur, pre
143+
144+
nlevels = len(list_of_arrays)
145+
n = len(list_of_arrays[0])
146+
147+
cdef int32_t **vecs = <int32_t **> malloc(nlevels * sizeof(int32_t*))
148+
for i from 0 <= i < nlevels:
149+
vecs[i] = <int32_t *> (<ndarray> list_of_arrays[i]).data
150+
151+
# assume uniqueness??
152+
153+
for i from 1 <= i < n:
154+
for k from 0 <= k < nlevels:
155+
cur = vecs[k][i]
156+
pre = vecs[k][i-1]
157+
if cur == pre:
158+
continue
159+
elif cur > pre:
160+
break
161+
else:
162+
return False
163+
free(vecs)
164+
return True
165+
136166
@cython.wraparound(False)
137167
@cython.boundscheck(False)
138168
def group_labels(ndarray[object] values):

pandas/tests/test_multilevel.py

+18
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,26 @@ def test_insert_index(self):
231231
def test_alignment(self):
232232
pass
233233

234+
def test_is_lexsorted(self):
235+
levels = [[0, 1], [0, 1, 2]]
236+
237+
index = MultiIndex(levels=levels,
238+
labels=[[0, 0, 0, 1, 1, 1],
239+
[0, 1, 2, 0, 1, 2]])
240+
self.assert_(index.is_lexsorted())
241+
242+
index = MultiIndex(levels=levels,
243+
labels=[[0, 0, 0, 1, 1, 1],
244+
[0, 1, 2, 0, 2, 1]])
245+
self.assert_(not index.is_lexsorted())
246+
247+
index = MultiIndex(levels=levels,
248+
labels=[[0, 0, 1, 0, 1, 1],
249+
[0, 1, 0, 2, 2, 1]])
250+
self.assert_(not index.is_lexsorted())
234251

235252
if __name__ == '__main__':
253+
236254
# unittest.main()
237255
import nose
238256
# nose.runmodule(argv=[__file__,'-vvs','-x', '--pdb-failure'],

0 commit comments

Comments
 (0)