Skip to content

Commit 586b490

Browse files
authored
DEPR MultiIndex.is_lexsorted and MultiIndex.lexsort_depth (#38701)
1 parent 9952626 commit 586b490

File tree

16 files changed

+114
-85
lines changed

16 files changed

+114
-85
lines changed

doc/source/reference/indexing.rst

-1
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,6 @@ MultiIndex components
301301
MultiIndex.set_codes
302302
MultiIndex.to_flat_index
303303
MultiIndex.to_frame
304-
MultiIndex.is_lexsorted
305304
MultiIndex.sortlevel
306305
MultiIndex.droplevel
307306
MultiIndex.swaplevel

doc/source/user_guide/advanced.rst

+4-6
Original file line numberDiff line numberDiff line change
@@ -658,20 +658,18 @@ Furthermore, if you try to index something that is not fully lexsorted, this can
658658
In [5]: dfm.loc[(0, 'y'):(1, 'z')]
659659
UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (1)'
660660
661-
The :meth:`~MultiIndex.is_lexsorted` method on a ``MultiIndex`` shows if the
662-
index is sorted, and the ``lexsort_depth`` property returns the sort depth:
661+
The :meth:`~MultiIndex.is_monotonic_increasing` method on a ``MultiIndex`` shows if the
662+
index is sorted:
663663

664664
.. ipython:: python
665665
666-
dfm.index.is_lexsorted()
667-
dfm.index.lexsort_depth
666+
dfm.index.is_monotonic_increasing
668667
669668
.. ipython:: python
670669
671670
dfm = dfm.sort_index()
672671
dfm
673-
dfm.index.is_lexsorted()
674-
dfm.index.lexsort_depth
672+
dfm.index.is_monotonic_increasing
675673
676674
And now selection works as expected.
677675

doc/source/whatsnew/v0.20.0.rst

+28-10
Original file line numberDiff line numberDiff line change
@@ -873,21 +873,27 @@ This is *unchanged* from prior versions, but shown for illustration purposes:
873873
index=pd.MultiIndex.from_product([list('BA'), range(3)]))
874874
df
875875
876-
.. ipython:: python
876+
.. code-block:: python
877877
878-
df.index.is_lexsorted()
879-
df.index.is_monotonic
878+
In [87]: df.index.is_lexsorted()
879+
Out[87]: False
880+
881+
In [88]: df.index.is_monotonic
882+
Out[88]: False
880883
881884
Sorting works as expected
882885

883886
.. ipython:: python
884887
885888
df.sort_index()
886889
887-
.. ipython:: python
890+
.. code-block:: python
891+
892+
In [90]: df.sort_index().index.is_lexsorted()
893+
Out[90]: True
888894
889-
df.sort_index().index.is_lexsorted()
890-
df.sort_index().index.is_monotonic
895+
In [91]: df.sort_index().index.is_monotonic
896+
Out[91]: True
891897
892898
However, this example, which has a non-monotonic 2nd level,
893899
doesn't behave as desired.
@@ -919,11 +925,23 @@ Previous behavior:
919925
920926
New behavior:
921927

922-
.. ipython:: python
928+
.. code-block:: python
923929
924-
df.sort_index()
925-
df.sort_index().index.is_lexsorted()
926-
df.sort_index().index.is_monotonic
930+
In [94]: df.sort_index()
931+
Out[94]:
932+
value
933+
a aa 2
934+
bb 1
935+
b aa 4
936+
bb 3
937+
938+
[4 rows x 1 columns]
939+
940+
In [95]: df.sort_index().index.is_lexsorted()
941+
Out[95]: True
942+
943+
In [96]: df.sort_index().index.is_monotonic
944+
Out[96]: True
927945
928946
929947
.. _whatsnew_0200.api_breaking.groupby_describe:

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ Deprecations
156156
- Deprecating allowing scalars passed to the :class:`Categorical` constructor (:issue:`38433`)
157157
- Deprecated allowing subclass-specific keyword arguments in the :class:`Index` constructor, use the specific subclass directly instead (:issue:`14093`,:issue:`21311`,:issue:`22315`,:issue:`26974`)
158158
- Deprecated ``astype`` of datetimelike (``timedelta64[ns]``, ``datetime64[ns]``, ``Datetime64TZDtype``, ``PeriodDtype``) to integer dtypes, use ``values.view(...)`` instead (:issue:`38544`)
159+
- Deprecated :meth:`MultiIndex.is_lexsorted` and :meth:`MultiIndex.lexsort_depth` as a public methods, users should use :meth:`MultiIndex.is_monotonic_increasing` instead (:issue:`32259`)
159160
- Deprecated keyword ``try_cast`` in :meth:`Series.where`, :meth:`Series.mask`, :meth:`DataFrame.where`, :meth:`DataFrame.mask`; cast results manually if desired (:issue:`38836`)
160161
- Deprecated comparison of :class:`Timestamp` object with ``datetime.date`` objects. Instead of e.g. ``ts <= mydate`` use ``ts <= pd.Timestamp(mydate)`` or ``ts.date() <= mydate`` (:issue:`36131`)
161162
-

pandas/core/indexes/multi.py

+43-23
Original file line numberDiff line numberDiff line change
@@ -391,11 +391,11 @@ def _verify_integrity(
391391
f"Level values must be unique: {list(level)} on level {i}"
392392
)
393393
if self.sortorder is not None:
394-
if self.sortorder > self._lexsort_depth():
394+
if self.sortorder > _lexsort_depth(self.codes, self.nlevels):
395395
raise ValueError(
396396
"Value for sortorder must be inferior or equal to actual "
397397
f"lexsort_depth: sortorder {self.sortorder} "
398-
f"with lexsort_depth {self._lexsort_depth()}"
398+
f"with lexsort_depth {_lexsort_depth(self.codes, self.nlevels)}"
399399
)
400400

401401
codes = [
@@ -1809,6 +1809,15 @@ def _is_all_dates(self) -> bool:
18091809
return False
18101810

18111811
def is_lexsorted(self) -> bool:
1812+
warnings.warn(
1813+
"MultiIndex.is_lexsorted is deprecated as a public function, "
1814+
"users should use MultiIndex.is_monotonic_increasing instead.",
1815+
FutureWarning,
1816+
stacklevel=2,
1817+
)
1818+
return self._is_lexsorted()
1819+
1820+
def _is_lexsorted(self) -> bool:
18121821
"""
18131822
Return True if the codes are lexicographically sorted.
18141823
@@ -1840,15 +1849,19 @@ def is_lexsorted(self) -> bool:
18401849
... ['bb', 'aa', 'aa', 'bb']]).is_lexsorted()
18411850
False
18421851
"""
1843-
return self.lexsort_depth == self.nlevels
1852+
return self._lexsort_depth == self.nlevels
18441853

1845-
@cache_readonly
1854+
@property
18461855
def lexsort_depth(self):
1847-
if self.sortorder is not None:
1848-
return self.sortorder
1849-
1850-
return self._lexsort_depth()
1856+
warnings.warn(
1857+
"MultiIndex.is_lexsorted is deprecated as a public function, "
1858+
"users should use MultiIndex.is_monotonic_increasing instead.",
1859+
FutureWarning,
1860+
stacklevel=2,
1861+
)
1862+
return self._lexsort_depth
18511863

1864+
@cache_readonly
18521865
def _lexsort_depth(self) -> int:
18531866
"""
18541867
Compute and return the lexsort_depth, the number of levels of the
@@ -1858,11 +1871,9 @@ def _lexsort_depth(self) -> int:
18581871
-------
18591872
int
18601873
"""
1861-
int64_codes = [ensure_int64(level_codes) for level_codes in self.codes]
1862-
for k in range(self.nlevels, 0, -1):
1863-
if libalgos.is_lexsorted(int64_codes[:k]):
1864-
return k
1865-
return 0
1874+
if self.sortorder is not None:
1875+
return self.sortorder
1876+
return _lexsort_depth(self.codes, self.nlevels)
18661877

18671878
def _sort_levels_monotonic(self):
18681879
"""
@@ -1898,7 +1909,7 @@ def _sort_levels_monotonic(self):
18981909
('b', 'bb')],
18991910
)
19001911
"""
1901-
if self.is_lexsorted() and self.is_monotonic:
1912+
if self._is_lexsorted() and self.is_monotonic:
19021913
return self
19031914

19041915
new_levels = []
@@ -2184,7 +2195,7 @@ def drop(self, codes, level=None, errors="raise"):
21842195
step = loc.step if loc.step is not None else 1
21852196
inds.extend(range(loc.start, loc.stop, step))
21862197
elif com.is_bool_indexer(loc):
2187-
if self.lexsort_depth == 0:
2198+
if self._lexsort_depth == 0:
21882199
warnings.warn(
21892200
"dropping on a non-lexsorted multi-index "
21902201
"without a level parameter may impact performance.",
@@ -2755,10 +2766,10 @@ def slice_locs(self, start=None, end=None, step=None, kind=None):
27552766
return super().slice_locs(start, end, step, kind=kind)
27562767

27572768
def _partial_tup_index(self, tup, side="left"):
2758-
if len(tup) > self.lexsort_depth:
2769+
if len(tup) > self._lexsort_depth:
27592770
raise UnsortedIndexError(
27602771
f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth "
2761-
f"({self.lexsort_depth})"
2772+
f"({self._lexsort_depth})"
27622773
)
27632774

27642775
n = len(tup)
@@ -2897,7 +2908,7 @@ def _maybe_to_slice(loc):
28972908
# break the key into 2 parts based on the lexsort_depth of the index;
28982909
# the first part returns a continuous slice of the index; the 2nd part
28992910
# needs linear search within the slice
2900-
i = self.lexsort_depth
2911+
i = self._lexsort_depth
29012912
lead_key, follow_key = key[:i], key[i:]
29022913
start, stop = (
29032914
self.slice_locs(lead_key, lead_key) if lead_key else (0, len(self))
@@ -3150,7 +3161,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
31503161
stop = getattr(stop, "stop", stop)
31513162
return convert_indexer(start, stop, step)
31523163

3153-
elif level > 0 or self.lexsort_depth == 0 or step is not None:
3164+
elif level > 0 or self._lexsort_depth == 0 or step is not None:
31543165
# need to have like semantics here to right
31553166
# searching as when we are using a slice
31563167
# so include the stop+1 (so we include stop)
@@ -3165,7 +3176,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
31653176

31663177
idx = self._get_loc_single_level_index(level_index, key)
31673178

3168-
if level > 0 or self.lexsort_depth == 0:
3179+
if level > 0 or self._lexsort_depth == 0:
31693180
# Desired level is not sorted
31703181
locs = np.array(level_codes == idx, dtype=bool, copy=False)
31713182
if not locs.any():
@@ -3222,10 +3233,10 @@ def get_locs(self, seq):
32223233

32233234
# must be lexsorted to at least as many levels
32243235
true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s]
3225-
if true_slices and true_slices[-1] >= self.lexsort_depth:
3236+
if true_slices and true_slices[-1] >= self._lexsort_depth:
32263237
raise UnsortedIndexError(
32273238
"MultiIndex slicing requires the index to be lexsorted: slicing "
3228-
f"on levels {true_slices}, lexsort depth {self.lexsort_depth}"
3239+
f"on levels {true_slices}, lexsort depth {self._lexsort_depth}"
32293240
)
32303241
# indexer
32313242
# this is the list of all values that we want to select
@@ -3347,7 +3358,7 @@ def _reorder_indexer(
33473358
"""
33483359
# If the index is lexsorted and the list_like label in seq are sorted
33493360
# then we do not need to sort
3350-
if self.is_lexsorted():
3361+
if self._is_lexsorted():
33513362
need_sort = False
33523363
for i, k in enumerate(seq):
33533364
if is_list_like(k):
@@ -3768,6 +3779,15 @@ def isin(self, values, level=None):
37683779
__inv__ = make_invalid_op("__inv__")
37693780

37703781

3782+
def _lexsort_depth(codes: List[np.ndarray], nlevels: int) -> int:
3783+
"""Count depth (up to a maximum of `nlevels`) with which codes are lexsorted."""
3784+
int64_codes = [ensure_int64(level_codes) for level_codes in codes]
3785+
for k in range(nlevels, 0, -1):
3786+
if libalgos.is_lexsorted(int64_codes[:k]):
3787+
return k
3788+
return 0
3789+
3790+
37713791
def sparsify_labels(label_list, start: int = 0, sentinel=""):
37723792
pivoted = list(zip(*label_list))
37733793
k = len(label_list)

pandas/core/reshape/reshape.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ def _convert_level_number(level_num, columns):
617617
roll_columns = roll_columns.swaplevel(lev1, lev2)
618618
this.columns = roll_columns
619619

620-
if not this.columns.is_lexsorted():
620+
if not this.columns._is_lexsorted():
621621
# Workaround the edge case where 0 is one of the column names,
622622
# which interferes with trying to sort based on the first
623623
# level

pandas/tests/frame/methods/test_drop.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ def test_drop_multiindex_not_lexsorted(self):
162162
[("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"]
163163
)
164164
lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi)
165-
assert lexsorted_df.columns.is_lexsorted()
165+
assert lexsorted_df.columns._is_lexsorted()
166166

167167
# define the non-lexsorted version
168168
not_lexsorted_df = DataFrame(
@@ -172,7 +172,7 @@ def test_drop_multiindex_not_lexsorted(self):
172172
index="a", columns=["b", "c"], values="d"
173173
)
174174
not_lexsorted_df = not_lexsorted_df.reset_index()
175-
assert not not_lexsorted_df.columns.is_lexsorted()
175+
assert not not_lexsorted_df.columns._is_lexsorted()
176176

177177
# compare the results
178178
tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)

pandas/tests/frame/methods/test_sort_index.py

+5-12
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def test_sort_index_and_reconstruction_doc_example(self):
2424
levels=[["a", "b"], ["bb", "aa"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]]
2525
),
2626
)
27-
assert df.index.is_lexsorted()
27+
assert df.index._is_lexsorted()
2828
assert not df.index.is_monotonic
2929

3030
# sort it
@@ -35,15 +35,13 @@ def test_sort_index_and_reconstruction_doc_example(self):
3535
),
3636
)
3737
result = df.sort_index()
38-
assert result.index.is_lexsorted()
3938
assert result.index.is_monotonic
4039

4140
tm.assert_frame_equal(result, expected)
4241

4342
# reconstruct
4443
result = df.sort_index().copy()
4544
result.index = result.index._sort_levels_monotonic()
46-
assert result.index.is_lexsorted()
4745
assert result.index.is_monotonic
4846

4947
tm.assert_frame_equal(result, expected)
@@ -524,14 +522,13 @@ def test_sort_index_and_reconstruction(self):
524522
[(0.5, "a"), (0.5, "b"), (0.8, "a"), (0.8, "b")]
525523
),
526524
)
527-
assert expected.index.is_lexsorted()
525+
assert expected.index._is_lexsorted()
528526

529527
result = DataFrame(
530528
[[1, 1], [2, 2], [1, 1], [2, 2]],
531529
index=MultiIndex.from_product([[0.5, 0.8], list("ab")]),
532530
)
533531
result = result.sort_index()
534-
assert result.index.is_lexsorted()
535532
assert result.index.is_monotonic
536533

537534
tm.assert_frame_equal(result, expected)
@@ -543,14 +540,13 @@ def test_sort_index_and_reconstruction(self):
543540
),
544541
)
545542
result = result.sort_index()
546-
assert result.index.is_lexsorted()
543+
assert result.index._is_lexsorted()
547544

548545
tm.assert_frame_equal(result, expected)
549546

550547
concatted = pd.concat([df, df], keys=[0.8, 0.5])
551548
result = concatted.sort_index()
552549

553-
assert result.index.is_lexsorted()
554550
assert result.index.is_monotonic
555551

556552
tm.assert_frame_equal(result, expected)
@@ -567,13 +563,10 @@ def test_sort_index_and_reconstruction(self):
567563
df.columns = df.columns.set_levels(
568564
pd.to_datetime(df.columns.levels[1]), level=1
569565
)
570-
assert not df.columns.is_lexsorted()
571566
assert not df.columns.is_monotonic
572567
result = df.sort_index(axis=1)
573-
assert result.columns.is_lexsorted()
574568
assert result.columns.is_monotonic
575569
result = df.sort_index(axis=1, level=1)
576-
assert result.columns.is_lexsorted()
577570
assert result.columns.is_monotonic
578571

579572
# TODO: better name, de-duplicate with test_sort_index_level above
@@ -614,7 +607,7 @@ def test_sort_index_level_large_cardinality(self):
614607

615608
# it works!
616609
result = df.sort_index(level=0)
617-
assert result.index.lexsort_depth == 3
610+
assert result.index._lexsort_depth == 3
618611

619612
# GH#2684 (int32)
620613
index = MultiIndex.from_arrays([np.arange(4000)] * 3)
@@ -623,7 +616,7 @@ def test_sort_index_level_large_cardinality(self):
623616
# it works!
624617
result = df.sort_index(level=0)
625618
assert (result.dtypes.values == df.dtypes.values).all()
626-
assert result.index.lexsort_depth == 3
619+
assert result.index._lexsort_depth == 3
627620

628621
def test_sort_index_level_by_name(self):
629622
mi = MultiIndex(

0 commit comments

Comments
 (0)