-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
DEPR MultiIndex.is_lexsorted and MultiIndex.lexsort_depth #38701
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
b5d157a
8ca3b3d
109c1ce
238b568
893ab7c
b2b1c36
b8bcc5e
4ab8981
72b0abc
a0a709c
be2a692
7d1d279
b3c5144
39d163f
6b9c698
74d8c7e
3798cf5
dfc779d
fdbb366
fff7297
fe4aaa6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1809,6 +1809,15 @@ def _is_all_dates(self) -> bool: | |
return False | ||
|
||
def is_lexsorted(self) -> bool: | ||
warnings.warn( | ||
"MultiIndex.is_lexsorted is deprecated as a public function, " | ||
"users should use MultiIndex.is_monotonic_increasing instead.", | ||
FutureWarning, | ||
stacklevel=2, | ||
) | ||
return self._is_lexsorted() | ||
|
||
def _is_lexsorted(self) -> bool: | ||
""" | ||
Return True if the codes are lexicographically sorted. | ||
|
||
|
@@ -1821,29 +1830,39 @@ def is_lexsorted(self) -> bool: | |
In the below examples, the first level of the MultiIndex is sorted because | ||
a<b<c, so there is no need to look at the next level. | ||
|
||
>>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'e', 'f']]).is_lexsorted() | ||
>>> pd.MultiIndex.from_arrays([['a', 'b'], ['d', 'e']])._is_lexsorted() | ||
True | ||
>>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'f', 'e']]).is_lexsorted() | ||
>>> pd.MultiIndex.from_arrays([['a', 'b'], ['d', 'f']])._is_lexsorted() | ||
True | ||
|
||
In case there is a tie, the lexicographical sorting looks | ||
at the next level of the MultiIndex. | ||
|
||
>>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']]).is_lexsorted() | ||
>>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']])._is_lexsorted() | ||
True | ||
>>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']]).is_lexsorted() | ||
>>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']])._is_lexsorted() | ||
False | ||
>>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], | ||
... ['aa', 'bb', 'aa', 'bb']]).is_lexsorted() | ||
... ['aa', 'bb', 'aa', 'bb']])._is_lexsorted() | ||
True | ||
>>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], | ||
... ['bb', 'aa', 'aa', 'bb']]).is_lexsorted() | ||
... ['bb', 'aa', 'aa', 'bb']])._is_lexsorted() | ||
False | ||
""" | ||
return self.lexsort_depth == self.nlevels | ||
return self._get_lexsort_depth == self.nlevels | ||
|
||
@cache_readonly | ||
def lexsort_depth(self): | ||
warnings.warn( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. metion in the whatsnew as well There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you don't need the cache on this one (as its already on _lexsort_depth) and this is now user facing |
||
"MultiIndex.lexsort_depth is deprecated as a public function, " | ||
"users should use MultiIndex.is_monotonic_increasing to check " | ||
"is a MultiIndex is sorted.", | ||
FutureWarning, | ||
stacklevel=2, | ||
) | ||
return self._get_lexsort_depth() | ||
|
||
@cache_readonly | ||
def _get_lexsort_depth(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there's already a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. umm that is extra confusing. pls don't do this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can just move the .sortorder stuff to _lexsort_depth There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for your review - the difficulty I'm running into is that in if self.sortorder is not None:
if self.sortorder > self._lexsort_depth():
raise ValueError(
"Value for sortorder must be inferior or equal to actual "
f"lexsort_depth: sortorder {self.sortorder} "
f"with lexsort_depth {self._lexsort_depth()}"
) so I can't just move For now I've factored out the part which is used in the above check into def _codes_lexsort_depth(self) -> int:
int64_codes = [ensure_int64(level_codes) for level_codes in self.codes]
for k in range(self.nlevels, 0, -1):
if libalgos.is_lexsorted(int64_codes[:k]):
return k
return 0 but I'd imagine this will be considered equally confusing. Any suggestions? |
||
if self.sortorder is not None: | ||
return self.sortorder | ||
|
||
|
@@ -1898,7 +1917,7 @@ def _sort_levels_monotonic(self): | |
('b', 'bb')], | ||
) | ||
""" | ||
if self.is_lexsorted() and self.is_monotonic: | ||
if self._is_lexsorted() and self.is_monotonic: | ||
return self | ||
|
||
new_levels = [] | ||
|
@@ -2181,7 +2200,7 @@ def drop(self, codes, level=None, errors="raise"): | |
step = loc.step if loc.step is not None else 1 | ||
inds.extend(range(loc.start, loc.stop, step)) | ||
elif com.is_bool_indexer(loc): | ||
if self.lexsort_depth == 0: | ||
if self._get_lexsort_depth == 0: | ||
warnings.warn( | ||
"dropping on a non-lexsorted multi-index " | ||
"without a level parameter may impact performance.", | ||
|
@@ -2752,10 +2771,10 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): | |
return super().slice_locs(start, end, step, kind=kind) | ||
|
||
def _partial_tup_index(self, tup, side="left"): | ||
if len(tup) > self.lexsort_depth: | ||
if len(tup) > self._get_lexsort_depth: | ||
raise UnsortedIndexError( | ||
f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth " | ||
f"({self.lexsort_depth})" | ||
f"({self._get_lexsort_depth})" | ||
) | ||
|
||
n = len(tup) | ||
|
@@ -2894,7 +2913,7 @@ def _maybe_to_slice(loc): | |
# break the key into 2 parts based on the lexsort_depth of the index; | ||
# the first part returns a continuous slice of the index; the 2nd part | ||
# needs linear search within the slice | ||
i = self.lexsort_depth | ||
i = self._get_lexsort_depth | ||
lead_key, follow_key = key[:i], key[i:] | ||
start, stop = ( | ||
self.slice_locs(lead_key, lead_key) if lead_key else (0, len(self)) | ||
|
@@ -3147,7 +3166,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): | |
stop = getattr(stop, "stop", stop) | ||
return convert_indexer(start, stop, step) | ||
|
||
elif level > 0 or self.lexsort_depth == 0 or step is not None: | ||
elif level > 0 or self._get_lexsort_depth == 0 or step is not None: | ||
# need to have like semantics here to right | ||
# searching as when we are using a slice | ||
# so include the stop+1 (so we include stop) | ||
|
@@ -3162,7 +3181,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): | |
|
||
idx = self._get_loc_single_level_index(level_index, key) | ||
|
||
if level > 0 or self.lexsort_depth == 0: | ||
if level > 0 or self._get_lexsort_depth == 0: | ||
# Desired level is not sorted | ||
locs = np.array(level_codes == idx, dtype=bool, copy=False) | ||
if not locs.any(): | ||
|
@@ -3219,10 +3238,10 @@ def get_locs(self, seq): | |
|
||
# must be lexsorted to at least as many levels | ||
true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s] | ||
if true_slices and true_slices[-1] >= self.lexsort_depth: | ||
if true_slices and true_slices[-1] >= self._get_lexsort_depth: | ||
raise UnsortedIndexError( | ||
"MultiIndex slicing requires the index to be lexsorted: slicing " | ||
f"on levels {true_slices}, lexsort depth {self.lexsort_depth}" | ||
f"on levels {true_slices}, lexsort depth {self._get_lexsort_depth}" | ||
) | ||
# indexer | ||
# this is the list of all values that we want to select | ||
|
@@ -3344,7 +3363,7 @@ def _reorder_indexer( | |
""" | ||
# If the index is lexsorted and the list_like label in seq are sorted | ||
# then we do not need to sort | ||
if self.is_lexsorted(): | ||
if self._is_lexsorted(): | ||
need_sort = False | ||
for i, k in enumerate(seq): | ||
if is_list_like(k): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,7 @@ def test_sort_index_and_reconstruction_doc_example(self): | |
levels=[["a", "b"], ["bb", "aa"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] | ||
), | ||
) | ||
assert df.index.is_lexsorted() | ||
assert df.index._is_lexsorted() | ||
assert not df.index.is_monotonic | ||
|
||
# sort it | ||
|
@@ -35,15 +35,15 @@ def test_sort_index_and_reconstruction_doc_example(self): | |
), | ||
) | ||
result = df.sort_index() | ||
assert result.index.is_lexsorted() | ||
assert result.index._is_lexsorted() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for the cases where these are the same as is_monotonic (vast majority), ok with removing the _is_lexsorted assert |
||
assert result.index.is_monotonic | ||
|
||
tm.assert_frame_equal(result, expected) | ||
|
||
# reconstruct | ||
result = df.sort_index().copy() | ||
result.index = result.index._sort_levels_monotonic() | ||
assert result.index.is_lexsorted() | ||
assert result.index._is_lexsorted() | ||
assert result.index.is_monotonic | ||
|
||
tm.assert_frame_equal(result, expected) | ||
|
@@ -524,14 +524,14 @@ def test_sort_index_and_reconstruction(self): | |
[(0.5, "a"), (0.5, "b"), (0.8, "a"), (0.8, "b")] | ||
), | ||
) | ||
assert expected.index.is_lexsorted() | ||
assert expected.index._is_lexsorted() | ||
|
||
result = DataFrame( | ||
[[1, 1], [2, 2], [1, 1], [2, 2]], | ||
index=MultiIndex.from_product([[0.5, 0.8], list("ab")]), | ||
) | ||
result = result.sort_index() | ||
assert result.index.is_lexsorted() | ||
assert result.index._is_lexsorted() | ||
assert result.index.is_monotonic | ||
|
||
tm.assert_frame_equal(result, expected) | ||
|
@@ -543,14 +543,14 @@ def test_sort_index_and_reconstruction(self): | |
), | ||
) | ||
result = result.sort_index() | ||
assert result.index.is_lexsorted() | ||
assert result.index._is_lexsorted() | ||
|
||
tm.assert_frame_equal(result, expected) | ||
|
||
concatted = pd.concat([df, df], keys=[0.8, 0.5]) | ||
result = concatted.sort_index() | ||
|
||
assert result.index.is_lexsorted() | ||
assert result.index._is_lexsorted() | ||
assert result.index.is_monotonic | ||
|
||
tm.assert_frame_equal(result, expected) | ||
|
@@ -567,13 +567,13 @@ def test_sort_index_and_reconstruction(self): | |
df.columns = df.columns.set_levels( | ||
pd.to_datetime(df.columns.levels[1]), level=1 | ||
) | ||
assert not df.columns.is_lexsorted() | ||
assert not df.columns._is_lexsorted() | ||
assert not df.columns.is_monotonic | ||
result = df.sort_index(axis=1) | ||
assert result.columns.is_lexsorted() | ||
assert result.columns._is_lexsorted() | ||
assert result.columns.is_monotonic | ||
result = df.sort_index(axis=1, level=1) | ||
assert result.columns.is_lexsorted() | ||
assert result.columns._is_lexsorted() | ||
assert result.columns.is_monotonic | ||
|
||
# TODO: better name, de-duplicate with test_sort_index_level above | ||
|
@@ -614,7 +614,7 @@ def test_sort_index_level_large_cardinality(self): | |
|
||
# it works! | ||
result = df.sort_index(level=0) | ||
assert result.index.lexsort_depth == 3 | ||
assert result.index._get_lexsort_depth == 3 | ||
|
||
# GH#2684 (int32) | ||
index = MultiIndex.from_arrays([np.arange(4000)] * 3) | ||
|
@@ -623,7 +623,7 @@ def test_sort_index_level_large_cardinality(self): | |
# it works! | ||
result = df.sort_index(level=0) | ||
assert (result.dtypes.values == df.dtypes.values).all() | ||
assert result.index.lexsort_depth == 3 | ||
assert result.index._get_lexsort_depth == 3 | ||
|
||
def test_sort_index_level_by_name(self): | ||
mi = MultiIndex( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
making these a bit shorter so they fit on a single line