From b5d157ac7cf8b8c803a3655bd5ec62a5320694f9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 26 Dec 2020 09:37:28 +0000 Subject: [PATCH 01/20] deprecate multiindex.is_lexsorted --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/indexes/multi.py | 25 +++++++++++++------ pandas/core/reshape/reshape.py | 2 +- pandas/tests/frame/methods/test_drop.py | 4 +-- pandas/tests/frame/methods/test_sort_index.py | 20 +++++++-------- pandas/tests/groupby/test_groupby.py | 6 ++--- pandas/tests/indexes/multi/test_drop.py | 4 +-- pandas/tests/indexes/multi/test_lexsort.py | 6 ++--- pandas/tests/indexes/multi/test_sorting.py | 16 ++++++------ .../tests/indexing/multiindex/test_sorted.py | 6 ++--- pandas/tests/test_multilevel.py | 10 ++++++-- 11 files changed, 58 insertions(+), 43 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c7573ee860744..f8d89a343778e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -149,7 +149,7 @@ Deprecations - Deprecating allowing scalars passed to the :class:`Categorical` constructor (:issue:`38433`) - Deprecated allowing subclass-specific keyword arguments in the :class:`Index` constructor, use the specific subclass directly instead (:issue:`14093`,:issue:`21311`,:issue:`22315`,:issue:`26974`) - Deprecated ``astype`` of datetimelike (``timedelta64[ns]``, ``datetime64[ns]``, ``Datetime64TZDtype``, ``PeriodDtype``) to integer dtypes, use ``values.view(...)`` instead (:issue:`38544`) -- +- Deprecated :meth:`MultiIndex.is_lexsorted` as a public method, users should use :meth:`MultiIndex.is_monotonic_increasing` instead (:issue:`32259`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 06a04e5a9b9eb..ddbf9086758fa 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1809,6 +1809,15 @@ def _is_all_dates(self) -> bool: return False def is_lexsorted(self) -> bool: + warnings.warn( + "MultiIndex.is_lexsorted is deprecated as a public function, " + "users should use MultiIndex.is_monotonic_increasing instead.", + FutureWarning, + stacklevel=2, + ) + return self._is_lexsorted() + + def _is_lexsorted(self) -> bool: """ Return True if the codes are lexicographically sorted. @@ -1821,23 +1830,23 @@ def is_lexsorted(self) -> bool: In the below examples, the first level of the MultiIndex is sorted because a>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'e', 'f']]).is_lexsorted() + >>> pd.MultiIndex.from_arrays([['a', 'b'], ['d', 'e']])._is_lexsorted() True - >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'f', 'e']]).is_lexsorted() + >>> pd.MultiIndex.from_arrays([['a', 'b'], ['d', 'f']])._is_lexsorted() True In case there is a tie, the lexicographical sorting looks at the next level of the MultiIndex. - >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']]).is_lexsorted() + >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']])._is_lexsorted() True - >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']]).is_lexsorted() + >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']])._is_lexsorted() False >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], - ... ['aa', 'bb', 'aa', 'bb']]).is_lexsorted() + ... ['aa', 'bb', 'aa', 'bb']])._is_lexsorted() True >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], - ... ['bb', 'aa', 'aa', 'bb']]).is_lexsorted() + ... ['bb', 'aa', 'aa', 'bb']])._is_lexsorted() False """ return self.lexsort_depth == self.nlevels @@ -1898,7 +1907,7 @@ def _sort_levels_monotonic(self): ('b', 'bb')], ) """ - if self.is_lexsorted() and self.is_monotonic: + if self._is_lexsorted() and self.is_monotonic: return self new_levels = [] @@ -3344,7 +3353,7 @@ def _reorder_indexer( """ # If the index is lexsorted and the list_like label in seq are sorted # then we do not need to sort - if self.is_lexsorted(): + if self._is_lexsorted(): need_sort = False for i, k in enumerate(seq): if is_list_like(k): diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index c197e142fecbc..d855886fb725f 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -617,7 +617,7 @@ def _convert_level_number(level_num, columns): roll_columns = roll_columns.swaplevel(lev1, lev2) this.columns = roll_columns - if not this.columns.is_lexsorted(): + if not this.columns._is_lexsorted(): # Workaround the edge case where 0 is one of the column names, # which interferes with trying to sort based on the first # level diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index dd93eac8b7110..747ee25e97331 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -162,7 +162,7 @@ def test_drop_multiindex_not_lexsorted(self): [("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"] ) lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi) - assert lexsorted_df.columns.is_lexsorted() + assert lexsorted_df.columns._is_lexsorted() # define the non-lexsorted version not_lexsorted_df = DataFrame( @@ -172,7 +172,7 @@ def test_drop_multiindex_not_lexsorted(self): index="a", columns=["b", "c"], values="d" ) not_lexsorted_df = not_lexsorted_df.reset_index() - assert not not_lexsorted_df.columns.is_lexsorted() + assert not not_lexsorted_df.columns._is_lexsorted() # compare the results tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index de847c12723b2..4e15c01578352 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -24,7 +24,7 @@ def test_sort_index_and_reconstruction_doc_example(self): levels=[["a", "b"], ["bb", "aa"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] ), ) - assert df.index.is_lexsorted() + assert df.index._is_lexsorted() assert not df.index.is_monotonic # sort it @@ -35,7 +35,7 @@ def test_sort_index_and_reconstruction_doc_example(self): ), ) result = df.sort_index() - assert result.index.is_lexsorted() + assert result.index._is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) @@ -43,7 +43,7 @@ def test_sort_index_and_reconstruction_doc_example(self): # reconstruct result = df.sort_index().copy() result.index = result.index._sort_levels_monotonic() - assert result.index.is_lexsorted() + assert result.index._is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) @@ -524,14 +524,14 @@ def test_sort_index_and_reconstruction(self): [(0.5, "a"), (0.5, "b"), (0.8, "a"), (0.8, "b")] ), ) - assert expected.index.is_lexsorted() + assert expected.index._is_lexsorted() result = DataFrame( [[1, 1], [2, 2], [1, 1], [2, 2]], index=MultiIndex.from_product([[0.5, 0.8], list("ab")]), ) result = result.sort_index() - assert result.index.is_lexsorted() + assert result.index._is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) @@ -543,14 +543,14 @@ def test_sort_index_and_reconstruction(self): ), ) result = result.sort_index() - assert result.index.is_lexsorted() + assert result.index._is_lexsorted() tm.assert_frame_equal(result, expected) concatted = pd.concat([df, df], keys=[0.8, 0.5]) result = concatted.sort_index() - assert result.index.is_lexsorted() + assert result.index._is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) @@ -567,13 +567,13 @@ def test_sort_index_and_reconstruction(self): df.columns = df.columns.set_levels( pd.to_datetime(df.columns.levels[1]), level=1 ) - assert not df.columns.is_lexsorted() + assert not df.columns._is_lexsorted() assert not df.columns.is_monotonic result = df.sort_index(axis=1) - assert result.columns.is_lexsorted() + assert result.columns._is_lexsorted() assert result.columns.is_monotonic result = df.sort_index(axis=1, level=1) - assert result.columns.is_lexsorted() + assert result.columns._is_lexsorted() assert result.columns.is_monotonic # TODO: better name, de-duplicate with test_sort_index_level above diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 7c179a79513fa..cb48aa272cf37 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1571,7 +1571,7 @@ def test_groupby_multiindex_not_lexsorted(): [("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"] ) lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi) - assert lexsorted_df.columns.is_lexsorted() + assert lexsorted_df.columns._is_lexsorted() # define the non-lexsorted version not_lexsorted_df = DataFrame( @@ -1581,7 +1581,7 @@ def test_groupby_multiindex_not_lexsorted(): index="a", columns=["b", "c"], values="d" ) not_lexsorted_df = not_lexsorted_df.reset_index() - assert not not_lexsorted_df.columns.is_lexsorted() + assert not not_lexsorted_df.columns._is_lexsorted() # compare the results tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) @@ -1596,7 +1596,7 @@ def test_groupby_multiindex_not_lexsorted(): df = DataFrame( {"x": ["a", "a", "b", "a"], "y": [1, 1, 2, 2], "z": [1, 2, 3, 4]} ).set_index(["x", "y"]) - assert not df.index.is_lexsorted() + assert not df.index._is_lexsorted() for level in [0, 1, [0, 1]]: for sort in [False, True]: diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index f7b1bc4729428..f43f1beba1cea 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -126,7 +126,7 @@ def test_drop_not_lexsorted(): # define the lexsorted version of the multi-index tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")] lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"]) - assert lexsorted_mi.is_lexsorted() + assert lexsorted_mi._is_lexsorted() # and the not-lexsorted version df = pd.DataFrame( @@ -135,7 +135,7 @@ def test_drop_not_lexsorted(): df = df.pivot_table(index="a", columns=["b", "c"], values="d") df = df.reset_index() not_lexsorted_mi = df.columns - assert not not_lexsorted_mi.is_lexsorted() + assert not not_lexsorted_mi._is_lexsorted() # compare the results tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) diff --git a/pandas/tests/indexes/multi/test_lexsort.py b/pandas/tests/indexes/multi/test_lexsort.py index 1d2ad8e02697e..5ffb1abc52c3a 100644 --- a/pandas/tests/indexes/multi/test_lexsort.py +++ b/pandas/tests/indexes/multi/test_lexsort.py @@ -8,17 +8,17 @@ def test_is_lexsorted(self): index = MultiIndex( levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] ) - assert index.is_lexsorted() + assert index._is_lexsorted() index = MultiIndex( levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]] ) - assert not index.is_lexsorted() + assert not index._is_lexsorted() index = MultiIndex( levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] ) - assert not index.is_lexsorted() + assert not index._is_lexsorted() assert index.lexsort_depth == 0 diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index e5d178581136b..b78ed9ccf7d8c 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -131,7 +131,7 @@ def test_unsortedindex_doc_examples(): with pytest.raises(UnsortedIndexError, match=msg): dfm.loc[(0, "y"):(1, "z")] - assert not dfm.index.is_lexsorted() + assert not dfm.index._is_lexsorted() assert dfm.index.lexsort_depth == 1 # sort it @@ -139,7 +139,7 @@ def test_unsortedindex_doc_examples(): dfm.loc[(1, "z")] dfm.loc[(0, "y"):(1, "z")] - assert dfm.index.is_lexsorted() + assert dfm.index._is_lexsorted() assert dfm.index.lexsort_depth == 2 @@ -147,11 +147,11 @@ def test_reconstruct_sort(): # starts off lexsorted & monotonic mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) - assert mi.is_lexsorted() + assert mi._is_lexsorted() assert mi.is_monotonic recons = mi._sort_levels_monotonic() - assert recons.is_lexsorted() + assert recons._is_lexsorted() assert recons.is_monotonic assert mi is recons @@ -163,11 +163,11 @@ def test_reconstruct_sort(): [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")], names=["one", "two"], ) - assert not mi.is_lexsorted() + assert not mi._is_lexsorted() assert not mi.is_monotonic recons = mi._sort_levels_monotonic() - assert not recons.is_lexsorted() + assert not recons._is_lexsorted() assert not recons.is_monotonic assert mi.equals(recons) @@ -179,11 +179,11 @@ def test_reconstruct_sort(): codes=[[0, 1, 0, 2], [2, 0, 0, 1]], names=["col1", "col2"], ) - assert not mi.is_lexsorted() + assert not mi._is_lexsorted() assert not mi.is_monotonic recons = mi._sort_levels_monotonic() - assert not recons.is_lexsorted() + assert not recons._is_lexsorted() assert not recons.is_monotonic assert mi.equals(recons) diff --git a/pandas/tests/indexing/multiindex/test_sorted.py b/pandas/tests/indexing/multiindex/test_sorted.py index 8a013c769f2cc..934f40d0b5aff 100644 --- a/pandas/tests/indexing/multiindex/test_sorted.py +++ b/pandas/tests/indexing/multiindex/test_sorted.py @@ -50,16 +50,16 @@ def test_frame_getitem_not_sorted2(self, key): with tm.assert_produces_warning(FutureWarning): return_value = df2.index.set_codes([0, 1, 0, 2], level="col1", inplace=True) assert return_value is None - assert not df2.index.is_lexsorted() + assert not df2.index._is_lexsorted() assert not df2.index.is_monotonic assert df2_original.index.equals(df2.index) expected = df2.sort_index(key=key) - assert expected.index.is_lexsorted() + assert expected.index._is_lexsorted() assert expected.index.is_monotonic result = df2.sort_index(level=0, key=key) - assert result.index.is_lexsorted() + assert result.index._is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 84aa8ec6f970f..d2f5e078eaecc 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -401,11 +401,11 @@ def test_sort_non_lexsorted(self): ) df = DataFrame({"col": range(len(idx))}, index=idx, dtype="int64") - assert df.index.is_lexsorted() is False + assert df.index._is_lexsorted() is False assert df.index.is_monotonic is False sorted = df.sort_index() - assert sorted.index.is_lexsorted() is True + assert sorted.index._is_lexsorted() is True assert sorted.index.is_monotonic is True expected = DataFrame( @@ -417,3 +417,9 @@ def test_sort_non_lexsorted(self): ) result = sorted.loc[pd.IndexSlice["B":"C", "a":"c"], :] tm.assert_frame_equal(result, expected) + + +def test_is_lexsorted_deprecation(): + # GH 32259 + with tm.assert_produces_warning(): + MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).is_lexsorted() From 8ca3b3d093534561bd08768ddc42d99845e080c5 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 26 Dec 2020 13:04:14 +0000 Subject: [PATCH 02/20] deprecate public usage of MultiIndex.lexsort_depth too --- doc/source/user_guide/advanced.rst | 10 +++---- pandas/core/indexes/multi.py | 30 ++++++++++++------- pandas/tests/frame/methods/test_sort_index.py | 4 +-- pandas/tests/indexes/multi/test_sorting.py | 4 +-- .../indexing/multiindex/test_indexing_slow.py | 2 +- .../tests/indexing/multiindex/test_slice.py | 4 +-- 6 files changed, 31 insertions(+), 23 deletions(-) diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 2cd48ac7adb0e..2c948f67c14c6 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -658,20 +658,18 @@ Furthermore, if you try to index something that is not fully lexsorted, this can In [5]: dfm.loc[(0, 'y'):(1, 'z')] UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (1)' -The :meth:`~MultiIndex.is_lexsorted` method on a ``MultiIndex`` shows if the -index is sorted, and the ``lexsort_depth`` property returns the sort depth: +The :meth:`~MultiIndex.is_monotonic_increasing` method on a ``MultiIndex`` shows if the +index is sorted: .. ipython:: python - dfm.index.is_lexsorted() - dfm.index.lexsort_depth + dfm.index.is_monotonic_increasing() .. ipython:: python dfm = dfm.sort_index() dfm - dfm.index.is_lexsorted() - dfm.index.lexsort_depth + dfm.index.is_monotonic_increasing() And now selection works as expected. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ddbf9086758fa..6d2498203498c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1849,10 +1849,20 @@ def _is_lexsorted(self) -> bool: ... ['bb', 'aa', 'aa', 'bb']])._is_lexsorted() False """ - return self.lexsort_depth == self.nlevels + return self.get_lexsort_depth == self.nlevels - @cache_readonly def lexsort_depth(self): + warnings.warn( + "MultiIndex.lexsort_depth is deprecated as a public function, " + "users should use MultiIndex.is_monotonic_increasing to check " + "is a MultiIndex is sorted.", + FutureWarning, + stacklevel=2, + ) + return self._get_lexsort_depth() + + @cache_readonly + def _get_lexsort_depth(self): if self.sortorder is not None: return self.sortorder @@ -2190,7 +2200,7 @@ def drop(self, codes, level=None, errors="raise"): step = loc.step if loc.step is not None else 1 inds.extend(range(loc.start, loc.stop, step)) elif com.is_bool_indexer(loc): - if self.lexsort_depth == 0: + if self.get_lexsort_depth == 0: warnings.warn( "dropping on a non-lexsorted multi-index " "without a level parameter may impact performance.", @@ -2761,10 +2771,10 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): return super().slice_locs(start, end, step, kind=kind) def _partial_tup_index(self, tup, side="left"): - if len(tup) > self.lexsort_depth: + if len(tup) > self.get_lexsort_depth: raise UnsortedIndexError( f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth " - f"({self.lexsort_depth})" + f"({self.get_lexsort_depth})" ) n = len(tup) @@ -2903,7 +2913,7 @@ def _maybe_to_slice(loc): # break the key into 2 parts based on the lexsort_depth of the index; # the first part returns a continuous slice of the index; the 2nd part # needs linear search within the slice - i = self.lexsort_depth + i = self.get_lexsort_depth lead_key, follow_key = key[:i], key[i:] start, stop = ( self.slice_locs(lead_key, lead_key) if lead_key else (0, len(self)) @@ -3156,7 +3166,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): stop = getattr(stop, "stop", stop) return convert_indexer(start, stop, step) - elif level > 0 or self.lexsort_depth == 0 or step is not None: + elif level > 0 or self.get_lexsort_depth == 0 or step is not None: # need to have like semantics here to right # searching as when we are using a slice # so include the stop+1 (so we include stop) @@ -3171,7 +3181,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): idx = self._get_loc_single_level_index(level_index, key) - if level > 0 or self.lexsort_depth == 0: + if level > 0 or self.get_lexsort_depth == 0: # Desired level is not sorted locs = np.array(level_codes == idx, dtype=bool, copy=False) if not locs.any(): @@ -3228,10 +3238,10 @@ def get_locs(self, seq): # must be lexsorted to at least as many levels true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s] - if true_slices and true_slices[-1] >= self.lexsort_depth: + if true_slices and true_slices[-1] >= self.get_lexsort_depth: raise UnsortedIndexError( "MultiIndex slicing requires the index to be lexsorted: slicing " - f"on levels {true_slices}, lexsort depth {self.lexsort_depth}" + f"on levels {true_slices}, lexsort depth {self.get_lexsort_depth}" ) # indexer # this is the list of all values that we want to select diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index 4e15c01578352..06a58e4054f7d 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -614,7 +614,7 @@ def test_sort_index_level_large_cardinality(self): # it works! result = df.sort_index(level=0) - assert result.index.lexsort_depth == 3 + assert result.index._get_lexsort_depth == 3 # GH#2684 (int32) index = MultiIndex.from_arrays([np.arange(4000)] * 3) @@ -623,7 +623,7 @@ def test_sort_index_level_large_cardinality(self): # it works! result = df.sort_index(level=0) assert (result.dtypes.values == df.dtypes.values).all() - assert result.index.lexsort_depth == 3 + assert result.index._get_lexsort_depth == 3 def test_sort_index_level_by_name(self): mi = MultiIndex( diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index b78ed9ccf7d8c..77043da87f3d2 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -132,7 +132,7 @@ def test_unsortedindex_doc_examples(): dfm.loc[(0, "y"):(1, "z")] assert not dfm.index._is_lexsorted() - assert dfm.index.lexsort_depth == 1 + assert dfm.index._get_lexsort_depth == 1 # sort it dfm = dfm.sort_index() @@ -140,7 +140,7 @@ def test_unsortedindex_doc_examples(): dfm.loc[(0, "y"):(1, "z")] assert dfm.index._is_lexsorted() - assert dfm.index.lexsort_depth == 2 + assert dfm.index._get_lexsort_depth == 2 def test_reconstruct_sort(): diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py index efe1e0f0d75b5..1f96b56f49d0e 100644 --- a/pandas/tests/indexing/multiindex/test_indexing_slow.py +++ b/pandas/tests/indexing/multiindex/test_indexing_slow.py @@ -86,5 +86,5 @@ def validate(mi, df, key): df = frame.sort_values(by=cols[:lexsort_depth]) mi = df.set_index(cols[:-1]) - assert not mi.index.lexsort_depth < lexsort_depth + assert not mi.index._get_lexsort_depth < lexsort_depth validate(mi, df, key) diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index 51684f092aefd..75aad4b13de3e 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -144,9 +144,9 @@ def test_per_axis_per_level_getitem(self): tm.assert_frame_equal(result, expected) # not lexsorted - assert df.index.lexsort_depth == 2 + assert df.index._get_lexsort_depth == 2 df = df.sort_index(level=1, axis=0) - assert df.index.lexsort_depth == 0 + assert df.index._get_lexsort_depth == 0 msg = ( "MultiIndex slicing requires the index to be " From 109c1ce8a94c62399318cad673d393d608f79030 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 26 Dec 2020 13:10:05 +0000 Subject: [PATCH 03/20] make get_lexsort_depth private --- pandas/core/indexes/multi.py | 18 +++++++++--------- pandas/tests/indexes/multi/test_lexsort.py | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6d2498203498c..85aa4473f811f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1849,7 +1849,7 @@ def _is_lexsorted(self) -> bool: ... ['bb', 'aa', 'aa', 'bb']])._is_lexsorted() False """ - return self.get_lexsort_depth == self.nlevels + return self._get_lexsort_depth == self.nlevels def lexsort_depth(self): warnings.warn( @@ -2200,7 +2200,7 @@ def drop(self, codes, level=None, errors="raise"): step = loc.step if loc.step is not None else 1 inds.extend(range(loc.start, loc.stop, step)) elif com.is_bool_indexer(loc): - if self.get_lexsort_depth == 0: + if self._get_lexsort_depth == 0: warnings.warn( "dropping on a non-lexsorted multi-index " "without a level parameter may impact performance.", @@ -2771,10 +2771,10 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): return super().slice_locs(start, end, step, kind=kind) def _partial_tup_index(self, tup, side="left"): - if len(tup) > self.get_lexsort_depth: + if len(tup) > self._get_lexsort_depth: raise UnsortedIndexError( f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth " - f"({self.get_lexsort_depth})" + f"({self._get_lexsort_depth})" ) n = len(tup) @@ -2913,7 +2913,7 @@ def _maybe_to_slice(loc): # break the key into 2 parts based on the lexsort_depth of the index; # the first part returns a continuous slice of the index; the 2nd part # needs linear search within the slice - i = self.get_lexsort_depth + i = self._get_lexsort_depth lead_key, follow_key = key[:i], key[i:] start, stop = ( self.slice_locs(lead_key, lead_key) if lead_key else (0, len(self)) @@ -3166,7 +3166,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): stop = getattr(stop, "stop", stop) return convert_indexer(start, stop, step) - elif level > 0 or self.get_lexsort_depth == 0 or step is not None: + elif level > 0 or self._get_lexsort_depth == 0 or step is not None: # need to have like semantics here to right # searching as when we are using a slice # so include the stop+1 (so we include stop) @@ -3181,7 +3181,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): idx = self._get_loc_single_level_index(level_index, key) - if level > 0 or self.get_lexsort_depth == 0: + if level > 0 or self._get_lexsort_depth == 0: # Desired level is not sorted locs = np.array(level_codes == idx, dtype=bool, copy=False) if not locs.any(): @@ -3238,10 +3238,10 @@ def get_locs(self, seq): # must be lexsorted to at least as many levels true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s] - if true_slices and true_slices[-1] >= self.get_lexsort_depth: + if true_slices and true_slices[-1] >= self._get_lexsort_depth: raise UnsortedIndexError( "MultiIndex slicing requires the index to be lexsorted: slicing " - f"on levels {true_slices}, lexsort depth {self.get_lexsort_depth}" + f"on levels {true_slices}, lexsort depth {self._get_lexsort_depth}" ) # indexer # this is the list of all values that we want to select diff --git a/pandas/tests/indexes/multi/test_lexsort.py b/pandas/tests/indexes/multi/test_lexsort.py index 5ffb1abc52c3a..07dc8d83bf838 100644 --- a/pandas/tests/indexes/multi/test_lexsort.py +++ b/pandas/tests/indexes/multi/test_lexsort.py @@ -19,7 +19,7 @@ def test_is_lexsorted(self): levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] ) assert not index._is_lexsorted() - assert index.lexsort_depth == 0 + assert index._get_lexsort_depth == 0 class TestLexsortDepth: @@ -33,14 +33,14 @@ def test_lexsort_depth(self): index = MultiIndex( levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 ) - assert index.lexsort_depth == 2 + assert index._get_lexsort_depth == 2 index = MultiIndex( levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1 ) - assert index.lexsort_depth == 1 + assert index._get_lexsort_depth == 1 index = MultiIndex( levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 ) - assert index.lexsort_depth == 0 + assert index._get_lexsort_depth == 0 From 238b568b9242712b43b752118ab6d71c5ea68b08 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 26 Dec 2020 13:14:28 +0000 Subject: [PATCH 04/20] test lexsort_depth deprecation --- pandas/core/indexes/multi.py | 3 ++- pandas/tests/test_multilevel.py | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 85aa4473f811f..7c8b316ed1dec 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1851,6 +1851,7 @@ def _is_lexsorted(self) -> bool: """ return self._get_lexsort_depth == self.nlevels + @cache_readonly def lexsort_depth(self): warnings.warn( "MultiIndex.lexsort_depth is deprecated as a public function, " @@ -1859,7 +1860,7 @@ def lexsort_depth(self): FutureWarning, stacklevel=2, ) - return self._get_lexsort_depth() + return self._get_lexsort_depth @cache_readonly def _get_lexsort_depth(self): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d2f5e078eaecc..fb1dc719dcca8 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -423,3 +423,9 @@ def test_is_lexsorted_deprecation(): # GH 32259 with tm.assert_produces_warning(): MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).is_lexsorted() + + +def test_lexsort_depth_deprecation(): + # GH 32259 + with tm.assert_produces_warning(): + MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).lexsort_depth From 893ab7cd43b0acbabf5d79b81acf4a4485d38bde Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 26 Dec 2020 13:18:49 +0000 Subject: [PATCH 05/20] typo --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7c8b316ed1dec..d40dcf4227cd0 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1856,7 +1856,7 @@ def lexsort_depth(self): warnings.warn( "MultiIndex.lexsort_depth is deprecated as a public function, " "users should use MultiIndex.is_monotonic_increasing to check " - "is a MultiIndex is sorted.", + "if a MultiIndex is sorted.", FutureWarning, stacklevel=2, ) From b2b1c369038f96136290b1a8a84df17cc4797804 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 26 Dec 2020 13:22:12 +0000 Subject: [PATCH 06/20] move tests --- pandas/tests/indexes/multi/test_lexsort.py | 11 +++++++++++ pandas/tests/test_multilevel.py | 12 ------------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pandas/tests/indexes/multi/test_lexsort.py b/pandas/tests/indexes/multi/test_lexsort.py index 07dc8d83bf838..feb9acce8c879 100644 --- a/pandas/tests/indexes/multi/test_lexsort.py +++ b/pandas/tests/indexes/multi/test_lexsort.py @@ -1,4 +1,5 @@ from pandas import MultiIndex +import pandas._testing as tm class TestIsLexsorted: @@ -21,6 +22,11 @@ def test_is_lexsorted(self): assert not index._is_lexsorted() assert index._get_lexsort_depth == 0 + def test_is_lexsorted_deprecation(self): + # GH 32259 + with tm.assert_produces_warning(): + MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).is_lexsorted() + class TestLexsortDepth: def test_lexsort_depth(self): @@ -44,3 +50,8 @@ def test_lexsort_depth(self): levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 ) assert index._get_lexsort_depth == 0 + + def test_lexsort_depth_deprecation(self): + # GH 32259 + with tm.assert_produces_warning(): + MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).lexsort_depth diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index fb1dc719dcca8..5d0212dc54e88 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -417,15 +417,3 @@ def test_sort_non_lexsorted(self): ) result = sorted.loc[pd.IndexSlice["B":"C", "a":"c"], :] tm.assert_frame_equal(result, expected) - - -def test_is_lexsorted_deprecation(): - # GH 32259 - with tm.assert_produces_warning(): - MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).is_lexsorted() - - -def test_lexsort_depth_deprecation(): - # GH 32259 - with tm.assert_produces_warning(): - MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).lexsort_depth From b8bcc5ee00634a0a13aeeb6229363d8e114b7500 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 31 Dec 2020 11:18:24 +0000 Subject: [PATCH 07/20] move sortorder check to _lexsort_depth --- pandas/core/indexes/multi.py | 29 +++++++++---------- pandas/tests/frame/methods/test_sort_index.py | 4 +-- pandas/tests/indexes/multi/test_sorting.py | 4 +-- .../indexing/multiindex/test_indexing_slow.py | 2 +- .../tests/indexing/multiindex/test_slice.py | 4 +-- 5 files changed, 20 insertions(+), 23 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d40dcf4227cd0..f5004c438c3bb 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1849,7 +1849,7 @@ def _is_lexsorted(self) -> bool: ... ['bb', 'aa', 'aa', 'bb']])._is_lexsorted() False """ - return self._get_lexsort_depth == self.nlevels + return self._lexsort_depth == self.nlevels @cache_readonly def lexsort_depth(self): @@ -1860,15 +1860,9 @@ def lexsort_depth(self): FutureWarning, stacklevel=2, ) - return self._get_lexsort_depth + return self._lexsort_depth @cache_readonly - def _get_lexsort_depth(self): - if self.sortorder is not None: - return self.sortorder - - return self._lexsort_depth() - def _lexsort_depth(self) -> int: """ Compute and return the lexsort_depth, the number of levels of the @@ -1878,6 +1872,9 @@ def _lexsort_depth(self) -> int: ------- int """ + if self.sortorder is not None: + return self.sortorder + int64_codes = [ensure_int64(level_codes) for level_codes in self.codes] for k in range(self.nlevels, 0, -1): if libalgos.is_lexsorted(int64_codes[:k]): @@ -2201,7 +2198,7 @@ def drop(self, codes, level=None, errors="raise"): step = loc.step if loc.step is not None else 1 inds.extend(range(loc.start, loc.stop, step)) elif com.is_bool_indexer(loc): - if self._get_lexsort_depth == 0: + if self._lexsort_depth == 0: warnings.warn( "dropping on a non-lexsorted multi-index " "without a level parameter may impact performance.", @@ -2772,10 +2769,10 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): return super().slice_locs(start, end, step, kind=kind) def _partial_tup_index(self, tup, side="left"): - if len(tup) > self._get_lexsort_depth: + if len(tup) > self._lexsort_depth: raise UnsortedIndexError( f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth " - f"({self._get_lexsort_depth})" + f"({self._lexsort_depth})" ) n = len(tup) @@ -2914,7 +2911,7 @@ def _maybe_to_slice(loc): # break the key into 2 parts based on the lexsort_depth of the index; # the first part returns a continuous slice of the index; the 2nd part # needs linear search within the slice - i = self._get_lexsort_depth + i = self._lexsort_depth lead_key, follow_key = key[:i], key[i:] start, stop = ( self.slice_locs(lead_key, lead_key) if lead_key else (0, len(self)) @@ -3167,7 +3164,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): stop = getattr(stop, "stop", stop) return convert_indexer(start, stop, step) - elif level > 0 or self._get_lexsort_depth == 0 or step is not None: + elif level > 0 or self._lexsort_depth == 0 or step is not None: # need to have like semantics here to right # searching as when we are using a slice # so include the stop+1 (so we include stop) @@ -3182,7 +3179,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): idx = self._get_loc_single_level_index(level_index, key) - if level > 0 or self._get_lexsort_depth == 0: + if level > 0 or self._lexsort_depth == 0: # Desired level is not sorted locs = np.array(level_codes == idx, dtype=bool, copy=False) if not locs.any(): @@ -3239,10 +3236,10 @@ def get_locs(self, seq): # must be lexsorted to at least as many levels true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s] - if true_slices and true_slices[-1] >= self._get_lexsort_depth: + if true_slices and true_slices[-1] >= self._lexsort_depth: raise UnsortedIndexError( "MultiIndex slicing requires the index to be lexsorted: slicing " - f"on levels {true_slices}, lexsort depth {self._get_lexsort_depth}" + f"on levels {true_slices}, lexsort depth {self._lexsort_depth}" ) # indexer # this is the list of all values that we want to select diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index 06a58e4054f7d..da0d5caff52bb 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -614,7 +614,7 @@ def test_sort_index_level_large_cardinality(self): # it works! result = df.sort_index(level=0) - assert result.index._get_lexsort_depth == 3 + assert result.index._lexsort_depth == 3 # GH#2684 (int32) index = MultiIndex.from_arrays([np.arange(4000)] * 3) @@ -623,7 +623,7 @@ def test_sort_index_level_large_cardinality(self): # it works! result = df.sort_index(level=0) assert (result.dtypes.values == df.dtypes.values).all() - assert result.index._get_lexsort_depth == 3 + assert result.index._lexsort_depth == 3 def test_sort_index_level_by_name(self): mi = MultiIndex( diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 77043da87f3d2..837c0a08fe8f4 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -132,7 +132,7 @@ def test_unsortedindex_doc_examples(): dfm.loc[(0, "y"):(1, "z")] assert not dfm.index._is_lexsorted() - assert dfm.index._get_lexsort_depth == 1 + assert dfm.index._lexsort_depth == 1 # sort it dfm = dfm.sort_index() @@ -140,7 +140,7 @@ def test_unsortedindex_doc_examples(): dfm.loc[(0, "y"):(1, "z")] assert dfm.index._is_lexsorted() - assert dfm.index._get_lexsort_depth == 2 + assert dfm.index._lexsort_depth == 2 def test_reconstruct_sort(): diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py index 1f96b56f49d0e..eaa7029b118b1 100644 --- a/pandas/tests/indexing/multiindex/test_indexing_slow.py +++ b/pandas/tests/indexing/multiindex/test_indexing_slow.py @@ -86,5 +86,5 @@ def validate(mi, df, key): df = frame.sort_values(by=cols[:lexsort_depth]) mi = df.set_index(cols[:-1]) - assert not mi.index._get_lexsort_depth < lexsort_depth + assert not mi.index._lexsort_depth < lexsort_depth validate(mi, df, key) diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index 75aad4b13de3e..b60135a802b8e 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -144,9 +144,9 @@ def test_per_axis_per_level_getitem(self): tm.assert_frame_equal(result, expected) # not lexsorted - assert df.index._get_lexsort_depth == 2 + assert df.index._lexsort_depth == 2 df = df.sort_index(level=1, axis=0) - assert df.index._get_lexsort_depth == 0 + assert df.index._lexsort_depth == 0 msg = ( "MultiIndex slicing requires the index to be " From 4ab8981d5588d4c3a98f9d4fb8f4c0dc3a6182e6 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 31 Dec 2020 11:19:21 +0000 Subject: [PATCH 08/20] note lexsort_depth deprecation in whatsnew note --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index f8d89a343778e..bc9c3832613e6 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -149,7 +149,7 @@ Deprecations - Deprecating allowing scalars passed to the :class:`Categorical` constructor (:issue:`38433`) - Deprecated allowing subclass-specific keyword arguments in the :class:`Index` constructor, use the specific subclass directly instead (:issue:`14093`,:issue:`21311`,:issue:`22315`,:issue:`26974`) - Deprecated ``astype`` of datetimelike (``timedelta64[ns]``, ``datetime64[ns]``, ``Datetime64TZDtype``, ``PeriodDtype``) to integer dtypes, use ``values.view(...)`` instead (:issue:`38544`) -- Deprecated :meth:`MultiIndex.is_lexsorted` as a public method, users should use :meth:`MultiIndex.is_monotonic_increasing` instead (:issue:`32259`) +- Deprecated :meth:`MultiIndex.is_lexsorted` and :meth:`MultiIndex.lexsort_depth` as a public methods, users should use :meth:`MultiIndex.is_monotonic_increasing` instead (:issue:`32259`) - .. --------------------------------------------------------------------------- From 72b0abc9732503026b7213213dfa9fe73d55fa74 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 31 Dec 2020 11:26:14 +0000 Subject: [PATCH 09/20] remove is_lexsorted assertion when it's the same as is_monotonic --- pandas/tests/frame/methods/test_sort_index.py | 7 ------- pandas/tests/indexes/multi/test_sorting.py | 6 ------ pandas/tests/indexing/multiindex/test_sorted.py | 3 --- pandas/tests/test_multilevel.py | 2 -- 4 files changed, 18 deletions(-) diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index da0d5caff52bb..3be6a8453420e 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -35,7 +35,6 @@ def test_sort_index_and_reconstruction_doc_example(self): ), ) result = df.sort_index() - assert result.index._is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) @@ -43,7 +42,6 @@ def test_sort_index_and_reconstruction_doc_example(self): # reconstruct result = df.sort_index().copy() result.index = result.index._sort_levels_monotonic() - assert result.index._is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) @@ -531,7 +529,6 @@ def test_sort_index_and_reconstruction(self): index=MultiIndex.from_product([[0.5, 0.8], list("ab")]), ) result = result.sort_index() - assert result.index._is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) @@ -550,7 +547,6 @@ def test_sort_index_and_reconstruction(self): concatted = pd.concat([df, df], keys=[0.8, 0.5]) result = concatted.sort_index() - assert result.index._is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) @@ -567,13 +563,10 @@ def test_sort_index_and_reconstruction(self): df.columns = df.columns.set_levels( pd.to_datetime(df.columns.levels[1]), level=1 ) - assert not df.columns._is_lexsorted() assert not df.columns.is_monotonic result = df.sort_index(axis=1) - assert result.columns._is_lexsorted() assert result.columns.is_monotonic result = df.sort_index(axis=1, level=1) - assert result.columns._is_lexsorted() assert result.columns.is_monotonic # TODO: better name, de-duplicate with test_sort_index_level above diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 837c0a08fe8f4..6cc4bb153f5b0 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -147,11 +147,9 @@ def test_reconstruct_sort(): # starts off lexsorted & monotonic mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) - assert mi._is_lexsorted() assert mi.is_monotonic recons = mi._sort_levels_monotonic() - assert recons._is_lexsorted() assert recons.is_monotonic assert mi is recons @@ -163,11 +161,9 @@ def test_reconstruct_sort(): [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")], names=["one", "two"], ) - assert not mi._is_lexsorted() assert not mi.is_monotonic recons = mi._sort_levels_monotonic() - assert not recons._is_lexsorted() assert not recons.is_monotonic assert mi.equals(recons) @@ -179,11 +175,9 @@ def test_reconstruct_sort(): codes=[[0, 1, 0, 2], [2, 0, 0, 1]], names=["col1", "col2"], ) - assert not mi._is_lexsorted() assert not mi.is_monotonic recons = mi._sort_levels_monotonic() - assert not recons._is_lexsorted() assert not recons.is_monotonic assert mi.equals(recons) diff --git a/pandas/tests/indexing/multiindex/test_sorted.py b/pandas/tests/indexing/multiindex/test_sorted.py index 934f40d0b5aff..b3e8c4a83b9fc 100644 --- a/pandas/tests/indexing/multiindex/test_sorted.py +++ b/pandas/tests/indexing/multiindex/test_sorted.py @@ -50,16 +50,13 @@ def test_frame_getitem_not_sorted2(self, key): with tm.assert_produces_warning(FutureWarning): return_value = df2.index.set_codes([0, 1, 0, 2], level="col1", inplace=True) assert return_value is None - assert not df2.index._is_lexsorted() assert not df2.index.is_monotonic assert df2_original.index.equals(df2.index) expected = df2.sort_index(key=key) - assert expected.index._is_lexsorted() assert expected.index.is_monotonic result = df2.sort_index(level=0, key=key) - assert result.index._is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 5d0212dc54e88..88fecc7635475 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -401,11 +401,9 @@ def test_sort_non_lexsorted(self): ) df = DataFrame({"col": range(len(idx))}, index=idx, dtype="int64") - assert df.index._is_lexsorted() is False assert df.index.is_monotonic is False sorted = df.sort_index() - assert sorted.index._is_lexsorted() is True assert sorted.index.is_monotonic is True expected = DataFrame( From a0a709c3830fefb82edb027b1b3a4756f874b70c Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 31 Dec 2020 11:56:03 +0000 Subject: [PATCH 10/20] fixup property usage in docs --- doc/source/user_guide/advanced.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 2c948f67c14c6..6377aeb0e7c74 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -663,13 +663,13 @@ index is sorted: .. ipython:: python - dfm.index.is_monotonic_increasing() + dfm.index.is_monotonic_increasing .. ipython:: python dfm = dfm.sort_index() dfm - dfm.index.is_monotonic_increasing() + dfm.index.is_monotonic_increasing And now selection works as expected. From be2a692cfccf2a63378733f4a69b5f3421f20610 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 31 Dec 2020 11:58:06 +0000 Subject: [PATCH 11/20] stage unstaged file --- pandas/tests/indexes/multi/test_lexsort.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/multi/test_lexsort.py b/pandas/tests/indexes/multi/test_lexsort.py index feb9acce8c879..69e1dcb944efe 100644 --- a/pandas/tests/indexes/multi/test_lexsort.py +++ b/pandas/tests/indexes/multi/test_lexsort.py @@ -20,7 +20,7 @@ def test_is_lexsorted(self): levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] ) assert not index._is_lexsorted() - assert index._get_lexsort_depth == 0 + assert index.lexsort_depth == 0 def test_is_lexsorted_deprecation(self): # GH 32259 @@ -39,17 +39,17 @@ def test_lexsort_depth(self): index = MultiIndex( levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 ) - assert index._get_lexsort_depth == 2 + assert index.lexsort_depth == 2 index = MultiIndex( levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1 ) - assert index._get_lexsort_depth == 1 + assert index.lexsort_depth == 1 index = MultiIndex( levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 ) - assert index._get_lexsort_depth == 0 + assert index.lexsort_depth == 0 def test_lexsort_depth_deprecation(self): # GH 32259 From 7d1d279936e22db9b3cb2d7d406b17b4a8a434cd Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 1 Jan 2021 14:32:54 +0000 Subject: [PATCH 12/20] use as property --- pandas/core/indexes/multi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f5004c438c3bb..2f89b12e3c367 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -391,11 +391,11 @@ def _verify_integrity( f"Level values must be unique: {list(level)} on level {i}" ) if self.sortorder is not None: - if self.sortorder > self._lexsort_depth(): + if self.sortorder > self._lexsort_depth: raise ValueError( "Value for sortorder must be inferior or equal to actual " f"lexsort_depth: sortorder {self.sortorder} " - f"with lexsort_depth {self._lexsort_depth()}" + f"with lexsort_depth {self._lexsort_depth}" ) codes = [ From 39d163fbf511f53ab7af6af502893140efa28c5f Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 1 Jan 2021 15:39:49 +0000 Subject: [PATCH 13/20] skip sortorder --- pandas/core/indexes/multi.py | 3 --- pandas/tests/indexes/multi/test_lexsort.py | 8 ++++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 68529c190126f..554843508ae89 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1872,9 +1872,6 @@ def _lexsort_depth(self) -> int: ------- int """ - if self.sortorder is not None: - return self.sortorder - int64_codes = [ensure_int64(level_codes) for level_codes in self.codes] for k in range(self.nlevels, 0, -1): if libalgos.is_lexsorted(int64_codes[:k]): diff --git a/pandas/tests/indexes/multi/test_lexsort.py b/pandas/tests/indexes/multi/test_lexsort.py index 69e1dcb944efe..c37172ad7a980 100644 --- a/pandas/tests/indexes/multi/test_lexsort.py +++ b/pandas/tests/indexes/multi/test_lexsort.py @@ -20,7 +20,7 @@ def test_is_lexsorted(self): levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] ) assert not index._is_lexsorted() - assert index.lexsort_depth == 0 + assert index._lexsort_depth == 0 def test_is_lexsorted_deprecation(self): # GH 32259 @@ -39,17 +39,17 @@ def test_lexsort_depth(self): index = MultiIndex( levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 ) - assert index.lexsort_depth == 2 + assert index._lexsort_depth == 2 index = MultiIndex( levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1 ) - assert index.lexsort_depth == 1 + assert index._lexsort_depth == 1 index = MultiIndex( levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 ) - assert index.lexsort_depth == 0 + assert index._lexsort_depth == 0 def test_lexsort_depth_deprecation(self): # GH 32259 From 6b9c698ff0a3bd4baed878e5692679ae7db11967 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 1 Jan 2021 15:54:36 +0000 Subject: [PATCH 14/20] factor out _codes_lexsort_depth --- pandas/core/indexes/multi.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 554843508ae89..6c414ef4f0d69 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -391,11 +391,11 @@ def _verify_integrity( f"Level values must be unique: {list(level)} on level {i}" ) if self.sortorder is not None: - if self.sortorder > self._lexsort_depth: + if self.sortorder > self._codes_lexsort_depth(): raise ValueError( "Value for sortorder must be inferior or equal to actual " f"lexsort_depth: sortorder {self.sortorder} " - f"with lexsort_depth {self._lexsort_depth}" + f"with lexsort_depth {self._codes_lexsort_depth()}" ) codes = [ @@ -1830,23 +1830,23 @@ def _is_lexsorted(self) -> bool: In the below examples, the first level of the MultiIndex is sorted because a>> pd.MultiIndex.from_arrays([['a', 'b'], ['d', 'e']])._is_lexsorted() + >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'e', 'f']]).is_lexsorted() True - >>> pd.MultiIndex.from_arrays([['a', 'b'], ['d', 'f']])._is_lexsorted() + >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['d', 'f', 'e']]).is_lexsorted() True In case there is a tie, the lexicographical sorting looks at the next level of the MultiIndex. - >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']])._is_lexsorted() + >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']]).is_lexsorted() True - >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']])._is_lexsorted() + >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']]).is_lexsorted() False >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], - ... ['aa', 'bb', 'aa', 'bb']])._is_lexsorted() + ... ['aa', 'bb', 'aa', 'bb']]).is_lexsorted() True >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], - ... ['bb', 'aa', 'aa', 'bb']])._is_lexsorted() + ... ['bb', 'aa', 'aa', 'bb']]).is_lexsorted() False """ return self._lexsort_depth == self.nlevels @@ -1854,9 +1854,8 @@ def _is_lexsorted(self) -> bool: @cache_readonly def lexsort_depth(self): warnings.warn( - "MultiIndex.lexsort_depth is deprecated as a public function, " - "users should use MultiIndex.is_monotonic_increasing to check " - "if a MultiIndex is sorted.", + "MultiIndex.is_lexsorted is deprecated as a public function, " + "users should use MultiIndex.is_monotonic_increasing instead.", FutureWarning, stacklevel=2, ) @@ -1872,6 +1871,11 @@ def _lexsort_depth(self) -> int: ------- int """ + if self.sortorder is not None: + return self.sortorder + return self._codes_lexsort_depth() + + def _codes_lexsort_depth(self) -> int: int64_codes = [ensure_int64(level_codes) for level_codes in self.codes] for k in range(self.nlevels, 0, -1): if libalgos.is_lexsorted(int64_codes[:k]): From 74d8c7eafd2f5aa682edd4974052e246fb721227 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 1 Jan 2021 17:26:32 +0000 Subject: [PATCH 15/20] update old release notes --- doc/source/reference/indexing.rst | 1 - doc/source/whatsnew/v0.20.0.rst | 38 +++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 8446b384b8fb8..1a8c21a2c1a74 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -301,7 +301,6 @@ MultiIndex components MultiIndex.set_codes MultiIndex.to_flat_index MultiIndex.to_frame - MultiIndex.is_lexsorted MultiIndex.sortlevel MultiIndex.droplevel MultiIndex.swaplevel diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index 2cb8e13e9a18a..01966c9f50fb1 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -873,10 +873,13 @@ This is *unchanged* from prior versions, but shown for illustration purposes: index=pd.MultiIndex.from_product([list('BA'), range(3)])) df -.. ipython:: python +.. code-block:: python - df.index.is_lexsorted() - df.index.is_monotonic + In [87]: df.index.is_lexsorted() + Out[87]: False + + In [88]: df.index.is_monotonic + Out[88]: False Sorting works as expected @@ -884,10 +887,13 @@ Sorting works as expected df.sort_index() -.. ipython:: python +.. code-block:: python + + In [90]: df.sort_index().index.is_lexsorted() + Out[90]: True - df.sort_index().index.is_lexsorted() - df.sort_index().index.is_monotonic + In [91]: df.sort_index().index.is_monotonic + Out[91]: True However, this example, which has a non-monotonic 2nd level, doesn't behave as desired. @@ -919,11 +925,23 @@ Previous behavior: New behavior: -.. ipython:: python +.. code-block:: python - df.sort_index() - df.sort_index().index.is_lexsorted() - df.sort_index().index.is_monotonic + In [94]: df.sort_index() + Out[94]: + value + a aa 2 + bb 1 + b aa 4 + bb 3 + + [4 rows x 1 columns] + + In [95]: df.sort_index().index.is_lexsorted() + Out[95]: True + + In [96]: df.sort_index().index.is_monotonic + Out[96]: True .. _whatsnew_0200.api_breaking.groupby_describe: From 3798cf5c9772fbd86fdf0df65b98aeeaa7b8dc05 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 2 Jan 2021 12:15:09 +0000 Subject: [PATCH 16/20] use module-level _lexsort_depth --- pandas/core/indexes/multi.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6c414ef4f0d69..a1758577d2be8 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -176,6 +176,14 @@ def new_meth(self_or_cls, *args, **kwargs): return new_meth +def _lexsort_depth(codes, nlevels) -> int: + int64_codes = [ensure_int64(level_codes) for level_codes in codes] + for k in range(nlevels, 0, -1): + if libalgos.is_lexsorted(int64_codes[:k]): + return k + return 0 + + class MultiIndex(Index): """ A multi-level, or hierarchical, index object for pandas objects. @@ -391,11 +399,11 @@ def _verify_integrity( f"Level values must be unique: {list(level)} on level {i}" ) if self.sortorder is not None: - if self.sortorder > self._codes_lexsort_depth(): + if self.sortorder > _lexsort_depth(self.codes, self.nlevels): raise ValueError( "Value for sortorder must be inferior or equal to actual " f"lexsort_depth: sortorder {self.sortorder} " - f"with lexsort_depth {self._codes_lexsort_depth()}" + f"with lexsort_depth {_lexsort_depth(self.codes, self.nlevels)}" ) codes = [ @@ -1851,7 +1859,7 @@ def _is_lexsorted(self) -> bool: """ return self._lexsort_depth == self.nlevels - @cache_readonly + @property def lexsort_depth(self): warnings.warn( "MultiIndex.is_lexsorted is deprecated as a public function, " @@ -1873,14 +1881,7 @@ def _lexsort_depth(self) -> int: """ if self.sortorder is not None: return self.sortorder - return self._codes_lexsort_depth() - - def _codes_lexsort_depth(self) -> int: - int64_codes = [ensure_int64(level_codes) for level_codes in self.codes] - for k in range(self.nlevels, 0, -1): - if libalgos.is_lexsorted(int64_codes[:k]): - return k - return 0 + return _lexsort_depth(self.codes, self.nlevels) def _sort_levels_monotonic(self): """ From dfc779d7429a15fba303bc4e3433c290cc6f13a4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 2 Jan 2021 12:32:56 +0000 Subject: [PATCH 17/20] add type hints --- pandas/core/indexes/multi.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a1758577d2be8..069f0f2fd3b4c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -176,7 +176,8 @@ def new_meth(self_or_cls, *args, **kwargs): return new_meth -def _lexsort_depth(codes, nlevels) -> int: +def _lexsort_depth(codes: List[np.ndarray], nlevels: int) -> int: + """Count depth with which codes are lexsorted, up to nlevels.""" int64_codes = [ensure_int64(level_codes) for level_codes in codes] for k in range(nlevels, 0, -1): if libalgos.is_lexsorted(int64_codes[:k]): From fdbb3664b8d9234227c367d00f3f1834ee878f10 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 2 Jan 2021 12:35:22 +0000 Subject: [PATCH 18/20] fixup indents in old whatsnew note --- doc/source/whatsnew/v0.20.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index 01966c9f50fb1..5dac3a26424a8 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -931,9 +931,9 @@ New behavior: Out[94]: value a aa 2 - bb 1 + bb 1 b aa 4 - bb 3 + bb 3 [4 rows x 1 columns] From fff7297806572af4e806979d52ab94e60afad1be Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 2 Jan 2021 12:36:36 +0000 Subject: [PATCH 19/20] clarify docstring --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 069f0f2fd3b4c..17cc2e7208e4c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -177,7 +177,7 @@ def new_meth(self_or_cls, *args, **kwargs): def _lexsort_depth(codes: List[np.ndarray], nlevels: int) -> int: - """Count depth with which codes are lexsorted, up to nlevels.""" + """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted.""" int64_codes = [ensure_int64(level_codes) for level_codes in codes] for k in range(nlevels, 0, -1): if libalgos.is_lexsorted(int64_codes[:k]): From fe4aaa6a1e568c1eb812f91ae9e62bf95d7cd6e9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 3 Jan 2021 17:59:43 +0000 Subject: [PATCH 20/20] move _lexsort_depth to after class definition --- pandas/core/indexes/multi.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 17cc2e7208e4c..85d4b8eebedf7 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -176,15 +176,6 @@ def new_meth(self_or_cls, *args, **kwargs): return new_meth -def _lexsort_depth(codes: List[np.ndarray], nlevels: int) -> int: - """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted.""" - int64_codes = [ensure_int64(level_codes) for level_codes in codes] - for k in range(nlevels, 0, -1): - if libalgos.is_lexsorted(int64_codes[:k]): - return k - return 0 - - class MultiIndex(Index): """ A multi-level, or hierarchical, index object for pandas objects. @@ -3788,6 +3779,15 @@ def isin(self, values, level=None): __inv__ = make_invalid_op("__inv__") +def _lexsort_depth(codes: List[np.ndarray], nlevels: int) -> int: + """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted.""" + int64_codes = [ensure_int64(level_codes) for level_codes in codes] + for k in range(nlevels, 0, -1): + if libalgos.is_lexsorted(int64_codes[:k]): + return k + return 0 + + def sparsify_labels(label_list, start: int = 0, sentinel=""): pivoted = list(zip(*label_list)) k = len(label_list)