diff --git a/asv_bench/benchmarks/multiindex_object.py b/asv_bench/benchmarks/multiindex_object.py index ca1d71f50bd5f..89d74e784b64c 100644 --- a/asv_bench/benchmarks/multiindex_object.py +++ b/asv_bench/benchmarks/multiindex_object.py @@ -8,6 +8,7 @@ MultiIndex, RangeIndex, Series, + array, date_range, ) @@ -176,6 +177,20 @@ def time_sortlevel_one(self): self.mi.sortlevel(1) +class SortValues: + + params = ["int64", "Int64"] + param_names = ["dtype"] + + def setup(self, dtype): + a = array(np.tile(np.arange(100), 1000), dtype=dtype) + b = array(np.tile(np.arange(1000), 100), dtype=dtype) + self.mi = MultiIndex.from_arrays([a, b]) + + def time_sort_values(self, dtype): + self.mi.sort_values() + + class Values: def setup_cache(self): diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index c393b8a57f805..59c27560c4e1e 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -101,6 +101,7 @@ Deprecations Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Performance improvement in :meth:`.GroupBy.median` for nullable dtypes (:issue:`37493`) +- Performance improvement in :meth:`MultiIndex.argsort` and :meth:`MultiIndex.sort_values` (:issue:`48406`) - Performance improvement in :meth:`.GroupBy.mean` and :meth:`.GroupBy.var` for extension array dtypes (:issue:`37493`) - Performance improvement for :meth:`MultiIndex.unique` (:issue:`48335`) - diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6bf00bc5bc366..0ebd1a3d718d8 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2225,6 +2225,10 @@ def append(self, other): return Index._with_infer(new_tuples) def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: + if len(args) == 0 and len(kwargs) == 0: + # np.lexsort is significantly faster than self._values.argsort() + values = [self._get_level_values(i) for i in reversed(range(self.nlevels))] + return np.lexsort(values) return self._values.argsort(*args, **kwargs) @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index d29abc1bc6a9f..e15809a751b73 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -530,11 +530,6 @@ def test_union_duplicates(index, request): if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)): # No duplicates in empty indexes return - if index.dtype.kind == "c": - mark = pytest.mark.xfail( - reason="sort_values() call raises bc complex objects are not comparable" - ) - request.node.add_marker(mark) values = index.unique().values.tolist() mi1 = MultiIndex.from_arrays([values, [1] * len(values)])