Skip to content

Commit 769c242

Browse files
authored
PERF: MultiIndex.argsort / MultiIndex.sort_values (#48406)
* perf improvement in MultiIndex.argsort and MultiIndex.sort_values * whatsnew * add asv for Int64
1 parent 8fef018 commit 769c242

File tree

4 files changed

+20
-5
lines changed

4 files changed

+20
-5
lines changed

asv_bench/benchmarks/multiindex_object.py

+15
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
MultiIndex,
99
RangeIndex,
1010
Series,
11+
array,
1112
date_range,
1213
)
1314

@@ -176,6 +177,20 @@ def time_sortlevel_one(self):
176177
self.mi.sortlevel(1)
177178

178179

180+
class SortValues:
181+
182+
params = ["int64", "Int64"]
183+
param_names = ["dtype"]
184+
185+
def setup(self, dtype):
186+
a = array(np.tile(np.arange(100), 1000), dtype=dtype)
187+
b = array(np.tile(np.arange(1000), 100), dtype=dtype)
188+
self.mi = MultiIndex.from_arrays([a, b])
189+
190+
def time_sort_values(self, dtype):
191+
self.mi.sort_values()
192+
193+
179194
class Values:
180195
def setup_cache(self):
181196

doc/source/whatsnew/v1.6.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ Deprecations
101101
Performance improvements
102102
~~~~~~~~~~~~~~~~~~~~~~~~
103103
- Performance improvement in :meth:`.GroupBy.median` for nullable dtypes (:issue:`37493`)
104+
- Performance improvement in :meth:`MultiIndex.argsort` and :meth:`MultiIndex.sort_values` (:issue:`48406`)
104105
- Performance improvement in :meth:`.GroupBy.mean` and :meth:`.GroupBy.var` for extension array dtypes (:issue:`37493`)
105106
- Performance improvement for :meth:`MultiIndex.unique` (:issue:`48335`)
106107
-

pandas/core/indexes/multi.py

+4
Original file line numberDiff line numberDiff line change
@@ -2225,6 +2225,10 @@ def append(self, other):
22252225
return Index._with_infer(new_tuples)
22262226

22272227
def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
2228+
if len(args) == 0 and len(kwargs) == 0:
2229+
# np.lexsort is significantly faster than self._values.argsort()
2230+
values = [self._get_level_values(i) for i in reversed(range(self.nlevels))]
2231+
return np.lexsort(values)
22282232
return self._values.argsort(*args, **kwargs)
22292233

22302234
@Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)

pandas/tests/indexes/multi/test_setops.py

-5
Original file line numberDiff line numberDiff line change
@@ -530,11 +530,6 @@ def test_union_duplicates(index, request):
530530
if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)):
531531
# No duplicates in empty indexes
532532
return
533-
if index.dtype.kind == "c":
534-
mark = pytest.mark.xfail(
535-
reason="sort_values() call raises bc complex objects are not comparable"
536-
)
537-
request.node.add_marker(mark)
538533

539534
values = index.unique().values.tolist()
540535
mi1 = MultiIndex.from_arrays([values, [1] * len(values)])

0 commit comments

Comments
 (0)