Skip to content

Commit 3ad64b1

Browse files
committed
ENH partial sorting for mi in sortlevel
1 parent c70b4ae commit 3ad64b1

File tree

6 files changed

+62
-13
lines changed

6 files changed

+62
-13
lines changed

pandas/core/frame.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -2634,7 +2634,8 @@ def trans(v):
26342634
else:
26352635
return self.take(indexer, axis=axis, convert=False, is_copy=False)
26362636

2637-
def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
2637+
def sortlevel(self, level=0, axis=0, ascending=True,
2638+
inplace=False, sort_remaining=True):
26382639
"""
26392640
Sort multilevel index by chosen axis and primary level. Data will be
26402641
lexicographically sorted by the chosen level followed by the other
@@ -2647,6 +2648,8 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
26472648
ascending : boolean, default True
26482649
inplace : boolean, default False
26492650
Sort the DataFrame without creating a new instance
2651+
sort_remaining : boolean, default True
2652+
Sort by the other levels too.
26502653
26512654
Returns
26522655
-------
@@ -2657,7 +2660,8 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
26572660
if not isinstance(the_axis, MultiIndex):
26582661
raise TypeError('can only sort by level with a hierarchical index')
26592662

2660-
new_axis, indexer = the_axis.sortlevel(level, ascending=ascending)
2663+
new_axis, indexer = the_axis.sortlevel(level, ascending=ascending,
2664+
sort_remaining=sort_remaining)
26612665

26622666
if self._is_mixed_type and not inplace:
26632667
ax = 'index' if axis == 0 else 'columns'

pandas/core/index.py

+22-9
Original file line numberDiff line numberDiff line change
@@ -3014,17 +3014,19 @@ def reorder_levels(self, order):
30143014
def __getslice__(self, i, j):
30153015
return self.__getitem__(slice(i, j))
30163016

3017-
def sortlevel(self, level=0, ascending=True):
3017+
def sortlevel(self, level=0, ascending=True, sort_remaining=True):
30183018
"""
30193019
Sort MultiIndex at the requested level. The result will respect the
30203020
original ordering of the associated factor at that level.
30213021
30223022
Parameters
30233023
----------
3024-
level : int or str, default 0
3024+
level : list-like, int or str, default 0
30253025
If a string is given, must be a name of the level
3026+
If list-like must be names or ints of levels.
30263027
ascending : boolean, default True
30273028
False to sort in descending order
3029+
sort_remaining : sort by the remaining levels after level.
30283030
30293031
Returns
30303032
-------
@@ -3033,24 +3035,35 @@ def sortlevel(self, level=0, ascending=True):
30333035
from pandas.core.groupby import _indexer_from_factorized
30343036

30353037
labels = list(self.labels)
3038+
shape = list(self.levshape)
30363039

3037-
level = self._get_level_number(level)
3038-
primary = labels.pop(level)
3040+
if isinstance(level, (str, int)):
3041+
level = [level]
3042+
level = [self._get_level_number(lev) for lev in level]
30393043

3040-
shape = list(self.levshape)
3041-
primshp = shape.pop(level)
3044+
# partition labels and shape
3045+
primary = tuple(labels.pop(lev - i) for i, lev in enumerate(level))
3046+
primshp = tuple(shape.pop(lev - i) for i, lev in enumerate(level))
30423047

3043-
indexer = _indexer_from_factorized((primary,) + tuple(labels),
3044-
(primshp,) + tuple(shape),
3048+
if sort_remaining:
3049+
primary += primary + tuple(labels)
3050+
primshp += primshp + tuple(shape)
3051+
sortorder = None
3052+
else:
3053+
sortorder = level[0]
3054+
3055+
indexer = _indexer_from_factorized(primary,
3056+
primshp,
30453057
compress=False)
3058+
30463059
if not ascending:
30473060
indexer = indexer[::-1]
30483061

30493062
indexer = com._ensure_platform_int(indexer)
30503063
new_labels = [lab.take(indexer) for lab in self.labels]
30513064

30523065
new_index = MultiIndex(labels=new_labels, levels=self.levels,
3053-
names=self.names, sortorder=level,
3066+
names=self.names, sortorder=sortorder,
30543067
verify_integrity=False)
30553068

30563069
return new_index, indexer

pandas/core/series.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1758,7 +1758,7 @@ def _try_kind_sort(arr):
17581758
return self._constructor(arr[sortedIdx], index=self.index[sortedIdx])\
17591759
.__finalize__(self)
17601760

1761-
def sortlevel(self, level=0, ascending=True):
1761+
def sortlevel(self, level=0, ascending=True, sort_remaining=True):
17621762
"""
17631763
Sort Series with MultiIndex by chosen level. Data will be
17641764
lexicographically sorted by the chosen level followed by the other
@@ -1776,7 +1776,8 @@ def sortlevel(self, level=0, ascending=True):
17761776
if not isinstance(self.index, MultiIndex):
17771777
raise TypeError('can only sort by level with a hierarchical index')
17781778

1779-
new_index, indexer = self.index.sortlevel(level, ascending=ascending)
1779+
new_index, indexer = self.index.sortlevel(level, ascending=ascending,
1780+
sort_remaining=sort_remaining)
17801781
new_values = self.values.take(indexer)
17811782
return self._constructor(new_values,
17821783
index=new_index).__finalize__(self)

pandas/tests/test_frame.py

+9
Original file line numberDiff line numberDiff line change
@@ -10083,6 +10083,15 @@ def test_sort_index_duplicates(self):
1008310083
result = df.sort_index(by=('a',1))
1008410084
assert_frame_equal(result, expected)
1008510085

10086+
def test_sortlevel(self):
10087+
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
10088+
df = DataFrame([[1, 2], [3, 4]], mi)
10089+
res = df.sortlevel('A', sort_remaining=False)
10090+
assert_frame_equal(df, res)
10091+
10092+
res = df.sortlevel(['A', 'B'], sort_remaining=False)
10093+
assert_frame_equal(df, res)
10094+
1008610095
def test_sort_datetimes(self):
1008710096

1008810097
# GH 3461, argsort / lexsort differences for a datetime column

pandas/tests/test_index.py

+5
Original file line numberDiff line numberDiff line change
@@ -2410,6 +2410,11 @@ def test_sortlevel(self):
24102410
sorted_idx, _ = index.sortlevel(1, ascending=False)
24112411
self.assert_(sorted_idx.equals(expected[::-1]))
24122412

2413+
def test_sortlevel_not_sort_remaining(self):
2414+
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
2415+
sorted_idx, _ = mi.sortlevel('A', sort_remaining=False)
2416+
self.assert_(sorted_idx.equals(mi))
2417+
24132418
def test_sortlevel_deterministic(self):
24142419
tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'),
24152420
('foo', 'one'), ('baz', 'two'), ('qux', 'one')]

pandas/tests/test_series.py

+17
Original file line numberDiff line numberDiff line change
@@ -5189,6 +5189,23 @@ def test_unstack(self):
51895189
unstacked = s.unstack(0)
51905190
assert_frame_equal(unstacked, expected)
51915191

5192+
def test_sortlevel(self):
5193+
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
5194+
s = Series([1, 2], mi)
5195+
backwards = s.iloc[[1, 0]]
5196+
5197+
res = s.sortlevel('A')
5198+
assert_series_equal(backwards, res)
5199+
5200+
res = s.sortlevel(['A', 'B'])
5201+
assert_series_equal(backwards, res)
5202+
5203+
res = s.sortlevel('A', sort_remaining=False)
5204+
assert_series_equal(s, res)
5205+
5206+
res = s.sortlevel(['A', 'B'], sort_remaining=False)
5207+
assert_series_equal(s, res)
5208+
51925209
def test_head_tail(self):
51935210
assert_series_equal(self.series.head(), self.series[:5])
51945211
assert_series_equal(self.series.tail(), self.series[-5:])

0 commit comments

Comments
 (0)