Skip to content

Commit d5f9493

Browse files
committed
Merge pull request #6135 from hayd/mi_partial_sorting
ENH partial sorting for mi in sortlevel
2 parents 525e835 + 8e774b8 commit d5f9493

File tree

7 files changed

+64
-13
lines changed

7 files changed

+64
-13
lines changed

doc/source/v0.14.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,8 @@ Enhancements
510510

511511
- ``quotechar``, ``doublequote``, and ``escapechar`` can now be specified when
512512
using ``DataFrame.to_csv`` (:issue:`5414`, :issue:`4528`)
513+
- Partially sort by only the specified levels of a MultiIndex with the
514+
``sort_remaining`` boolean kwarg. (:issue:`3984`)
513515
- Added a ``to_julian_date`` function to ``TimeStamp`` and ``DatetimeIndex``
514516
to convert to the Julian Date used primarily in astronomy. (:issue:`4041`)
515517
- ``DataFrame.to_stata`` will now check data for compatibility with Stata data types

pandas/core/frame.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -2648,7 +2648,8 @@ def trans(v):
26482648
else:
26492649
return self._constructor(new_data).__finalize__(self)
26502650

2651-
def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
2651+
def sortlevel(self, level=0, axis=0, ascending=True,
2652+
inplace=False, sort_remaining=True):
26522653
"""
26532654
Sort multilevel index by chosen axis and primary level. Data will be
26542655
lexicographically sorted by the chosen level followed by the other
@@ -2661,6 +2662,8 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
26612662
ascending : boolean, default True
26622663
inplace : boolean, default False
26632664
Sort the DataFrame without creating a new instance
2665+
sort_remaining : boolean, default True
2666+
Sort by the other levels too.
26642667
26652668
Returns
26662669
-------
@@ -2671,7 +2674,8 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
26712674
if not isinstance(the_axis, MultiIndex):
26722675
raise TypeError('can only sort by level with a hierarchical index')
26732676

2674-
new_axis, indexer = the_axis.sortlevel(level, ascending=ascending)
2677+
new_axis, indexer = the_axis.sortlevel(level, ascending=ascending,
2678+
sort_remaining=sort_remaining)
26752679

26762680
if self._is_mixed_type and not inplace:
26772681
ax = 'index' if axis == 0 else 'columns'

pandas/core/index.py

+22-9
Original file line numberDiff line numberDiff line change
@@ -3096,17 +3096,19 @@ def reorder_levels(self, order):
30963096
def __getslice__(self, i, j):
30973097
return self.__getitem__(slice(i, j))
30983098

3099-
def sortlevel(self, level=0, ascending=True):
3099+
def sortlevel(self, level=0, ascending=True, sort_remaining=True):
31003100
"""
31013101
Sort MultiIndex at the requested level. The result will respect the
31023102
original ordering of the associated factor at that level.
31033103
31043104
Parameters
31053105
----------
3106-
level : int or str, default 0
3106+
level : list-like, int or str, default 0
31073107
If a string is given, must be a name of the level
3108+
If list-like must be names or ints of levels.
31083109
ascending : boolean, default True
31093110
False to sort in descending order
3111+
sort_remaining : sort by the remaining levels after level.
31103112
31113113
Returns
31123114
-------
@@ -3115,24 +3117,35 @@ def sortlevel(self, level=0, ascending=True):
31153117
from pandas.core.groupby import _indexer_from_factorized
31163118

31173119
labels = list(self.labels)
3120+
shape = list(self.levshape)
31183121

3119-
level = self._get_level_number(level)
3120-
primary = labels.pop(level)
3122+
if isinstance(level, (str, int)):
3123+
level = [level]
3124+
level = [self._get_level_number(lev) for lev in level]
31213125

3122-
shape = list(self.levshape)
3123-
primshp = shape.pop(level)
3126+
# partition labels and shape
3127+
primary = tuple(labels.pop(lev - i) for i, lev in enumerate(level))
3128+
primshp = tuple(shape.pop(lev - i) for i, lev in enumerate(level))
31243129

3125-
indexer = _indexer_from_factorized((primary,) + tuple(labels),
3126-
(primshp,) + tuple(shape),
3130+
if sort_remaining:
3131+
primary += primary + tuple(labels)
3132+
primshp += primshp + tuple(shape)
3133+
sortorder = None
3134+
else:
3135+
sortorder = level[0]
3136+
3137+
indexer = _indexer_from_factorized(primary,
3138+
primshp,
31273139
compress=False)
3140+
31283141
if not ascending:
31293142
indexer = indexer[::-1]
31303143

31313144
indexer = com._ensure_platform_int(indexer)
31323145
new_labels = [lab.take(indexer) for lab in self.labels]
31333146

31343147
new_index = MultiIndex(labels=new_labels, levels=self.levels,
3135-
names=self.names, sortorder=level,
3148+
names=self.names, sortorder=sortorder,
31363149
verify_integrity=False)
31373150

31383151
return new_index, indexer

pandas/core/series.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1784,7 +1784,7 @@ def _try_kind_sort(arr):
17841784
else:
17851785
return result.__finalize__(self)
17861786

1787-
def sortlevel(self, level=0, ascending=True):
1787+
def sortlevel(self, level=0, ascending=True, sort_remaining=True):
17881788
"""
17891789
Sort Series with MultiIndex by chosen level. Data will be
17901790
lexicographically sorted by the chosen level followed by the other
@@ -1802,7 +1802,8 @@ def sortlevel(self, level=0, ascending=True):
18021802
if not isinstance(self.index, MultiIndex):
18031803
raise TypeError('can only sort by level with a hierarchical index')
18041804

1805-
new_index, indexer = self.index.sortlevel(level, ascending=ascending)
1805+
new_index, indexer = self.index.sortlevel(level, ascending=ascending,
1806+
sort_remaining=sort_remaining)
18061807
new_values = self.values.take(indexer)
18071808
return self._constructor(new_values,
18081809
index=new_index).__finalize__(self)

pandas/tests/test_frame.py

+9
Original file line numberDiff line numberDiff line change
@@ -10116,6 +10116,15 @@ def test_sort_index_duplicates(self):
1011610116
result = df.sort_index(by=('a',1))
1011710117
assert_frame_equal(result, expected)
1011810118

10119+
def test_sortlevel(self):
10120+
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
10121+
df = DataFrame([[1, 2], [3, 4]], mi)
10122+
res = df.sortlevel('A', sort_remaining=False)
10123+
assert_frame_equal(df, res)
10124+
10125+
res = df.sortlevel(['A', 'B'], sort_remaining=False)
10126+
assert_frame_equal(df, res)
10127+
1011910128
def test_sort_datetimes(self):
1012010129

1012110130
# GH 3461, argsort / lexsort differences for a datetime column

pandas/tests/test_index.py

+5
Original file line numberDiff line numberDiff line change
@@ -2420,6 +2420,11 @@ def test_sortlevel(self):
24202420
sorted_idx, _ = index.sortlevel(1, ascending=False)
24212421
self.assert_(sorted_idx.equals(expected[::-1]))
24222422

2423+
def test_sortlevel_not_sort_remaining(self):
2424+
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
2425+
sorted_idx, _ = mi.sortlevel('A', sort_remaining=False)
2426+
self.assert_(sorted_idx.equals(mi))
2427+
24232428
def test_sortlevel_deterministic(self):
24242429
tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'),
24252430
('foo', 'one'), ('baz', 'two'), ('qux', 'one')]

pandas/tests/test_series.py

+17
Original file line numberDiff line numberDiff line change
@@ -5302,6 +5302,23 @@ def test_unstack(self):
53025302
unstacked = s.unstack(0)
53035303
assert_frame_equal(unstacked, expected)
53045304

5305+
def test_sortlevel(self):
5306+
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
5307+
s = Series([1, 2], mi)
5308+
backwards = s.iloc[[1, 0]]
5309+
5310+
res = s.sortlevel('A')
5311+
assert_series_equal(backwards, res)
5312+
5313+
res = s.sortlevel(['A', 'B'])
5314+
assert_series_equal(backwards, res)
5315+
5316+
res = s.sortlevel('A', sort_remaining=False)
5317+
assert_series_equal(s, res)
5318+
5319+
res = s.sortlevel(['A', 'B'], sort_remaining=False)
5320+
assert_series_equal(s, res)
5321+
53055322
def test_head_tail(self):
53065323
assert_series_equal(self.series.head(), self.series[:5])
53075324
assert_series_equal(self.series.tail(), self.series[-5:])

0 commit comments

Comments
 (0)