Skip to content

ENH partial sorting for mi in sortlevel #6135

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 2, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/v0.14.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,8 @@ Enhancements

- ``quotechar``, ``doublequote``, and ``escapechar`` can now be specified when
using ``DataFrame.to_csv`` (:issue:`5414`, :issue:`4528`)
- Partially sort by only the specified levels of a MultiIndex with the
``sort_remaining`` boolean kwarg. (:issue:`3984`)
- Added a ``to_julian_date`` function to ``TimeStamp`` and ``DatetimeIndex``
to convert to the Julian Date used primarily in astronomy. (:issue:`4041`)
- ``DataFrame.to_stata`` will now check data for compatibility with Stata data types
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2634,7 +2634,8 @@ def trans(v):
else:
return self.take(indexer, axis=axis, convert=False, is_copy=False)

def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
def sortlevel(self, level=0, axis=0, ascending=True,
inplace=False, sort_remaining=True):
"""
Sort multilevel index by chosen axis and primary level. Data will be
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this be more clear as:

Lexicographically sort index of dataframe on specified axis, starting with the specified level and then sorting by other levels in the order they're defined on the multilevel index (sort_remaining can optionally disable sorting on other levels).

It's not perfect, but maybe clearer?

lexicographically sorted by the chosen level followed by the other
Expand All @@ -2647,6 +2648,8 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
ascending : boolean, default True
inplace : boolean, default False
Sort the DataFrame without creating a new instance
sort_remaining : boolean, default True
Sort by the other levels too.

Returns
-------
Expand All @@ -2657,7 +2660,8 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
if not isinstance(the_axis, MultiIndex):
raise TypeError('can only sort by level with a hierarchical index')

new_axis, indexer = the_axis.sortlevel(level, ascending=ascending)
new_axis, indexer = the_axis.sortlevel(level, ascending=ascending,
sort_remaining=sort_remaining)

if self._is_mixed_type and not inplace:
ax = 'index' if axis == 0 else 'columns'
Expand Down
31 changes: 22 additions & 9 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3014,17 +3014,19 @@ def reorder_levels(self, order):
def __getslice__(self, i, j):
return self.__getitem__(slice(i, j))

def sortlevel(self, level=0, ascending=True):
def sortlevel(self, level=0, ascending=True, sort_remaining=True):
"""
Sort MultiIndex at the requested level. The result will respect the
original ordering of the associated factor at that level.

Parameters
----------
level : int or str, default 0
level : list-like, int or str, default 0
If a string is given, must be a name of the level
If list-like must be names or ints of levels.
ascending : boolean, default True
False to sort in descending order
sort_remaining : sort by the remaining levels after level.

Returns
-------
Expand All @@ -3033,24 +3035,35 @@ def sortlevel(self, level=0, ascending=True):
from pandas.core.groupby import _indexer_from_factorized

labels = list(self.labels)
shape = list(self.levshape)

level = self._get_level_number(level)
primary = labels.pop(level)
if isinstance(level, (str, int)):
level = [level]
level = [self._get_level_number(lev) for lev in level]

shape = list(self.levshape)
primshp = shape.pop(level)
# partition labels and shape
primary = tuple(labels.pop(lev - i) for i, lev in enumerate(level))
primshp = tuple(shape.pop(lev - i) for i, lev in enumerate(level))

indexer = _indexer_from_factorized((primary,) + tuple(labels),
(primshp,) + tuple(shape),
if sort_remaining:
primary += primary + tuple(labels)
primshp += primshp + tuple(shape)
sortorder = None
else:
sortorder = level[0]

indexer = _indexer_from_factorized(primary,
primshp,
compress=False)

if not ascending:
indexer = indexer[::-1]

indexer = com._ensure_platform_int(indexer)
new_labels = [lab.take(indexer) for lab in self.labels]

new_index = MultiIndex(labels=new_labels, levels=self.levels,
names=self.names, sortorder=level,
names=self.names, sortorder=sortorder,
verify_integrity=False)

return new_index, indexer
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1758,7 +1758,7 @@ def _try_kind_sort(arr):
return self._constructor(arr[sortedIdx], index=self.index[sortedIdx])\
.__finalize__(self)

def sortlevel(self, level=0, ascending=True):
def sortlevel(self, level=0, ascending=True, sort_remaining=True):
"""
Sort Series with MultiIndex by chosen level. Data will be
lexicographically sorted by the chosen level followed by the other
Expand All @@ -1776,7 +1776,8 @@ def sortlevel(self, level=0, ascending=True):
if not isinstance(self.index, MultiIndex):
raise TypeError('can only sort by level with a hierarchical index')

new_index, indexer = self.index.sortlevel(level, ascending=ascending)
new_index, indexer = self.index.sortlevel(level, ascending=ascending,
sort_remaining=sort_remaining)
new_values = self.values.take(indexer)
return self._constructor(new_values,
index=new_index).__finalize__(self)
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10083,6 +10083,15 @@ def test_sort_index_duplicates(self):
result = df.sort_index(by=('a',1))
assert_frame_equal(result, expected)

def test_sortlevel(self):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
df = DataFrame([[1, 2], [3, 4]], mi)
res = df.sortlevel('A', sort_remaining=False)
assert_frame_equal(df, res)

res = df.sortlevel(['A', 'B'], sort_remaining=False)
assert_frame_equal(df, res)

def test_sort_datetimes(self):

# GH 3461, argsort / lexsort differences for a datetime column
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2410,6 +2410,11 @@ def test_sortlevel(self):
sorted_idx, _ = index.sortlevel(1, ascending=False)
self.assert_(sorted_idx.equals(expected[::-1]))

def test_sortlevel_not_sort_remaining(self):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
sorted_idx, _ = mi.sortlevel('A', sort_remaining=False)
self.assert_(sorted_idx.equals(mi))

def test_sortlevel_deterministic(self):
tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'),
('foo', 'one'), ('baz', 'two'), ('qux', 'one')]
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5189,6 +5189,23 @@ def test_unstack(self):
unstacked = s.unstack(0)
assert_frame_equal(unstacked, expected)

def test_sortlevel(self):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
s = Series([1, 2], mi)
backwards = s.iloc[[1, 0]]

res = s.sortlevel('A')
assert_series_equal(backwards, res)

res = s.sortlevel(['A', 'B'])
assert_series_equal(backwards, res)

res = s.sortlevel('A', sort_remaining=False)
assert_series_equal(s, res)

res = s.sortlevel(['A', 'B'], sort_remaining=False)
assert_series_equal(s, res)

def test_head_tail(self):
assert_series_equal(self.series.head(), self.series[:5])
assert_series_equal(self.series.tail(), self.series[-5:])
Expand Down