diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 05569dbdba702..9654fe4a2e45d 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -436,6 +436,8 @@ Enhancements - ``quotechar``, ``doublequote``, and ``escapechar`` can now be specified when using ``DataFrame.to_csv`` (:issue:`5414`, :issue:`4528`) +- Partially sort by only the specified levels of a MultiIndex with the + ``sort_remaining`` boolean kwarg. (:issue:`3984`) - Added a ``to_julian_date`` function to ``TimeStamp`` and ``DatetimeIndex`` to convert to the Julian Date used primarily in astronomy. (:issue:`4041`) - ``DataFrame.to_stata`` will now check data for compatibility with Stata data types diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d330d4309b13e..f072d0a37cedc 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2634,7 +2634,8 @@ def trans(v): else: return self.take(indexer, axis=axis, convert=False, is_copy=False) - def sortlevel(self, level=0, axis=0, ascending=True, inplace=False): + def sortlevel(self, level=0, axis=0, ascending=True, + inplace=False, sort_remaining=True): """ Sort multilevel index by chosen axis and primary level. Data will be lexicographically sorted by the chosen level followed by the other @@ -2647,6 +2648,8 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False): ascending : boolean, default True inplace : boolean, default False Sort the DataFrame without creating a new instance + sort_remaining : boolean, default True + Sort by the other levels too. Returns ------- @@ -2657,7 +2660,8 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False): if not isinstance(the_axis, MultiIndex): raise TypeError('can only sort by level with a hierarchical index') - new_axis, indexer = the_axis.sortlevel(level, ascending=ascending) + new_axis, indexer = the_axis.sortlevel(level, ascending=ascending, + sort_remaining=sort_remaining) if self._is_mixed_type and not inplace: ax = 'index' if axis == 0 else 'columns' diff --git a/pandas/core/index.py b/pandas/core/index.py index e8403bfe8b4f8..640f65e0f374c 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -3014,17 +3014,19 @@ def reorder_levels(self, order): def __getslice__(self, i, j): return self.__getitem__(slice(i, j)) - def sortlevel(self, level=0, ascending=True): + def sortlevel(self, level=0, ascending=True, sort_remaining=True): """ Sort MultiIndex at the requested level. The result will respect the original ordering of the associated factor at that level. Parameters ---------- - level : int or str, default 0 + level : list-like, int or str, default 0 If a string is given, must be a name of the level + If list-like must be names or ints of levels. ascending : boolean, default True False to sort in descending order + sort_remaining : sort by the remaining levels after level. Returns ------- @@ -3033,16 +3035,27 @@ def sortlevel(self, level=0, ascending=True): from pandas.core.groupby import _indexer_from_factorized labels = list(self.labels) + shape = list(self.levshape) - level = self._get_level_number(level) - primary = labels.pop(level) + if isinstance(level, (str, int)): + level = [level] + level = [self._get_level_number(lev) for lev in level] - shape = list(self.levshape) - primshp = shape.pop(level) + # partition labels and shape + primary = tuple(labels.pop(lev - i) for i, lev in enumerate(level)) + primshp = tuple(shape.pop(lev - i) for i, lev in enumerate(level)) - indexer = _indexer_from_factorized((primary,) + tuple(labels), - (primshp,) + tuple(shape), + if sort_remaining: + primary += primary + tuple(labels) + primshp += primshp + tuple(shape) + sortorder = None + else: + sortorder = level[0] + + indexer = _indexer_from_factorized(primary, + primshp, compress=False) + if not ascending: indexer = indexer[::-1] @@ -3050,7 +3063,7 @@ def sortlevel(self, level=0, ascending=True): new_labels = [lab.take(indexer) for lab in self.labels] new_index = MultiIndex(labels=new_labels, levels=self.levels, - names=self.names, sortorder=level, + names=self.names, sortorder=sortorder, verify_integrity=False) return new_index, indexer diff --git a/pandas/core/series.py b/pandas/core/series.py index 70b73c56772aa..cfd69ac0c577b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1758,7 +1758,7 @@ def _try_kind_sort(arr): return self._constructor(arr[sortedIdx], index=self.index[sortedIdx])\ .__finalize__(self) - def sortlevel(self, level=0, ascending=True): + def sortlevel(self, level=0, ascending=True, sort_remaining=True): """ Sort Series with MultiIndex by chosen level. Data will be lexicographically sorted by the chosen level followed by the other @@ -1776,7 +1776,8 @@ def sortlevel(self, level=0, ascending=True): if not isinstance(self.index, MultiIndex): raise TypeError('can only sort by level with a hierarchical index') - new_index, indexer = self.index.sortlevel(level, ascending=ascending) + new_index, indexer = self.index.sortlevel(level, ascending=ascending, + sort_remaining=sort_remaining) new_values = self.values.take(indexer) return self._constructor(new_values, index=new_index).__finalize__(self) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index f273c794a7f05..d52579f8235e6 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10083,6 +10083,15 @@ def test_sort_index_duplicates(self): result = df.sort_index(by=('a',1)) assert_frame_equal(result, expected) + def test_sortlevel(self): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + df = DataFrame([[1, 2], [3, 4]], mi) + res = df.sortlevel('A', sort_remaining=False) + assert_frame_equal(df, res) + + res = df.sortlevel(['A', 'B'], sort_remaining=False) + assert_frame_equal(df, res) + def test_sort_datetimes(self): # GH 3461, argsort / lexsort differences for a datetime column diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index f4d90b533a0f7..e134560231431 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -2410,6 +2410,11 @@ def test_sortlevel(self): sorted_idx, _ = index.sortlevel(1, ascending=False) self.assert_(sorted_idx.equals(expected[::-1])) + def test_sortlevel_not_sort_remaining(self): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + sorted_idx, _ = mi.sortlevel('A', sort_remaining=False) + self.assert_(sorted_idx.equals(mi)) + def test_sortlevel_deterministic(self): tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'), ('foo', 'one'), ('baz', 'two'), ('qux', 'one')] diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 5b088598dfcec..562fb34fd9a3c 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -5189,6 +5189,23 @@ def test_unstack(self): unstacked = s.unstack(0) assert_frame_equal(unstacked, expected) + def test_sortlevel(self): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + s = Series([1, 2], mi) + backwards = s.iloc[[1, 0]] + + res = s.sortlevel('A') + assert_series_equal(backwards, res) + + res = s.sortlevel(['A', 'B']) + assert_series_equal(backwards, res) + + res = s.sortlevel('A', sort_remaining=False) + assert_series_equal(s, res) + + res = s.sortlevel(['A', 'B'], sort_remaining=False) + assert_series_equal(s, res) + def test_head_tail(self): assert_series_equal(self.series.head(), self.series[:5]) assert_series_equal(self.series.tail(), self.series[-5:])