pandas-dev · jreback · Jan 5, 2018 · Oct 15, 2016 · Aug 29, 2017 · Sep 5, 2017
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
@@ -1738,19 +1738,26 @@ description.
 Sorting
 -------
 
-There are two obvious kinds of sorting that you may be interested in: sorting
-by label and sorting by actual values.
+Pandas supports three kinds of sorting: sorting by index levels,
+sorting by column values, and sorting by a combination of both.
+
+.. _basics.sort_index:
 
 By Index
 ~~~~~~~~
 
-The primary method for sorting axis
-labels (indexes) are the ``Series.sort_index()`` and the ``DataFrame.sort_index()`` methods.
+The :meth:`Series.sort_index` and :meth:`DataFrame.sort_index` methods are
+used to sort a pandas object by its index levels.
 
 .. ipython:: python
 
+   df = pd.DataFrame({'one' : pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
+                      'two' : pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
+                      'three' : pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})
+
    unsorted_df = df.reindex(index=['a', 'd', 'c', 'b'],
                             columns=['three', 'two', 'one'])
+   unsorted_df
 
    # DataFrame
    unsorted_df.sort_index()
@@ -1760,20 +1767,22 @@ labels (indexes) are the ``Series.sort_index()`` and the ``DataFrame.sort_index(
    # Series
    unsorted_df['three'].sort_index()
 
+.. _basics.sort_values:
+
 By Values
 ~~~~~~~~~
 
-The :meth:`Series.sort_values` and :meth:`DataFrame.sort_values` are the entry points for **value** sorting (that is the values in a column or row).
-:meth:`DataFrame.sort_values` can accept an optional ``by`` argument for ``axis=0``
-which will use an arbitrary vector or a column name of the DataFrame to
-determine the sort order:
+The :meth:`Series.sort_values` and :meth:`DataFrame.sort_values` methods are
+used to sort a pandas object by its values.  The optional ``by`` parameter to
+:meth:`DataFrame.sort_values` may used to specify one or more columns to
+use to determine the sorted order.
 
 .. ipython:: python
 
    df1 = pd.DataFrame({'one':[2,1,1,1],'two':[1,3,2,4],'three':[5,4,3,2]})
    df1.sort_values(by='two')
 
-The ``by`` argument can take a list of column names, e.g.:
+The ``by`` parameter can take a list of column names, e.g.:
 
 .. ipython:: python
 
@@ -1788,6 +1797,36 @@ argument:
    s.sort_values()
    s.sort_values(na_position='first')
 
+.. _basics.sort_indexes_and_values:
+
+By Indexes and Values
+~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 0.22.0
+
+Strings passed as the ``by`` parameter to :meth:`DataFrame.sort_values` may
+refer to either columns or index levels.
+
+.. ipython:: python
+
+   # Build MultiIndex
+   idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 2),
+                                   ('b', 2), ('b', 1), ('b', 1)])
+   idx.names = ['first', 'second']
+
+   # Build DataFrame
+   df_multi = pd.DataFrame({'A': np.arange(6, 0, -1)},
+                           index=idx)
+   df_multi
+
+   # Sort by 'second' (index) and 'A' (column)
+   df_multi.sort_values(by=['second', 'A'])
+
+.. note::
+
+   If a string matches both a column name and an index level name then a
+   warning is issued and the column takes precedence. This will result in an
+   ambiguity error in a future version.
 
 .. _basics.searchsorted:
 

diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -65,6 +65,31 @@ levels <merging.merge_on_columns_and_levels>` documentation section.
 
 .. _whatsnew_0220.enhancements.other:
 
+Sorting by a combination of columns and index levels
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Strings passed to :meth:`DataFrame.sort_values` as the ``by`` parameter may
+now refer to either column names or index level names.  This enables sorting
+``DataFrame`` instances by a combination of index levels and columns without
+resetting indexes. See the :ref:`Sorting by Indexes and Values
+<basics.sort_indexes_and_values>` documentation section.
+(:issue:`14353`)
+
+.. ipython:: python
+
+   # Build MultiIndex
+   idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 2),
+                                    ('b', 2), ('b', 1), ('b', 1)])
+   idx.names = ['first', 'second']
+
+   # Build DataFrame
+   df_multi = pd.DataFrame({'A': np.arange(6, 0, -1)},
+                           index=idx)
+   df_multi
+
+   # Sort by 'second' (index) and 'A' (column)
+   df_multi.sort_values(by=['second', 'A'])
+
 Other Enhancements
 ^^^^^^^^^^^^^^^^^^
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -113,7 +113,15 @@
     axes_single_arg="{0 or 'index', 1 or 'columns'}",
     optional_by="""
         by : str or list of str
-            Name or list of names which refer to the axis items.""",
+            Name or list of names matching axis levels or off-axis labels.
+
+            - if `axis` is 0 or `'index'` then `by` may contain index
+              levels and/or column labels
+            - if `axis` is 1 or `'columns'` then `by` may contain column
+              levels and/or index labels
+
+            Support for specify index/column levels was added in
+            version 0.22.0""",
     versionadded_to_excel='',
     optional_labels="""labels : array-like, optional
             New labels / index to conform the axis specified by 'axis' to.""",
@@ -3612,7 +3620,6 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
                     kind='quicksort', na_position='last'):
         inplace = validate_bool_kwarg(inplace, 'inplace')
         axis = self._get_axis_number(axis)
-        other_axis = 0 if axis == 1 else 1
 
         if not isinstance(by, list):
             by = [by]
@@ -3624,10 +3631,7 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
 
             keys = []
             for x in by:
-                k = self.xs(x, axis=other_axis).values
-                if k.ndim == 2:
-                    raise ValueError('Cannot sort by duplicate column %s' %
-                                     str(x))
+                k = self._get_label_or_level_values(x, axis=axis)
                 keys.append(k)
             indexer = lexsort_indexer(keys, orders=ascending,
                                       na_position=na_position)
@@ -3636,17 +3640,8 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
             from pandas.core.sorting import nargsort
 
             by = by[0]
-            k = self.xs(by, axis=other_axis).values
-            if k.ndim == 2:
-
-                # try to be helpful
-                if isinstance(self.columns, MultiIndex):
-                    raise ValueError('Cannot sort by column %s in a '
-                                     'multi-index you need to explicitly '
-                                     'provide all the levels' % str(by))
+            k = self._get_label_or_level_values(by, axis=axis)
 
-                raise ValueError('Cannot sort by duplicate column %s' %
-                                 str(by))
             if isinstance(ascending, (tuple, list)):
                 ascending = ascending[0]
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -69,7 +69,7 @@
     args_transpose='axes to permute (int or label for object)',
     optional_by="""
         by : str or list of str
-            Name or list of names which refer to the axis items.""")
+            Name or list of names matching axis levels or off-axis labels.""")
 
 
 def _single_replace(self, to_replace, method, inplace, limit):
@@ -2932,7 +2932,7 @@ def add_suffix(self, suffix):
         Parameters
         ----------%(optional_by)s
         axis : %(axes_single_arg)s, default 0
-            Axis to direct sorting
+             Axis to be sorted
         ascending : bool or list of bool, default True
              Sort ascending vs. descending. Specify list for multiple sort
              orders.  If this is a list of bools, must match the length of

diff --git a/pandas/tests/frame/test_sort_values_level_as_str.py b/pandas/tests/frame/test_sort_values_level_as_str.py
@@ -0,0 +1,122 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame, Index
+from pandas.errors import PerformanceWarning
+from pandas.util import testing as tm
+from pandas.util.testing import assert_frame_equal
+
+
+@pytest.fixture
+def df_none():
+    return DataFrame({
+        'outer': ['a', 'a', 'a', 'b', 'b', 'b'],
+        'inner': [1, 2, 2, 2, 1, 1],
+        'A': np.arange(6, 0, -1),
+        ('B', 5): ['one', 'one', 'two', 'two', 'one', 'one']})
+
+
+@pytest.fixture(params=[
+    ['outer'],
+    ['outer', 'inner']
+])
+def df_idx(request, df_none):
+    levels = request.param
+    return df_none.set_index(levels)
+
+
+@pytest.fixture(params=[
+    'inner',     # index level
+    ['outer'],   # list of index level
+    'A',         # column
+    [('B', 5)],  # list of column
+    ['inner', 'outer'],   # two index levels
+    [('B', 5), 'outer'],  # index level and column
+    ['A', ('B', 5)],      # Two columns
+    ['inner', 'outer']    # two index levels and column
+])
+def sort_names(request):
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def ascending(request):
+    return request.param
+
+
+def test_sort_index_level_and_column_label(
+        df_none, df_idx, sort_names, ascending):
+
+    # Get index levels from df_idx
+    levels = df_idx.index.names
+
+    # Compute expected by sorting on columns and the setting index
+    expected = df_none.sort_values(by=sort_names,
+                                   ascending=ascending,
+                                   axis=0).set_index(levels)
+
+    # Compute result sorting on mix on columns and index levels
+    result = df_idx.sort_values(by=sort_names,
+                                ascending=ascending,
+                                axis=0)
+
+    assert_frame_equal(result, expected)
+
+
+def test_sort_column_level_and_index_label(
+        df_none, df_idx, sort_names, ascending):
+
+    # Get levels from df_idx
+    levels = df_idx.index.names
+
+    # Compute expected by sorting on axis=0, setting index levels, and then
+    # transposing. For some cases this will result in a frame with
+    # multiple column levels
+    expected = df_none.sort_values(by=sort_names,
+                                   ascending=ascending,
+                                   axis=0).set_index(levels).T
+
+    # Compute result by transposing and sorting on axis=1.
+    result = df_idx.T.sort_values(by=sort_names,
+                                  ascending=ascending,
+                                  axis=1)
+
+    if len(levels) > 1:
+        # Accessing multi-level columns that are not lexsorted raises a
+        # performance warning
+        with tm.assert_produces_warning(PerformanceWarning,
+                                        check_stacklevel=False):
+            assert_frame_equal(result, expected)
+    else:
+        assert_frame_equal(result, expected)
+
+
+def test_sort_values_column_index_level_precedence():
+    # GH 14353, when a string passed as the `by` parameter
+    # matches a column and an index level the column takes
+    # precedence
+
+    # Construct DataFrame with index and column named 'idx'
+    idx = Index(np.arange(1, 7), name='idx')
+    df = DataFrame({'A': np.arange(11, 17),
+                    'idx': np.arange(6, 0, -1)},
+                   index=idx)
+
+    # Sorting by 'idx' should sort by the idx column and raise a
+    # FutureWarning
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+        result = df.sort_values(by='idx')
+
+    # This should be equivalent to sorting by the 'idx' index level in
+    # descending order
+    expected = df.sort_index(level='idx', ascending=False)
+    assert_frame_equal(result, expected)
+
+    # Perform same test with MultiIndex
+    df_multi = df.set_index('A', append=True)
+
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+        result = df_multi.sort_values(by='idx')
+
+    expected = df_multi.sort_index(level='idx', ascending=False)
+    assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py
@@ -455,38 +455,38 @@ def test_sort_index_duplicates(self):
         df = DataFrame([lrange(5, 9), lrange(4)],
                        columns=['a', 'a', 'b', 'b'])
 
-        with tm.assert_raises_regex(ValueError, 'duplicate'):
+        with tm.assert_raises_regex(ValueError, 'not unique'):
             # use .sort_values #9816
             with tm.assert_produces_warning(FutureWarning):
                 df.sort_index(by='a')
-        with tm.assert_raises_regex(ValueError, 'duplicate'):
+        with tm.assert_raises_regex(ValueError, 'not unique'):
             df.sort_values(by='a')
 
-        with tm.assert_raises_regex(ValueError, 'duplicate'):
+        with tm.assert_raises_regex(ValueError, 'not unique'):
             # use .sort_values #9816
             with tm.assert_produces_warning(FutureWarning):
                 df.sort_index(by=['a'])
-        with tm.assert_raises_regex(ValueError, 'duplicate'):
+        with tm.assert_raises_regex(ValueError, 'not unique'):
             df.sort_values(by=['a'])
 
-        with tm.assert_raises_regex(ValueError, 'duplicate'):
+        with tm.assert_raises_regex(ValueError, 'not unique'):
             # use .sort_values #9816
             with tm.assert_produces_warning(FutureWarning):
                 # multi-column 'by' is separate codepath
                 df.sort_index(by=['a', 'b'])
-        with tm.assert_raises_regex(ValueError, 'duplicate'):
+        with tm.assert_raises_regex(ValueError, 'not unique'):
             # multi-column 'by' is separate codepath
             df.sort_values(by=['a', 'b'])
 
         # with multi-index
         # GH4370
         df = DataFrame(np.random.randn(4, 2),
                        columns=MultiIndex.from_tuples([('a', 0), ('a', 1)]))
-        with tm.assert_raises_regex(ValueError, 'levels'):
+        with tm.assert_raises_regex(ValueError, 'not unique'):
             # use .sort_values #9816
             with tm.assert_produces_warning(FutureWarning):
                 df.sort_index(by='a')
-        with tm.assert_raises_regex(ValueError, 'levels'):
+        with tm.assert_raises_regex(ValueError, 'not unique'):
             df.sort_values(by='a')
 
         # convert tuples to a list of tuples