ENH: DataFrame sort columns by rows: sort_values(axis=1)

IamJeffG · jorisvandenbossche · commit bb6b5e54edaf · 2016-07-21T17:04:00.000+02:00
closes pandas-dev#10806 Author: Jeffrey Gerard <jeffreygerard+github@gmail.com> Closes pandas-dev#13622 from IamJeffG/GH10806 and squashes the following commits: ea2d89e [Jeffrey Gerard] More test cases. Clarify whatnew w/ example. f43ab2e [Jeffrey Gerard] Tweak whatsnew entry, once more 2773cdf [Jeffrey Gerard] Tweak whatsnew entry 0f23615 [Jeffrey Gerard] Whatsnew entry for DataFrame.sort_values by index (10806) 970e25b [Jeffrey Gerard] DataFrame sort columns by rows: sort_values(axis=1) Joris Van den Bossche: updated axis kwarg in docstring
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -313,6 +313,15 @@ Other enhancements
 - ``Series.append`` now supports the ``ignore_index`` option (:issue:`13677`)
 - ``.to_stata()`` and ``StataWriter`` can now write variable labels to Stata dta files using a dictionary to make column names to labels (:issue:`13535`, :issue:`13536`)
 - ``.to_stata()`` and ``StataWriter`` will automatically convert ``datetime64[ns]`` columns to Stata format ``%tc``, rather than raising a ``ValueError`` (:issue:`12259`)
+- ``DataFrame`` has gained support to re-order the columns based on the values
+  in a row using ``df.sort_values(by='...', axis=1)`` (:issue:`10806`)
+
+  .. ipython:: python
+
+     df = pd.DataFrame({'A': [2, 7], 'B': [3, 5], 'C': [4, 8]},
+                       index=['row1', 'row2'])
+     df.sort_values(by='row2', axis=1)
+
 
 .. _whatsnew_0190.api:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -101,7 +101,7 @@
 
 _shared_doc_kwargs = dict(
     axes='index, columns', klass='DataFrame',
-    axes_single_arg="{0, 1, 'index', 'columns'}",
+    axes_single_arg="{0 or 'index', 1 or 'columns'}",
     optional_by="""
         by : str or list of str
             Name or list of names which refer to the axis items.""")
@@ -3184,9 +3184,8 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
                     kind='quicksort', na_position='last'):
 
         axis = self._get_axis_number(axis)
+        other_axis = 0 if axis == 1 else 1
 
-        if axis != 0:
-            raise ValueError('When sorting by column, axis must be 0 (rows)')
         if not isinstance(by, list):
             by = [by]
         if is_sequence(ascending) and len(by) != len(ascending):
@@ -3202,7 +3201,7 @@ def trans(v):
 
             keys = []
             for x in by:
-                k = self[x].values
+                k = self.xs(x, axis=other_axis).values
                 if k.ndim == 2:
                     raise ValueError('Cannot sort by duplicate column %s' %
                                      str(x))
@@ -3214,7 +3213,7 @@ def trans(v):
             from pandas.core.groupby import _nargsort
 
             by = by[0]
-            k = self[by].values
+            k = self.xs(by, axis=other_axis).values
             if k.ndim == 2:
 
                 # try to be helpful
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1979,7 +1979,8 @@ def add_suffix(self, suffix):
 
         Parameters
         ----------%(optional_by)s
-        axis : %(axes)s to direct sorting, default 0
+        axis : %(axes_single_arg)s, default 0
+            Axis to direct sorting
         ascending : bool or list of bool, default True
              Sort ascending vs. descending. Specify list for multiple sort
              orders.  If this is a list of bools, must match the length of
diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py
@@ -84,7 +84,7 @@ def test_sort_values(self):
         frame = DataFrame([[1, 1, 2], [3, 1, 0], [4, 5, 6]],
                           index=[1, 2, 3], columns=list('ABC'))
 
-        # by column
+        # by column (axis=0)
         sorted_df = frame.sort_values(by='A')
         indexer = frame['A'].argsort().values
         expected = frame.ix[frame.index[indexer]]
@@ -116,9 +116,26 @@ def test_sort_values(self):
         self.assertRaises(ValueError, lambda: frame.sort_values(
             by=['A', 'B'], axis=2, inplace=True))
 
-        msg = 'When sorting by column, axis must be 0'
-        with assertRaisesRegexp(ValueError, msg):
-            frame.sort_values(by='A', axis=1)
+        # by row (axis=1): GH 10806
+        sorted_df = frame.sort_values(by=3, axis=1)
+        expected = frame
+        assert_frame_equal(sorted_df, expected)
+
+        sorted_df = frame.sort_values(by=3, axis=1, ascending=False)
+        expected = frame.reindex(columns=['C', 'B', 'A'])
+        assert_frame_equal(sorted_df, expected)
+
+        sorted_df = frame.sort_values(by=[1, 2], axis='columns')
+        expected = frame.reindex(columns=['B', 'A', 'C'])
+        assert_frame_equal(sorted_df, expected)
+
+        sorted_df = frame.sort_values(by=[1, 3], axis=1,
+                                      ascending=[True, False])
+        assert_frame_equal(sorted_df, expected)
+
+        sorted_df = frame.sort_values(by=[1, 3], axis=1, ascending=False)
+        expected = frame.reindex(columns=['C', 'B', 'A'])
+        assert_frame_equal(sorted_df, expected)
 
         msg = r'Length of ascending \(5\) != length of by \(2\)'
         with assertRaisesRegexp(ValueError, msg):
@@ -133,6 +150,11 @@ def test_sort_values_inplace(self):
         expected = frame.sort_values(by='A')
         assert_frame_equal(sorted_df, expected)
 
+        sorted_df = frame.copy()
+        sorted_df.sort_values(by=1, axis=1, inplace=True)
+        expected = frame.sort_values(by=1, axis=1)
+        assert_frame_equal(sorted_df, expected)
+
         sorted_df = frame.copy()
         sorted_df.sort_values(by='A', ascending=False, inplace=True)
         expected = frame.sort_values(by='A', ascending=False)
@@ -179,6 +201,10 @@ def test_sort_nan(self):
         sorted_df = df.sort_values(['A'], na_position='first', ascending=False)
         assert_frame_equal(sorted_df, expected)
 
+        expected = df.reindex(columns=['B', 'A'])
+        sorted_df = df.sort_values(by=1, axis=1, na_position='first')
+        assert_frame_equal(sorted_df, expected)
+
         # na_position='last', order
         expected = DataFrame(
             {'A': [1, 1, 2, 4, 6, 8, nan],