Skip to content

Commit bb6b5e5

Browse files
IamJeffGjorisvandenbossche
authored andcommitted
ENH: DataFrame sort columns by rows: sort_values(axis=1)
closes pandas-dev#10806 Author: Jeffrey Gerard <[email protected]> Closes pandas-dev#13622 from IamJeffG/GH10806 and squashes the following commits: ea2d89e [Jeffrey Gerard] More test cases. Clarify whatnew w/ example. f43ab2e [Jeffrey Gerard] Tweak whatsnew entry, once more 2773cdf [Jeffrey Gerard] Tweak whatsnew entry 0f23615 [Jeffrey Gerard] Whatsnew entry for DataFrame.sort_values by index (10806) 970e25b [Jeffrey Gerard] DataFrame sort columns by rows: sort_values(axis=1) Joris Van den Bossche: updated axis kwarg in docstring
1 parent 4caacdf commit bb6b5e5

File tree

4 files changed

+45
-10
lines changed

4 files changed

+45
-10
lines changed

doc/source/whatsnew/v0.19.0.txt

+9
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,15 @@ Other enhancements
313313
- ``Series.append`` now supports the ``ignore_index`` option (:issue:`13677`)
314314
- ``.to_stata()`` and ``StataWriter`` can now write variable labels to Stata dta files using a dictionary to make column names to labels (:issue:`13535`, :issue:`13536`)
315315
- ``.to_stata()`` and ``StataWriter`` will automatically convert ``datetime64[ns]`` columns to Stata format ``%tc``, rather than raising a ``ValueError`` (:issue:`12259`)
316+
- ``DataFrame`` has gained support to re-order the columns based on the values
317+
in a row using ``df.sort_values(by='...', axis=1)`` (:issue:`10806`)
318+
319+
.. ipython:: python
320+
321+
df = pd.DataFrame({'A': [2, 7], 'B': [3, 5], 'C': [4, 8]},
322+
index=['row1', 'row2'])
323+
df.sort_values(by='row2', axis=1)
324+
316325

317326
.. _whatsnew_0190.api:
318327

pandas/core/frame.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@
101101

102102
_shared_doc_kwargs = dict(
103103
axes='index, columns', klass='DataFrame',
104-
axes_single_arg="{0, 1, 'index', 'columns'}",
104+
axes_single_arg="{0 or 'index', 1 or 'columns'}",
105105
optional_by="""
106106
by : str or list of str
107107
Name or list of names which refer to the axis items.""")
@@ -3184,9 +3184,8 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
31843184
kind='quicksort', na_position='last'):
31853185

31863186
axis = self._get_axis_number(axis)
3187+
other_axis = 0 if axis == 1 else 1
31873188

3188-
if axis != 0:
3189-
raise ValueError('When sorting by column, axis must be 0 (rows)')
31903189
if not isinstance(by, list):
31913190
by = [by]
31923191
if is_sequence(ascending) and len(by) != len(ascending):
@@ -3202,7 +3201,7 @@ def trans(v):
32023201

32033202
keys = []
32043203
for x in by:
3205-
k = self[x].values
3204+
k = self.xs(x, axis=other_axis).values
32063205
if k.ndim == 2:
32073206
raise ValueError('Cannot sort by duplicate column %s' %
32083207
str(x))
@@ -3214,7 +3213,7 @@ def trans(v):
32143213
from pandas.core.groupby import _nargsort
32153214

32163215
by = by[0]
3217-
k = self[by].values
3216+
k = self.xs(by, axis=other_axis).values
32183217
if k.ndim == 2:
32193218

32203219
# try to be helpful

pandas/core/generic.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1979,7 +1979,8 @@ def add_suffix(self, suffix):
19791979
19801980
Parameters
19811981
----------%(optional_by)s
1982-
axis : %(axes)s to direct sorting, default 0
1982+
axis : %(axes_single_arg)s, default 0
1983+
Axis to direct sorting
19831984
ascending : bool or list of bool, default True
19841985
Sort ascending vs. descending. Specify list for multiple sort
19851986
orders. If this is a list of bools, must match the length of

pandas/tests/frame/test_sorting.py

+30-4
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def test_sort_values(self):
8484
frame = DataFrame([[1, 1, 2], [3, 1, 0], [4, 5, 6]],
8585
index=[1, 2, 3], columns=list('ABC'))
8686

87-
# by column
87+
# by column (axis=0)
8888
sorted_df = frame.sort_values(by='A')
8989
indexer = frame['A'].argsort().values
9090
expected = frame.ix[frame.index[indexer]]
@@ -116,9 +116,26 @@ def test_sort_values(self):
116116
self.assertRaises(ValueError, lambda: frame.sort_values(
117117
by=['A', 'B'], axis=2, inplace=True))
118118

119-
msg = 'When sorting by column, axis must be 0'
120-
with assertRaisesRegexp(ValueError, msg):
121-
frame.sort_values(by='A', axis=1)
119+
# by row (axis=1): GH 10806
120+
sorted_df = frame.sort_values(by=3, axis=1)
121+
expected = frame
122+
assert_frame_equal(sorted_df, expected)
123+
124+
sorted_df = frame.sort_values(by=3, axis=1, ascending=False)
125+
expected = frame.reindex(columns=['C', 'B', 'A'])
126+
assert_frame_equal(sorted_df, expected)
127+
128+
sorted_df = frame.sort_values(by=[1, 2], axis='columns')
129+
expected = frame.reindex(columns=['B', 'A', 'C'])
130+
assert_frame_equal(sorted_df, expected)
131+
132+
sorted_df = frame.sort_values(by=[1, 3], axis=1,
133+
ascending=[True, False])
134+
assert_frame_equal(sorted_df, expected)
135+
136+
sorted_df = frame.sort_values(by=[1, 3], axis=1, ascending=False)
137+
expected = frame.reindex(columns=['C', 'B', 'A'])
138+
assert_frame_equal(sorted_df, expected)
122139

123140
msg = r'Length of ascending \(5\) != length of by \(2\)'
124141
with assertRaisesRegexp(ValueError, msg):
@@ -133,6 +150,11 @@ def test_sort_values_inplace(self):
133150
expected = frame.sort_values(by='A')
134151
assert_frame_equal(sorted_df, expected)
135152

153+
sorted_df = frame.copy()
154+
sorted_df.sort_values(by=1, axis=1, inplace=True)
155+
expected = frame.sort_values(by=1, axis=1)
156+
assert_frame_equal(sorted_df, expected)
157+
136158
sorted_df = frame.copy()
137159
sorted_df.sort_values(by='A', ascending=False, inplace=True)
138160
expected = frame.sort_values(by='A', ascending=False)
@@ -179,6 +201,10 @@ def test_sort_nan(self):
179201
sorted_df = df.sort_values(['A'], na_position='first', ascending=False)
180202
assert_frame_equal(sorted_df, expected)
181203

204+
expected = df.reindex(columns=['B', 'A'])
205+
sorted_df = df.sort_values(by=1, axis=1, na_position='first')
206+
assert_frame_equal(sorted_df, expected)
207+
182208
# na_position='last', order
183209
expected = DataFrame(
184210
{'A': [1, 1, 2, 4, 6, 8, nan],

0 commit comments

Comments
 (0)