Skip to content

Commit 6826609

Browse files
committed
BUG: more helpful error when sorting by duplicate column. close #2488
1 parent 287ddbf commit 6826609

File tree

3 files changed

+28
-1
lines changed

3 files changed

+28
-1
lines changed

RELEASE.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,8 @@ pandas 0.10.0
229229
- Fix exception when Timestamp.to_datetime is called on a Timestamp with tzoffset (#2471)
230230
- Fixed unintentional conversion of datetime64 to long in groupby.first() (#2133)
231231
- Union of empty DataFrames now return empty with concatenated index (#2307)
232+
- DataFrame.sort_index raises more helpful exception if sorting by column
233+
with duplicates (#2488)
232234

233235
pandas 0.9.1
234236
============

pandas/core/frame.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3101,11 +3101,23 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False):
31013101
if axis != 0:
31023102
raise AssertionError('Axis must be 0')
31033103
if isinstance(by, (tuple, list)):
3104+
keys = []
3105+
for x in by:
3106+
k = self[x].values
3107+
if k.ndim == 2:
3108+
raise ValueError('Cannot sort by duplicate column %s'
3109+
% str(x))
3110+
keys.append(k)
3111+
31043112
keys = [self[x].values for x in by]
31053113
indexer = _lexsort_indexer(keys, orders=ascending)
31063114
indexer = com._ensure_platform_int(indexer)
31073115
else:
3108-
indexer = self[by].values.argsort()
3116+
k = self[by].values
3117+
if k.ndim == 2:
3118+
raise ValueError('Cannot sort by duplicate column %s'
3119+
% str(by))
3120+
indexer = k.argsort()
31093121
if not ascending:
31103122
indexer = indexer[::-1]
31113123
elif isinstance(labels, MultiIndex):

pandas/tests/test_frame.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6112,6 +6112,19 @@ def test_sort_inplace(self):
61126112
expected = frame.sort_index(by=['A', 'B'], ascending=False)
61136113
assert_frame_equal(sorted_df, expected)
61146114

6115+
def test_sort_index_duplicates(self):
6116+
df = DataFrame([[1, 2], [3, 4]], columns=['a', 'a'])
6117+
6118+
try:
6119+
df.sort_index(by='a')
6120+
except Exception, e:
6121+
self.assertTrue('duplicate' in str(e))
6122+
6123+
try:
6124+
df.sort_index(by=['a'])
6125+
except Exception, e:
6126+
self.assertTrue('duplicate' in str(e))
6127+
61156128
def test_frame_column_inplace_sort_exception(self):
61166129
s = self.frame['A']
61176130
self.assertRaises(Exception, s.sort)

0 commit comments

Comments
 (0)