Skip to content

Commit 3f76205

Browse files
committed
BUG: Bug with reindexing where a non-unique index will now raise ValueError (GH4746)
1 parent 725b195 commit 3f76205

File tree

6 files changed

+42
-26
lines changed

6 files changed

+42
-26
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
326326
- Bug with using ``QUOTE_NONE`` with ``to_csv`` causing ``Exception``. (:issue:`4328`)
327327
- Bug with Series indexing not raising an error when the right-hand-side has an incorrect length (:issue:`2702`)
328328
- Bug in multi-indexing with a partial string selection as one part of a MultIndex (:issue:`4758`)
329+
- Bug with reindexing on the index with a non-unique index will now raise ``ValueError`` (:issue:`4746`)
329330

330331
pandas 0.12
331332
===========

pandas/core/frame.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -2267,15 +2267,15 @@ def _reindex_index(self, new_index, method, copy, level, fill_value=NA,
22672267
limit=limit, copy_if_needed=True,
22682268
takeable=takeable)
22692269
return self._reindex_with_indexers({0: [new_index, indexer]},
2270-
copy=copy, fill_value=fill_value)
2270+
copy=copy, fill_value=fill_value, allow_dups=takeable)
22712271

22722272
def _reindex_columns(self, new_columns, copy, level, fill_value=NA,
22732273
limit=None, takeable=False):
22742274
new_columns, indexer = self.columns.reindex(new_columns, level=level,
22752275
limit=limit, copy_if_needed=True,
22762276
takeable=takeable)
22772277
return self._reindex_with_indexers({1: [new_columns, indexer]},
2278-
copy=copy, fill_value=fill_value)
2278+
copy=copy, fill_value=fill_value, allow_dups=takeable)
22792279

22802280
def _reindex_multi(self, axes, copy, fill_value):
22812281
""" we are guaranteed non-Nones in the axes! """
@@ -2541,8 +2541,7 @@ def take(self, indices, axis=0, convert=True):
25412541
new_data = self._data.take(indices, axis=1, verify=False)
25422542
return DataFrame(new_data)
25432543
else:
2544-
new_columns = self.columns.take(indices)
2545-
return self.reindex(columns=new_columns)
2544+
return self.reindex(columns=indices, takeable=True)
25462545
else:
25472546
new_values = com.take_nd(self.values,
25482547
com._ensure_int64(indices),

pandas/core/generic.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -944,7 +944,7 @@ def drop(self, labels, axis=0, level=None):
944944
new_axis = axis.drop(labels, level=level)
945945
else:
946946
new_axis = axis.drop(labels)
947-
dropped = self.reindex(**{axis_name: new_axis})
947+
dropped = self.reindex(**{ axis_name: new_axis })
948948
try:
949949
dropped.axes[axis_].set_names(axis.names, inplace=True)
950950
except AttributeError:
@@ -1161,7 +1161,8 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
11611161
return self._reindex_with_indexers({axis: [new_index, indexer]}, method=method, fill_value=fill_value,
11621162
limit=limit, copy=copy)._propogate_attributes(self)
11631163

1164-
def _reindex_with_indexers(self, reindexers, method=None, fill_value=np.nan, limit=None, copy=False):
1164+
def _reindex_with_indexers(self, reindexers, method=None, fill_value=np.nan, limit=None, copy=False, allow_dups=False):
1165+
""" allow_dups indicates an internal call here """
11651166

11661167
# reindex doing multiple operations on different axes if indiciated
11671168
new_data = self._data
@@ -1183,7 +1184,7 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=np.nan, lim
11831184
# TODO: speed up on homogeneous DataFrame objects
11841185
indexer = com._ensure_int64(indexer)
11851186
new_data = new_data.reindex_indexer(index, indexer, axis=baxis,
1186-
fill_value=fill_value)
1187+
fill_value=fill_value, allow_dups=allow_dups)
11871188

11881189
elif baxis == 0 and index is not None and index is not new_data.axes[baxis]:
11891190
new_data = new_data.reindex_items(index, copy=copy,

pandas/core/indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,7 @@ def _reindex(keys, level=None):
668668
if axis+1 > ndim:
669669
raise AssertionError("invalid indexing error with non-unique index")
670670

671-
result = result._reindex_with_indexers({ axis : [ new_labels, new_indexer ] }, copy=True)
671+
result = result._reindex_with_indexers({ axis : [ new_labels, new_indexer ] }, copy=True, allow_dups=True)
672672

673673
return result
674674

pandas/core/internals.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, limit=None
198198
raise AssertionError('axis must be at least 1, got %d' % axis)
199199
if fill_value is None:
200200
fill_value = self.fill_value
201+
201202
new_values = com.take_nd(self.values, indexer, axis,
202203
fill_value=fill_value, mask_info=mask_info)
203204
return make_block(
@@ -2718,10 +2719,14 @@ def reindex_axis0_with_method(self, new_axis, indexer=None, method=None, fill_va
27182719
raise AssertionError('method argument not supported for '
27192720
'axis == 0')
27202721

2721-
def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=None):
2722+
def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=None, allow_dups=False):
27222723
"""
27232724
pandas-indexer with -1's only.
27242725
"""
2726+
# trying to reindex on an axis with duplicates
2727+
if not allow_dups and not self.axes[axis].is_unique:
2728+
raise ValueError("cannot reindex from a duplicate axis")
2729+
27252730
if axis == 0:
27262731
return self._reindex_indexer_items(new_axis, indexer, fill_value)
27272732

pandas/tests/test_frame.py

+27-17
Original file line numberDiff line numberDiff line change
@@ -2879,7 +2879,7 @@ def test_constructor_column_duplicates(self):
28792879
columns=['b', 'a', 'a'])
28802880

28812881

2882-
def test_column_duplicates_operations(self):
2882+
def test_column_dups_operations(self):
28832883

28842884
def check(result, expected=None):
28852885
if expected is not None:
@@ -2973,22 +2973,6 @@ def check(result, expected=None):
29732973
expected = DataFrame([[1,5,7.],[1,5,7.],[1,5,7.]],columns=['bar','hello','foo2'])
29742974
check(df,expected)
29752975

2976-
# reindex
2977-
df = DataFrame([[1,5,7.],[1,5,7.],[1,5,7.]],columns=['bar','a','a'])
2978-
expected = DataFrame([[1],[1],[1]],columns=['bar'])
2979-
result = df.reindex(columns=['bar'])
2980-
check(result,expected)
2981-
2982-
result1 = DataFrame([[1],[1],[1]],columns=['bar']).reindex(columns=['bar','foo'])
2983-
result2 = df.reindex(columns=['bar','foo'])
2984-
check(result2,result1)
2985-
2986-
# drop
2987-
df = DataFrame([[1,5,7.],[1,5,7.],[1,5,7.]],columns=['bar','a','a'])
2988-
df = df.drop(['a'],axis=1)
2989-
expected = DataFrame([[1],[1],[1]],columns=['bar'])
2990-
check(df,expected)
2991-
29922976
# values
29932977
df = DataFrame([[1,2.5],[3,4.5]], index=[1,2], columns=['x','x'])
29942978
result = df.values
@@ -3016,6 +3000,17 @@ def check(result, expected=None):
30163000
columns=['RT','TClose','TExg','RPT_Date','STK_ID','STK_Name','QT_Close']).set_index(['STK_ID','RPT_Date'],drop=False)
30173001
assert_frame_equal(result,expected)
30183002

3003+
# reindex is invalid!
3004+
df = DataFrame([[1,5,7.],[1,5,7.],[1,5,7.]],columns=['bar','a','a'])
3005+
self.assertRaises(ValueError, df.reindex, columns=['bar'])
3006+
self.assertRaises(ValueError, df.reindex, columns=['bar','foo'])
3007+
3008+
# drop
3009+
df = DataFrame([[1,5,7.],[1,5,7.],[1,5,7.]],columns=['bar','a','a'])
3010+
df = df.drop(['a'],axis=1)
3011+
expected = DataFrame([[1],[1],[1]],columns=['bar'])
3012+
check(df,expected)
3013+
30193014
def test_insert_benchmark(self):
30203015
# from the vb_suite/frame_methods/frame_insert_columns
30213016
N = 10
@@ -7573,6 +7568,21 @@ def test_reindex_fill_value(self):
75737568
expected = df.reindex(lrange(15)).fillna(0)
75747569
assert_frame_equal(result, expected)
75757570

7571+
def test_reindex_dups(self):
7572+
7573+
# GH4746, reindex on duplicate index error messages
7574+
arr = np.random.randn(10)
7575+
df = DataFrame(arr,index=[1,2,3,4,5,1,2,3,4,5])
7576+
7577+
# set index is ok
7578+
result = df.copy()
7579+
result.index = list(range(len(df)))
7580+
expected = DataFrame(arr,index=list(range(len(df))))
7581+
assert_frame_equal(result,expected)
7582+
7583+
# reindex fails
7584+
self.assertRaises(ValueError, df.reindex, index=list(range(len(df))))
7585+
75767586
def test_align(self):
75777587

75787588
af, bf = self.frame.align(self.frame)

0 commit comments

Comments
 (0)