Skip to content

Commit a6c708a

Browse files
committed
Rebased version of pandas-dev#22486
1 parent aadf50b commit a6c708a

File tree

3 files changed

+42
-13
lines changed

3 files changed

+42
-13
lines changed

pandas/core/frame.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
is_named_tuple)
6262
from pandas.core.dtypes.concat import _get_sliced_frame_result_type
6363
from pandas.core.dtypes.missing import isna, notna
64-
64+
from pandas.core.dtypes.generic import ABCIndexClass, ABCMultiIndex, ABCSeries
6565

6666
from pandas.core.generic import NDFrame, _shared_docs
6767
from pandas.core.index import (Index, MultiIndex, ensure_index,
@@ -3891,6 +3891,22 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
38913891
if not isinstance(keys, list):
38923892
keys = [keys]
38933893

3894+
missing = []
3895+
for x in keys:
3896+
if not (is_scalar(x) or isinstance(x, tuple)):
3897+
if not isinstance(x, (ABCSeries, ABCIndexClass, ABCMultiIndex,
3898+
list, np.ndarray)):
3899+
raise TypeError('keys may only contain a combination of '
3900+
'the following: valid column keys, '
3901+
'Series, Index, MultiIndex, list or '
3902+
'np.ndarray')
3903+
else:
3904+
if x not in self:
3905+
missing.append(x)
3906+
3907+
if missing:
3908+
raise KeyError('{}'.format(missing))
3909+
38943910
vi = verify_integrity
38953911
return super(DataFrame, self).set_index(keys=keys, drop=drop,
38963912
append=append, inplace=inplace,

pandas/core/generic.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
779779
raise ValueError('Index has duplicate keys: {dup}'.format(
780780
dup=duplicates))
781781

782-
for c in to_remove:
782+
# use set to handle duplicate column names gracefully in case of drop
783+
for c in set(to_remove):
783784
del obj[c]
784785

785786
# clear up memory usage

pandas/tests/frame/test_alter_axes.py

+23-11
Original file line numberDiff line numberDiff line change
@@ -186,18 +186,19 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
186186

187187
# == gives ambiguous Boolean for Series
188188
if drop and keys[0] is 'A' and keys[1] is 'A':
189-
with tm.assert_raises_regex(KeyError, '.*'):
190-
df.set_index(keys, drop=drop, append=append)
189+
# can't drop same column twice
190+
first_drop = False
191191
else:
192-
result = df.set_index(keys, drop=drop, append=append)
192+
first_drop = drop
193193

194-
# to test against already-tested behavior, we add sequentially,
195-
# hence second append always True; must wrap in list, otherwise
196-
# list-box will be illegal
197-
expected = df.set_index([keys[0]], drop=drop, append=append)
198-
expected = expected.set_index([keys[1]], drop=drop, append=True)
194+
# to test against already-tested behaviour, we add sequentially,
195+
# hence second append always True; must wrap in list, otherwise
196+
# list-box will be illegal
197+
expected = df.set_index([keys[0]], drop=first_drop, append=append)
198+
expected = expected.set_index([keys[1]], drop=drop, append=True)
199199

200-
tm.assert_frame_equal(result, expected)
200+
result = df.set_index(keys, drop=drop, append=append)
201+
tm.assert_frame_equal(result, expected)
201202

202203
@pytest.mark.parametrize('append', [True, False])
203204
@pytest.mark.parametrize('drop', [True, False])
@@ -229,13 +230,24 @@ def test_set_index_verify_integrity(self, frame_of_index_cols):
229230
def test_set_index_raise(self, frame_of_index_cols, drop, append):
230231
df = frame_of_index_cols
231232

232-
with tm.assert_raises_regex(KeyError, '.*'): # column names are A-E
233+
with tm.assert_raises_regex(KeyError, "['foo', 'bar', 'baz']"):
234+
# column names are A-E
233235
df.set_index(['foo', 'bar', 'baz'], drop=drop, append=append)
234236

235237
# non-existent key in list with arrays
236-
with tm.assert_raises_regex(KeyError, '.*'):
238+
with tm.assert_raises_regex(KeyError, 'X'):
237239
df.set_index([df['A'], df['B'], 'X'], drop=drop, append=append)
238240

241+
rgx = 'keys may only contain a combination of the following:.*'
242+
# forbidden type, e.g. set
243+
with tm.assert_raises_regex(TypeError, rgx):
244+
df.set_index(set(df['A']), drop=drop, append=append)
245+
246+
# forbidden type in list, e.g. set
247+
with tm.assert_raises_regex(TypeError, rgx):
248+
df.set_index(['A', df['A'], set(df['A'])],
249+
drop=drop, append=append)
250+
239251
def test_construction_with_categorical_index(self):
240252
ci = tm.makeCategoricalIndex(10)
241253
ci.name = 'B'

0 commit comments

Comments
 (0)