Skip to content

Commit e997faf

Browse files
committed
Rebased version of pandas-dev#22486
1 parent 3edbaea commit e997faf

File tree

4 files changed

+43
-13
lines changed

4 files changed

+43
-13
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,7 @@ Other API Changes
546546
- :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel` (:issue:`22015`)
547547
- :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`)
548548
- :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`)
549+
- :meth:`DataFrame.set_index` now raises a ``TypeError`` for incorrect types, has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
549550

550551
.. _whatsnew_0240.deprecations:
551552

pandas/core/frame.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
is_named_tuple)
6262
from pandas.core.dtypes.concat import _get_sliced_frame_result_type
6363
from pandas.core.dtypes.missing import isna, notna
64-
64+
from pandas.core.dtypes.generic import ABCIndexClass, ABCMultiIndex, ABCSeries
6565

6666
from pandas.core.generic import NDFrame, _shared_docs
6767
from pandas.core.index import (Index, MultiIndex, ensure_index,
@@ -3898,6 +3898,22 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
38983898
if not isinstance(keys, list):
38993899
keys = [keys]
39003900

3901+
missing = []
3902+
for x in keys:
3903+
if not (is_scalar(x) or isinstance(x, tuple)):
3904+
if not isinstance(x, (ABCSeries, ABCIndexClass, ABCMultiIndex,
3905+
list, np.ndarray)):
3906+
raise TypeError('keys may only contain a combination of '
3907+
'the following: valid column keys, '
3908+
'Series, Index, MultiIndex, list or '
3909+
'np.ndarray')
3910+
else:
3911+
if x not in self:
3912+
missing.append(x)
3913+
3914+
if missing:
3915+
raise KeyError('{}'.format(missing))
3916+
39013917
vi = verify_integrity
39023918
return super(DataFrame, self).set_index(keys=keys, drop=drop,
39033919
append=append, inplace=inplace,

pandas/core/generic.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
786786
raise ValueError('Index has duplicate keys: {dup}'.format(
787787
dup=duplicates))
788788

789-
for c in to_remove:
789+
# use set to handle duplicate column names gracefully in case of drop
790+
for c in set(to_remove):
790791
del obj[c]
791792

792793
# clear up memory usage

pandas/tests/frame/test_alter_axes.py

+23-11
Original file line numberDiff line numberDiff line change
@@ -186,18 +186,19 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
186186

187187
# == gives ambiguous Boolean for Series
188188
if drop and keys[0] is 'A' and keys[1] is 'A':
189-
with tm.assert_raises_regex(KeyError, '.*'):
190-
df.set_index(keys, drop=drop, append=append)
189+
# can't drop same column twice
190+
first_drop = False
191191
else:
192-
result = df.set_index(keys, drop=drop, append=append)
192+
first_drop = drop
193193

194-
# to test against already-tested behavior, we add sequentially,
195-
# hence second append always True; must wrap in list, otherwise
196-
# list-box will be illegal
197-
expected = df.set_index([keys[0]], drop=drop, append=append)
198-
expected = expected.set_index([keys[1]], drop=drop, append=True)
194+
# to test against already-tested behaviour, we add sequentially,
195+
# hence second append always True; must wrap in list, otherwise
196+
# list-box will be illegal
197+
expected = df.set_index([keys[0]], drop=first_drop, append=append)
198+
expected = expected.set_index([keys[1]], drop=drop, append=True)
199199

200-
tm.assert_frame_equal(result, expected)
200+
result = df.set_index(keys, drop=drop, append=append)
201+
tm.assert_frame_equal(result, expected)
201202

202203
@pytest.mark.parametrize('append', [True, False])
203204
@pytest.mark.parametrize('drop', [True, False])
@@ -229,13 +230,24 @@ def test_set_index_verify_integrity(self, frame_of_index_cols):
229230
def test_set_index_raise(self, frame_of_index_cols, drop, append):
230231
df = frame_of_index_cols
231232

232-
with tm.assert_raises_regex(KeyError, '.*'): # column names are A-E
233+
with tm.assert_raises_regex(KeyError, "['foo', 'bar', 'baz']"):
234+
# column names are A-E
233235
df.set_index(['foo', 'bar', 'baz'], drop=drop, append=append)
234236

235237
# non-existent key in list with arrays
236-
with tm.assert_raises_regex(KeyError, '.*'):
238+
with tm.assert_raises_regex(KeyError, 'X'):
237239
df.set_index([df['A'], df['B'], 'X'], drop=drop, append=append)
238240

241+
rgx = 'keys may only contain a combination of the following:.*'
242+
# forbidden type, e.g. set
243+
with tm.assert_raises_regex(TypeError, rgx):
244+
df.set_index(set(df['A']), drop=drop, append=append)
245+
246+
# forbidden type in list, e.g. set
247+
with tm.assert_raises_regex(TypeError, rgx):
248+
df.set_index(['A', df['A'], set(df['A'])],
249+
drop=drop, append=append)
250+
239251
def test_construction_with_categorical_index(self):
240252
ci = tm.makeCategoricalIndex(10)
241253
ci.name = 'B'

0 commit comments

Comments
 (0)