Skip to content

Commit 6f3e626

Browse files
h-vetinariPingviinituutti
authored andcommitted
Revert set_index inspection/error handling for 0.24.1 (pandas-dev#25085)
* DOC: Minor what's new fix (pandas-dev#24933) * Backport PR pandas-dev#24916: BUG-24212 fix regression in pandas-dev#24897 (pandas-dev#24951) * Revert "Backport PR pandas-dev#24916: BUG-24212 fix regression in pandas-dev#24897 (pandas-dev#24951)" This reverts commit 84056c5.
1 parent 307550d commit 6f3e626

File tree

3 files changed

+130
-42
lines changed

3 files changed

+130
-42
lines changed

doc/source/whatsnew/v0.24.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ Fixed Regressions
5858
- Fixed regression in :func:`merge` when merging an empty ``DataFrame`` with multiple timezone-aware columns on one of the timezone-aware columns (:issue:`25014`).
5959
- Fixed regression in :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` where passing ``None`` failed to remove the axis name (:issue:`25034`)
6060
- Fixed regression in :func:`to_timedelta` with `box=False` incorrectly returning a ``datetime64`` object instead of a ``timedelta64`` object (:issue:`24961`)
61+
- Fixed regression where custom hashable types could not be used as column keys in :meth:`DataFrame.set_index` (:issue:`24969`)
6162

6263
.. _whatsnew_0241.bug_fixes:
6364

pandas/core/frame.py

+18-37
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
is_iterator,
7272
is_sequence,
7373
is_named_tuple)
74-
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex
74+
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
7575
from pandas.core.dtypes.missing import isna, notna
7676

7777
from pandas.core import algorithms
@@ -4154,33 +4154,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
41544154
4 16 10 2014 31
41554155
"""
41564156
inplace = validate_bool_kwarg(inplace, 'inplace')
4157-
4158-
err_msg = ('The parameter "keys" may be a column key, one-dimensional '
4159-
'array, or a list containing only valid column keys and '
4160-
'one-dimensional arrays.')
4161-
4162-
if (is_scalar(keys) or isinstance(keys, tuple)
4163-
or isinstance(keys, (ABCIndexClass, ABCSeries, np.ndarray))):
4164-
# make sure we have a container of keys/arrays we can iterate over
4165-
# tuples can appear as valid column keys!
4157+
if not isinstance(keys, list):
41664158
keys = [keys]
4167-
elif not isinstance(keys, list):
4168-
raise ValueError(err_msg)
4169-
4170-
missing = []
4171-
for col in keys:
4172-
if (is_scalar(col) or isinstance(col, tuple)):
4173-
# if col is a valid column key, everything is fine
4174-
# tuples are always considered keys, never as list-likes
4175-
if col not in self:
4176-
missing.append(col)
4177-
elif (not isinstance(col, (ABCIndexClass, ABCSeries,
4178-
np.ndarray, list))
4179-
or getattr(col, 'ndim', 1) > 1):
4180-
raise ValueError(err_msg)
4181-
4182-
if missing:
4183-
raise KeyError('{}'.format(missing))
41844159

41854160
if inplace:
41864161
frame = self
@@ -4191,31 +4166,37 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
41914166
names = []
41924167
if append:
41934168
names = [x for x in self.index.names]
4194-
if isinstance(self.index, ABCMultiIndex):
4169+
if isinstance(self.index, MultiIndex):
41954170
for i in range(self.index.nlevels):
41964171
arrays.append(self.index._get_level_values(i))
41974172
else:
41984173
arrays.append(self.index)
41994174

42004175
to_remove = []
42014176
for col in keys:
4202-
if isinstance(col, ABCMultiIndex):
4203-
for n in range(col.nlevels):
4177+
if isinstance(col, MultiIndex):
4178+
# append all but the last column so we don't have to modify
4179+
# the end of this loop
4180+
for n in range(col.nlevels - 1):
42044181
arrays.append(col._get_level_values(n))
4182+
4183+
level = col._get_level_values(col.nlevels - 1)
42054184
names.extend(col.names)
4206-
elif isinstance(col, (ABCIndexClass, ABCSeries)):
4207-
# if Index then not MultiIndex (treated above)
4208-
arrays.append(col)
4185+
elif isinstance(col, Series):
4186+
level = col._values
4187+
names.append(col.name)
4188+
elif isinstance(col, Index):
4189+
level = col
42094190
names.append(col.name)
4210-
elif isinstance(col, (list, np.ndarray)):
4211-
arrays.append(col)
4191+
elif isinstance(col, (list, np.ndarray, Index)):
4192+
level = col
42124193
names.append(None)
4213-
# from here, col can only be a column label
42144194
else:
4215-
arrays.append(frame[col]._values)
4195+
level = frame[col]._values
42164196
names.append(col)
42174197
if drop:
42184198
to_remove.append(col)
4199+
arrays.append(level)
42194200

42204201
index = ensure_index_from_sequences(arrays, names)
42214202

pandas/tests/frame/test_alter_axes.py

+111-5
Original file line numberDiff line numberDiff line change
@@ -253,23 +253,129 @@ def test_set_index_raise_keys(self, frame_of_index_cols, drop, append):
253253
df.set_index(['A', df['A'], tuple(df['A'])],
254254
drop=drop, append=append)
255255

256+
@pytest.mark.xfail(reason='broken due to revert, see GH 25085')
256257
@pytest.mark.parametrize('append', [True, False])
257258
@pytest.mark.parametrize('drop', [True, False])
258-
@pytest.mark.parametrize('box', [set, iter])
259+
@pytest.mark.parametrize('box', [set, iter, lambda x: (y for y in x)],
260+
ids=['set', 'iter', 'generator'])
259261
def test_set_index_raise_on_type(self, frame_of_index_cols, box,
260262
drop, append):
261263
df = frame_of_index_cols
262264

263265
msg = 'The parameter "keys" may be a column key, .*'
264-
# forbidden type, e.g. set/tuple/iter
265-
with pytest.raises(ValueError, match=msg):
266+
# forbidden type, e.g. set/iter/generator
267+
with pytest.raises(TypeError, match=msg):
266268
df.set_index(box(df['A']), drop=drop, append=append)
267269

268-
# forbidden type in list, e.g. set/tuple/iter
269-
with pytest.raises(ValueError, match=msg):
270+
# forbidden type in list, e.g. set/iter/generator
271+
with pytest.raises(TypeError, match=msg):
270272
df.set_index(['A', df['A'], box(df['A'])],
271273
drop=drop, append=append)
272274

275+
def test_set_index_custom_label_type(self):
276+
# GH 24969
277+
278+
class Thing(object):
279+
def __init__(self, name, color):
280+
self.name = name
281+
self.color = color
282+
283+
def __str__(self):
284+
return "<Thing %r>" % (self.name,)
285+
286+
# necessary for pretty KeyError
287+
__repr__ = __str__
288+
289+
thing1 = Thing('One', 'red')
290+
thing2 = Thing('Two', 'blue')
291+
df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
292+
expected = DataFrame({thing1: [0, 1]},
293+
index=Index([2, 3], name=thing2))
294+
295+
# use custom label directly
296+
result = df.set_index(thing2)
297+
tm.assert_frame_equal(result, expected)
298+
299+
# custom label wrapped in list
300+
result = df.set_index([thing2])
301+
tm.assert_frame_equal(result, expected)
302+
303+
# missing key
304+
thing3 = Thing('Three', 'pink')
305+
msg = "<Thing 'Three'>"
306+
with pytest.raises(KeyError, match=msg):
307+
# missing label directly
308+
df.set_index(thing3)
309+
310+
with pytest.raises(KeyError, match=msg):
311+
# missing label in list
312+
df.set_index([thing3])
313+
314+
def test_set_index_custom_label_hashable_iterable(self):
315+
# GH 24969
316+
317+
# actual example discussed in GH 24984 was e.g. for shapely.geometry
318+
# objects (e.g. a collection of Points) that can be both hashable and
319+
# iterable; using frozenset as a stand-in for testing here
320+
321+
class Thing(frozenset):
322+
# need to stabilize repr for KeyError (due to random order in sets)
323+
def __repr__(self):
324+
tmp = sorted(list(self))
325+
# double curly brace prints one brace in format string
326+
return "frozenset({{{}}})".format(', '.join(map(repr, tmp)))
327+
328+
thing1 = Thing(['One', 'red'])
329+
thing2 = Thing(['Two', 'blue'])
330+
df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
331+
expected = DataFrame({thing1: [0, 1]},
332+
index=Index([2, 3], name=thing2))
333+
334+
# use custom label directly
335+
result = df.set_index(thing2)
336+
tm.assert_frame_equal(result, expected)
337+
338+
# custom label wrapped in list
339+
result = df.set_index([thing2])
340+
tm.assert_frame_equal(result, expected)
341+
342+
# missing key
343+
thing3 = Thing(['Three', 'pink'])
344+
msg = '.*' # due to revert, see GH 25085
345+
with pytest.raises(KeyError, match=msg):
346+
# missing label directly
347+
df.set_index(thing3)
348+
349+
with pytest.raises(KeyError, match=msg):
350+
# missing label in list
351+
df.set_index([thing3])
352+
353+
def test_set_index_custom_label_type_raises(self):
354+
# GH 24969
355+
356+
# purposefully inherit from something unhashable
357+
class Thing(set):
358+
def __init__(self, name, color):
359+
self.name = name
360+
self.color = color
361+
362+
def __str__(self):
363+
return "<Thing %r>" % (self.name,)
364+
365+
thing1 = Thing('One', 'red')
366+
thing2 = Thing('Two', 'blue')
367+
df = DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2])
368+
369+
msg = 'unhashable type.*'
370+
371+
with pytest.raises(TypeError, match=msg):
372+
# use custom label directly
373+
df.set_index(thing2)
374+
375+
with pytest.raises(TypeError, match=msg):
376+
# custom label wrapped in list
377+
df.set_index([thing2])
378+
273379
def test_construction_with_categorical_index(self):
274380
ci = tm.makeCategoricalIndex(10)
275381
ci.name = 'B'

0 commit comments

Comments
 (0)