Skip to content

Commit 65498c0

Browse files
h-vetinariMeeseeksDev[bot]
authored and
MeeseeksDev[bot]
committed
Backport PR pandas-dev#25085: Revert set_index inspection/error handling for 0.24.1
1 parent 78f6cc5 commit 65498c0

File tree

3 files changed

+130
-42
lines changed

3 files changed

+130
-42
lines changed

doc/source/whatsnew/v0.24.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ Fixed Regressions
5858
- Fixed regression in :func:`merge` when merging an empty ``DataFrame`` with multiple timezone-aware columns on one of the timezone-aware columns (:issue:`25014`).
5959
- Fixed regression in :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` where passing ``None`` failed to remove the axis name (:issue:`25034`)
6060
- Fixed regression in :func:`to_timedelta` with `box=False` incorrectly returning a ``datetime64`` object instead of a ``timedelta64`` object (:issue:`24961`)
61+
- Fixed regression where custom hashable types could not be used as column keys in :meth:`DataFrame.set_index` (:issue:`24969`)
6162

6263
.. _whatsnew_0241.bug_fixes:
6364

pandas/core/frame.py

+18-37
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
is_iterator,
7272
is_sequence,
7373
is_named_tuple)
74-
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex
74+
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
7575
from pandas.core.dtypes.missing import isna, notna
7676

7777
from pandas.core import algorithms
@@ -4137,33 +4137,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
41374137
4 16 10 2014 31
41384138
"""
41394139
inplace = validate_bool_kwarg(inplace, 'inplace')
4140-
4141-
err_msg = ('The parameter "keys" may be a column key, one-dimensional '
4142-
'array, or a list containing only valid column keys and '
4143-
'one-dimensional arrays.')
4144-
4145-
if (is_scalar(keys) or isinstance(keys, tuple)
4146-
or isinstance(keys, (ABCIndexClass, ABCSeries, np.ndarray))):
4147-
# make sure we have a container of keys/arrays we can iterate over
4148-
# tuples can appear as valid column keys!
4140+
if not isinstance(keys, list):
41494141
keys = [keys]
4150-
elif not isinstance(keys, list):
4151-
raise ValueError(err_msg)
4152-
4153-
missing = []
4154-
for col in keys:
4155-
if (is_scalar(col) or isinstance(col, tuple)):
4156-
# if col is a valid column key, everything is fine
4157-
# tuples are always considered keys, never as list-likes
4158-
if col not in self:
4159-
missing.append(col)
4160-
elif (not isinstance(col, (ABCIndexClass, ABCSeries,
4161-
np.ndarray, list))
4162-
or getattr(col, 'ndim', 1) > 1):
4163-
raise ValueError(err_msg)
4164-
4165-
if missing:
4166-
raise KeyError('{}'.format(missing))
41674142

41684143
if inplace:
41694144
frame = self
@@ -4174,31 +4149,37 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
41744149
names = []
41754150
if append:
41764151
names = [x for x in self.index.names]
4177-
if isinstance(self.index, ABCMultiIndex):
4152+
if isinstance(self.index, MultiIndex):
41784153
for i in range(self.index.nlevels):
41794154
arrays.append(self.index._get_level_values(i))
41804155
else:
41814156
arrays.append(self.index)
41824157

41834158
to_remove = []
41844159
for col in keys:
4185-
if isinstance(col, ABCMultiIndex):
4186-
for n in range(col.nlevels):
4160+
if isinstance(col, MultiIndex):
4161+
# append all but the last column so we don't have to modify
4162+
# the end of this loop
4163+
for n in range(col.nlevels - 1):
41874164
arrays.append(col._get_level_values(n))
4165+
4166+
level = col._get_level_values(col.nlevels - 1)
41884167
names.extend(col.names)
4189-
elif isinstance(col, (ABCIndexClass, ABCSeries)):
4190-
# if Index then not MultiIndex (treated above)
4191-
arrays.append(col)
4168+
elif isinstance(col, Series):
4169+
level = col._values
4170+
names.append(col.name)
4171+
elif isinstance(col, Index):
4172+
level = col
41924173
names.append(col.name)
4193-
elif isinstance(col, (list, np.ndarray)):
4194-
arrays.append(col)
4174+
elif isinstance(col, (list, np.ndarray, Index)):
4175+
level = col
41954176
names.append(None)
4196-
# from here, col can only be a column label
41974177
else:
4198-
arrays.append(frame[col]._values)
4178+
level = frame[col]._values
41994179
names.append(col)
42004180
if drop:
42014181
to_remove.append(col)
4182+
arrays.append(level)
42024183

42034184
index = ensure_index_from_sequences(arrays, names)
42044185

pandas/tests/frame/test_alter_axes.py

+111-5
Original file line numberDiff line numberDiff line change
@@ -253,23 +253,129 @@ def test_set_index_raise_keys(self, frame_of_index_cols, drop, append):
253253
df.set_index(['A', df['A'], tuple(df['A'])],
254254
drop=drop, append=append)
255255

256+
@pytest.mark.xfail(reason='broken due to revert, see GH 25085')
256257
@pytest.mark.parametrize('append', [True, False])
257258
@pytest.mark.parametrize('drop', [True, False])
258-
@pytest.mark.parametrize('box', [set, iter])
259+
@pytest.mark.parametrize('box', [set, iter, lambda x: (y for y in x)],
260+
ids=['set', 'iter', 'generator'])
259261
def test_set_index_raise_on_type(self, frame_of_index_cols, box,
260262
drop, append):
261263
df = frame_of_index_cols
262264

263265
msg = 'The parameter "keys" may be a column key, .*'
264-
# forbidden type, e.g. set/tuple/iter
265-
with pytest.raises(ValueError, match=msg):
266+
# forbidden type, e.g. set/iter/generator
267+
with pytest.raises(TypeError, match=msg):
266268
df.set_index(box(df['A']), drop=drop, append=append)
267269

268-
# forbidden type in list, e.g. set/tuple/iter
269-
with pytest.raises(ValueError, match=msg):
270+
# forbidden type in list, e.g. set/iter/generator
271+
with pytest.raises(TypeError, match=msg):
270272
df.set_index(['A', df['A'], box(df['A'])],
271273
drop=drop, append=append)
272274

275+
def test_set_index_custom_label_type(self):
276+
# GH 24969
277+
278+
class Thing(object):
279+
def __init__(self, name, color):
280+
self.name = name
281+
self.color = color
282+
283+
def __str__(self):
284+
return "<Thing %r>" % (self.name,)
285+
286+
# necessary for pretty KeyError
287+
__repr__ = __str__
288+
289+
thing1 = Thing('One', 'red')
290+
thing2 = Thing('Two', 'blue')
291+
df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
292+
expected = DataFrame({thing1: [0, 1]},
293+
index=Index([2, 3], name=thing2))
294+
295+
# use custom label directly
296+
result = df.set_index(thing2)
297+
tm.assert_frame_equal(result, expected)
298+
299+
# custom label wrapped in list
300+
result = df.set_index([thing2])
301+
tm.assert_frame_equal(result, expected)
302+
303+
# missing key
304+
thing3 = Thing('Three', 'pink')
305+
msg = "<Thing 'Three'>"
306+
with pytest.raises(KeyError, match=msg):
307+
# missing label directly
308+
df.set_index(thing3)
309+
310+
with pytest.raises(KeyError, match=msg):
311+
# missing label in list
312+
df.set_index([thing3])
313+
314+
def test_set_index_custom_label_hashable_iterable(self):
315+
# GH 24969
316+
317+
# actual example discussed in GH 24984 was e.g. for shapely.geometry
318+
# objects (e.g. a collection of Points) that can be both hashable and
319+
# iterable; using frozenset as a stand-in for testing here
320+
321+
class Thing(frozenset):
322+
# need to stabilize repr for KeyError (due to random order in sets)
323+
def __repr__(self):
324+
tmp = sorted(list(self))
325+
# double curly brace prints one brace in format string
326+
return "frozenset({{{}}})".format(', '.join(map(repr, tmp)))
327+
328+
thing1 = Thing(['One', 'red'])
329+
thing2 = Thing(['Two', 'blue'])
330+
df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
331+
expected = DataFrame({thing1: [0, 1]},
332+
index=Index([2, 3], name=thing2))
333+
334+
# use custom label directly
335+
result = df.set_index(thing2)
336+
tm.assert_frame_equal(result, expected)
337+
338+
# custom label wrapped in list
339+
result = df.set_index([thing2])
340+
tm.assert_frame_equal(result, expected)
341+
342+
# missing key
343+
thing3 = Thing(['Three', 'pink'])
344+
msg = '.*' # due to revert, see GH 25085
345+
with pytest.raises(KeyError, match=msg):
346+
# missing label directly
347+
df.set_index(thing3)
348+
349+
with pytest.raises(KeyError, match=msg):
350+
# missing label in list
351+
df.set_index([thing3])
352+
353+
def test_set_index_custom_label_type_raises(self):
354+
# GH 24969
355+
356+
# purposefully inherit from something unhashable
357+
class Thing(set):
358+
def __init__(self, name, color):
359+
self.name = name
360+
self.color = color
361+
362+
def __str__(self):
363+
return "<Thing %r>" % (self.name,)
364+
365+
thing1 = Thing('One', 'red')
366+
thing2 = Thing('Two', 'blue')
367+
df = DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2])
368+
369+
msg = 'unhashable type.*'
370+
371+
with pytest.raises(TypeError, match=msg):
372+
# use custom label directly
373+
df.set_index(thing2)
374+
375+
with pytest.raises(TypeError, match=msg):
376+
# custom label wrapped in list
377+
df.set_index([thing2])
378+
273379
def test_construction_with_categorical_index(self):
274380
ci = tm.makeCategoricalIndex(10)
275381
ci.name = 'B'

0 commit comments

Comments
 (0)