Skip to content

Commit add3fbf

Browse files
arminvTomAugspurger
authored andcommitted
ERR: disallow non-hashables in Index/MultiIndex construction & rename (#20548)
1 parent 31e77b0 commit add3fbf

File tree

6 files changed

+103
-11
lines changed

6 files changed

+103
-11
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,7 @@ Other API Changes
827827
- A :class:`Series` of ``dtype=category`` constructed from an empty ``dict`` will now have categories of ``dtype=object`` rather than ``dtype=float64``, consistently with the case in which an empty list is passed (:issue:`18515`)
828828
- All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`).
829829
- Levels names of a ``MultiIndex`` (when not None) are now required to be unique: trying to create a ``MultiIndex`` with repeated names will raise a ``ValueError`` (:issue:`18872`)
830+
- Both construction and renaming of ``Index``/``MultiIndex`` with non-hashable ``name``/``names`` will now raise ``TypeError`` (:issue:`20527`)
830831
- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`).
831832
- :func:`DataFrame.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`)
832833
- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`)

pandas/core/indexes/base.py

+28-2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
is_datetime64_any_dtype,
4343
is_datetime64tz_dtype,
4444
is_timedelta64_dtype,
45+
is_hashable,
4546
needs_i8_conversion,
4647
is_iterator, is_list_like,
4748
is_scalar)
@@ -1312,9 +1313,33 @@ def _get_names(self):
13121313
return FrozenList((self.name, ))
13131314

13141315
def _set_names(self, values, level=None):
1316+
"""
1317+
Set new names on index. Each name has to be a hashable type.
1318+
1319+
Parameters
1320+
----------
1321+
values : str or sequence
1322+
name(s) to set
1323+
level : int, level name, or sequence of int/level names (default None)
1324+
If the index is a MultiIndex (hierarchical), level(s) to set (None
1325+
for all levels). Otherwise level must be None
1326+
1327+
Raises
1328+
------
1329+
TypeError if each name is not hashable.
1330+
"""
1331+
if not is_list_like(values):
1332+
raise ValueError('Names must be a list-like')
13151333
if len(values) != 1:
13161334
raise ValueError('Length of new names must be 1, got %d' %
13171335
len(values))
1336+
1337+
# GH 20527
1338+
# All items in 'name' need to be hashable:
1339+
for name in values:
1340+
if not is_hashable(name):
1341+
raise TypeError('{}.name must be a hashable type'
1342+
.format(self.__class__.__name__))
13181343
self.name = values[0]
13191344

13201345
names = property(fset=_set_names, fget=_get_names)
@@ -1340,9 +1365,9 @@ def set_names(self, names, level=None, inplace=False):
13401365
Examples
13411366
--------
13421367
>>> Index([1, 2, 3, 4]).set_names('foo')
1343-
Int64Index([1, 2, 3, 4], dtype='int64')
1368+
Int64Index([1, 2, 3, 4], dtype='int64', name='foo')
13441369
>>> Index([1, 2, 3, 4]).set_names(['foo'])
1345-
Int64Index([1, 2, 3, 4], dtype='int64')
1370+
Int64Index([1, 2, 3, 4], dtype='int64', name='foo')
13461371
>>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'),
13471372
(2, u'one'), (2, u'two')],
13481373
names=['foo', 'bar'])
@@ -1355,6 +1380,7 @@ def set_names(self, names, level=None, inplace=False):
13551380
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
13561381
names=[u'baz', u'bar'])
13571382
"""
1383+
13581384
if level is not None and self.nlevels == 1:
13591385
raise ValueError('Level must be None for non-MultiIndex')
13601386

pandas/core/indexes/multi.py

+33-5
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
_ensure_platform_int,
1717
is_categorical_dtype,
1818
is_object_dtype,
19+
is_hashable,
1920
is_iterator,
2021
is_list_like,
2122
pandas_dtype,
@@ -634,12 +635,29 @@ def _get_names(self):
634635

635636
def _set_names(self, names, level=None, validate=True):
636637
"""
638+
Set new names on index. Each name has to be a hashable type.
639+
640+
Parameters
641+
----------
642+
values : str or sequence
643+
name(s) to set
644+
level : int, level name, or sequence of int/level names (default None)
645+
If the index is a MultiIndex (hierarchical), level(s) to set (None
646+
for all levels). Otherwise level must be None
647+
validate : boolean, default True
648+
validate that the names match level lengths
649+
650+
Raises
651+
------
652+
TypeError if each name is not hashable.
653+
654+
Notes
655+
-----
637656
sets names on levels. WARNING: mutates!
638657
639658
Note that you generally want to set this *after* changing levels, so
640659
that it only acts on copies
641660
"""
642-
643661
# GH 15110
644662
# Don't allow a single string for names in a MultiIndex
645663
if names is not None and not is_list_like(names):
@@ -662,10 +680,20 @@ def _set_names(self, names, level=None, validate=True):
662680

663681
# set the name
664682
for l, name in zip(level, names):
665-
if name is not None and name in used:
666-
raise ValueError('Duplicated level name: "{}", assigned to '
667-
'level {}, is already used for level '
668-
'{}.'.format(name, l, used[name]))
683+
if name is not None:
684+
685+
# GH 20527
686+
# All items in 'names' need to be hashable:
687+
if not is_hashable(name):
688+
raise TypeError('{}.name must be a hashable type'
689+
.format(self.__class__.__name__))
690+
691+
if name in used:
692+
raise ValueError(
693+
'Duplicated level name: "{}", assigned to '
694+
'level {}, is already used for level '
695+
'{}.'.format(name, l, used[name]))
696+
669697
self.levels[l].rename(name, inplace=True)
670698
used[name] = l
671699

pandas/tests/frame/test_indexing.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -125,12 +125,12 @@ def test_getitem_list(self):
125125
# tuples
126126
df = DataFrame(randn(8, 3),
127127
columns=Index([('foo', 'bar'), ('baz', 'qux'),
128-
('peek', 'aboo')], name=['sth', 'sth2']))
128+
('peek', 'aboo')], name=('sth', 'sth2')))
129129

130130
result = df[[('foo', 'bar'), ('baz', 'qux')]]
131131
expected = df.iloc[:, :2]
132132
assert_frame_equal(result, expected)
133-
assert result.columns.names == ['sth', 'sth2']
133+
assert result.columns.names == ('sth', 'sth2')
134134

135135
def test_getitem_callable(self):
136136
# GH 12533

pandas/tests/indexes/test_base.py

+18
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,24 @@ def test_constructor_empty(self):
435435
assert isinstance(empty, MultiIndex)
436436
assert not len(empty)
437437

438+
def test_constructor_nonhashable_name(self, indices):
439+
# GH 20527
440+
441+
if isinstance(indices, MultiIndex):
442+
pytest.skip("multiindex handled in test_multi.py")
443+
444+
name = ['0']
445+
message = "Index.name must be a hashable type"
446+
tm.assert_raises_regex(TypeError, message, name=name)
447+
448+
# With .rename()
449+
renamed = [['1']]
450+
tm.assert_raises_regex(TypeError, message,
451+
indices.rename, name=renamed)
452+
# With .set_names()
453+
tm.assert_raises_regex(TypeError, message,
454+
indices.set_names, names=renamed)
455+
438456
def test_view_with_args(self):
439457

440458
restricted = ['unicodeIndex', 'strIndex', 'catIndex', 'boolIndex',

pandas/tests/indexes/test_multi.py

+21-2
Original file line numberDiff line numberDiff line change
@@ -615,8 +615,27 @@ def test_constructor_mismatched_label_levels(self):
615615
with tm.assert_raises_regex(ValueError, label_error):
616616
self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]])
617617

618-
@pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2],
619-
[1, 'a', 1]])
618+
def test_constructor_nonhashable_names(self):
619+
# GH 20527
620+
levels = [[1, 2], [u'one', u'two']]
621+
labels = [[0, 0, 1, 1], [0, 1, 0, 1]]
622+
names = ((['foo'], ['bar']))
623+
message = "MultiIndex.name must be a hashable type"
624+
tm.assert_raises_regex(TypeError, message,
625+
MultiIndex, levels=levels,
626+
labels=labels, names=names)
627+
628+
# With .rename()
629+
mi = MultiIndex(levels=[[1, 2], [u'one', u'two']],
630+
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
631+
names=('foo', 'bar'))
632+
renamed = [['foor'], ['barr']]
633+
tm.assert_raises_regex(TypeError, message, mi.rename, names=renamed)
634+
# With .set_names()
635+
tm.assert_raises_regex(TypeError, message, mi.set_names, names=renamed)
636+
637+
@pytest.mark.parametrize('names', [['a', 'b', 'a'], ['1', '1', '2'],
638+
['1', 'a', '1']])
620639
def test_duplicate_level_names(self, names):
621640
# GH18872
622641
pytest.raises(ValueError, pd.MultiIndex.from_product,

0 commit comments

Comments
 (0)