diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index e19aedac80213..3f7c4b3b0ccb7 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -827,6 +827,7 @@ Other API Changes - A :class:`Series` of ``dtype=category`` constructed from an empty ``dict`` will now have categories of ``dtype=object`` rather than ``dtype=float64``, consistently with the case in which an empty list is passed (:issue:`18515`) - All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`). - Levels names of a ``MultiIndex`` (when not None) are now required to be unique: trying to create a ``MultiIndex`` with repeated names will raise a ``ValueError`` (:issue:`18872`) +- Both construction and renaming of ``Index``/``MultiIndex`` with non-hashable ``name``/``names`` will now raise ``TypeError`` (:issue:`20527`) - :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`). - :func:`DataFrame.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) - :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 12bb09e8f8a8a..f392a716d9e5b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -42,6 +42,7 @@ is_datetime64_any_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, + is_hashable, needs_i8_conversion, is_iterator, is_list_like, is_scalar) @@ -1311,9 +1312,33 @@ def _get_names(self): return FrozenList((self.name, )) def _set_names(self, values, level=None): + """ + Set new names on index. Each name has to be a hashable type. + + Parameters + ---------- + values : str or sequence + name(s) to set + level : int, level name, or sequence of int/level names (default None) + If the index is a MultiIndex (hierarchical), level(s) to set (None + for all levels). Otherwise level must be None + + Raises + ------ + TypeError if each name is not hashable. + """ + if not is_list_like(values): + raise ValueError('Names must be a list-like') if len(values) != 1: raise ValueError('Length of new names must be 1, got %d' % len(values)) + + # GH 20527 + # All items in 'name' need to be hashable: + for name in values: + if not is_hashable(name): + raise TypeError('{}.name must be a hashable type' + .format(self.__class__.__name__)) self.name = values[0] names = property(fset=_set_names, fget=_get_names) @@ -1339,9 +1364,9 @@ def set_names(self, names, level=None, inplace=False): Examples -------- >>> Index([1, 2, 3, 4]).set_names('foo') - Int64Index([1, 2, 3, 4], dtype='int64') + Int64Index([1, 2, 3, 4], dtype='int64', name='foo') >>> Index([1, 2, 3, 4]).set_names(['foo']) - Int64Index([1, 2, 3, 4], dtype='int64') + Int64Index([1, 2, 3, 4], dtype='int64', name='foo') >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), (2, u'one'), (2, u'two')], names=['foo', 'bar']) @@ -1354,6 +1379,7 @@ def set_names(self, names, level=None, inplace=False): labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[u'baz', u'bar']) """ + if level is not None and self.nlevels == 1: raise ValueError('Level must be None for non-MultiIndex') diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 8098f7bb7d246..fbcf06a28c1e5 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -16,6 +16,7 @@ _ensure_platform_int, is_categorical_dtype, is_object_dtype, + is_hashable, is_iterator, is_list_like, pandas_dtype, @@ -634,12 +635,29 @@ def _get_names(self): def _set_names(self, names, level=None, validate=True): """ + Set new names on index. Each name has to be a hashable type. + + Parameters + ---------- + values : str or sequence + name(s) to set + level : int, level name, or sequence of int/level names (default None) + If the index is a MultiIndex (hierarchical), level(s) to set (None + for all levels). Otherwise level must be None + validate : boolean, default True + validate that the names match level lengths + + Raises + ------ + TypeError if each name is not hashable. + + Notes + ----- sets names on levels. WARNING: mutates! Note that you generally want to set this *after* changing levels, so that it only acts on copies """ - # GH 15110 # Don't allow a single string for names in a MultiIndex if names is not None and not is_list_like(names): @@ -662,10 +680,20 @@ def _set_names(self, names, level=None, validate=True): # set the name for l, name in zip(level, names): - if name is not None and name in used: - raise ValueError('Duplicated level name: "{}", assigned to ' - 'level {}, is already used for level ' - '{}.'.format(name, l, used[name])) + if name is not None: + + # GH 20527 + # All items in 'names' need to be hashable: + if not is_hashable(name): + raise TypeError('{}.name must be a hashable type' + .format(self.__class__.__name__)) + + if name in used: + raise ValueError( + 'Duplicated level name: "{}", assigned to ' + 'level {}, is already used for level ' + '{}.'.format(name, l, used[name])) + self.levels[l].rename(name, inplace=True) used[name] = l diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index a8b81b1b03552..8e10e4c4fbc65 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -125,12 +125,12 @@ def test_getitem_list(self): # tuples df = DataFrame(randn(8, 3), columns=Index([('foo', 'bar'), ('baz', 'qux'), - ('peek', 'aboo')], name=['sth', 'sth2'])) + ('peek', 'aboo')], name=('sth', 'sth2'))) result = df[[('foo', 'bar'), ('baz', 'qux')]] expected = df.iloc[:, :2] assert_frame_equal(result, expected) - assert result.columns.names == ['sth', 'sth2'] + assert result.columns.names == ('sth', 'sth2') def test_getitem_callable(self): # GH 12533 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 7e19de4cca292..682517f5a6fb1 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -435,6 +435,24 @@ def test_constructor_empty(self): assert isinstance(empty, MultiIndex) assert not len(empty) + def test_constructor_nonhashable_name(self, indices): + # GH 20527 + + if isinstance(indices, MultiIndex): + pytest.skip("multiindex handled in test_multi.py") + + name = ['0'] + message = "Index.name must be a hashable type" + tm.assert_raises_regex(TypeError, message, name=name) + + # With .rename() + renamed = [['1']] + tm.assert_raises_regex(TypeError, message, + indices.rename, name=renamed) + # With .set_names() + tm.assert_raises_regex(TypeError, message, + indices.set_names, names=renamed) + def test_view_with_args(self): restricted = ['unicodeIndex', 'strIndex', 'catIndex', 'boolIndex', diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 984f37042d600..88dc4cbaf7bb3 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -615,8 +615,27 @@ def test_constructor_mismatched_label_levels(self): with tm.assert_raises_regex(ValueError, label_error): self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]]) - @pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2], - [1, 'a', 1]]) + def test_constructor_nonhashable_names(self): + # GH 20527 + levels = [[1, 2], [u'one', u'two']] + labels = [[0, 0, 1, 1], [0, 1, 0, 1]] + names = ((['foo'], ['bar'])) + message = "MultiIndex.name must be a hashable type" + tm.assert_raises_regex(TypeError, message, + MultiIndex, levels=levels, + labels=labels, names=names) + + # With .rename() + mi = MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=('foo', 'bar')) + renamed = [['foor'], ['barr']] + tm.assert_raises_regex(TypeError, message, mi.rename, names=renamed) + # With .set_names() + tm.assert_raises_regex(TypeError, message, mi.set_names, names=renamed) + + @pytest.mark.parametrize('names', [['a', 'b', 'a'], ['1', '1', '2'], + ['1', 'a', '1']]) def test_duplicate_level_names(self, names): # GH18872 pytest.raises(ValueError, pd.MultiIndex.from_product,