-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ERR: disallow non-hashables in Index/MultiIndex construction & rename #20548
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 41 commits
9047d60
df7650d
dd64219
89e92ab
cd3e53a
cd070e3
351691f
3a7b0b2
70933d5
56fd617
d4ed636
b554bb3
6efd6cc
4fb3a6b
786f43f
01b712e
6f13cd0
26433c3
91ef466
85e35ea
5c2e240
4ca2a52
840cd88
18bcf2a
d98014f
b8a1d7e
edfbd1d
2322346
fa52655
c0f6936
a9c14e6
30da596
667d495
c4c1011
bd75433
74a9b54
b1cb7fd
863f7d3
0723009
7092d49
1d8f67a
12488ff
4a500ba
9ec64b0
47903ae
04f2eed
1a68188
97a2b06
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,6 +42,7 @@ | |
is_datetime64_any_dtype, | ||
is_datetime64tz_dtype, | ||
is_timedelta64_dtype, | ||
is_hashable, | ||
needs_i8_conversion, | ||
is_iterator, is_list_like, | ||
is_scalar) | ||
|
@@ -473,7 +474,7 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs): | |
|
||
result = object.__new__(cls) | ||
result._data = values | ||
result.name = name | ||
result._set_names([name]) | ||
for k, v in compat.iteritems(kwargs): | ||
setattr(result, k, v) | ||
return result._reset_identity() | ||
|
@@ -1311,6 +1312,52 @@ def _get_names(self): | |
return FrozenList((self.name, )) | ||
|
||
def _set_names(self, values, level=None): | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you also add a mention on |
||
Set new names on index. | ||
|
||
Parameters | ||
---------- | ||
values : str or sequence | ||
name(s) to set | ||
level : int, level name, or sequence of int/level names (default None) | ||
If the index is a MultiIndex (hierarchical), level(s) to set (None | ||
for all levels). Otherwise level must be None | ||
|
||
Returns | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be Raises (and its a TypeError) |
||
------- | ||
index : Index | ||
|
||
See Also | ||
-------- | ||
Index.set_names : Set new names on index. Defaults to returning | ||
new index. | ||
Index.rename : Set new names on index. Defaults to returning new index. | ||
|
||
Notes | ||
----- | ||
Both `set_names` and `rename` call this function to set name. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if this is needed? |
||
|
||
Examples | ||
-------- | ||
on an index with no names: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you don't need the fulll doc string here (e.g. examples and such, leave Parameters and such), only on |
||
>>> I1 = Index([1, 2, 3, 4]) | ||
>>> I1._set_names([0]) | ||
>>> I1 | ||
Int64Index([1, 2, 3, 4], dtype='int64', name=0) | ||
|
||
set multiple names: | ||
>>> I2 = Index([1, 2, 3, 4], name="foo") | ||
>>> I2._set_names([(0, 1)]) | ||
>>> I2 | ||
Int64Index([1, 2, 3, 4], dtype='int64', name=(0, 1)) | ||
""" | ||
# GH 20527 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doc-string here |
||
# All items in 'name' need to be hashable: | ||
if values is not None: | ||
for name in values: | ||
if not is_hashable(name): | ||
raise TypeError('{}.name must be a hashable type' | ||
.format(self.__class__.__name__)) | ||
if len(values) != 1: | ||
raise ValueError('Length of new names must be 1, got %d' % | ||
len(values)) | ||
|
@@ -1339,9 +1386,9 @@ def set_names(self, names, level=None, inplace=False): | |
Examples | ||
-------- | ||
>>> Index([1, 2, 3, 4]).set_names('foo') | ||
Int64Index([1, 2, 3, 4], dtype='int64') | ||
Int64Index([1, 2, 3, 4], dtype='int64', name='foo') | ||
>>> Index([1, 2, 3, 4]).set_names(['foo']) | ||
Int64Index([1, 2, 3, 4], dtype='int64') | ||
Int64Index([1, 2, 3, 4], dtype='int64', name='foo') | ||
>>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), | ||
(2, u'one'), (2, u'two')], | ||
names=['foo', 'bar']) | ||
|
@@ -1354,6 +1401,7 @@ def set_names(self, names, level=None, inplace=False): | |
labels=[[0, 0, 1, 1], [0, 1, 0, 1]], | ||
names=[u'baz', u'bar']) | ||
""" | ||
|
||
if level is not None and self.nlevels == 1: | ||
raise ValueError('Level must be None for non-MultiIndex') | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
_ensure_platform_int, | ||
is_categorical_dtype, | ||
is_object_dtype, | ||
is_hashable, | ||
is_iterator, | ||
is_list_like, | ||
pandas_dtype, | ||
|
@@ -639,6 +640,12 @@ def _set_names(self, names, level=None, validate=True): | |
Note that you generally want to set this *after* changing levels, so | ||
that it only acts on copies | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks good here. can you update the doc-string with the comment below |
||
""" | ||
# GH 20527 | ||
# All items in 'names' need to be hashable: | ||
for levels, name in enumerate(names): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
if not is_hashable(name): | ||
raise TypeError('{}.name must be a hashable type' | ||
.format(self.__class__.__name__)) | ||
|
||
# GH 15110 | ||
# Don't allow a single string for names in a MultiIndex | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -435,6 +435,24 @@ def test_constructor_empty(self): | |
assert isinstance(empty, MultiIndex) | ||
assert not len(empty) | ||
|
||
def test_constructor_nonhashable_name(self, indices): | ||
# GH 20527 | ||
|
||
if isinstance(indices, MultiIndex): | ||
pytest.skip("multiindex handled in test_multi.py") | ||
|
||
name = ['0'] | ||
message = "Index.name must be a hashable type" | ||
tm.assert_raises_regex(TypeError, message, name=name) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add this - it will test with all of the indices types (this is confusing atm as we have multiple way of running index creation: fixture, a function, and as a class property); other's PR's are cleaning this up.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I pushed this to your branch There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback Thanks. Can we do the same for MI (i.e. through fixture) ? |
||
# With .rename() | ||
renamed = [['1']] | ||
tm.assert_raises_regex(TypeError, message, | ||
indices.rename, name=renamed) | ||
# With .set_names() | ||
tm.assert_raises_regex(TypeError, message, | ||
indices.set_names, names=renamed) | ||
|
||
def test_view_with_args(self): | ||
|
||
restricted = ['unicodeIndex', 'strIndex', 'catIndex', 'boolIndex', | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -615,8 +615,27 @@ def test_constructor_mismatched_label_levels(self): | |
with tm.assert_raises_regex(ValueError, label_error): | ||
self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]]) | ||
|
||
@pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2], | ||
[1, 'a', 1]]) | ||
def test_constructor_nonhashable_names(self): | ||
# GH 20527 | ||
levels = [[1, 2], [u'one', u'two']] | ||
labels = [[0, 0, 1, 1], [0, 1, 0, 1]] | ||
names = ((['foo'], ['bar'])) | ||
message = "MultiIndex.name must be a hashable type" | ||
tm.assert_raises_regex(TypeError, message, | ||
MultiIndex, levels=levels, | ||
labels=labels, names=names) | ||
|
||
# With .rename() | ||
mi = MultiIndex(levels=[[1, 2], [u'one', u'two']], | ||
labels=[[0, 0, 1, 1], [0, 1, 0, 1]], | ||
names=('foo', 'bar')) | ||
renamed = [['foor'], ['barr']] | ||
tm.assert_raises_regex(TypeError, message, mi.rename, names=renamed) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also check set_names There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
# With .set_names() | ||
tm.assert_raises_regex(TypeError, message, mi.set_names, names=renamed) | ||
|
||
@pytest.mark.parametrize('names', [['a', 'b', 'a'], ['1', '1', '2'], | ||
['1', 'a', '1']]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jorisvandenbossche IIRC I changed it (in this commit) because the test was failing, but implementation changed a lot after that commit so I'm not sure if reverting this would cause a problem now There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems to be passing there! |
||
def test_duplicate_level_names(self, names): | ||
# GH18872 | ||
pytest.raises(ValueError, pd.MultiIndex.from_product, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jreback I wasn't sure if
_set_names
was getting called from_simple_new
, so I made it explicit. Is this ok?Also, we are not checking in
__new__
anymore (as you suggested).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you shouldn't need to do this, and just leave the original code
setting
.name
name is a property that calls_set_names