-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: MultiIndex not dropping nan level and invalid code value #26408
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
e0d3bb4
af2ad91
248177b
b6d29bd
041b4cd
933615b
d4227c6
b66d3a4
81ebfda
7dfd698
ec265fa
af35fa3
6ec67b3
28c953f
055999d
d9db853
a141e40
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -243,10 +243,19 @@ def __new__(cls, levels=None, codes=None, sortorder=None, names=None, | |
|
||
if verify_integrity: | ||
result._verify_integrity() | ||
|
||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
codes = [cls._reassign_na_codes(*it) for it in zip(levels, codes)] | ||
result._set_codes(codes, validate=False) | ||
|
||
if _set_identity: | ||
result._reset_identity() | ||
|
||
return result | ||
|
||
@classmethod | ||
def _reassign_na_codes(cls, level, code): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. call this _validate_codes, pls add a doc-string. This should be an instance method (and not a classmethod) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. method level changed and moved into validation method |
||
return [-1 if x == -1 or isna(level[x]) else x for x in code] | ||
|
||
def _verify_integrity(self, codes=None, levels=None): | ||
""" | ||
|
||
|
@@ -281,6 +290,9 @@ def _verify_integrity(self, codes=None, levels=None): | |
" level (%d). NOTE: this index is in an" | ||
" inconsistent state" % (i, level_codes.max(), | ||
len(level))) | ||
if len(level_codes) and level_codes.min() < -1: | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
raise ValueError("On level %d, code value (%d) < -1" % | ||
(i, level_codes.min())) | ||
if not level.is_unique: | ||
raise ValueError("Level values must be unique: {values} on " | ||
"level {level}".format( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -67,6 +67,7 @@ def test_constructor_mismatched_codes_levels(idx): | |
length_error = (r"On level 0, code max \(3\) >= length of level \(1\)\." | ||
" NOTE: this index is in an inconsistent state") | ||
label_error = r"Unequal code lengths: \[4, 2\]" | ||
code_value_error = (r"On level 0, code value \(-2\) < -1") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't need the outer parentheses here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed |
||
|
||
# important to check that it's looking at the right thing. | ||
with pytest.raises(ValueError, match=length_error): | ||
|
@@ -83,6 +84,23 @@ def test_constructor_mismatched_codes_levels(idx): | |
with pytest.raises(ValueError, match=label_error): | ||
idx.copy().set_codes([[0, 0, 0, 0], [0, 0]]) | ||
|
||
# code value smaller than -1 | ||
with pytest.raises(ValueError, match=code_value_error): | ||
MultiIndex(levels=[['a'], ['b']], codes=[[0, -2], [0, 0]]) | ||
|
||
|
||
def test_na_levels(): | ||
tm.assert_index_equal( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a comment with the issue number There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue number added |
||
MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]], | ||
codes=[[0, -1, 1, 2, 3, 4]]), | ||
MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]], | ||
codes=[[-1, -1, -1, -1, 3, 4]])) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. test cases refactored |
||
tm.assert_index_equal( | ||
MultiIndex(levels=[[np.nan, 's', pd.NaT, 128, None]], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should also verify if .set_levels and/or .set_codes is called. Can you add tests for each. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. new test cases added |
||
codes=[[0, -1, 1, 2, 3, 4]]), | ||
MultiIndex(levels=[[np.nan, 's', pd.NaT, 128, None]], | ||
codes=[[-1, -1, 1, -1, 3, -1]])) | ||
|
||
|
||
def test_labels_deprecated(idx): | ||
# GH23752 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you make something like
Bug in :class:`Multindex` construction from
levelsand
codesthat would incorrectly allows
NaNlevels