From 17204a68a264949b2a0ebd73e78942d0cc4f4880 Mon Sep 17 00:00:00 2001 From: Douglas Rudd Date: Tue, 27 Jun 2017 09:44:55 -0700 Subject: [PATCH 01/11] BUG: allow empty multiindex (fixes .isin regression, GH16777) --- pandas/core/indexes/multi.py | 11 +++++++---- pandas/tests/indexes/test_multi.py | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ee18263cca6ab..87aae2f07d118 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -89,6 +89,9 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, raise TypeError("Must pass both levels and labels") if len(levels) != len(labels): raise ValueError('Length of levels and labels must be the same.') + if names is not None and len(names) > 0 and len(levels) == 0: + levels = [[] for _ in names] + labels = [[] for _ in names] if len(levels) == 0: raise ValueError('Must pass non-zero number of levels/labels') if len(levels) == 1: @@ -1128,8 +1131,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None): MultiIndex.from_product : Make a MultiIndex from cartesian product of iterables """ - if len(tuples) == 0: - # I think this is right? Not quite sure... + if len(tuples) == 0 and names is None: raise TypeError('Cannot infer number of levels from empty list') if isinstance(tuples, (np.ndarray, Index)): @@ -2621,8 +2623,9 @@ def _wrap_joined_index(self, joined, other): @Appender(Index.isin.__doc__) def isin(self, values, level=None): if level is None: - return algos.isin(self.values, - MultiIndex.from_tuples(values).values) + values = MultiIndex.from_tuples(values, + names=self._levels).values + return algos.isin(self.values, values) else: num = self._get_level_number(level) levs = self.levels[num] diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index ef8806246c2c5..e6006d932cc35 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -766,6 +766,14 @@ def test_from_arrays_empty(self): ValueError, "Must pass non-zero number of levels/labels"): MultiIndex.from_arrays(arrays=[]) + # 0 levels, names defined + result = MultiIndex.from_arrays(arrays=[], + names=['A', 'B']) + expected = MultiIndex(levels=[[], []], + labels=[[], []], + names=['A', 'B']) + tm.assert_index_equal(result, expected) + # 1 level result = MultiIndex.from_arrays(arrays=[[]], names=['A']) expected = Index([], name='A') @@ -1717,6 +1725,12 @@ def test_from_tuples(self): 'from empty list', MultiIndex.from_tuples, []) + result = MultiIndex.from_tuples([], names=['a', 'b']) + expected = MultiIndex(levels=[[], []], + labels=[[], []], + names=['a', 'b']) + tm.assert_index_equal(result, expected) + idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) assert len(idx) == 2 From 474ff40655c8f85a3f762a8bf6141273033686b9 Mon Sep 17 00:00:00 2001 From: Douglas Rudd Date: Tue, 27 Jun 2017 09:44:55 -0700 Subject: [PATCH 02/11] BUG: allow empty multiindex (fixes .isin regression, GH16777) --- doc/source/whatsnew/v0.20.3.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 77b3e3bd25740..ad0cc065c9523 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -55,6 +55,7 @@ Indexing ^^^^^^^^ - Bug in ``Float64Index`` causing an empty array instead of ``None`` to be returned from ``.get(np.nan)`` on a Series whose index did not contain any ``NaN`` s (:issue:`8569`) +- Bug in ``MultiIndex`` causing an error when passing an empty iterable to `.isin`. (:issue:`16777`) I/O ^^^ From 64908e82a4fcb305f03cdbd214b27c6b450291c1 Mon Sep 17 00:00:00 2001 From: Douglas Rudd Date: Tue, 27 Jun 2017 14:13:52 -0700 Subject: [PATCH 03/11] Replace incorrect use of self._levels with self._names --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 87aae2f07d118..8cf1726fee02d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2624,7 +2624,7 @@ def _wrap_joined_index(self, joined, other): def isin(self, values, level=None): if level is None: values = MultiIndex.from_tuples(values, - names=self._levels).values + names=self._names).values return algos.isin(self.values, values) else: num = self._get_level_number(level) From 76967dbcc7cdf9d9bdac5d2562e889eaeaba204a Mon Sep 17 00:00:00 2001 From: Douglas Rudd Date: Wed, 5 Jul 2017 21:21:59 -0700 Subject: [PATCH 04/11] Requested change to whats-new --- doc/source/whatsnew/v0.20.3.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index ad0cc065c9523..3d6aba98d4d57 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -55,7 +55,7 @@ Indexing ^^^^^^^^ - Bug in ``Float64Index`` causing an empty array instead of ``None`` to be returned from ``.get(np.nan)`` on a Series whose index did not contain any ``NaN`` s (:issue:`8569`) -- Bug in ``MultiIndex`` causing an error when passing an empty iterable to `.isin`. (:issue:`16777`) +- Bug in ``MultiIndex.isin`` causing an error when passing an empty iterable (:issue:`16777`) I/O ^^^ From c84f0bc1e5a1cda712b4f91045d761c6e92a8975 Mon Sep 17 00:00:00 2001 From: Douglas Rudd Date: Wed, 5 Jul 2017 21:22:52 -0700 Subject: [PATCH 05/11] Use names property rather than _names in MultiIndex.isin --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 8cf1726fee02d..200421d0314df 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2624,7 +2624,7 @@ def _wrap_joined_index(self, joined, other): def isin(self, values, level=None): if level is None: values = MultiIndex.from_tuples(values, - names=self._names).values + names=self.names).values return algos.isin(self.values, values) else: num = self._get_level_number(level) From f94d2b4905d75c2a4e2328b7e90fd10d7a666fbf Mon Sep 17 00:00:00 2001 From: Douglas Rudd Date: Thu, 6 Jul 2017 14:23:03 -0700 Subject: [PATCH 06/11] Revert ability to construct MultiIndex from empty arrays, ensure all tests pass despite issue #16844 --- pandas/core/indexes/multi.py | 14 +++++++++----- pandas/tests/indexes/test_multi.py | 13 ++----------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 200421d0314df..7935b24940010 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1083,7 +1083,9 @@ def from_arrays(cls, arrays, sortorder=None, names=None): MultiIndex.from_product : Make a MultiIndex from cartesian product of iterables """ - if len(arrays) == 1: + if len(arrays) == 0: + raise ValueError('Must pass non-zero number of levels/labels') + elif len(arrays) == 1: name = None if names is None else names[0] return Index(arrays[0], name=name) @@ -1131,10 +1133,12 @@ def from_tuples(cls, tuples, sortorder=None, names=None): MultiIndex.from_product : Make a MultiIndex from cartesian product of iterables """ - if len(tuples) == 0 and names is None: - raise TypeError('Cannot infer number of levels from empty list') - - if isinstance(tuples, (np.ndarray, Index)): + if len(tuples) == 0: + if names is None: + msg = 'Cannot infer number of levels from empty list' + raise TypeError(msg) + arrays = [[]]*len(names) + elif isinstance(tuples, (np.ndarray, Index)): if isinstance(tuples, Index): tuples = tuples._values diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index e6006d932cc35..1493ed1c33802 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -766,14 +766,6 @@ def test_from_arrays_empty(self): ValueError, "Must pass non-zero number of levels/labels"): MultiIndex.from_arrays(arrays=[]) - # 0 levels, names defined - result = MultiIndex.from_arrays(arrays=[], - names=['A', 'B']) - expected = MultiIndex(levels=[[], []], - labels=[[], []], - names=['A', 'B']) - tm.assert_index_equal(result, expected) - # 1 level result = MultiIndex.from_arrays(arrays=[[]], names=['A']) expected = Index([], name='A') @@ -1726,9 +1718,8 @@ def test_from_tuples(self): MultiIndex.from_tuples, []) result = MultiIndex.from_tuples([], names=['a', 'b']) - expected = MultiIndex(levels=[[], []], - labels=[[], []], - names=['a', 'b']) + expected = MultiIndex.from_arrays(arrays=[[], []], + names=['a', 'b']) tm.assert_index_equal(result, expected) idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) From fe1e043b710041ec8a5603a12b63c4e07bb308fc Mon Sep 17 00:00:00 2001 From: Douglas Rudd Date: Thu, 6 Jul 2017 14:30:21 -0700 Subject: [PATCH 07/11] Remove empty list checks in MultiIndex.__new__ --- pandas/core/indexes/multi.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7935b24940010..b450c4cf71637 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -89,9 +89,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, raise TypeError("Must pass both levels and labels") if len(levels) != len(labels): raise ValueError('Length of levels and labels must be the same.') - if names is not None and len(names) > 0 and len(levels) == 0: - levels = [[] for _ in names] - labels = [[] for _ in names] if len(levels) == 0: raise ValueError('Must pass non-zero number of levels/labels') if len(levels) == 1: From b0778f0e0f92fe9c841b37f3269e58cc3e9a9a82 Mon Sep 17 00:00:00 2001 From: Douglas Rudd Date: Thu, 6 Jul 2017 14:35:48 -0700 Subject: [PATCH 08/11] Remove empty array check in .from_arrays, falling back to check in __new__ --- pandas/core/indexes/multi.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index b450c4cf71637..74112bb6701c1 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1080,9 +1080,7 @@ def from_arrays(cls, arrays, sortorder=None, names=None): MultiIndex.from_product : Make a MultiIndex from cartesian product of iterables """ - if len(arrays) == 0: - raise ValueError('Must pass non-zero number of levels/labels') - elif len(arrays) == 1: + if len(arrays) == 1: name = None if names is None else names[0] return Index(arrays[0], name=name) From e3d670aa8089c2c4f8bd7ec883074d5c8838dc2d Mon Sep 17 00:00:00 2001 From: Douglas Rudd Date: Thu, 6 Jul 2017 16:03:04 -0700 Subject: [PATCH 09/11] Fix for pep8 --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 74112bb6701c1..81eac0ac0684f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1132,7 +1132,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None): if names is None: msg = 'Cannot infer number of levels from empty list' raise TypeError(msg) - arrays = [[]]*len(names) + arrays = [[]] * len(names) elif isinstance(tuples, (np.ndarray, Index)): if isinstance(tuples, Index): tuples = tuples._values From 39b544a7aa0727dea62a64d56ca43b72676ae423 Mon Sep 17 00:00:00 2001 From: Douglas Rudd Date: Thu, 6 Jul 2017 16:03:19 -0700 Subject: [PATCH 10/11] Split empty from_tuples test --- pandas/tests/indexes/test_multi.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 1493ed1c33802..243cf3e122f5d 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1717,14 +1717,15 @@ def test_from_tuples(self): 'from empty list', MultiIndex.from_tuples, []) + idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) + assert len(idx) == 2 + + def test_from_tuples_empty(self): result = MultiIndex.from_tuples([], names=['a', 'b']) expected = MultiIndex.from_arrays(arrays=[[], []], names=['a', 'b']) tm.assert_index_equal(result, expected) - idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) - assert len(idx) == 2 - def test_argsort(self): result = self.index.argsort() expected = self.index.values.argsort() From 94ac6b16b8b33809525a9240afd59834d51db94f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 7 Jul 2017 06:24:45 -0400 Subject: [PATCH 11/11] add commit number to test --- pandas/tests/indexes/test_multi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 243cf3e122f5d..719cd2f7e01a4 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1721,6 +1721,7 @@ def test_from_tuples(self): assert len(idx) == 2 def test_from_tuples_empty(self): + # GH 16777 result = MultiIndex.from_tuples([], names=['a', 'b']) expected = MultiIndex.from_arrays(arrays=[[], []], names=['a', 'b'])