From 739bc4c945dc39e1644038283559d2b2cd773ba9 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 7 Aug 2018 07:04:55 +0200 Subject: [PATCH 1/3] BUG: raise KeyError if MultiIndex.get_loc_level is asked unused label closes #22221 --- doc/source/whatsnew/v0.24.0.txt | 4 ++-- pandas/core/indexes/multi.py | 26 +++++++++++---------- pandas/tests/indexes/multi/test_indexing.py | 4 ++++ 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index ea0677a0edf28..997ab8f50e07c 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -622,8 +622,8 @@ Missing MultiIndex ^^^^^^^^^^ -- Removed compatibility for MultiIndex pickles prior to version 0.8.0; compatibility with MultiIndex pickles from version 0.13 forward is maintained (:issue:`21654`) -- +- Removed compatibility for :class:`MultiIndex` pickles prior to version 0.8.0; compatibility with :class:`MultiIndex` pickles from version 0.13 forward is maintained (:issue:`21654`) +- :meth:`MultiIndex.get_loc_level` (and as a consequence, ``.loc`` on a :class:``MultiIndex``ed object) will now raise a ``KeyError``, rather than returning an empty ``slice``, if asked a label which is present in the ``levels`` but is unused (:issue:`22221`) - I/O diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 2a97c37449e12..37f4415776b83 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2185,11 +2185,6 @@ def _maybe_to_slice(loc): if not isinstance(key, tuple): loc = self._get_level_indexer(key, level=0) - - # _get_level_indexer returns an empty slice if the key has - # been dropped from the MultiIndex - if isinstance(loc, slice) and loc.start == loc.stop: - raise KeyError(key) return _maybe_to_slice(loc) keylen = len(key) @@ -2443,14 +2438,21 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): else: - loc = level_index.get_loc(key) - if isinstance(loc, slice): - return loc - elif level > 0 or self.lexsort_depth == 0: - return np.array(labels == loc, dtype=bool) + code = level_index.get_loc(key) + + if level > 0 or self.lexsort_depth == 0: + # Desired level is not sorted + locs = np.array(labels == code, dtype=bool, copy=False) + if not locs.any(): + # The label is present in self.levels[level] but unused: + raise KeyError(key) + return locs - i = labels.searchsorted(loc, side='left') - j = labels.searchsorted(loc, side='right') + i = labels.searchsorted(code, side='left') + j = labels.searchsorted(code, side='right') + if i == j: + # The label is present in self.levels[level] but unused: + raise KeyError(key) return slice(i, j) def get_locs(self, seq): diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index ebd50909bae98..9ec11f1f42b9a 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -271,6 +271,10 @@ def test_get_loc_level(): assert new_index is None pytest.raises(KeyError, index.get_loc_level, (2, 2)) + # GH 22221: unused label + pytest.raises(KeyError, index.drop(2).get_loc_level, 2) + # Unused label on unsorted level: + pytest.raises(KeyError, index.drop(1, level=2).get_loc_level, 2, 2) index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) From 821508005c1a4863172a81048dc57be09a4ba9a1 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Wed, 8 Aug 2018 14:15:58 +0200 Subject: [PATCH 2/3] TST: test groupby.apply() with user-defined function returning an empty chunk --- pandas/tests/groupby/test_apply.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 7c90d359a4054..7e32d900f04bd 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -518,6 +518,19 @@ def test_func(x): tm.assert_frame_equal(result2, expected2) +def test_groupby_apply_return_empty_chunk(): + # GH 22221: apply filter which returns some empty groups + df = pd.DataFrame(dict(value=[0, 1], group=['filled', 'empty'])) + groups = df.groupby('group') + result = groups.apply(lambda group: group[group.value != 1]['value']) + expected = pd.Series([0], name='value', + index=MultiIndex.from_product([['empty', 'filled'], + [0]], + names=['group', None] + ).drop('empty')) + tm.assert_series_equal(result, expected) + + def test_apply_with_mixed_types(): # gh-20949 df = pd.DataFrame({'A': 'a a b'.split(), 'B': [1, 2, 3], 'C': [4, 6, 5]}) From 31b55c4066ed627c3a7e84497fabd81c661e4125 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Wed, 8 Aug 2018 14:16:10 +0200 Subject: [PATCH 3/3] CLN: remove named lambda --- pandas/tests/groupby/test_apply.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 7e32d900f04bd..3bc5e51ca046a 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -271,10 +271,7 @@ def test_apply_chunk_view(): df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3], 'value': compat.lrange(9)}) - # return view - f = lambda x: x[:2] - - result = df.groupby('key', group_keys=False).apply(f) + result = df.groupby('key', group_keys=False).apply(lambda x: x[:2]) expected = df.take([0, 1, 3, 4, 6, 7]) tm.assert_frame_equal(result, expected)