Skip to content

Commit f673af1

Browse files
nbonnottejreback
authored andcommitted
BUG in MultiIndex.drop for not-lexsorted multi-indexes, #12078
Closes #12078
1 parent bb94ebe commit f673af1

File tree

3 files changed

+39
-2
lines changed

3 files changed

+39
-2
lines changed

doc/source/whatsnew/v0.18.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ Bug Fixes
523523
- Bug in ``read_sql`` with ``pymysql`` connections failing to return chunked data (:issue:`11522`)
524524
- Bug in ``.to_csv`` ignoring formatting parameters ``decimal``, ``na_rep``, ``float_format`` for float indexes (:issue:`11553`)
525525
- Bug in ``Int64Index`` and ``Float64Index`` preventing the use of the modulo operator (:issue:`9244`)
526-
526+
- Bug in ``MultiIndex.drop`` for not lexsorted multi-indexes (:issue:`12078`)
527527

528528
- Bug in ``DataFrame`` when masking an empty ``DataFrame`` (:issue:`11859`)
529529

pandas/indexes/multi.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -1083,10 +1083,24 @@ def drop(self, labels, level=None, errors='raise'):
10831083
for label in labels:
10841084
try:
10851085
loc = self.get_loc(label)
1086+
# get_loc returns either an integer, a slice, or a boolean
1087+
# mask
10861088
if isinstance(loc, int):
10871089
inds.append(loc)
1088-
else:
1090+
elif isinstance(loc, slice):
10891091
inds.extend(lrange(loc.start, loc.stop))
1092+
elif is_bool_indexer(loc):
1093+
if self.lexsort_depth == 0:
1094+
warnings.warn('dropping on a non-lexsorted multi-index'
1095+
'without a level parameter may impact '
1096+
'performance.',
1097+
PerformanceWarning,
1098+
stacklevel=2)
1099+
loc = loc.nonzero()[0]
1100+
inds.extend(loc)
1101+
else:
1102+
msg = 'unsupported indexer of type {}'.format(type(loc))
1103+
raise AssertionError(msg)
10901104
except KeyError:
10911105
if errors != 'ignore':
10921106
raise

pandas/tests/indexes/test_multi.py

+23
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from pandas import (date_range, MultiIndex, Index, CategoricalIndex,
1010
compat)
11+
from pandas.io.common import PerformanceWarning
1112
from pandas.indexes.base import InvalidIndexError
1213
from pandas.compat import range, lrange, u, PY3, long, lzip
1314

@@ -1419,6 +1420,28 @@ def test_droplevel_multiple(self):
14191420
expected = index[:2].droplevel(2).droplevel(0)
14201421
self.assertTrue(dropped.equals(expected))
14211422

1423+
def test_drop_not_lexsorted(self):
1424+
# GH 12078
1425+
1426+
# define the lexsorted version of the multi-index
1427+
tuples = [('a', ''), ('b1', 'c1'), ('b2', 'c2')]
1428+
lexsorted_mi = MultiIndex.from_tuples(tuples, names=['b', 'c'])
1429+
self.assertTrue(lexsorted_mi.is_lexsorted())
1430+
1431+
# and the not-lexsorted version
1432+
df = pd.DataFrame(columns=['a', 'b', 'c', 'd'],
1433+
data=[[1, 'b1', 'c1', 3], [1, 'b2', 'c2', 4]])
1434+
df = df.pivot_table(index='a', columns=['b', 'c'], values='d')
1435+
df = df.reset_index()
1436+
not_lexsorted_mi = df.columns
1437+
self.assertFalse(not_lexsorted_mi.is_lexsorted())
1438+
1439+
# compare the results
1440+
self.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
1441+
with self.assert_produces_warning(PerformanceWarning):
1442+
self.assert_index_equal(lexsorted_mi.drop('a'),
1443+
not_lexsorted_mi.drop('a'))
1444+
14221445
def test_insert(self):
14231446
# key contained in all levels
14241447
new_index = self.index.insert(0, ('bar', 'two'))

0 commit comments

Comments
 (0)