Skip to content

Commit f33093b

Browse files
committed
ENH: Introduce UnsortedIndexError #11897
ERR: Change error message #14491 ENH: #11897 make lint work. ERR: #14491 change error message ERR: #14491 fix test for error message fixes based on jreback feedback fix indent issue Doc fixes Fixes per jreback comments
1 parent 2f43ac4 commit f33093b

File tree

7 files changed

+53
-12
lines changed

7 files changed

+53
-12
lines changed

doc/source/advanced.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -528,12 +528,14 @@ return a copy of the data rather than a view:
528528
jim joe
529529
1 z 0.64094
530530
531+
.. _advanced.unsorted:
532+
531533
Furthermore if you try to index something that is not fully lexsorted, this can raise:
532534

533535
.. code-block:: ipython
534536
535537
In [5]: dfm.loc[(0,'y'):(1, 'z')]
536-
KeyError: 'Key length (2) was greater than MultiIndex lexsort depth (1)'
538+
UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (1)'
537539
538540
The ``is_lexsorted()`` method on an ``Index`` show if the index is sorted, and the ``lexsort_depth`` property returns the sort depth:
539541

doc/source/whatsnew/v0.20.0.txt

+7
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ Other enhancements
4141

4242
- ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`)
4343

44+
- New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an
45+
unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack
46+
of sorting or an incorrect key. See :ref:`here <advanced.unsorted>`
47+
4448

4549
.. _whatsnew_0200.api_breaking:
4650

@@ -58,6 +62,9 @@ Backwards incompatible API changes
5862
Other API Changes
5963
^^^^^^^^^^^^^^^^^
6064

65+
- Change error message text when indexing via a
66+
boolean ``Series`` that has an incompatible index (:issue:`14491`)
67+
6168
.. _whatsnew_0200.deprecations:
6269

6370
Deprecations

pandas/core/common.py

+11
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,17 @@ class UnsupportedFunctionCall(ValueError):
9797
pass
9898

9999

100+
class UnsortedIndexError(KeyError):
101+
""" Error raised when attempting to get a slice of a MultiIndex
102+
and the index has not been lexsorted. Subclass of `KeyError`.
103+
104+
.. versionadded:: 0.20.0
105+
106+
"""
107+
pass
108+
109+
110+
100111
class AbstractMethodError(NotImplementedError):
101112
"""Raise this error instead of NotImplementedError for abstract methods
102113
while keeping compatibility with Python 2 and Python 3.

pandas/core/indexing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1814,7 +1814,8 @@ def check_bool_indexer(ax, key):
18141814
result = result.reindex(ax)
18151815
mask = isnull(result._values)
18161816
if mask.any():
1817-
raise IndexingError('Unalignable boolean Series key provided')
1817+
raise IndexingError('Unalignable labels in boolean Series index '
1818+
'{}'.format(key.index))
18181819
result = result.astype(bool)._values
18191820
elif is_sparse(result):
18201821
result = result.to_dense()

pandas/indexes/multi.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
from pandas.core.common import (_values_from_object,
2626
is_bool_indexer,
2727
is_null_slice,
28-
PerformanceWarning)
28+
PerformanceWarning,
29+
UnsortedIndexError)
2930

3031

3132
from pandas.core.base import FrozenList
@@ -1936,9 +1937,10 @@ def get_locs(self, tup):
19361937

19371938
# must be lexsorted to at least as many levels
19381939
if not self.is_lexsorted_for_tuple(tup):
1939-
raise KeyError('MultiIndex Slicing requires the index to be fully '
1940-
'lexsorted tuple len ({0}), lexsort depth '
1941-
'({1})'.format(len(tup), self.lexsort_depth))
1940+
raise UnsortedIndexError('MultiIndex Slicing requires the index '
1941+
'to be fully lexsorted tuple len ({0}), '
1942+
'lexsort depth ({1})'
1943+
.format(len(tup), self.lexsort_depth))
19421944

19431945
# indexer
19441946
# this is the list of all values that we want to select

pandas/tests/indexes/test_multi.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from pandas import (DataFrame, date_range, period_range, MultiIndex, Index,
1010
CategoricalIndex, compat)
11-
from pandas.core.common import PerformanceWarning
11+
from pandas.core.common import PerformanceWarning, UnsortedIndexError
1212
from pandas.indexes.base import InvalidIndexError
1313
from pandas.compat import range, lrange, u, PY3, long, lzip
1414

@@ -2535,3 +2535,19 @@ def test_dropna(self):
25352535
msg = "invalid how option: xxx"
25362536
with tm.assertRaisesRegexp(ValueError, msg):
25372537
idx.dropna(how='xxx')
2538+
2539+
def test_unsortedindex(self):
2540+
# GH 11897
2541+
mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'),
2542+
('x', 'b'), ('y', 'a'), ('z', 'b')],
2543+
names=['one', 'two'])
2544+
df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi,
2545+
columns=['one', 'two'])
2546+
2547+
with assertRaises(UnsortedIndexError):
2548+
df.loc(axis=0)['z', :]
2549+
df.sort_index(inplace=True)
2550+
self.assertEqual(len(df.loc(axis=0)['z', :]), 2)
2551+
2552+
with assertRaises(KeyError):
2553+
df.loc(axis=0)['q', :]

pandas/tests/indexing/test_indexing.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
MultiIndex, Timestamp, Timedelta)
2424
from pandas.formats.printing import pprint_thing
2525
from pandas import concat
26-
from pandas.core.common import PerformanceWarning
26+
from pandas.core.common import PerformanceWarning, UnsortedIndexError
2727

2828
import pandas.util.testing as tm
2929
from pandas import date_range
@@ -2230,7 +2230,7 @@ def f():
22302230
df = df.sortlevel(level=1, axis=0)
22312231
self.assertEqual(df.index.lexsort_depth, 0)
22322232
with tm.assertRaisesRegexp(
2233-
KeyError,
2233+
UnsortedIndexError,
22342234
'MultiIndex Slicing requires the index to be fully '
22352235
r'lexsorted tuple len \(2\), lexsort depth \(0\)'):
22362236
df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
@@ -2417,7 +2417,7 @@ def test_per_axis_per_level_doc_examples(self):
24172417
def f():
24182418
df.loc['A1', (slice(None), 'foo')]
24192419

2420-
self.assertRaises(KeyError, f)
2420+
self.assertRaises(UnsortedIndexError, f)
24212421
df = df.sortlevel(axis=1)
24222422

24232423
# slicing
@@ -3480,8 +3480,10 @@ def test_iloc_mask(self):
34803480
('index', '.loc'): '0b11',
34813481
('index', '.iloc'): ('iLocation based boolean indexing '
34823482
'cannot use an indexable as a mask'),
3483-
('locs', ''): 'Unalignable boolean Series key provided',
3484-
('locs', '.loc'): 'Unalignable boolean Series key provided',
3483+
('locs', ''): 'Unalignable labels in boolean Series index '
3484+
"Int64Index([3, 2, 1, 0], dtype='int64')",
3485+
('locs', '.loc'): 'Unalignable labels in boolean Series index '
3486+
"Int64Index([3, 2, 1, 0], dtype='int64')",
34853487
('locs', '.iloc'): ('iLocation based boolean indexing on an '
34863488
'integer type is not available'),
34873489
}

0 commit comments

Comments
 (0)