diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 0c843dd39b56f..7b6b2a09f6037 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -528,12 +528,14 @@ return a copy of the data rather than a view: jim joe 1 z 0.64094 +.. _advanced.unsorted: + Furthermore if you try to index something that is not fully lexsorted, this can raise: .. code-block:: ipython In [5]: dfm.loc[(0,'y'):(1, 'z')] - KeyError: 'Key length (2) was greater than MultiIndex lexsort depth (1)' + UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (1)' The ``is_lexsorted()`` method on an ``Index`` show if the index is sorted, and the ``lexsort_depth`` property returns the sort depth: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 0bfd755aae40c..25664fec313ae 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -50,6 +50,11 @@ Other enhancements - ``Series.sort_index`` accepts parameters ``kind`` and ``na_position`` (:issue:`13589`, :issue:`14444`) - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) + +- New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an + unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack + of sorting or an incorrect key. See :ref:`here ` + - ``pd.cut`` and ``pd.qcut`` now support datetime64 and timedelta64 dtypes (issue:`14714`) - ``Series`` provides a ``to_excel`` method to output Excel files (:issue:`8825`) - The ``usecols`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`14154`) @@ -70,6 +75,9 @@ Backwards incompatible API changes Other API Changes ^^^^^^^^^^^^^^^^^ +- Change error message text when indexing via a + boolean ``Series`` that has an incompatible index (:issue:`14491`) + .. _whatsnew_0200.deprecations: Deprecations diff --git a/pandas/core/common.py b/pandas/core/common.py index 295947bbc1166..fddac1f29d454 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -97,6 +97,16 @@ class UnsupportedFunctionCall(ValueError): pass +class UnsortedIndexError(KeyError): + """ Error raised when attempting to get a slice of a MultiIndex + and the index has not been lexsorted. Subclass of `KeyError`. + + .. versionadded:: 0.20.0 + + """ + pass + + class AbstractMethodError(NotImplementedError): """Raise this error instead of NotImplementedError for abstract methods while keeping compatibility with Python 2 and Python 3. diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 660e8c9446202..c4ae3dcca8367 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1814,7 +1814,9 @@ def check_bool_indexer(ax, key): result = result.reindex(ax) mask = isnull(result._values) if mask.any(): - raise IndexingError('Unalignable boolean Series key provided') + raise IndexingError('Unalignable boolean Series provided as ' + 'indexer (index of the boolean Series and of ' + 'the indexed object do not match') result = result.astype(bool)._values elif is_sparse(result): result = result.to_dense() diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 45b6cad89d020..132543e0e386c 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -25,7 +25,8 @@ from pandas.core.common import (_values_from_object, is_bool_indexer, is_null_slice, - PerformanceWarning) + PerformanceWarning, + UnsortedIndexError) from pandas.core.base import FrozenList @@ -1936,9 +1937,10 @@ def get_locs(self, tup): # must be lexsorted to at least as many levels if not self.is_lexsorted_for_tuple(tup): - raise KeyError('MultiIndex Slicing requires the index to be fully ' - 'lexsorted tuple len ({0}), lexsort depth ' - '({1})'.format(len(tup), self.lexsort_depth)) + raise UnsortedIndexError('MultiIndex Slicing requires the index ' + 'to be fully lexsorted tuple len ({0}), ' + 'lexsort depth ({1})' + .format(len(tup), self.lexsort_depth)) # indexer # this is the list of all values that we want to select diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index e1e714719092a..ccbe65e58a1a5 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -8,7 +8,7 @@ from pandas import (DataFrame, date_range, period_range, MultiIndex, Index, CategoricalIndex, compat) -from pandas.core.common import PerformanceWarning +from pandas.core.common import PerformanceWarning, UnsortedIndexError from pandas.indexes.base import InvalidIndexError from pandas.compat import range, lrange, u, PY3, long, lzip @@ -2535,3 +2535,19 @@ def test_dropna(self): msg = "invalid how option: xxx" with tm.assertRaisesRegexp(ValueError, msg): idx.dropna(how='xxx') + + def test_unsortedindex(self): + # GH 11897 + mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), + ('x', 'b'), ('y', 'a'), ('z', 'b')], + names=['one', 'two']) + df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi, + columns=['one', 'two']) + + with assertRaises(UnsortedIndexError): + df.loc(axis=0)['z', :] + df.sort_index(inplace=True) + self.assertEqual(len(df.loc(axis=0)['z', :]), 2) + + with assertRaises(KeyError): + df.loc(axis=0)['q', :] diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 9ca1fd2a76817..bc95ff329d686 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -23,7 +23,7 @@ MultiIndex, Timestamp, Timedelta) from pandas.formats.printing import pprint_thing from pandas import concat -from pandas.core.common import PerformanceWarning +from pandas.core.common import PerformanceWarning, UnsortedIndexError import pandas.util.testing as tm from pandas import date_range @@ -2230,7 +2230,7 @@ def f(): df = df.sortlevel(level=1, axis=0) self.assertEqual(df.index.lexsort_depth, 0) with tm.assertRaisesRegexp( - KeyError, + UnsortedIndexError, 'MultiIndex Slicing requires the index to be fully ' r'lexsorted tuple len \(2\), lexsort depth \(0\)'): df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] @@ -2417,7 +2417,7 @@ def test_per_axis_per_level_doc_examples(self): def f(): df.loc['A1', (slice(None), 'foo')] - self.assertRaises(KeyError, f) + self.assertRaises(UnsortedIndexError, f) df = df.sortlevel(axis=1) # slicing @@ -3480,8 +3480,12 @@ def test_iloc_mask(self): ('index', '.loc'): '0b11', ('index', '.iloc'): ('iLocation based boolean indexing ' 'cannot use an indexable as a mask'), - ('locs', ''): 'Unalignable boolean Series key provided', - ('locs', '.loc'): 'Unalignable boolean Series key provided', + ('locs', ''): 'Unalignable boolean Series provided as indexer ' + '(index of the boolean Series and of the indexed ' + 'object do not match', + ('locs', '.loc'): 'Unalignable boolean Series provided as indexer ' + '(index of the boolean Series and of the ' + 'indexed object do not match', ('locs', '.iloc'): ('iLocation based boolean indexing on an ' 'integer type is not available'), }