BUG: do not raise UnsortedIndexError if sorting is not required

toobaz · toobaz · commit 7850436286c0 · 2017-06-20T12:10:04.000+02:00
closes pandas-dev#16734
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -98,6 +98,7 @@ Indexing
 ^^^^^^^^
 
 - When called with a null slice (e.g. ``df.iloc[:]``), the``iloc`` and ``loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`).
+- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`).
 
 
 I/O
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -1035,11 +1035,12 @@ def is_lexsorted(self):
         """
         return self.lexsort_depth == self.nlevels
 
-    def is_lexsorted_for_tuple(self, tup):
+    def _true_slices_indices(self, tup):
         """
-        Return True if we are correctly lexsorted given the passed tuple
+        Return indices of (non-trivial) slices in "tup"
         """
-        return len(tup) <= self.lexsort_depth
+        slices = lambda k: isinstance(k, slice) and not is_null_slice(k)
+        return [(i if slices(k) else -1) for (i, k) in enumerate(tup)]
 
     @cache_readonly
     def lexsort_depth(self):
@@ -2262,12 +2263,12 @@ def get_locs(self, tup):
         """
 
         # must be lexsorted to at least as many levels
-        if not self.is_lexsorted_for_tuple(tup):
+        last_slice = max(self._true_slices_indices(tup))
+        if last_slice >= self.lexsort_depth:
             raise UnsortedIndexError('MultiIndex Slicing requires the index '
-                                     'to be fully lexsorted tuple len ({0}), '
-                                     'lexsort depth ({1})'
-                                     .format(len(tup), self.lexsort_depth))
-
+                                     'to be lexsorted: slicing on level '
+                                     '({0}), lexsort depth ({1})'
+                                     .format(last_slice, self.lexsort_depth))
         # indexer
         # this is the list of all values that we want to select
         n = len(self)
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
@@ -2826,8 +2826,13 @@ def test_unsortedindex(self):
         df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi,
                           columns=['one', 'two'])
 
+        # GH 16734: not sorted, but no real slicing
+        result = df.loc(axis=0)['z', 'a']
+        expected = df.iloc[0]
+        tm.assert_series_equal(result, expected)
+
         with pytest.raises(UnsortedIndexError):
-            df.loc(axis=0)['z', :]
+            df.loc(axis=0)['z', slice('a')]
         df.sort_index(inplace=True)
         assert len(df.loc(axis=0)['z', :]) == 2
 
diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py
@@ -817,9 +817,13 @@ def f():
         assert df.index.lexsort_depth == 0
         with tm.assert_raises_regex(
                 UnsortedIndexError,
-                'MultiIndex Slicing requires the index to be fully '
-                r'lexsorted tuple len \(2\), lexsort depth \(0\)'):
-            df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
+                'MultiIndex Slicing requires the index to be '
+                r'lexsorted: slicing on level \(1\), lexsort depth \(0\)'):
+            df.loc[(slice(None), slice('bar')), :]
+
+        # GH 16734: not sorted, but no real slicing
+        result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
+        tm.assert_frame_equal(result, df.iloc[[1, 3], :])
 
     def test_multiindex_slicers_non_unique(self):
 
@@ -1001,9 +1005,14 @@ def test_per_axis_per_level_doc_examples(self):
 
         # not sorted
         def f():
-            df.loc['A1', (slice(None), 'foo')]
+            df.loc['A1', ('a', slice('foo'))]
 
         pytest.raises(UnsortedIndexError, f)
+
+        # GH 16734: not sorted, but no real slicing
+        tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')],
+                              df.loc['A1'].iloc[:, [0, 2]])
+
         df = df.sort_index(axis=1)
 
         # slicing