pandas-dev · jreback · Aug 14, 2014 · Aug 12, 2014
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
@@ -282,7 +282,7 @@ Selection By Label
    See :ref:`Returning a View versus Copy <indexing.view_versus_copy>`
 
 pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol.
-**ALL** of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! When slicing, the start bound is *included*, **AND** the stop bound is *included*. Integers are valid labels, but they refer to the label **and not the position**.
+**at least 1** of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! When slicing, the start bound is *included*, **AND** the stop bound is *included*. Integers are valid labels, but they refer to the label **and not the position**.
 
 The ``.loc`` attribute is the primary access method. The following are valid inputs:
 

diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt
@@ -172,6 +172,50 @@ API changes
   as the ``left`` argument.  (:issue:`7737`)
 
 - Histogram from ``DataFrame.plot`` with ``kind='hist'`` (:issue:`7809`), See :ref:`the docs<visualization.hist>`.
+- Consistency when indexing with ``.loc`` and a list-like indexer when no values are found.
+
+  .. ipython:: python
+
+     df = DataFrame([['a'],['b']],index=[1,2])
+     df
+
+  In prior versions there was a difference in these two constructs:
+
+    - ``df.loc[[3]]`` would (prior to 0.15.0) return a frame reindexed by 3 (with all ``np.nan`` values)
+    - ``df.loc[[3],:]`` would raise ``KeyError``.
+
+  Both will now raise a ``KeyError``. The rule is that *at least 1* indexer must be found when using a list-like and ``.loc`` (:issue:`7999`)
+
+  There was also a difference between ``df.loc[[1,3]]`` (returns a frame reindexed by ``[1, 3]``) and ``df.loc[[1, 3],:]`` (would raise ``KeyError`` prior to 0.15.0). Both will now return a reindexed frame.
+
+  .. ipython:: python
+
+     df.loc[[1,3]]
+     df.loc[[1,3],:]
+
+  This can also be seen in multi-axis indexing with a ``Panel``.
+
+  .. ipython:: python
+
+     p = Panel(np.arange(2*3*4).reshape(2,3,4),
+               items=['ItemA','ItemB'],major_axis=[1,2,3],minor_axis=['A','B','C','D'])
+     p
+
+  The following would raise ``KeyError`` prior to 0.15.0:
+
+  .. ipython:: python
+
+     p.loc[['ItemA','ItemD'],:,'D']
+
+  Furthermore, ``.loc`` will raise If no values are found in a multi-index with a list-like indexer:
+
+  .. ipython:: python
+     :okexcept:
+
+     s = Series(np.arange(3,dtype='int64'),index=MultiIndex.from_product([['A'],['foo','bar','baz']],
+                                                                         names=['one','two'])).sortlevel()
+     s
+     s.loc[['D']]
 
 .. _whatsnew_0150.dt:
 

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -132,6 +132,16 @@ def _has_valid_tuple(self, key):
                 raise ValueError("Location based indexing can only have [%s] "
                                  "types" % self._valid_types)
 
+    def _should_validate_iterable(self, axis=0):
+        """ return a boolean whether this axes needs validation for a passed iterable """
+        ax = self.obj._get_axis(axis)
+        if isinstance(ax, MultiIndex):
+            return False
+        elif ax.is_floating():
+            return False
+
+        return True
+
     def _is_nested_tuple_indexer(self, tup):
         if any([ isinstance(ax, MultiIndex) for ax in self.obj.axes ]):
             return any([ _is_nested_tuple(tup,ax) for ax in self.obj.axes ])
@@ -762,7 +772,7 @@ def _getitem_lowerdim(self, tup):
         # we can directly get the axis result since the axis is specified
         if self.axis is not None:
             axis = self.obj._get_axis_number(self.axis)
-            return self._getitem_axis(tup, axis=axis, validate_iterable=True)
+            return self._getitem_axis(tup, axis=axis)
 
         # we may have a nested tuples indexer here
         if self._is_nested_tuple_indexer(tup):
@@ -825,7 +835,7 @@ def _getitem_nested_tuple(self, tup):
                 return result
 
             # this is a series with a multi-index specified a tuple of selectors
-            return self._getitem_axis(tup, axis=0, validate_iterable=True)
+            return self._getitem_axis(tup, axis=0)
 
         # handle the multi-axis by taking sections and reducing
         # this is iterative
@@ -838,7 +848,7 @@ def _getitem_nested_tuple(self, tup):
                 continue
 
             current_ndim = obj.ndim
-            obj = getattr(obj, self.name)._getitem_axis(key, axis=axis, validate_iterable=True)
+            obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)
             axis += 1
 
             # if we have a scalar, we are done
@@ -859,9 +869,11 @@ def _getitem_nested_tuple(self, tup):
 
         return obj
 
-    def _getitem_axis(self, key, axis=0, validate_iterable=False):
+    def _getitem_axis(self, key, axis=0):
+
+        if self._should_validate_iterable(axis):
+            self._has_valid_type(key, axis)
 
-        self._has_valid_type(key, axis)
         labels = self.obj._get_axis(axis)
         if isinstance(key, slice):
             return self._get_slice_axis(key, axis=axis)
@@ -888,17 +900,29 @@ def _getitem_axis(self, key, axis=0, validate_iterable=False):
             return self._get_label(key, axis=axis)
 
     def _getitem_iterable(self, key, axis=0):
+        if self._should_validate_iterable(axis):
+            self._has_valid_type(key, axis)
+
         labels = self.obj._get_axis(axis)
 
         def _reindex(keys, level=None):
+
             try:
-                return self.obj.reindex_axis(keys, axis=axis, level=level)
+                result = self.obj.reindex_axis(keys, axis=axis, level=level)
             except AttributeError:
                 # Series
                 if axis != 0:
                     raise AssertionError('axis must be 0')
                 return self.obj.reindex(keys, level=level)
 
+            # this is an error as we are trying to find
+            # keys in a multi-index that don't exist
+            if isinstance(labels, MultiIndex) and level is not None:
+                if hasattr(result,'ndim') and not np.prod(result.shape) and len(keys):
+                    raise KeyError("cannot index a multi-index axis with these keys")
+
+            return result
+
         if com._is_bool_indexer(key):
             key = _check_bool_indexer(labels, key)
             inds, = key.nonzero()
@@ -1149,7 +1173,7 @@ def __getitem__(self, key):
         else:
             return self._getitem_axis(key, axis=0)
 
-    def _getitem_axis(self, key, axis=0, validate_iterable=False):
+    def _getitem_axis(self, key, axis=0):
         raise NotImplementedError()
 
     def _getbool_axis(self, key, axis=0):
@@ -1223,11 +1247,11 @@ def _has_valid_type(self, key, axis):
             if isinstance(key, tuple) and isinstance(ax, MultiIndex):
                 return True
 
-            # require all elements in the index
+            # require at least 1 element in the index
             idx = _ensure_index(key)
-            if not idx.isin(ax).all():
+            if len(idx) and not idx.isin(ax).any():
 
-                raise KeyError("[%s] are not in ALL in the [%s]" %
+                raise KeyError("None of [%s] are in the [%s]" %
                                (key, self.obj._get_axis_name(axis)))
 
             return True
@@ -1256,7 +1280,7 @@ def error():
 
         return True
 
-    def _getitem_axis(self, key, axis=0, validate_iterable=False):
+    def _getitem_axis(self, key, axis=0):
         labels = self.obj._get_axis(axis)
 
         if isinstance(key, slice):
@@ -1280,9 +1304,6 @@ def _getitem_axis(self, key, axis=0, validate_iterable=False):
                 if hasattr(key, 'ndim') and key.ndim > 1:
                     raise ValueError('Cannot index with multidimensional key')
 
-                if validate_iterable:
-                    self._has_valid_type(key, axis)
-
                 return self._getitem_iterable(key, axis=axis)
 
             # nested tuple slicing
@@ -1389,7 +1410,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
         else:
             return self.obj.take(slice_obj, axis=axis, convert=False)
 
-    def _getitem_axis(self, key, axis=0, validate_iterable=False):
+    def _getitem_axis(self, key, axis=0):
 
         if isinstance(key, slice):
             self._has_valid_type(key, axis)

diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
@@ -743,11 +743,14 @@ def test_loc_getitem_label_list(self):
         self.check_result('list lbl', 'loc', [Timestamp('20130102'),Timestamp('20130103')], 'ix',
                           [Timestamp('20130102'),Timestamp('20130103')], typs = ['ts'], axes=0)
 
-        # fails
         self.check_result('list lbl', 'loc', [0,1,2], 'indexer', [0,1,2], typs = ['empty'], fails = KeyError)
         self.check_result('list lbl', 'loc', [0,2,3], 'ix', [0,2,3], typs = ['ints'], axes=0, fails = KeyError)
-        self.check_result('list lbl', 'loc', [3,6,7], 'ix', [3,6,9], typs = ['ints'], axes=1, fails = KeyError)
-        self.check_result('list lbl', 'loc', [4,8,10], 'ix', [4,8,12], typs = ['ints'], axes=2, fails = KeyError)
+        self.check_result('list lbl', 'loc', [3,6,7], 'ix', [3,6,7], typs = ['ints'], axes=1, fails = KeyError)
+        self.check_result('list lbl', 'loc', [4,8,10], 'ix', [4,8,10], typs = ['ints'], axes=2, fails = KeyError)
+
+        # fails
+        self.check_result('list lbl', 'loc', [20,30,40], 'ix', [20,30,40], typs = ['ints'], axes=1, fails = KeyError)
+        self.check_result('list lbl', 'loc', [20,30,40], 'ix', [20,30,40], typs = ['ints'], axes=2, fails = KeyError)
 
         # array like
         self.check_result('array like', 'loc', Series(index=[0,2,4]).index, 'ix', [0,2,4], typs = ['ints'], axes=0)
@@ -815,30 +818,35 @@ def test_loc_to_fail(self):
         s.loc['a'] = 2
 
         self.assertRaises(KeyError, lambda : s.loc[-1])
+        self.assertRaises(KeyError, lambda : s.loc[[-1, -2]])
 
-        result = s.loc[[-1, -2]]
-        expected = Series(np.nan,index=[-1,-2])
-        assert_series_equal(result, expected)
-
-        result = s.loc[['4']]
-        expected = Series(np.nan,index=['4'])
-        assert_series_equal(result, expected)
+        self.assertRaises(KeyError, lambda : s.loc[['4']])
 
         s.loc[-1] = 3
         result = s.loc[[-1,-2]]
         expected = Series([3,np.nan],index=[-1,-2])
         assert_series_equal(result, expected)
 
         s['a'] = 2
-        result = s.loc[[-2]]
-        expected = Series([np.nan],index=[-2])
-        assert_series_equal(result, expected)
+        self.assertRaises(KeyError, lambda : s.loc[[-2]])
 
         del s['a']
         def f():
             s.loc[[-2]] = 0
         self.assertRaises(KeyError, f)
 
+        # inconsistency between .loc[values] and .loc[values,:]
+        # GH 7999
+        df = DataFrame([['a'],['b']],index=[1,2],columns=['value'])
+
+        def f():
+            df.loc[[3],:]
+        self.assertRaises(KeyError, f)
+
+        def f():
+            df.loc[[3]]
+        self.assertRaises(KeyError, f)
+
     def test_loc_getitem_label_slice(self):
 
         # label slices (with ints)
@@ -1575,11 +1583,13 @@ def f():
         self.assertRaises(ValueError, f)
 
         # ambiguous cases
-        # these can be multiply interpreted
-        # but we can catch this in some cases
-        def f():
-            df.loc[(slice(None),[1])]
-        self.assertRaises(KeyError, f)
+        # these can be multiply interpreted (e.g. in this case
+        # as df.loc[slice(None),[1]] as well
+        self.assertRaises(KeyError, lambda : df.loc[slice(None),[1]])
+
+        result = df.loc[(slice(None),[1]),:]
+        expected = df.iloc[[0,3]]
+        assert_frame_equal(result, expected)
 
         # not lexsorted
         self.assertEqual(df.index.lexsort_depth,2)
@@ -1960,9 +1970,12 @@ def f():
         result = s.loc[['A','D']]
         assert_series_equal(result,expected)
 
-        # empty series
-        result = s.loc[['D']]
-        expected = s.loc[[]]
+        # not any values found
+        self.assertRaises(KeyError, lambda : s.loc[['D']])
+
+        # empty ok
+        result = s.loc[[]]
+        expected = s.iloc[[]]
         assert_series_equal(result,expected)
 
         idx = pd.IndexSlice
@@ -2788,9 +2801,8 @@ def test_series_partial_set(self):
         result = ser.loc[[3, 2, 3]]
         assert_series_equal(result, expected)
 
-        expected = Series([np.nan, np.nan, np.nan], index=[3, 3, 3])
-        result = ser.loc[[3, 3, 3]]
-        assert_series_equal(result, expected)
+        # raises as nothing in in the index
+        self.assertRaises(KeyError, lambda : ser.loc[[3, 3, 3]])
 
         expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
         result = ser.loc[[2, 2, 3]]