ENH: make it possible to pass keyword argument to .loc

jreback · jreback · commit 7d707101c198 · 2014-02-13T08:39:07.000-05:00
ENH: allow the axis keyword to short-circuit indexing
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
@@ -1771,41 +1771,55 @@ As usual, **both sides** of the slicers are included as this is label indexing.
                     columns=micolumns).sortlevel().sortlevel(axis=1)
    dfmi
 
+Basic multi-index slicing using slices, lists, and labels.
+
 .. ipython:: python
 
    dfmi.loc[(slice('A1','A3'),slice(None), ['C1','C3']),:]
-   dfmi.loc[(slice(None),slice(None), ['C1','C3']),:]
+
+You can use a ``pd.IndexSlice`` to shortcut the creation of these slices
+
+.. ipython:: python
+
+   idx = pd.IndexSlice
+   dfmi.loc[idx[:,:,['C1','C3']],idx[:,'foo']]
 
 It is possible to perform quite complicated selections using this method on multiple
 axes at the same time.
 
 .. ipython:: python
 
    dfmi.loc['A1',(slice(None),'foo')]
-   dfmi.loc[(slice(None),slice(None), ['C1','C3']),(slice(None),'foo')]
-   dfmi.loc[df[('a','foo')]>200,slice(None), ['C1','C3']),(slice(None),'foo')]
+   dfmi.loc[idx[:,:,['C1','C3']],idx[:,'foo']]
 
-You can use a ``pd.IndexSlice`` to shortcut the creation of these slices
+Using a boolean indexer you can provide selection related to the *values*.
 
 .. ipython:: python
 
-   idx = pd.IndexSlice
-   dfmi.loc[idx[:,:,['C1','C3']],idx[:,'foo']]
+   mask = dfmi[('a','foo')]>200
+   dfmi.loc[idx[mask,:,['C1','C3']],idx[:,'foo']]
+
+You can also specify the ``axis`` argument to ``.loc`` to interpret the passed
+slicers on a single axis.
+
+.. ipython:: python
+
+   dfmi.loc(axis=0)[:,:,['C1','C3']]
 
 Furthermore you can *set* the values using these methods
 
 .. ipython:: python
 
    df2 = dfmi.copy()
-   df2.loc[(slice(None),slice(None), ['C1','C3']),:] = -10
+   df2.loc(axis=0)[:,:,['C1','C3']] = -10
    df2
 
-You use a right-hand-side of an alignable object as well.
+You can use a right-hand-side of an alignable object as well.
 
 .. ipython:: python
 
    df2 = dfmi.copy()
-   df2.loc[(slice(None),slice(None), ['C1','C3']),:] = df2*1000
+   df2.loc[idx[:,:,['C1','C3']],:] = df2*1000
    df2
 
 .. _indexing.xs:
diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
@@ -85,41 +85,55 @@ See also issues (:issue:`6134`, :issue:`4036`, :issue:`3057`, :issue:`2598`, :is
                   columns=columns).sortlevel().sortlevel(axis=1)
    df
 
+Basic multi-index slicing using slices, lists, and labels.
+
 .. ipython:: python
 
    df.loc[(slice('A1','A3'),slice(None), ['C1','C3']),:]
-   df.loc[(slice(None),slice(None), ['C1','C3']),:]
+
+You can use a ``pd.IndexSlice`` to shortcut the creation of these slices
+
+.. ipython:: python
+
+   idx = pd.IndexSlice
+   df.loc[idx[:,:,['C1','C3']],idx[:,'foo']]
 
 It is possible to perform quite complicated selections using this method on multiple
 axes at the same time.
 
 .. ipython:: python
 
    df.loc['A1',(slice(None),'foo')]
-   df.loc[(slice(None),slice(None), ['C1','C3']),(slice(None),'foo')]
-   df.loc[df[('a','foo')]>200,slice(None), ['C1','C3']),(slice(None),'foo')]
+   df.loc[idx[:,:,['C1','C3']],idx[:,'foo']]
 
-You can use a ``pd.IndexSlice`` to shortcut the creation of these slices
+Using a boolean indexer you can provide selection related to the *values*.
 
 .. ipython:: python
 
-   idx = pd.IndexSlice
-   df.loc[idx[:,:,['C1','C3']],idx[:,'foo']]
+   mask = df[('a','foo')]>200
+   df.loc[idx[mask,:,['C1','C3']],idx[:,'foo']]
+
+You can also specify the ``axis`` argument to ``.loc`` to interpret the passed
+slicers on a single axis.
+
+.. ipython:: python
+
+   df.loc(axis=0)[:,:,['C1','C3']]
 
 Furthermore you can *set* the values using these methods
 
 .. ipython:: python
 
    df2 = df.copy()
-   df2.loc[(slice(None),slice(None), ['C1','C3']),:] = -10
+   df2.loc(axis=0)[:,:,['C1','C3']] = -10
    df2
 
-You use a right-hand-side of an alignable object as well.
+You can use a right-hand-side of an alignable object as well.
 
 .. ipython:: python
 
    df2 = df.copy()
-   df2.loc[(slice(None),slice(None), ['C1','C3']),:] = df2*1000
+   df2.loc[idx[:,:,['C1','C3']],:] = df2*1000
    df2
 
 Prior Version Deprecations/Changes
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -44,6 +44,16 @@ def __init__(self, obj, name):
         self.obj = obj
         self.ndim = obj.ndim
         self.name = name
+        self.axis = None
+
+    def __call__(self, *args, **kwargs):
+        # we need to return a copy of ourselves
+        self = self.__class__(self.obj, self.name)
+
+        # set the passed in values
+        for k, v in compat.iteritems(kwargs):
+            setattr(self,k,v)
+        return self
 
     def __iter__(self):
         raise NotImplementedError('ix is not iterable')
@@ -104,23 +114,28 @@ def _slice(self, obj, axis=0, raise_on_error=False, typ=None):
 
     def __setitem__(self, key, value):
 
-        # kludgetastic
-        ax = self.obj._get_axis(0)
-        if isinstance(ax, MultiIndex):
-            try:
-                indexer = ax.get_loc(key)
-                self._setitem_with_indexer(indexer, value)
-                return
-            except Exception:
-                pass
-
-        if isinstance(key, tuple):
-            if len(key) > self.ndim:
-                raise IndexingError('only tuples of length <= %d supported' %
-                                    self.ndim)
+        if self.axis is not None:
             indexer = self._convert_tuple(key, is_setter=True)
+
         else:
-            indexer = self._convert_to_indexer(key, is_setter=True)
+
+            # kludgetastic
+            ax = self.obj._get_axis(0)
+            if isinstance(ax, MultiIndex):
+                try:
+                    indexer = ax.get_loc(key)
+                    self._setitem_with_indexer(indexer, value)
+                    return
+                except Exception:
+                    pass
+
+            if isinstance(key, tuple):
+                if len(key) > self.ndim:
+                    raise IndexingError('only tuples of length <= %d supported' %
+                                        self.ndim)
+                indexer = self._convert_tuple(key, is_setter=True)
+            else:
+                indexer = self._convert_to_indexer(key, is_setter=True)
 
         self._setitem_with_indexer(indexer, value)
 
@@ -143,9 +158,17 @@ def _is_nested_tuple_indexer(self, tup):
 
     def _convert_tuple(self, key, is_setter=False):
         keyidx = []
-        for i, k in enumerate(key):
-            idx = self._convert_to_indexer(k, axis=i, is_setter=is_setter)
-            keyidx.append(idx)
+        if self.axis is not None:
+            axis = self.obj._get_axis_number(self.axis)
+            for i in range(self.ndim):
+                if i == axis:
+                    keyidx.append(self._convert_to_indexer(key, axis=axis, is_setter=is_setter))
+                else:
+                    keyidx.append(slice(None))
+        else:
+            for i, k in enumerate(key):
+                idx = self._convert_to_indexer(k, axis=i, is_setter=is_setter)
+                keyidx.append(idx)
         return tuple(keyidx)
 
     def _convert_scalar_indexer(self, key, axis):
@@ -732,6 +755,11 @@ def _handle_lowerdim_multi_index_axis0(self, tup):
 
     def _getitem_lowerdim(self, tup):
 
+        # we can directly get the axis result since the axis is specified
+        if self.axis is not None:
+            axis = self.obj._get_axis_number(self.axis)
+            return self._getitem_axis(tup, axis=axis, validate_iterable=True)
+
         # we may have a nested tuples indexer here
         if self._is_nested_tuple_indexer(tup):
             return self._getitem_nested_tuple(tup)
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
@@ -83,6 +83,9 @@ def _axify(obj, key, axis):
     return k
 
 
+def _mklbl(prefix,n):
+    return ["%s%s" % (prefix,i)  for i in range(n)]
+
 class TestIndexing(tm.TestCase):
 
     _multiprocess_can_split_ = True
@@ -1066,11 +1069,9 @@ def test_per_axis_per_level_getitem(self):
 
         # GH6134
         # example test case
-        def mklbl(prefix,n):
-            return ["%s%s" % (prefix,i)  for i in range(n)]
-
-        ix = MultiIndex.from_product([mklbl('A',5),mklbl('B',7),mklbl('C',4),mklbl('D',2)])
+        ix = MultiIndex.from_product([_mklbl('A',5),_mklbl('B',7),_mklbl('C',4),_mklbl('D',2)])
         df = DataFrame(np.arange(len(ix.get_values())),index=ix)
+
         result = df.loc[(slice('A1','A3'),slice(None), ['C1','C3']),:]
         expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if (
             a == 'A1' or a == 'A2' or a == 'A3') and (c == 'C1' or c == 'C3')]]
@@ -1150,19 +1151,16 @@ def f():
             df.loc[(slice(None),[1])]
         self.assertRaises(KeyError, f)
 
-    def test_per_axis_per_level_getitem_doc_examples(self):
+    def test_per_axis_per_level_doc_examples(self):
 
         # test index maker
         idx = pd.IndexSlice
 
         # from indexing.rst / advanced
-        def mklbl(prefix,n):
-            return ["%s%s" % (prefix,i)  for i in range(n)]
-
-        index = MultiIndex.from_product([mklbl('A',4),
-                                         mklbl('B',2),
-                                         mklbl('C',4),
-                                         mklbl('D',2)])
+        index = MultiIndex.from_product([_mklbl('A',4),
+                                         _mklbl('B',2),
+                                         _mklbl('C',4),
+                                         _mklbl('D',2)])
         columns = MultiIndex.from_tuples([('a','foo'),('a','bar'),
                                           ('b','foo'),('b','bah')],
                                          names=['lvl0', 'lvl1'])
@@ -1189,9 +1187,60 @@ def f():
         self.assertRaises(KeyError, f)
         df = df.sortlevel(axis=1)
 
+        # slicing
         df.loc['A1',(slice(None),'foo')]
         df.loc[(slice(None),slice(None), ['C1','C3']),(slice(None),'foo')]
 
+        # setitem
+        df.loc(axis=0)[:,:,['C1','C3']] = -10
+
+    def test_loc_arguments(self):
+
+        index = MultiIndex.from_product([_mklbl('A',4),
+                                         _mklbl('B',2),
+                                         _mklbl('C',4),
+                                         _mklbl('D',2)])
+        columns = MultiIndex.from_tuples([('a','foo'),('a','bar'),
+                                          ('b','foo'),('b','bah')],
+                                         names=['lvl0', 'lvl1'])
+        df = DataFrame(np.arange(len(index)*len(columns)).reshape((len(index),len(columns))),
+                       index=index,
+                       columns=columns).sortlevel().sortlevel(axis=1)
+
+
+        # axis 0
+        result = df.loc(axis=0)['A1':'A3',:,['C1','C3']]
+        expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if (
+            a == 'A1' or a == 'A2' or a == 'A3') and (c == 'C1' or c == 'C3')]]
+        assert_frame_equal(result, expected)
+
+        result = df.loc(axis='index')[:,:,['C1','C3']]
+        expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if (
+            c == 'C1' or c == 'C3')]]
+        assert_frame_equal(result, expected)
+
+        # axis 1
+        result = df.loc(axis=1)[:,'foo']
+        expected = df.loc[:,(slice(None),'foo')]
+        assert_frame_equal(result, expected)
+
+        result = df.loc(axis='columns')[:,'foo']
+        expected = df.loc[:,(slice(None),'foo')]
+        assert_frame_equal(result, expected)
+
+        # invalid axis
+        def f():
+            df.loc(axis=-1)[:,:,['C1','C3']]
+        self.assertRaises(ValueError, f)
+
+        def f():
+            df.loc(axis=2)[:,:,['C1','C3']]
+        self.assertRaises(ValueError, f)
+
+        def f():
+            df.loc(axis='foo')[:,:,['C1','C3']]
+        self.assertRaises(ValueError, f)
+
     def test_per_axis_per_level_setitem(self):
 
         # test index maker
@@ -1213,6 +1262,12 @@ def test_per_axis_per_level_setitem(self):
         expected.iloc[:,:] = 100
         assert_frame_equal(df, expected)
 
+        df = df_orig.copy()
+        df.loc(axis=0)[:,:] = 100
+        expected = df_orig.copy()
+        expected.iloc[:,:] = 100
+        assert_frame_equal(df, expected)
+
         df = df_orig.copy()
         df.loc[(slice(None),slice(None)),(slice(None),slice(None))] = 100
         expected = df_orig.copy()
@@ -1238,6 +1293,12 @@ def test_per_axis_per_level_setitem(self):
         expected.iloc[[0,3]] = 100
         assert_frame_equal(df, expected)
 
+        df = df_orig.copy()
+        df.loc(axis=0)[:,1] = 100
+        expected = df_orig.copy()
+        expected.iloc[[0,3]] = 100
+        assert_frame_equal(df, expected)
+
         # columns
         df = df_orig.copy()
         df.loc[:,(slice(None),['foo'])] = 100