diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
index a88b7332d9b9e..76c4280d8b728 100644
--- a/doc/source/groupby.rst
+++ b/doc/source/groupby.rst
@@ -707,6 +707,38 @@ can be used as group keys. If so, the order of the levels will be preserved:
 
    data.groupby(factor).mean()
 
+
+Taking the first rows of each group
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Just like for a DataFrame or Series you can call head and tail on a groupby:
+
+.. ipython:: python
+
+   df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
+   df
+
+   g = df.groupby('A')
+   g.head(1)
+
+   g.tail(1)
+
+This shows the first or last n rows from each group.
+
+.. warning::
+
+   Before 0.14.0 this was implemented with a fall-through apply,
+   so the result would incorrectly respect the as_index flag:
+
+   .. code-block:: python
+
+       >>> g.head(1):  # was equivalent to g.apply(lambda x: x.head(1))
+             A  B
+        A
+        1 0  1  2
+        5 2  5  6
+
+
 Enumerate group items
 ~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
index 4432e9e891e7d..a3d6d255db9a9 100644
--- a/doc/source/v0.14.0.txt
+++ b/doc/source/v0.14.0.txt
@@ -61,6 +61,24 @@ These are out-of-bounds selections
      s.year
      s.index.year
 
+- More consistent behaviour for some groupby methods:
+   - groupby head and tail now act more like filter rather than an aggregation:
+
+  .. ipython:: python
+
+     df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
+     g = df.groupby('A')
+     g.head(1)  # filters DataFrame
+
+     g.apply(lambda x: x.head(1))  # used to simply fall-through
+
+   - groupby head and tail respect column selection:
+
+  .. ipython:: python
+
+     g[['B']].head(1)
+
+
 - Local variable usage has changed in
   :func:`pandas.eval`/:meth:`DataFrame.eval`/:meth:`DataFrame.query`
   (:issue:`5987`). For the :class:`~pandas.DataFrame` methods, two things have
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index f0588524e16eb..598df5507fa69 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -587,7 +587,8 @@ def head(self, n=5):
         """
         Returns first n rows of each group.
 
-        Essentially equivalent to ``.apply(lambda x: x.head(n))``
+        Essentially equivalent to ``.apply(lambda x: x.head(n))``,
+        except ignores as_index flag.
 
         Example
         -------
@@ -599,24 +600,23 @@ def head(self, n=5):
         0  1  2
         2  5  6
         >>> df.groupby('A').head(1)
-             A  B
-        A
-        1 0  1  2
-        5 2  5  6
+           A  B
+        0  1  2
+        2  5  6
 
         """
+        obj = self._selected_obj
         rng = np.arange(self.grouper._max_groupsize, dtype='int64')
         in_head = self._cumcount_array(rng) < n
-        head = self.obj[in_head]
-        if self.as_index:
-            head.index = self._index_with_as_index(in_head)
+        head = obj[in_head]
         return head
 
     def tail(self, n=5):
         """
         Returns last n rows of each group
 
-        Essentially equivalent to ``.apply(lambda x: x.tail(n))``
+        Essentially equivalent to ``.apply(lambda x: x.tail(n))``,
+        except ignores as_index flag.
 
         Example
         -------
@@ -628,17 +628,15 @@ def tail(self, n=5):
         0  1  2
         2  5  6
         >>> df.groupby('A').head(1)
-             A  B
-        A
-        1 0  1  2
-        5 2  5  6
+           A  B
+        0  1  2
+        2  5  6
 
         """
+        obj = self._selected_obj
         rng = np.arange(0, -self.grouper._max_groupsize, -1, dtype='int64')
         in_tail = self._cumcount_array(rng, ascending=False) > -n
-        tail = self.obj[in_tail]
-        if self.as_index:
-            tail.index = self._index_with_as_index(in_tail)
+        tail = obj[in_tail]
         return tail
 
     def _cumcount_array(self, arr, **kwargs):
@@ -654,6 +652,13 @@ def _cumcount_array(self, arr, **kwargs):
                 cumcounts[v] = arr[len(v)-1::-1]
         return cumcounts
 
+    @cache_readonly
+    def _selected_obj(self):
+        if self._selection is None or isinstance(self.obj, Series):
+            return self.obj
+        else:
+            return self.obj[self._selection]
+        
     def _index_with_as_index(self, b):
         """
         Take boolean mask of index to be returned from apply, if as_index=True
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 4eee1d3a212e0..8af11c8bf77e1 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -1315,12 +1315,10 @@ def test_groupby_as_index_apply(self):
         g_not_as = df.groupby('user_id', as_index=False)
 
         res_as = g_as.head(2).index
-        exp_as = MultiIndex.from_tuples([(1, 0), (2, 1), (1, 2), (3, 4)])
-        assert_index_equal(res_as, exp_as)
-
         res_not_as = g_not_as.head(2).index
-        exp_not_as = Index([0, 1, 2, 4])
-        assert_index_equal(res_not_as, exp_not_as)
+        exp = Index([0, 1, 2, 4])
+        assert_index_equal(res_as, exp)
+        assert_index_equal(res_not_as, exp)
 
         res_as_apply = g_as.apply(lambda x: x.head(2)).index
         res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
@@ -1355,11 +1353,8 @@ def test_groupby_head_tail(self):
         assert_frame_equal(df, g_not_as.head(7)) # contains all
         assert_frame_equal(df, g_not_as.tail(7))
 
-        # as_index=True, yuck
-        # prepend the A column as an index, in a roundabout way
-        df_as = df.copy()
-        df_as.index = df.set_index('A', append=True,
-                                        drop=False).index.swaplevel(0, 1)
+        # as_index=True, (used to be different)
+        df_as = df
 
         assert_frame_equal(df_as.loc[[0, 2]], g_as.head(1))
         assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1))
@@ -1373,6 +1368,18 @@ def test_groupby_head_tail(self):
         assert_frame_equal(df_as, g_as.head(7)) # contains all
         assert_frame_equal(df_as, g_as.tail(7))
 
+        # test with selection
+        assert_frame_equal(g_as[[]].head(1), df_as.loc[[0,2], []])
+        assert_frame_equal(g_as[['A']].head(1), df_as.loc[[0,2], ['A']])
+        assert_frame_equal(g_as[['B']].head(1), df_as.loc[[0,2], ['B']])
+        assert_frame_equal(g_as[['A', 'B']].head(1), df_as.loc[[0,2]])
+
+        assert_frame_equal(g_not_as[[]].head(1), df_as.loc[[0,2], []])
+        assert_frame_equal(g_not_as[['A']].head(1), df_as.loc[[0,2], ['A']])
+        assert_frame_equal(g_not_as[['B']].head(1), df_as.loc[[0,2], ['B']])
+        assert_frame_equal(g_not_as[['A', 'B']].head(1), df_as.loc[[0,2]])
+
+
     def test_groupby_multiple_key(self):
         df = tm.makeTimeDataFrame()
         grouped = df.groupby([lambda x: x.year,