pandas-dev · adneu · May 29, 2016 · May 31, 2016 · Jul 5, 2016 · Jul 5, 2016
diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt
@@ -313,7 +313,7 @@ Bug Fixes
 
 
 - Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)
-
+- Bug in ``groupby(..).nth()`` where the group key is included inconsistently (:issue:`12839`)
 
 
 

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -95,7 +95,7 @@ def _groupby_function(name, alias, npfunc, numeric_only=True,
     @Appender(_doc_template)
     @Appender(_local_template)
     def f(self):
-        self._set_selection_from_grouper()
+        self._set_group_selection()
         try:
             return self._cython_agg_general(alias, numeric_only=numeric_only)
         except AssertionError as e:
@@ -457,8 +457,21 @@ def _selected_obj(self):
         else:
             return self.obj[self._selection]
 
-    def _set_selection_from_grouper(self):
-        """ we may need create a selection if we have non-level groupers """
+    def _clear_group_selection(self):
+        """
+        Clear group based selection. Used for methods needing to return info on
+        each group regardless of whether a group selection was previously set.
+        """
+        if self._group_selection is not None:
+            self._group_selection = None
+            # GH12839 clear cached selection too when changing group selection
+            self._reset_cache('_selected_obj')
+
+    def _set_group_selection(self):
+        """
+        Create group based selection. Used when selection is not passed
+        directly but instead via a grouper.
+        """
         grp = self.grouper
         if self.as_index and getattr(grp, 'groupings', None) is not None and \
            self.obj.ndim > 1:
@@ -468,6 +481,8 @@ def _set_selection_from_grouper(self):
 
             if len(groupers):
                 self._group_selection = ax.difference(Index(groupers)).tolist()
+                # GH12839 clear selected obj cache when group selection changes
+                self._reset_cache('_selected_obj')
 
     def _set_result_index_ordered(self, result):
         # set the result index on the passed values object and
@@ -511,7 +526,7 @@ def _make_wrapper(self, name):
 
         # need to setup the selection
         # as are not passed directly but in the grouper
-        self._set_selection_from_grouper()
+        self._set_group_selection()
 
         f = getattr(self._selected_obj, name)
         if not isinstance(f, types.MethodType):
@@ -979,7 +994,7 @@ def mean(self, *args, **kwargs):
         except GroupByError:
             raise
         except Exception:  # pragma: no cover
-            self._set_selection_from_grouper()
+            self._set_group_selection()
             f = lambda x: x.mean(axis=self.axis)
             return self._python_agg_general(f)
 
@@ -997,7 +1012,7 @@ def median(self):
             raise
         except Exception:  # pragma: no cover
 
-            self._set_selection_from_grouper()
+            self._set_group_selection()
 
             def f(x):
                 if isinstance(x, np.ndarray):
@@ -1040,7 +1055,7 @@ def var(self, ddof=1, *args, **kwargs):
         if ddof == 1:
             return self._cython_agg_general('var')
         else:
-            self._set_selection_from_grouper()
+            self._set_group_selection()
             f = lambda x: x.var(ddof=ddof)
             return self._python_agg_general(f)
 
@@ -1216,7 +1231,7 @@ def nth(self, n, dropna=None):
             raise TypeError("n needs to be an int or a list/set/tuple of ints")
 
         nth_values = np.array(nth_values, dtype=np.intp)
-        self._set_selection_from_grouper()
+        self._set_group_selection()
 
         if not dropna:
             mask = np.in1d(self._cumcount_array(), nth_values) | \
@@ -1324,7 +1339,7 @@ def cumcount(self, ascending=True):
         dtype: int64
         """
 
-        self._set_selection_from_grouper()
+        self._set_group_selection()
 
         index = self._selected_obj.index
         cumcounts = self._cumcount_array(ascending=ascending)
@@ -1402,6 +1417,7 @@ def head(self, n=5):
         0  1  2
         2  5  6
         """
+        self._clear_group_selection()
         mask = self._cumcount_array() < n
         return self._selected_obj[mask]
 
@@ -1428,6 +1444,7 @@ def tail(self, n=5):
         0  a  1
         2  b  1
         """
+        self._clear_group_selection()
         mask = self._cumcount_array(ascending=False) < n
         return self._selected_obj[mask]
 

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -354,6 +354,35 @@ def test_nth_multi_index_as_expected(self):
                                          names=['A', 'B']))
         assert_frame_equal(result, expected)
 
+    def test_group_selection_cache(self):
+        # GH 12839 nth, head, and tail should return same result consistently
+        df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
+        expected = df.iloc[[0, 2]].set_index('A')
+
+        g = df.groupby('A')
+        result1 = g.head(n=2)
+        result2 = g.nth(0)
+        assert_frame_equal(result1, df)
+        assert_frame_equal(result2, expected)
+
+        g = df.groupby('A')
+        result1 = g.tail(n=2)
+        result2 = g.nth(0)
+        assert_frame_equal(result1, df)
+        assert_frame_equal(result2, expected)
+
+        g = df.groupby('A')
+        result1 = g.nth(0)
+        result2 = g.head(n=2)
+        assert_frame_equal(result1, expected)
+        assert_frame_equal(result2, df)
+
+        g = df.groupby('A')
+        result1 = g.nth(0)
+        result2 = g.tail(n=2)
+        assert_frame_equal(result1, expected)
+        assert_frame_equal(result2, df)
+
     def test_grouper_index_types(self):
         # related GH5375
         # groupby misbehaving when using a Floatlike index
@@ -6107,7 +6136,7 @@ def test_cython_transform(self):
                 # bit a of hack to make sure the cythonized shift
                 # is equivalent to pre 0.17.1 behavior
                 if op == 'shift':
-                    gb._set_selection_from_grouper()
+                    gb._set_group_selection()
 
                 for (op, args), targop in ops:
                     if op != 'shift' and 'int' not in gb_target:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -313,7 +313,7 @@ Bug Fixes


		- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)

		- Bug in ``groupby(..).nth()`` where the group key is included inconsistently (:issue:`12839`)



Expand Down