pandas-dev · christopherzimmerman · Sep 19, 2019 · Sep 19, 2019 · Sep 19, 2019 · Sep 20, 2019
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -240,6 +240,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
 - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
 - Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
+- Bug in :meth:`DataFrameGroupby` causing unexpected mutations of the groupby object (:issue:`28523`)
 
 Reshaping
 ^^^^^^^^^

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1364,27 +1364,27 @@ def f(self, **kwargs):
                 if "min_count" not in kwargs:
                     kwargs["min_count"] = min_count
 
-                self._set_group_selection()
+                with _group_selection_context(self):
 
-                # try a cython aggregation if we can
-                try:
-                    return self._cython_agg_general(alias, alt=npfunc, **kwargs)
-                except AssertionError as e:
-                    raise SpecificationError(str(e))
-                except Exception:
-                    pass
+                    # try a cython aggregation if we can
+                    try:
+                        return self._cython_agg_general(alias, alt=npfunc, **kwargs)
+                    except AssertionError as e:
+                        raise SpecificationError(str(e))
+                    except Exception:
+                        pass
 
-                # apply a non-cython aggregation
-                result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
+                    # apply a non-cython aggregation
+                    result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
 
-                # coerce the resulting columns if we can
-                if isinstance(result, DataFrame):
-                    for col in result.columns:
-                        result[col] = self._try_cast(result[col], self.obj[col])
-                else:
-                    result = self._try_cast(result, self.obj)
+                    # coerce the resulting columns if we can
+                    if isinstance(result, DataFrame):
+                        for col in result.columns:
+                            result[col] = self._try_cast(result[col], self.obj[col])
+                    else:
+                        result = self._try_cast(result, self.obj)
 
-                return result
+                    return result
 
             set_function_name(f, name, cls)
 
@@ -1757,28 +1757,30 @@ def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFra
                 nth_values = list(set(n))
 
             nth_array = np.array(nth_values, dtype=np.intp)
-            self._set_group_selection()
 
-            mask_left = np.in1d(self._cumcount_array(), nth_array)
-            mask_right = np.in1d(self._cumcount_array(ascending=False) + 1, -nth_array)
-            mask = mask_left | mask_right
+            with _group_selection_context(self):
+                mask_left = np.in1d(self._cumcount_array(), nth_array)
+                mask_right = np.in1d(
+                    self._cumcount_array(ascending=False) + 1, -nth_array
+                )
+                mask = mask_left | mask_right
 
-            ids, _, _ = self.grouper.group_info
+                ids, _, _ = self.grouper.group_info
 
-            # Drop NA values in grouping
-            mask = mask & (ids != -1)
+                # Drop NA values in grouping
+                mask = mask & (ids != -1)
 
-            out = self._selected_obj[mask]
-            if not self.as_index:
-                return out
+                out = self._selected_obj[mask]
+                if not self.as_index:
+                    return out
 
-            result_index = self.grouper.result_index
-            out.index = result_index[ids[mask]]
+                result_index = self.grouper.result_index
+                out.index = result_index[ids[mask]]
 
-            if not self.observed and isinstance(result_index, CategoricalIndex):
-                out = out.reindex(result_index)
+                if not self.observed and isinstance(result_index, CategoricalIndex):
+                    out = out.reindex(result_index)
 
-            return out.sort_index() if self.sort else out
+                return out.sort_index() if self.sort else out
 
         # dropna is truthy
         if isinstance(n, valid_containers):

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -433,6 +433,33 @@ def test_frame_groupby_columns(tsframe):
         assert len(v.columns) == 2
 
 
+@pytest.mark.parametrize(
+    "func, args",
+    [
+        ("sum", []),
+        ("prod", []),
+        ("min", []),
+        ("max", []),
+        ("nth", [0]),
+        ("last", []),
+        ("first", []),
+    ],
+)
+def test_frame_groupby_avoids_mutate(func, args):
+    # GH28523
+    df = pd.DataFrame({"A": ["foo", "bar", "foo", "bar"], "B": [1, 2, 3, 4]})
+    grouped = df.groupby("A")
+
+    expected = grouped.apply(lambda x: x)
+
+    fn = getattr(grouped, func)
+    fn(*args)
+
+    result = grouped.apply(lambda x: x)
+
+    tm.assert_frame_equal(expected, result)
+
+
 def test_frame_set_name_single(df):
     grouped = df.groupby("A")