pandas-dev · llllllllll · Jan 5, 2017 · Jan 23, 2017 · Jan 23, 2017
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -108,16 +108,34 @@ def setup(self):
         self.N = 10000
         self.labels = np.random.randint(0, 2000, size=self.N)
         self.labels2 = np.random.randint(0, 3, size=self.N)
-        self.df = DataFrame({'key': self.labels, 'key2': self.labels2, 'value1': randn(self.N), 'value2': (['foo', 'bar', 'baz', 'qux'] * (self.N / 4)), })
-
-    def f(self, g):
+        self.df = DataFrame({
+            'key': self.labels,
+            'key2': self.labels2,
+            'value1': randn(self.N),
+            'value2': (['foo', 'bar', 'baz', 'qux'] * (self.N / 4)),
+        })
+
+    @staticmethod
+    def scalar_function(g):
         return 1
 
-    def time_groupby_frame_apply(self):
-        self.df.groupby(['key', 'key2']).apply(self.f)
+    def time_groupby_frame_apply_scalar_function(self):
+        self.df.groupby(['key', 'key2']).apply(self.scalar_function)
+
+    def time_groupby_frame_apply_scalar_function_overhead(self):
+        self.df.groupby('key').apply(self.scalar_function)
+
+    @staticmethod
+    def df_copy_function(g):
+        # ensure that the group name is available (see GH #15062)
+        g.name
+        return g.copy()
+
+    def time_groupby_frame_df_copy_function(self):
+        self.df.groupby(['key', 'key2']).apply(self.df_copy_function)
 
-    def time_groupby_frame_apply_overhead(self):
-        self.df.groupby('key').apply(self.f)
+    def time_groupby_frame_apply_df_copy_overhead(self):
+        self.df.groupby('key').apply(self.df_copy_function)
 
 
 #----------------------------------------------------------------------

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -370,6 +370,10 @@ Performance Improvements
 - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object.
 
 
+- Improve performance of ``pd.core.groupby.GroupBy.apply`` when the applied
+  function used the ``.name`` attribute of the group DataFrame (:issue:`15062`).
+
+
 
 .. _whatsnew_0200.bug_fixes:
 

diff --git a/pandas/src/reduce.pyx b/pandas/src/reduce.pyx
@@ -497,7 +497,7 @@ def apply_frame_axis0(object frame, object f, object names,
     # Need to infer if our low-level mucking is going to cause a segfault
     if n > 0:
         chunk = frame.iloc[starts[0]:ends[0]]
-        shape_before = chunk.shape
+        object.__setattr__(chunk, 'name', names[0])
         try:
             result = f(chunk)
             if result is chunk:

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -6022,6 +6022,21 @@ def test_cummin_cummax(self):
         result = base_df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
         tm.assert_frame_equal(expected, result)
 
+    def test_group_name_available_in_inference_pass(self):
+        df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)})
+
+        names = []
+
+        def f(group):
+            names.append(group.name)
+            return group.copy()
+
+        df.groupby('a', sort=False, group_keys=False).apply(f)
+        # we expect 2 zeros because we call ``f`` once to see if a faster route
+        # can be used.
+        expected_names = [0, 0, 1, 2]
+        tm.assert_equal(names, expected_names)
+
 
 def assert_fp_equal(a, b):
     assert (np.abs(a - b) < 1e-12).all()