BUG: handle as_index=False for pseudo multi-groupers (e.g. .describe())

jreback · jreback · commit 134dd1f7cc1a · 2014-04-29T14:46:47.000-04:00
diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
@@ -126,6 +126,13 @@ API changes
      g.count()
      g.describe()
 
+  passing ``as_index`` will leave the grouped column in-place (this is not change in 0.14.0)
+
+     df = DataFrame([[1, np.nan], [1, 4], [5, 6], [5, 8]], columns=['A', 'B'])
+     g = df.groupby('A',as_index=False)
+     g.count()
+     g.describe()
+
 - Allow specification of a more complex groupby via ``pd.Grouper``, such as grouping
   by a Time and a string field simultaneously. See :ref:`the docs <groupby.specify>`. (:issue:`3794`)
 
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -456,7 +456,7 @@ def _selected_obj(self):
     def _set_selection_from_grouper(self):
         """ we may need create a selection if we have non-level groupers """
         grp = self.grouper
-        if self._selection is None and getattr(grp,'groupings',None) is not None:
+        if self._selection is None and self.as_index and getattr(grp,'groupings',None) is not None:
             ax = self.obj._info_axis
             groupers = [ g.name for g in grp.groupings if g.level is None and g.name is not None and g.name in ax ]
             if len(groupers):
@@ -1029,12 +1029,23 @@ def _concat_objects(self, keys, values, not_indexed_same=False):
                 result = result.reindex(ax)
             else:
                 result = result.reindex_axis(ax, axis=self.axis)
-        elif self.group_keys and self.as_index:
-            group_keys = keys
-            group_levels = self.grouper.levels
-            group_names = self.grouper.names
-            result = concat(values, axis=self.axis, keys=group_keys,
-                            levels=group_levels, names=group_names)
+
+        elif self.group_keys:
+
+            if self.as_index:
+
+                # possible MI return case
+                group_keys = keys
+                group_levels = self.grouper.levels
+                group_names = self.grouper.names
+                result = concat(values, axis=self.axis, keys=group_keys,
+                                levels=group_levels, names=group_names)
+            else:
+
+                # GH5610, returns a MI, with the first level being a
+                # range index
+                keys = list(range(len(values)))
+                result = concat(values, axis=self.axis, keys=keys)
         else:
             result = concat(values, axis=self.axis)
 
@@ -2528,6 +2539,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
         elif hasattr(self.grouper, 'groupings'):
             if len(self.grouper.groupings) > 1:
                 key_index = MultiIndex.from_tuples(keys, names=key_names)
+
             else:
                 ping = self.grouper.groupings[0]
                 if len(keys) == ping.ngroups:
@@ -2540,8 +2552,13 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
                     # reorder the values
                     values = [values[i] for i in indexer]
                 else:
+
                     key_index = Index(keys, name=key_names[0])
 
+                # don't use the key indexer
+                if not self.as_index:
+                    key_index = None
+
             # make Nones an empty object
             if com._count_not_none(*values) != len(values):
                 v = next(v for v in values if v is not None)
@@ -2611,7 +2628,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
 
                         # normally use vstack as its faster than concat
                         # and if we have mi-columns
-                        if not _np_version_under1p7 or isinstance(v.index,MultiIndex):
+                        if not _np_version_under1p7 or isinstance(v.index,MultiIndex) or key_index is None:
                             stacked_values = np.vstack([np.asarray(x) for x in values])
                             result = DataFrame(stacked_values,index=key_index,columns=index)
                         else:
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -1378,7 +1378,8 @@ def test_groupby_as_index_apply(self):
         res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
 
         # apply doesn't maintain the original ordering
-        exp_not_as_apply = Index([0, 2, 1, 4])
+        # changed in GH5610 as the as_index=False returns a MI here
+        exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (2, 4)])
         exp_as_apply = MultiIndex.from_tuples([(1, 0), (1, 2), (2, 1), (3, 4)])
 
         assert_index_equal(res_as_apply, exp_as_apply)
@@ -1994,19 +1995,28 @@ def test_non_cython_api(self):
 
         df = DataFrame([[1, 2, 'foo'], [1, nan, 'bar',], [3, nan, 'baz']], columns=['A', 'B','C'])
         g = df.groupby('A')
+        gni = df.groupby('A',as_index=False)
 
         # mad
         expected = DataFrame([[0],[nan]],columns=['B'],index=[1,3])
         expected.index.name = 'A'
         result = g.mad()
         assert_frame_equal(result,expected)
 
+        expected = DataFrame([[0.,0.],[0,nan]],columns=['A','B'],index=[0,1])
+        result = gni.mad()
+        assert_frame_equal(result,expected)
+
         # describe
         expected = DataFrame(dict(B = concat([df.loc[[0,1],'B'].describe(),df.loc[[2],'B'].describe()],keys=[1,3])))
         expected.index.names = ['A',None]
         result = g.describe()
         assert_frame_equal(result,expected)
 
+        expected = concat([df.loc[[0,1],['A','B']].describe(),df.loc[[2],['A','B']].describe()],keys=[0,1])
+        result = gni.describe()
+        assert_frame_equal(result,expected)
+
         # any
         expected = DataFrame([[True, True],[False, True]],columns=['B','C'],index=[1,3])
         expected.index.name = 'A'