BUG: omit nuisance columns in multi-groupby with Python agger

wesm · wesm · commit a0c50905de12 · 2011-12-12T18:32:18.000-05:00
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -106,6 +106,7 @@ pandas 0.6.1
     change / item deletion in DataFrame (GH #473)
   - Index.get_loc should always raise Exception when there are duplicates
   - Handle differently-indexed Series input to DataFrame constructor (GH #475)
+  - Omit nuisance columns in multi-groupby with Python function
 
 Thanks
 ------
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -349,13 +349,11 @@ def _cython_agg_general(self, how):
         # aggregate all the columns at once?)
 
         output = {}
-        cannot_agg = []
         for name, obj in self._iterate_slices():
             if issubclass(obj.dtype.type, (np.number, np.bool_)):
                 if obj.dtype != np.float64:
                     obj = obj.astype('f8')
             else:
-                cannot_agg.append(name)
                 continue
 
             result, counts =  lib.group_aggregate(obj, label_list,
@@ -416,10 +414,13 @@ def _doit(reschunk, ctchunk, gen, shape_axis=0):
             # iterate through "columns" ex exclusions to populate output dict
             output = {}
             for name, obj in self._iterate_slices():
-                _doit(result, counts, gen_factory(obj))
-                # TODO: same mask for every column...
-                output[name] = result.ravel().copy()
-                result.fill(np.nan)
+                try:
+                    _doit(result, counts, gen_factory(obj))
+                    # TODO: same mask for every column...
+                    output[name] = result.ravel().copy()
+                    result.fill(np.nan)
+                except TypeError:
+                    continue
 
             mask = counts.ravel() > 0
             for name, result in output.iteritems():
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -666,6 +666,10 @@ def test_omit_nuisance(self):
         expected = self.df.ix[:, ['A', 'C', 'D']].groupby('A').mean()
         assert_frame_equal(result, expected)
 
+        agged = grouped.agg(np.mean)
+        exp = grouped.mean()
+        assert_frame_equal(agged, exp)
+
         df = self.df.ix[:, ['A', 'C', 'D']]
         df['E'] = datetime.now()
         grouped = df.groupby('A')
@@ -677,6 +681,26 @@ def test_omit_nuisance(self):
         grouped = df.groupby({'A' : 0, 'C' : 0, 'D' : 1, 'E' : 1}, axis=1)
         result = self.assertRaises(TypeError, grouped.agg, np.sum)
 
+    def test_omit_nuisance_python_multiple(self):
+        data = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo',
+                                 'bar', 'bar', 'bar', 'bar',
+                                 'foo', 'foo', 'foo'],
+                          'B' : ['one', 'one', 'one', 'two',
+                                 'one', 'one', 'one', 'two',
+                                 'two', 'two', 'one'],
+                          'C' : ['dull', 'dull', 'shiny', 'dull',
+                                 'dull', 'shiny', 'shiny', 'dull',
+                                 'shiny', 'shiny', 'shiny'],
+                          'D' : np.random.randn(11),
+                          'E' : np.random.randn(11),
+                          'F' : np.random.randn(11)})
+
+        grouped = data.groupby(['A', 'B'])
+
+        agged = grouped.agg(np.mean)
+        exp = grouped.mean()
+        assert_frame_equal(agged, exp)
+
     def test_nonsense_func(self):
         df = DataFrame([0])
         self.assertRaises(Exception, df.groupby, lambda x: x + 'foo')