Skip to content

Commit a0c5090

Browse files
committed
BUG: omit nuisance columns in multi-groupby with Python agger
1 parent f57770c commit a0c5090

File tree

3 files changed

+32
-6
lines changed

3 files changed

+32
-6
lines changed

RELEASE.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ pandas 0.6.1
106106
change / item deletion in DataFrame (GH #473)
107107
- Index.get_loc should always raise Exception when there are duplicates
108108
- Handle differently-indexed Series input to DataFrame constructor (GH #475)
109+
- Omit nuisance columns in multi-groupby with Python function
109110

110111
Thanks
111112
------

pandas/core/groupby.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -349,13 +349,11 @@ def _cython_agg_general(self, how):
349349
# aggregate all the columns at once?)
350350

351351
output = {}
352-
cannot_agg = []
353352
for name, obj in self._iterate_slices():
354353
if issubclass(obj.dtype.type, (np.number, np.bool_)):
355354
if obj.dtype != np.float64:
356355
obj = obj.astype('f8')
357356
else:
358-
cannot_agg.append(name)
359357
continue
360358

361359
result, counts = lib.group_aggregate(obj, label_list,
@@ -416,10 +414,13 @@ def _doit(reschunk, ctchunk, gen, shape_axis=0):
416414
# iterate through "columns" ex exclusions to populate output dict
417415
output = {}
418416
for name, obj in self._iterate_slices():
419-
_doit(result, counts, gen_factory(obj))
420-
# TODO: same mask for every column...
421-
output[name] = result.ravel().copy()
422-
result.fill(np.nan)
417+
try:
418+
_doit(result, counts, gen_factory(obj))
419+
# TODO: same mask for every column...
420+
output[name] = result.ravel().copy()
421+
result.fill(np.nan)
422+
except TypeError:
423+
continue
423424

424425
mask = counts.ravel() > 0
425426
for name, result in output.iteritems():

pandas/tests/test_groupby.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,10 @@ def test_omit_nuisance(self):
666666
expected = self.df.ix[:, ['A', 'C', 'D']].groupby('A').mean()
667667
assert_frame_equal(result, expected)
668668

669+
agged = grouped.agg(np.mean)
670+
exp = grouped.mean()
671+
assert_frame_equal(agged, exp)
672+
669673
df = self.df.ix[:, ['A', 'C', 'D']]
670674
df['E'] = datetime.now()
671675
grouped = df.groupby('A')
@@ -677,6 +681,26 @@ def test_omit_nuisance(self):
677681
grouped = df.groupby({'A' : 0, 'C' : 0, 'D' : 1, 'E' : 1}, axis=1)
678682
result = self.assertRaises(TypeError, grouped.agg, np.sum)
679683

684+
def test_omit_nuisance_python_multiple(self):
685+
data = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo',
686+
'bar', 'bar', 'bar', 'bar',
687+
'foo', 'foo', 'foo'],
688+
'B' : ['one', 'one', 'one', 'two',
689+
'one', 'one', 'one', 'two',
690+
'two', 'two', 'one'],
691+
'C' : ['dull', 'dull', 'shiny', 'dull',
692+
'dull', 'shiny', 'shiny', 'dull',
693+
'shiny', 'shiny', 'shiny'],
694+
'D' : np.random.randn(11),
695+
'E' : np.random.randn(11),
696+
'F' : np.random.randn(11)})
697+
698+
grouped = data.groupby(['A', 'B'])
699+
700+
agged = grouped.agg(np.mean)
701+
exp = grouped.mean()
702+
assert_frame_equal(agged, exp)
703+
680704
def test_nonsense_func(self):
681705
df = DataFrame([0])
682706
self.assertRaises(Exception, df.groupby, lambda x: x + 'foo')

0 commit comments

Comments
 (0)