Skip to content

Commit aaea503

Browse files
committed
BUG: implement remaining as_index=False handling in DataFrameGroupBy.aggregate. address GH #181
1 parent 2561e1d commit aaea503

File tree

3 files changed

+59
-3
lines changed

3 files changed

+59
-3
lines changed

RELEASE.rst

+7
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ This is an incremental bug fix and performance enhancement release
3838
- Implemented `BlockManager.take` resulting in significantly faster `take`
3939
performance on mixed-type `DataFrame` objects (GH #104)
4040
- Improved performance of `Series.sort_index`
41+
- Significant groupby performance enhancement: removed unnecessary integrity
42+
checks in DataFrame internals that were slowing down slicing operations to
43+
retrieve groups
44+
- Added informative Exception when passing dict to DataFrame groupby
45+
aggregation with axis != 0
4146

4247
**Bug fixes**
4348

@@ -46,6 +51,8 @@ This is an incremental bug fix and performance enhancement release
4651
- Fixed bug in unstacking code manifesting with more than 3 hierarchical
4752
levels
4853
- Throw exception when step specified in label-based slice (GH #185)
54+
- Fix isnull to correctly work with np.float32. Fix upstream bug described in
55+
GH #182
4956

5057
pandas 0.4.1
5158
============

pandas/core/groupby.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,11 @@ def __init__(self, obj, grouper=None, axis=0, level=None,
8888
self.level = level
8989

9090
if not as_index:
91-
assert(isinstance(obj, DataFrame))
92-
assert(axis == 0)
91+
if not isinstance(obj, DataFrame):
92+
raise TypeError('as_index=False only valid with DataFrame')
93+
if axis != 0:
94+
raise ValueError('as_index=False only valid for axis=0')
95+
9396
self.as_index = as_index
9497

9598
if groupings is None:
@@ -686,6 +689,9 @@ def aggregate(self, func_or_funcs, *args, **kwargs):
686689
else:
687690
ret = Series({})
688691

692+
if not self.as_index: # pragma: no cover
693+
print 'Warning, ignoring as_index=True'
694+
689695
return ret
690696

691697
def _wrap_aggregated_output(self, output, mask):
@@ -858,6 +864,9 @@ def aggregate(self, arg, *args, **kwargs):
858864

859865
result = {}
860866
if isinstance(arg, dict):
867+
if self.axis != 0: # pragma: no cover
868+
raise ValueError('Can only pass dict with axis=0')
869+
861870
for col, func in arg.iteritems():
862871
result[col] = self[col].agg(func)
863872

@@ -870,6 +879,19 @@ def aggregate(self, arg, *args, **kwargs):
870879
return self._aggregate_item_by_item(arg, *args, **kwargs)
871880
result = self._aggregate_generic(arg, *args, **kwargs)
872881

882+
if not self.as_index:
883+
if isinstance(result.index, MultiIndex):
884+
zipped = zip(result.index.levels, result.index.labels,
885+
result.index.names)
886+
for i, (lev, lab, name) in enumerate(zipped):
887+
result.insert(i, name, lev.values.take(lab))
888+
result = result.consolidate()
889+
else:
890+
values = result.index.values
891+
name = self.groupings[0].name
892+
result.insert(0, name, values)
893+
result.index = np.arange(len(result))
894+
873895
return result
874896

875897
def _aggregate_generic(self, func, *args, **kwargs):

pandas/tests/test_groupby.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,34 @@ def _check_op(op):
517517

518518
assert_series_equal(result, expected)
519519

520-
def test_groupby_as_index(self):
520+
def test_groupby_as_index_agg(self):
521+
grouped = self.df.groupby('A', as_index=False)
522+
523+
# single-key
524+
525+
result = grouped.agg(np.mean)
526+
expected = grouped.mean()
527+
assert_frame_equal(result, expected)
528+
529+
result2 = grouped.agg({'C' : np.mean, 'D' : np.sum})
530+
expected2 = grouped.mean()
531+
expected2['D'] = grouped.sum()['D']
532+
assert_frame_equal(result2, expected2)
533+
534+
# multi-key
535+
536+
grouped = self.df.groupby(['A', 'B'], as_index=False)
537+
538+
result = grouped.agg(np.mean)
539+
expected = grouped.mean()
540+
assert_frame_equal(result, expected)
541+
542+
result2 = grouped.agg({'C' : np.mean, 'D' : np.sum})
543+
expected2 = grouped.mean()
544+
expected2['D'] = grouped.sum()['D']
545+
assert_frame_equal(result2, expected2)
546+
547+
def test_groupby_as_index_cython(self):
521548
data = self.df
522549

523550
# single-key

0 commit comments

Comments
 (0)