Skip to content

Commit 1ff776a

Browse files
committed
Merge pull request #6718 from jreback/groupby_agg
BUG: Bug in consistency of groupby aggregation when passing a custom function (GH6715)
2 parents 4c3b9e5 + 88829b4 commit 1ff776a

File tree

3 files changed

+40
-5
lines changed

3 files changed

+40
-5
lines changed

doc/source/release.rst

+4-3
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,11 @@ API Changes
139139
- Following keywords are now acceptable for :meth:`DataFrame.plot(kind='bar')` and :meth:`DataFrame.plot(kind='barh')`.
140140

141141
- `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. (:issue:`6604`)
142-
142+
143143
- `align`: Specify the bar alignment. Default is `center` (different from matplotlib). In previous versions, pandas passes `align='edge'` to matplotlib and adjust the location to `center` by itself, and it results `align` keyword is not applied as expected. (:issue:`4525`)
144-
144+
145145
- `position`: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`)
146-
146+
147147
- Define and document the order of column vs index names in query/eval
148148
(:issue:`6676`)
149149

@@ -289,6 +289,7 @@ Bug Fixes
289289
- Bug in binary operations with a rhs of a Series not aligning (:issue:`6681`)
290290
- Bug in ``DataFrame.to_stata`` which incorrectly handles nan values and ignores 'with_index' keyword argument (:issue:`6685`)
291291
- Bug in resample with extra bins when using an evenly divisible frequency (:issue:`4076`)
292+
- Bug in consistency of groupby aggregation when passing a custom function (:issue:`6715`)
292293

293294
pandas 0.13.1
294295
-------------

pandas/core/groupby.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -1081,10 +1081,13 @@ def apply(self, f, data, axis=0):
10811081
try:
10821082
values, mutated = splitter.fast_apply(f, group_keys)
10831083
return group_keys, values, mutated
1084-
except Exception:
1084+
except (lib.InvalidApply):
10851085
# we detect a mutation of some kind
10861086
# so take slow path
10871087
pass
1088+
except (Exception) as e:
1089+
# raise this error to the caller
1090+
pass
10881091

10891092
result_values = []
10901093
for key, (i, group) in zip(group_keys, splitter):
@@ -2295,7 +2298,15 @@ def aggregate(self, arg, *args, **kwargs):
22952298
if self.grouper.nkeys > 1:
22962299
return self._python_agg_general(arg, *args, **kwargs)
22972300
else:
2298-
result = self._aggregate_generic(arg, *args, **kwargs)
2301+
2302+
# try to treat as if we are passing a list
2303+
try:
2304+
assert not args and not kwargs
2305+
result = self._aggregate_multiple_funcs([arg])
2306+
result.columns = Index(result.columns.levels[0],
2307+
name=self._selected_obj.columns.name)
2308+
except:
2309+
result = self._aggregate_generic(arg, *args, **kwargs)
22992310

23002311
if not self.as_index:
23012312
if isinstance(result.index, MultiIndex):

pandas/tests/test_groupby.py

+23
Original file line numberDiff line numberDiff line change
@@ -1946,6 +1946,29 @@ def test_grouping_ndarray(self):
19461946
expected = self.df.groupby('A').sum()
19471947
assert_frame_equal(result, expected, check_names=False) # Note: no names when grouping by value
19481948

1949+
def test_agg_consistency(self):
1950+
# agg with ([]) and () not consistent
1951+
# GH 6715
1952+
1953+
def P1(a):
1954+
try:
1955+
return np.percentile(a.dropna(), q=1)
1956+
except:
1957+
return np.nan
1958+
1959+
import datetime as dt
1960+
df = DataFrame({'col1':[1,2,3,4],
1961+
'col2':[10,25,26,31],
1962+
'date':[dt.date(2013,2,10),dt.date(2013,2,10),dt.date(2013,2,11),dt.date(2013,2,11)]})
1963+
1964+
g = df.groupby('date')
1965+
1966+
expected = g.agg([P1])
1967+
expected.columns = expected.columns.levels[0]
1968+
1969+
result = g.agg(P1)
1970+
assert_frame_equal(result, expected)
1971+
19491972
def test_apply_typecast_fail(self):
19501973
df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
19511974
'c': np.tile(['a', 'b', 'c'], 2),

0 commit comments

Comments
 (0)