diff --git a/doc/source/release.rst b/doc/source/release.rst index fac584eb2cba4..a8fe12940d479 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -91,6 +91,7 @@ Bug Fixes - ``HDFStore.select_as_multiple`` handles start and stop the same way as ``select`` (:issue:`6177`) - ``HDFStore.select_as_coordinates`` and ``select_column`` works where clauses that result in filters (:issue:`6177`) - Regression in join of non_unique_indexes (:issue:`6329`) +- Issue with groupby ``agg`` with a single function and a a mixed-type frame (:issue:`6337`) pandas 0.13.1 ------------- diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 7bd49a8b3a304..c0ea730e38a27 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2123,6 +2123,7 @@ def _aggregate_item_by_item(self, func, *args, **kwargs): obj = self._obj_with_exclusions result = {} cannot_agg = [] + errors=None for item in obj: try: data = obj[item] @@ -2133,11 +2134,19 @@ def _aggregate_item_by_item(self, func, *args, **kwargs): except ValueError: cannot_agg.append(item) continue + except TypeError as e: + cannot_agg.append(item) + errors=e + continue result_columns = obj.columns if cannot_agg: result_columns = result_columns.drop(cannot_agg) + # GH6337 + if not len(result_columns) and errors is not None: + raise errors + return DataFrame(result, columns=result_columns) def _decide_output_index(self, output, labels): diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index cddf8669950aa..dbaf41a269ec2 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -370,6 +370,26 @@ def f(grp): e.name = None assert_series_equal(result,e) + def test_agg_api(self): + + # GH 6337 + # http://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error + # different api for agg when passed custom function with mixed frame + + df = DataFrame({'data1':np.random.randn(5), + 'data2':np.random.randn(5), + 'key1':['a','a','b','b','a'], + 'key2':['one','two','one','two','one']}) + grouped = df.groupby('key1') + + def peak_to_peak(arr): + return arr.max() - arr.min() + + expected = grouped.agg([peak_to_peak]) + expected.columns=['data1','data2'] + result = grouped.agg(peak_to_peak) + assert_frame_equal(result,expected) + def test_agg_regression1(self): grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) result = grouped.agg(np.mean)