diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index 459bdbf10a4f1..a8edade14359e 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -149,3 +149,4 @@ Bug Fixes - Bug in ``NaT`` - ``Period`` raises ``AttributeError`` (:issue:`13071`) - Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`) - Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`) +- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 7a4791189726e..ac7127084ffd0 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -806,8 +806,9 @@ def reset_identity(values): # reset the identities of the components # of the values to prevent aliasing for v in values: - ax = v._get_axis(self.axis) - ax._reset_identity() + if v is not None: + ax = v._get_axis(self.axis) + ax._reset_identity() return values if not not_indexed_same: @@ -3228,7 +3229,21 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): key_names = self.grouper.names - if isinstance(values[0], DataFrame): + # GH12824. + def first_non_None_value(values): + try: + v = next(v for v in values if v is not None) + except StopIteration: + return None + return v + + v = first_non_None_value(values) + + if v is None: + # GH9684. If all values are None, then this will throw an error. + # We'd prefer it return an empty dataframe. + return DataFrame() + elif isinstance(v, DataFrame): return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) elif self.grouper.groupings is not None: @@ -3255,21 +3270,15 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): key_index = None # make Nones an empty object - if com._count_not_none(*values) != len(values): - try: - v = next(v for v in values if v is not None) - except StopIteration: - # If all values are None, then this will throw an error. - # We'd prefer it return an empty dataframe. - return DataFrame() - if v is None: - return DataFrame() - elif isinstance(v, NDFrame): - values = [ - x if x is not None else - v._constructor(**v._construct_axes_dict()) - for x in values - ] + v = first_non_None_value(values) + if v is None: + return DataFrame() + elif isinstance(v, NDFrame): + values = [ + x if x is not None else + v._constructor(**v._construct_axes_dict()) + for x in values + ] v = values[0] diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 583b1c7aea270..0d9fffc7ea666 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -6279,6 +6279,29 @@ def test_func(x): expected = DataFrame() tm.assert_frame_equal(result, expected) + def test_groupby_apply_none_first(self): + # GH 12824. Tests if apply returns None first. + test_df1 = DataFrame({'groups': [1, 1, 1, 2], 'vars': [0, 1, 2, 3]}) + test_df2 = DataFrame({'groups': [1, 2, 2, 2], 'vars': [0, 1, 2, 3]}) + + def test_func(x): + if x.shape[0] < 2: + return None + return x.iloc[[0, -1]] + + result1 = test_df1.groupby('groups').apply(test_func) + result2 = test_df2.groupby('groups').apply(test_func) + index1 = MultiIndex.from_arrays([[1, 1], [0, 2]], + names=['groups', None]) + index2 = MultiIndex.from_arrays([[2, 2], [1, 3]], + names=['groups', None]) + expected1 = DataFrame({'groups': [1, 1], 'vars': [0, 2]}, + index=index1) + expected2 = DataFrame({'groups': [2, 2], 'vars': [1, 3]}, + index=index2) + tm.assert_frame_equal(result1, expected1) + tm.assert_frame_equal(result2, expected2) + def test_first_last_max_min_on_time_data(self): # GH 10295 # Verify that NaT is not in the result of max, min, first and last on