diff --git a/RELEASE.rst b/RELEASE.rst index f870932aa0936..463ca2f404001 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -262,6 +262,8 @@ pandas 0.11.0 with an indexer (GH3216_) - Invoking df.applymap on a dataframe with dupe cols now raises a ValueError (GH2786_) - Apply with invalid returned indices raise correct Exception (GH2808_) + - Produce a series on apply if we only generate a singular series and have + a simple index (GH2893_) .. _GH622: https://github.com/pydata/pandas/issues/622 .. _GH797: https://github.com/pydata/pandas/issues/797 @@ -313,6 +315,7 @@ pandas 0.11.0 .. _GH2850: https://github.com/pydata/pandas/issues/2850 .. _GH2898: https://github.com/pydata/pandas/issues/2898 .. _GH2892: https://github.com/pydata/pandas/issues/2892 +.. _GH2893: https://github.com/pydata/pandas/issues/2893 .. _GH2902: https://github.com/pydata/pandas/issues/2902 .. _GH2903: https://github.com/pydata/pandas/issues/2903 .. _GH2909: https://github.com/pydata/pandas/issues/2909 diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index cb0a03d306c53..3a8565dba9029 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1834,11 +1834,23 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): key_index = Index(keys, name=key_names[0]) if isinstance(values[0], np.ndarray): - if (isinstance(values[0], Series) and - not _all_indexes_same([x.index for x in values])): - return self._concat_objects(keys, values, - not_indexed_same=not_indexed_same) - + if isinstance(values[0], Series): + applied_index = self.obj._get_axis(self.axis) + all_indexed_same = _all_indexes_same([x.index for x in values]) + singular_series = len(values) == 1 and applied_index.nlevels == 1 + + # assign the name to this series + if singular_series: + values[0].name = keys[0] + + # GH2893 + # we have series in the values array, we want to produce a series: + # if any of the sub-series are not indexed the same + # OR we don't have a multi-index and we have only a single values + if singular_series or not all_indexed_same: + return self._concat_objects(keys, values, + not_indexed_same=not_indexed_same) + try: if self.axis == 0: diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 7aad2e0b734b1..18f3e4a98ff85 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -261,6 +261,26 @@ def test_groupby_nonobject_dtype(self): expected = self.mframe.groupby(key.astype('O')).sum() assert_frame_equal(result, expected) + def test_groupby_return_type(self): + + # GH2893 + df1 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, + {"val1":2, "val2": 27}, {"val1":2, "val2": 12}]) + + def func(dataf): + return dataf["val2"] - dataf["val2"].mean() + + result = df1.groupby("val1").apply(func) + self.assert_(isinstance(result,Series)) + + df2 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, + {"val1":1, "val2": 27}, {"val1":1, "val2": 12}]) + def func(dataf): + return dataf["val2"] - dataf["val2"].mean() + + result = df2.groupby("val1").apply(func) + self.assert_(isinstance(result,Series)) + def test_agg_regression1(self): grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) result = grouped.agg(np.mean)