From 0ea820825b2cd1270909882713d090b239d3c010 Mon Sep 17 00:00:00 2001 From: rhshadrach Date: Wed, 9 Sep 2020 20:21:47 -0400 Subject: [PATCH 1/3] CLN: _wrap_applied_output --- pandas/core/groupby/generic.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index e870187fc7952..57856bc4bf8f9 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1203,16 +1203,27 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): values = [x if (x is not None) else backup for x in values] - v = values[0] - - if not isinstance(v, (np.ndarray, Index, Series)) and self.as_index: + if isinstance(first_not_none, (np.ndarray, Index)): + # GH#1738: values is list of arrays of unequal lengths + # fall through to the outer else clause + # TODO: sure this is right? we used to do this + # after raising AttributeError above + return self.obj._constructor_sliced( + values, index=key_index, name=self._selection_name + ) + elif not isinstance(first_not_none, Series): # values are not series or array-like but scalars # self._selection_name not passed through to Series as the # result should not take the name of original selection # of columns - return self.obj._constructor_sliced(values, index=key_index) + if self.as_index: + return self.obj._constructor_sliced(values, index=key_index) + else: + result = DataFrame(values, index=key_index, columns=[self._selection]) + self._insert_inaxis_grouper_inplace(result) + return result - if isinstance(v, Series): + else: all_indexed_same = all_indexes_same((x.index for x in values)) # GH3596 @@ -1266,18 +1277,6 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): result = self.obj._constructor(stacked_values, index=index, columns=columns) - elif not self.as_index: - # We add grouping column below, so create a frame here - result = DataFrame(values, index=key_index, columns=[self._selection]) - else: - # GH#1738: values is list of arrays of unequal lengths - # fall through to the outer else clause - # TODO: sure this is right? we used to do this - # after raising AttributeError above - return self.obj._constructor_sliced( - values, index=key_index, name=self._selection_name - ) - # if we have date/time like in the original, then coerce dates # as we are stacking can easily have object dtypes here so = self._selected_obj From 83211725decf72d117bcc1a906459a33acc4956e Mon Sep 17 00:00:00 2001 From: rhshadrach Date: Wed, 9 Sep 2020 20:28:55 -0400 Subject: [PATCH 2/3] Fixed error --- pandas/core/groupby/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 57856bc4bf8f9..1552256468ad2 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1264,14 +1264,14 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): if self.axis == 0: index = key_index - columns = v.index.copy() + columns = first_not_none.index.copy() if columns.name is None: # GH6124 - propagate name of Series when it's consistent names = {v.name for v in values} if len(names) == 1: columns.name = list(names)[0] else: - index = v.index + index = first_not_none.index columns = key_index stacked_values = stacked_values.T From 32c6030312bde87dd4ef1e9a6e383de4db23c3bf Mon Sep 17 00:00:00 2001 From: Richard Date: Sat, 12 Sep 2020 09:59:47 -0400 Subject: [PATCH 3/3] Added test --- pandas/tests/groupby/test_apply.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 3183305fe2933..db5c4af9c6f53 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -381,6 +381,16 @@ def test_apply_frame_to_series(df): tm.assert_numpy_array_equal(result.values, expected.values) +def test_apply_frame_not_as_index_column_name(df): + # GH 35964 - path within _wrap_applied_output not hit by a test + grouped = df.groupby(["A", "B"], as_index=False) + result = grouped.apply(len) + expected = grouped.count().rename(columns={"C": np.nan}).drop(columns="D") + # TODO: Use assert_frame_equal when column name is not np.nan (GH 36306) + tm.assert_index_equal(result.index, expected.index) + tm.assert_numpy_array_equal(result.values, expected.values) + + def test_apply_frame_concat_series(): def trans(group): return group.groupby("B")["C"].sum().sort_values()[:2]