Skip to content

Commit 79e3ee6

Browse files
authored
BUG: error when unstacking in DataFrameGroupby.apply (#52446)
* BUG: error when unstacking in DataFrameGroupby.apply * fix pre-commit * adjust whatsnew for comments
1 parent 0e857ff commit 79e3ee6

File tree

3 files changed

+28
-1
lines changed

3 files changed

+28
-1
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ Groupby/resample/rolling
296296
or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument,
297297
the function operated on the whole index rather than each element of the index. (:issue:`51979`)
298298
- Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64 or :class:`PeriodDtype` values (:issue:`52128`)
299+
- Bug in :meth:`DataFrameGroupBy.apply` causing an error to be raised when the input :class:`DataFrame` was subset as a :class:`DataFrame` after groupby (``[['a']]`` and not ``['a']``) and the given callable returned :class:`Series` that were not all indexed the same. (:issue:`52444`)
299300
-
300301

301302
Reshaping

pandas/core/groupby/groupby.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1192,7 +1192,13 @@ def _concat_objects(
11921192
else:
11931193
result = concat(values, axis=self.axis)
11941194

1195-
name = self.obj.name if self.obj.ndim == 1 else self._selection
1195+
if self.obj.ndim == 1:
1196+
name = self.obj.name
1197+
elif is_hashable(self._selection):
1198+
name = self._selection
1199+
else:
1200+
name = None
1201+
11961202
if isinstance(result, Series) and name is not None:
11971203
result.name = name
11981204

pandas/tests/groupby/test_apply.py

+20
Original file line numberDiff line numberDiff line change
@@ -1203,6 +1203,26 @@ def test_groupby_apply_shape_cache_safety():
12031203
tm.assert_frame_equal(result, expected)
12041204

12051205

1206+
def test_groupby_apply_to_series_name():
1207+
# GH52444
1208+
df = DataFrame.from_dict(
1209+
{
1210+
"a": ["a", "b", "a", "b"],
1211+
"b1": ["aa", "ac", "ac", "ad"],
1212+
"b2": ["aa", "aa", "aa", "ac"],
1213+
}
1214+
)
1215+
grp = df.groupby("a")[["b1", "b2"]]
1216+
result = grp.apply(lambda x: x.unstack().value_counts())
1217+
1218+
expected_idx = MultiIndex.from_arrays(
1219+
arrays=[["a", "a", "b", "b", "b"], ["aa", "ac", "ac", "ad", "aa"]],
1220+
names=["a", None],
1221+
)
1222+
expected = Series([3, 1, 2, 1, 1], index=expected_idx, name="count")
1223+
tm.assert_series_equal(result, expected)
1224+
1225+
12061226
@pytest.mark.parametrize("dropna", [True, False])
12071227
def test_apply_na(dropna):
12081228
# GH#28984

0 commit comments

Comments
 (0)