Skip to content

Commit 0d8384b

Browse files
rhshadrachYi Wei
authored and
Yi Wei
committed
REGR: SeriesGroupBy.agg with multiple categoricals, as_index=False, and a list fails (pandas-dev#52850)
1 parent 70fe1e7 commit 0d8384b

File tree

3 files changed

+50
-3
lines changed

3 files changed

+50
-3
lines changed

doc/source/whatsnew/v2.0.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Fixed regressions
1919
- Fixed regression in :meth:`DataFrame.sort_values` not resetting index when :class:`DataFrame` is already sorted and ``ignore_index=True`` (:issue:`52553`)
2020
- Fixed regression in :meth:`MultiIndex.isin` raising ``TypeError`` for ``Generator`` (:issue:`52568`)
2121
- Fixed regression in :meth:`Series.describe` showing ``RuntimeWarning`` for extension dtype :class:`Series` with one element (:issue:`52515`)
22+
- Fixed regression in :meth:`SeriesGroupBy.agg` failing when grouping with categorical data, multiple groupings, ``as_index=False``, and a list of aggregations (:issue:`52760`)
2223

2324
.. ---------------------------------------------------------------------------
2425
.. _whatsnew_201.bug_fixes:

pandas/core/groupby/generic.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
245245
assert columns is not None # for mypy
246246
ret.columns = columns
247247
if not self.as_index:
248-
ret = self._insert_inaxis_grouper(ret)
249-
ret.index = default_index(len(ret))
248+
ret = ret.reset_index()
250249
return ret
251250

252251
else:
@@ -352,7 +351,6 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame:
352351
output = self.obj._constructor_expanddim(indexed_output, index=None)
353352
output.columns = Index(key.label for key in results)
354353

355-
output = self._reindex_output(output)
356354
return output
357355

358356
def _wrap_applied_output(

pandas/tests/groupby/test_categorical.py

+48
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
qcut,
1515
)
1616
import pandas._testing as tm
17+
from pandas.api.typing import SeriesGroupBy
1718
from pandas.tests.groupby import get_groupby_method_args
1819

1920

@@ -2036,3 +2037,50 @@ def test_groupby_default_depr(cat_columns, keys):
20362037
klass = FutureWarning if set(cat_columns) & set(keys) else None
20372038
with tm.assert_produces_warning(klass, match=msg):
20382039
df.groupby(keys)
2040+
2041+
2042+
@pytest.mark.parametrize("test_series", [True, False])
2043+
@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
2044+
def test_agg_list(request, as_index, observed, reduction_func, test_series, keys):
2045+
# GH#52760
2046+
if test_series and reduction_func == "corrwith":
2047+
assert not hasattr(SeriesGroupBy, "corrwith")
2048+
pytest.skip("corrwith not implemented for SeriesGroupBy")
2049+
elif reduction_func == "corrwith":
2050+
msg = "GH#32293: attempts to call SeriesGroupBy.corrwith"
2051+
request.node.add_marker(pytest.mark.xfail(reason=msg))
2052+
elif (
2053+
reduction_func == "nunique"
2054+
and not test_series
2055+
and len(keys) != 1
2056+
and not observed
2057+
and not as_index
2058+
):
2059+
msg = "GH#52848 - raises a ValueError"
2060+
request.node.add_marker(pytest.mark.xfail(reason=msg))
2061+
2062+
df = DataFrame({"a1": [0, 0, 1], "a2": [2, 3, 3], "b": [4, 5, 6]})
2063+
df = df.astype({"a1": "category", "a2": "category"})
2064+
if "a2" not in keys:
2065+
df = df.drop(columns="a2")
2066+
gb = df.groupby(by=keys, as_index=as_index, observed=observed)
2067+
if test_series:
2068+
gb = gb["b"]
2069+
args = get_groupby_method_args(reduction_func, df)
2070+
2071+
result = gb.agg([reduction_func], *args)
2072+
expected = getattr(gb, reduction_func)(*args)
2073+
2074+
if as_index and (test_series or reduction_func == "size"):
2075+
expected = expected.to_frame(reduction_func)
2076+
if not test_series:
2077+
if not as_index:
2078+
# TODO: GH#52849 - as_index=False is not respected
2079+
expected = expected.set_index(keys)
2080+
expected.columns = MultiIndex(
2081+
levels=[["b"], [reduction_func]], codes=[[0], [0]]
2082+
)
2083+
elif not as_index:
2084+
expected.columns = keys + [reduction_func]
2085+
2086+
tm.assert_equal(result, expected)

0 commit comments

Comments
 (0)