diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c77348b365370..106c2579cd31a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -441,6 +441,7 @@ Groupby/resample/rolling - Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`) - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`) - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) +- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) Reshaping diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 79d9f49a3b355..f44ef8c4dbbfa 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1202,10 +1202,7 @@ def _concat_objects( sort=False, ) else: - # GH5610, returns a MI, with the first level being a - # range index - keys = RangeIndex(len(values)) - result = concat(values, axis=0, keys=keys) + result = concat(values, axis=0) elif not not_indexed_same: result = concat(values, axis=0) diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index be52b4a591c26..0f136b06c782a 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -329,13 +329,10 @@ def test_against_frame_and_seriesgroupby( else: name = "proportion" if normalize else "count" expected = expected.reset_index().rename({0: name}, axis=1) - if groupby == "column": - expected = expected.rename({"level_0": "country"}, axis=1) - expected["country"] = np.where(expected["country"], "US", "FR") - elif groupby == "function": - expected["level_0"] = expected["level_0"] == 1 + if groupby in ["array", "function"] and (not as_index and frame): + expected.insert(loc=0, column="level_0", value=result["level_0"]) else: - expected["level_0"] = np.where(expected["level_0"], "US", "FR") + expected.insert(loc=0, column="country", value=result["country"]) tm.assert_frame_equal(result, expected) else: # compare against SeriesGroupBy value_counts diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 1a2589fe94ea5..e27c782c1bdcf 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -315,7 +315,7 @@ def test_groupby_as_index_apply(): # apply doesn't maintain the original ordering # changed in GH5610 as the as_index=False returns a MI here - exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (2, 4)]) + exp_not_as_apply = Index([0, 2, 1, 4]) tp = [(1, 0), (1, 2), (2, 1), (3, 4)] exp_as_apply = MultiIndex.from_tuples(tp, names=["user_id", None]) diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py index e5028884e992b..fa20efad4da77 100644 --- a/pandas/tests/groupby/test_apply_mutate.py +++ b/pandas/tests/groupby/test_apply_mutate.py @@ -90,9 +90,7 @@ def fn(x): result = df.groupby(["col1"], as_index=False).apply(fn) expected = pd.Series( [1, 2, 0, 4, 5, 0], - index=pd.MultiIndex.from_tuples( - [(0, 0), (0, 1), (0, 2), (1, 3), (1, 4), (1, 5)] - ), + index=range(6), name="col2", ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 54d7895691f3f..d50fea459552a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -113,8 +113,9 @@ def f(x, q=None, axis=0): expected_seq = df_grouped.quantile([0.4, 0.8]) if not as_index: # apply treats the op as a transform; .quantile knows it's a reduction - apply_result = apply_result.reset_index() - apply_result["level_0"] = [1, 1, 2, 2] + apply_result.index = range(4) + apply_result.insert(loc=0, column="level_0", value=[1, 1, 2, 2]) + apply_result.insert(loc=1, column="level_1", value=[0.4, 0.8, 0.4, 0.8]) tm.assert_frame_equal(apply_result, expected_seq, check_names=False) agg_result = df_grouped.agg(f, q=80) @@ -519,9 +520,7 @@ def test_as_index_select_column(): result = df.groupby("A", as_index=False, group_keys=True)["B"].apply( lambda x: x.cumsum() ) - expected = Series( - [2, 6, 6], name="B", index=MultiIndex.from_tuples([(0, 0), (0, 1), (1, 2)]) - ) + expected = Series([2, 6, 6], name="B", index=range(3)) tm.assert_series_equal(result, expected)