Skip to content

Commit 8a286fa

Browse files
authored
BUG: Fix bug in GroupBy that ignores group_keys arg for empty datafra… (#60505)
BUG: Fix bug in GroupBy that ignores group_keys arg for empty dataframes/series
1 parent 497208f commit 8a286fa

File tree

5 files changed

+19
-2
lines changed

5 files changed

+19
-2
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,7 @@ Groupby/resample/rolling
733733
- Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
734734
- Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
735735
- Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`)
736+
- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`)
736737
- Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
737738
- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
738739
- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)

pandas/core/groupby/generic.py

+4
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,8 @@ def _wrap_applied_output(
583583
if is_transform:
584584
# GH#47787 see test_group_on_empty_multiindex
585585
res_index = data.index
586+
elif not self.group_keys:
587+
res_index = None
586588
else:
587589
res_index = self._grouper.result_index
588590

@@ -1967,6 +1969,8 @@ def _wrap_applied_output(
19671969
if is_transform:
19681970
# GH#47787 see test_group_on_empty_multiindex
19691971
res_index = data.index
1972+
elif not self.group_keys:
1973+
res_index = None
19701974
else:
19711975
res_index = self._grouper.result_index
19721976

pandas/tests/groupby/aggregate/test_aggregate.py

+1
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ def test_agg_apply_corner(ts, tsframe):
159159
tm.assert_frame_equal(grouped.agg("sum"), exp_df)
160160

161161
res = grouped.apply(np.sum, axis=0)
162+
exp_df = exp_df.reset_index(drop=True)
162163
tm.assert_frame_equal(res, exp_df)
163164

164165

pandas/tests/groupby/test_all_methods.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
def test_multiindex_group_all_columns_when_empty(groupby_func):
2323
# GH 32464
2424
df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
25-
gb = df.groupby(["a", "b", "c"], group_keys=False)
25+
gb = df.groupby(["a", "b", "c"], group_keys=True)
2626
method = getattr(gb, groupby_func)
2727
args = get_groupby_method_args(groupby_func, df)
2828
if groupby_func == "corrwith":

pandas/tests/groupby/test_grouping.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -777,10 +777,21 @@ def test_evaluate_with_empty_groups(self, func, expected):
777777
# (not testing other agg fns, because they return
778778
# different index objects.
779779
df = DataFrame({1: [], 2: []})
780-
g = df.groupby(1, group_keys=False)
780+
g = df.groupby(1, group_keys=True)
781781
result = getattr(g[2], func)(lambda x: x)
782782
tm.assert_series_equal(result, expected)
783783

784+
def test_groupby_apply_empty_with_group_keys_false(self):
785+
# 60471
786+
# test apply'ing empty groups with group_keys False
787+
# (not testing other agg fns, because they return
788+
# different index objects.
789+
df = DataFrame({"A": [], "B": [], "C": []})
790+
g = df.groupby("A", group_keys=False)
791+
result = g.apply(lambda x: x / x.sum(), include_groups=False)
792+
expected = DataFrame({"B": [], "C": []}, index=None)
793+
tm.assert_frame_equal(result, expected)
794+
784795
def test_groupby_empty(self):
785796
# https://github.com/pandas-dev/pandas/issues/27190
786797
s = Series([], name="name", dtype="float64")

0 commit comments

Comments
 (0)