Skip to content

Commit f6105a4

Browse files
rhshadrachJulianWgs
authored andcommitted
REGR: groupby with as_index=False on an empty frame (pandas-dev#42254)
1 parent 4dc74db commit f6105a4

File tree

3 files changed

+21
-11
lines changed

3 files changed

+21
-11
lines changed

pandas/core/groupby/generic.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1676,7 +1676,9 @@ def _wrap_transformed_output(
16761676

16771677
def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
16781678
if not self.as_index:
1679-
index = Index(range(mgr.shape[1]))
1679+
# GH 41998 - empty mgr always gets index of length 0
1680+
rows = mgr.shape[1] if mgr.shape[0] > 0 else 0
1681+
index = Index(range(rows))
16801682
mgr.set_axis(1, index)
16811683
result = self.obj._constructor(mgr)
16821684

pandas/tests/groupby/conftest.py

+5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212
)
1313

1414

15+
@pytest.fixture(params=[True, False])
16+
def as_index(request):
17+
return request.param
18+
19+
1520
@pytest.fixture
1621
def mframe():
1722
index = MultiIndex(

pandas/tests/groupby/test_groupby.py

+13-10
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Grouper,
1616
Index,
1717
MultiIndex,
18+
RangeIndex,
1819
Series,
1920
Timestamp,
2021
date_range,
@@ -2399,19 +2400,21 @@ def test_groupby_all_nan_groups_drop():
23992400
tm.assert_series_equal(result, expected)
24002401

24012402

2402-
def test_groupby_empty_multi_column():
2403-
# GH 15106
2403+
@pytest.mark.parametrize("numeric_only", [True, False])
2404+
def test_groupby_empty_multi_column(as_index, numeric_only):
2405+
# GH 15106 & GH 41998
24042406
df = DataFrame(data=[], columns=["A", "B", "C"])
2405-
gb = df.groupby(["A", "B"])
2406-
result = gb.sum(numeric_only=False)
2407-
expected = DataFrame(
2408-
[], columns=["C"], index=MultiIndex([[], []], [[], []], names=["A", "B"])
2409-
)
2407+
gb = df.groupby(["A", "B"], as_index=as_index)
2408+
result = gb.sum(numeric_only=numeric_only)
2409+
if as_index:
2410+
index = MultiIndex([[], []], [[], []], names=["A", "B"])
2411+
columns = ["C"] if not numeric_only else []
2412+
else:
2413+
index = RangeIndex(0)
2414+
columns = ["A", "B", "C"] if not numeric_only else ["A", "B"]
2415+
expected = DataFrame([], columns=columns, index=index)
24102416
tm.assert_frame_equal(result, expected)
24112417

2412-
result = gb.sum(numeric_only=True)
2413-
tm.assert_frame_equal(result, expected[[]])
2414-
24152418

24162419
def test_groupby_filtered_df_std():
24172420
# GH 16174

0 commit comments

Comments
 (0)