Skip to content

Commit f12e2a7

Browse files
authored
BUG: groupby with empty object, categorical grouper, and dropna=False fails (#50635)
1 parent f4851dd commit f12e2a7

File tree

3 files changed

+6
-4
lines changed

3 files changed

+6
-4
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -973,6 +973,7 @@ Groupby/resample/rolling
973973
- Bug in :meth:`.DataFrameGrouBy.value_counts` would raise when used with a :class:`.TimeGrouper` (:issue:`50486`)
974974
- Bug in :meth:`Resampler.size` caused a wide :class:`DataFrame` to be returned instead of a :class:`Series` with :class:`MultiIndex` (:issue:`46826`)
975975
- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`)
976+
- Bug in :class:`.DataFrameGroupBy` would raise when used with an empty DataFrame, categorical grouper, and ``dropna=False`` (:issue:`50634`)
976977
-
977978

978979
Reshaping

pandas/core/groupby/grouper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,7 @@ def group_index(self) -> Index:
649649
uniques = Categorical.from_codes(
650650
np.append(uniques.codes, [-1]), uniques.categories
651651
)
652-
else:
652+
elif len(codes) > 0:
653653
# Need to determine proper placement of NA value when not sorting
654654
cat = self.grouping_vector
655655
na_idx = (cat.codes < 0).argmax()

pandas/tests/groupby/test_groupby.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1870,7 +1870,9 @@ def test_pivot_table_values_key_error():
18701870
@pytest.mark.parametrize(
18711871
"op", ["idxmax", "idxmin", "min", "max", "sum", "prod", "skew"]
18721872
)
1873-
def test_empty_groupby(columns, keys, values, method, op, request, using_array_manager):
1873+
def test_empty_groupby(
1874+
columns, keys, values, method, op, request, using_array_manager, dropna
1875+
):
18741876
# GH8093 & GH26411
18751877
override_dtype = None
18761878

@@ -1930,7 +1932,7 @@ def test_empty_groupby(columns, keys, values, method, op, request, using_array_m
19301932

19311933
df = df.iloc[:0]
19321934

1933-
gb = df.groupby(keys, group_keys=False)[columns]
1935+
gb = df.groupby(keys, group_keys=False, dropna=dropna)[columns]
19341936

19351937
def get_result(**kwargs):
19361938
if method == "attr":
@@ -2037,7 +2039,6 @@ def get_result(**kwargs):
20372039
lev = Categorical([0], dtype=values.dtype)
20382040
ci = Index(lev, name=keys[0])
20392041
expected = DataFrame([], columns=[], index=ci)
2040-
# expected = df.set_index(keys)[columns]
20412042

20422043
tm.assert_equal(result, expected)
20432044
return

0 commit comments

Comments
 (0)