Skip to content

Commit ca32744

Browse files
Khor Chean Weiyehoshuadimarsky
Khor Chean Wei
authored andcommitted
Bug fix - GroupBy.describe produces inconsistent results for empty datasets (pandas-dev#46162)
1 parent abec810 commit ca32744

File tree

3 files changed

+35
-0
lines changed

3 files changed

+35
-0
lines changed

doc/source/whatsnew/v1.5.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,8 @@ Groupby/resample/rolling
787787
- Bug in :meth:`.Rolling.var` would segfault calculating weighted variance when window size was larger than data size (:issue:`46760`)
788788
- Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`)
789789
- Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`)
790+
- Bug in :meth:`.DataFrameGroupBy.describe` and :meth:`.SeriesGroupBy.describe` produces inconsistent results for empty datasets (:issue:`41575`)
791+
-
790792

791793
Reshaping
792794
^^^^^^^^^

pandas/core/groupby/groupby.py

+8
Original file line numberDiff line numberDiff line change
@@ -2544,6 +2544,14 @@ def ohlc(self) -> DataFrame:
25442544
@doc(DataFrame.describe)
25452545
def describe(self, **kwargs):
25462546
with self._group_selection_context():
2547+
if len(self._selected_obj) == 0:
2548+
described = self._selected_obj.describe(**kwargs)
2549+
if self._selected_obj.ndim == 1:
2550+
result = described
2551+
else:
2552+
result = described.unstack()
2553+
return result.to_frame().T.iloc[:0]
2554+
25472555
result = self._python_apply_general(
25482556
lambda x: x.describe(**kwargs),
25492557
self._selected_obj,

pandas/tests/groupby/test_function.py

+25
Original file line numberDiff line numberDiff line change
@@ -1354,3 +1354,28 @@ def test_deprecate_numeric_only(
13541354
# Doesn't have numeric_only argument and fails on nuisance columns
13551355
with pytest.raises(TypeError, match=r"unsupported operand type"):
13561356
method(*args, **kwargs)
1357+
1358+
1359+
@pytest.mark.parametrize("dtype", [int, float, object])
1360+
@pytest.mark.parametrize(
1361+
"kwargs",
1362+
[
1363+
{"percentiles": [0.10, 0.20, 0.30], "include": "all", "exclude": None},
1364+
{"percentiles": [0.10, 0.20, 0.30], "include": None, "exclude": ["int"]},
1365+
{"percentiles": [0.10, 0.20, 0.30], "include": ["int"], "exclude": None},
1366+
],
1367+
)
1368+
def test_groupby_empty_dataset(dtype, kwargs):
1369+
# GH#41575
1370+
df = DataFrame([[1, 2, 3]], columns=["A", "B", "C"], dtype=dtype)
1371+
df["B"] = df["B"].astype(int)
1372+
df["C"] = df["C"].astype(float)
1373+
1374+
result = df.iloc[:0].groupby("A").describe(**kwargs)
1375+
expected = df.groupby("A").describe(**kwargs).reset_index(drop=True).iloc[:0]
1376+
tm.assert_frame_equal(result, expected)
1377+
1378+
result = df.iloc[:0].groupby("A").B.describe(**kwargs)
1379+
expected = df.groupby("A").B.describe(**kwargs).reset_index(drop=True).iloc[:0]
1380+
expected.index = Index([])
1381+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)