diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4007ecd5a9412..e375234e88711 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -410,6 +410,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) - Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`) - Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`) +- Bug in :meth:`DataFrameGroupBy.quantile` where NA values in the grouping could cause segfaults or incorrect results (:issue:`28882`) Reshaping ^^^^^^^^^ diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index b2ffbb3ecb4f2..1a8694a0f4746 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -763,6 +763,9 @@ def group_quantile(ndarray[float64_t] out, with nogil: for i in range(N): lab = labels[i] + if lab == -1: # NA group label + continue + counts[lab] += 1 if not mask[i]: non_na_counts[lab] += 1 diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 571e710ba8928..2d7dfe49dc038 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1373,6 +1373,29 @@ def test_quantile_out_of_bounds_q_raises(): g.quantile(-1) +def test_quantile_missing_group_values_no_segfaults(): + # GH 28662 + data = np.array([1.0, np.nan, 1.0]) + df = pd.DataFrame(dict(key=data, val=range(3))) + + # Random segfaults; would have been guaranteed in loop + grp = df.groupby("key") + for _ in range(100): + grp.quantile() + + +def test_quantile_missing_group_values_correct_results(): + # GH 28662 + data = np.array([1.0, np.nan, 3.0, np.nan]) + df = pd.DataFrame(dict(key=data, val=range(4))) + + result = df.groupby("key").quantile() + expected = pd.DataFrame( + [1.0, 3.0], index=pd.Index([1.0, 3.0], name="key"), columns=["val"] + ) + tm.assert_frame_equal(result, expected) + + # pipe # --------------------------------