diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4a02622ae9eda..80e5e89b79690 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -571,6 +571,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`) - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) +- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`) - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`) - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 15f8727c38f8d..7937b2ab72c37 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -399,7 +399,12 @@ def group_cumsum( lab = labels[i] if lab < 0: + # GH#58811 + if uses_mask: + result_mask[i, :] = True + out[i, :] = 0 continue + for j in range(K): val = values[i, j] diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 726c57081373c..a189d6772ece4 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1591,3 +1591,12 @@ def test_min_one_dim_no_type_coercion(): expected = DataFrame({"Y": [9435, -5465765, -5465765, 0, 9435]}, dtype="int32") tm.assert_frame_equal(expected, result) + + +def test_nan_in_cumsum_group_label(): + # GH#58811 + df = DataFrame({"A": [1, None], "B": [2, 3]}, dtype="Int16") + gb = df.groupby("A")["B"] + result = gb.cumsum() + expected = Series([2, None], dtype="Int16", name="B") + tm.assert_series_equal(expected, result)