From 78d0b851ed1d0a19d737d3f58137fbdb64899e77 Mon Sep 17 00:00:00 2001 From: Zhengbo Wang Date: Wed, 12 Jun 2024 15:04:53 +0800 Subject: [PATCH 1/6] BUG: Fix issue with negative labels in group_cumsum function --- pandas/_libs/groupby.pyx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 15f8727c38f8d..60804766adb79 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -399,8 +399,14 @@ def group_cumsum( lab = labels[i] if lab < 0: + # GH 58811 + if uses_mask: + result_mask[i, :] = True + out[i, :] = 0 continue + for j in range(K): + val = values[i, j] if uses_mask: From a99d430d96bd7cfd24b82b63da26b755e9b1f701 Mon Sep 17 00:00:00 2001 From: Zhengbo Wang Date: Wed, 12 Jun 2024 15:08:07 +0800 Subject: [PATCH 2/6] Remove blank line --- pandas/_libs/groupby.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 60804766adb79..047d43642f820 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -406,9 +406,7 @@ def group_cumsum( continue for j in range(K): - val = values[i, j] - if uses_mask: isna_entry = mask[i, j] else: From 39173f37c568eeab69bc5fb1d7ae5c018ee0e217 Mon Sep 17 00:00:00 2001 From: Zhengbo Wang Date: Wed, 12 Jun 2024 15:09:32 +0800 Subject: [PATCH 3/6] Revert remove blank line --- pandas/_libs/groupby.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 047d43642f820..91821646d822d 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -407,6 +407,7 @@ def group_cumsum( for j in range(K): val = values[i, j] + if uses_mask: isna_entry = mask[i, j] else: From f15239a8402c1338282412b59ce706f59a101a0d Mon Sep 17 00:00:00 2001 From: Zhengbo Wang Date: Wed, 12 Jun 2024 21:25:47 +0800 Subject: [PATCH 4/6] Add test --- pandas/_libs/groupby.pyx | 2 +- pandas/tests/groupby/transform/test_transform.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 91821646d822d..7937b2ab72c37 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -399,7 +399,7 @@ def group_cumsum( lab = labels[i] if lab < 0: - # GH 58811 + # GH#58811 if uses_mask: result_mask[i, :] = True out[i, :] = 0 diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 726c57081373c..a189d6772ece4 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1591,3 +1591,12 @@ def test_min_one_dim_no_type_coercion(): expected = DataFrame({"Y": [9435, -5465765, -5465765, 0, 9435]}, dtype="int32") tm.assert_frame_equal(expected, result) + + +def test_nan_in_cumsum_group_label(): + # GH#58811 + df = DataFrame({"A": [1, None], "B": [2, 3]}, dtype="Int16") + gb = df.groupby("A")["B"] + result = gb.cumsum() + expected = Series([2, None], dtype="Int16", name="B") + tm.assert_series_equal(expected, result) From 57c437678962d918af07eda41cbe12f448e8560e Mon Sep 17 00:00:00 2001 From: Zhengbo Wang Date: Thu, 13 Jun 2024 09:34:16 +0800 Subject: [PATCH 5/6] Add what's new --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index fe1dcefe05ff2..ff84c82f5015b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -573,6 +573,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`) - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`) +- Bug in :neth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`) Reshaping ^^^^^^^^^ From 0f6bc6e148cb8e7c85e5f8a24db9de225848b0dc Mon Sep 17 00:00:00 2001 From: Zhengbo Wang Date: Thu, 13 Jun 2024 09:38:16 +0800 Subject: [PATCH 6/6] typo --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 51e399c5c67f5..80e5e89b79690 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -571,9 +571,9 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`) - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) +- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`) - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`) - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`) -- Bug in :neth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`) Reshaping ^^^^^^^^^