pandas-dev · snitish · Dec 1, 2024 · Dec 3, 2024 · Feb 15, 2025 · Feb 16, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -756,6 +756,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
 - Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
 - Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
+- Bug in :meth:`DataFrameGroupBy.sum` and :math:`SeriesGroupBy.sum` where in case of all-nan values for object dtype the result is incorrectly set to 0 instead of ``None``. (:issue:`58811`)
 - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
 - Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
 - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -725,8 +725,12 @@ def group_sum(
         raise ValueError("len(index) != len(labels)")
 
     nobs = np.zeros((<object>out).shape, dtype=np.int64)
-    # the below is equivalent to `np.zeros_like(out)` but faster
-    sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
+    if sum_t is object:
+        # For object dtype, fill value should not be 0 (#60229)
+        sumx = np.full((<object>out).shape, NAN, dtype=object)
+    else:
+        # the below is equivalent to `np.zeros_like(out)` but faster
+        sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
     compensation = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
 
     N, K = (<object>values).shape
@@ -760,7 +764,10 @@ def group_sum(
                     if uses_mask:
                         isna_result = result_mask[lab, j]
                     else:
-                        isna_result = _treat_as_na(sumx[lab, j], is_datetimelike)
+                        isna_result = (
+                            _treat_as_na(sumx[lab, j], is_datetimelike) and
+                            nobs[lab, j] > 0
+                        )
 
                     if isna_result:
                         # If sum is already NA, don't add to it. This is important for
@@ -795,6 +802,7 @@ def group_sum(
                             compensation[lab, j] = 0
                         sumx[lab, j] = t
                 elif not skipna:
+                    nobs[lab, j] += 1
                     if uses_mask:
                         result_mask[lab, j] = True
                     else:

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -358,6 +358,7 @@ def test_observed(request, using_infer_string, observed):
         expected = cartesian_product_for_groupers(
             expected, [cat1, cat2], list("AB"), fill_value=0
         )
+        expected.loc[expected.C == 0, "C"] = np.nan
 
     tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
@@ -514,6 +514,21 @@ def test_sum_skipna_object(skipna):
     tm.assert_series_equal(result, expected)
 
 
+def test_sum_allnan_object(skipna):
+    # GH#60229
+    df = DataFrame(
+        {
+            "val": [np.nan] * 10,
+            "cat": ["A", "B"] * 5,
+        }
+    ).astype({"val": object})
+    expected = Series(
+        [np.nan, np.nan], index=pd.Index(["A", "B"], name="cat"), name="val"
+    ).astype(object)
+    result = df.groupby("cat")["val"].sum(skipna=skipna)
+    tm.assert_series_equal(result, expected)
+
+
 @pytest.mark.parametrize(
     "func, values, dtype, result_dtype",
     [

diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
@@ -113,7 +113,7 @@ def test_groupby_with_timegrouper(self):
                 unit=df.index.unit,
             )
             expected = DataFrame(
-                {"Buyer": 0, "Quantity": 0},
+                {"Buyer": np.nan, "Quantity": 0},
                 index=exp_dti,
             )
             # Cast to object to avoid implicit cast when setting entry to "CarlCarlCarl"