From 88aa9f74341cb3b8078de44fd9b60a0fd4e2e6a1 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Thu, 16 Dec 2021 20:00:23 -0800
Subject: [PATCH 1/3] ENH: Add numba engine to groupby.sum

---
 doc/source/whatsnew/v1.4.0.rst     |  2 +-
 pandas/core/groupby/groupby.py     | 37 ++++++++++++++++++++----------
 pandas/tests/groupby/conftest.py   |  1 +
 pandas/tests/groupby/test_numba.py |  9 +++++---
 4 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index f50442eb7ca46..057e7a1e9c2b7 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -219,7 +219,7 @@ Other enhancements
 - :meth:`DataFrame.dropna` now accepts a single label as ``subset`` along with array-like (:issue:`41021`)
 - :class:`ExcelWriter` argument ``if_sheet_exists="overlay"`` option added (:issue:`40231`)
 - :meth:`read_excel` now accepts a ``decimal`` argument that allow the user to specify the decimal point when parsing string columns to numeric (:issue:`14403`)
-- :meth:`.GroupBy.mean`, :meth:`.GroupBy.std`, and :meth:`.GroupBy.var` now supports `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`43731`, :issue:`44862`)
+- :meth:`.GroupBy.mean`, :meth:`.GroupBy.std`, :meth:`.GroupBy.var`, :meth:`.GroupBy.sum` now supports `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`43731`, :issue:`44862`)
 - :meth:`Timestamp.isoformat`, now handles the ``timespec`` argument from the base :class:``datetime`` class (:issue:`26131`)
 - :meth:`NaT.to_numpy` ``dtype`` argument is now respected, so ``np.timedelta64`` can be returned (:issue:`44460`)
 - New option ``display.max_dir_items`` customizes the number of columns added to :meth:`Dataframe.__dir__` and suggested for tab completion (:issue:`37996`)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index e11d420ada29f..a1866e3bdc9f6 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2163,22 +2163,35 @@ def size(self) -> DataFrame | Series:
     @final
     @doc(_groupby_agg_method_template, fname="sum", no=True, mc=0)
     def sum(
-        self, numeric_only: bool | lib.NoDefault = lib.no_default, min_count: int = 0
+        self,
+        numeric_only: bool | lib.NoDefault = lib.no_default,
+        min_count: int = 0,
+        engine: str | None = None,
+        engine_kwargs: dict[str, bool] | None = None,
     ):
-        numeric_only = self._resolve_numeric_only(numeric_only)
+        if maybe_use_numba(engine):
+            from pandas.core._numba.kernels import sliding_sum
 
-        # If we are grouping on categoricals we want unobserved categories to
-        # return zero, rather than the default of NaN which the reindexing in
-        # _agg_general() returns. GH #31422
-        with com.temp_setattr(self, "observed", True):
-            result = self._agg_general(
-                numeric_only=numeric_only,
-                min_count=min_count,
-                alias="add",
-                npfunc=np.sum,
+            return self._numba_agg_general(
+                sliding_sum,
+                engine_kwargs,
+                "groupby_sum",
             )
+        else:
+            numeric_only = self._resolve_numeric_only(numeric_only)
 
-        return self._reindex_output(result, fill_value=0)
+            # If we are grouping on categoricals we want unobserved categories to
+            # return zero, rather than the default of NaN which the reindexing in
+            # _agg_general() returns. GH #31422
+            with com.temp_setattr(self, "observed", True):
+                result = self._agg_general(
+                    numeric_only=numeric_only,
+                    min_count=min_count,
+                    alias="add",
+                    npfunc=np.sum,
+                )
+
+            return self._reindex_output(result, fill_value=0)
 
     @final
     @doc(_groupby_agg_method_template, fname="prod", no=True, mc=0)
diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py
index 596e9e0a4de77..2be680d7a4ccd 100644
--- a/pandas/tests/groupby/conftest.py
+++ b/pandas/tests/groupby/conftest.py
@@ -183,6 +183,7 @@ def nopython(request):
         ("var", {"ddof": 0}),
         ("std", {"ddof": 1}),
         ("std", {"ddof": 0}),
+        ("sum", {}),
     ]
 )
 def numba_supported_reductions(request):
diff --git a/pandas/tests/groupby/test_numba.py b/pandas/tests/groupby/test_numba.py
index 6fc1f0d808bb2..645f2ebddf63d 100644
--- a/pandas/tests/groupby/test_numba.py
+++ b/pandas/tests/groupby/test_numba.py
@@ -24,7 +24,8 @@ def test_cython_vs_numba_frame(
             engine="numba", engine_kwargs=engine_kwargs, **kwargs
         )
         expected = getattr(gb, func)(**kwargs)
-        tm.assert_frame_equal(result, expected)
+        check_dtype = func != "sum"
+        tm.assert_frame_equal(result, expected, check_dtype=check_dtype)
 
     def test_cython_vs_numba_getitem(
         self, sort, nogil, parallel, nopython, numba_supported_reductions
@@ -37,7 +38,8 @@ def test_cython_vs_numba_getitem(
             engine="numba", engine_kwargs=engine_kwargs, **kwargs
         )
         expected = getattr(gb, func)(**kwargs)
-        tm.assert_series_equal(result, expected)
+        check_dtype = func != "sum"
+        tm.assert_series_equal(result, expected, check_dtype=check_dtype)
 
     def test_cython_vs_numba_series(
         self, sort, nogil, parallel, nopython, numba_supported_reductions
@@ -50,7 +52,8 @@ def test_cython_vs_numba_series(
             engine="numba", engine_kwargs=engine_kwargs, **kwargs
         )
         expected = getattr(gb, func)(**kwargs)
-        tm.assert_series_equal(result, expected)
+        check_dtype = func != "sum"
+        tm.assert_series_equal(result, expected, check_dtype=check_dtype)
 
     def test_as_index_false_unsupported(self, numba_supported_reductions):
         func, kwargs = numba_supported_reductions

From 318ab087b7b9289298240343e100994c38ec8e51 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Thu, 16 Dec 2021 20:05:47 -0800
Subject: [PATCH 2/3] Add issue number

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 057e7a1e9c2b7..d7ea4c553f8e0 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -219,7 +219,7 @@ Other enhancements
 - :meth:`DataFrame.dropna` now accepts a single label as ``subset`` along with array-like (:issue:`41021`)
 - :class:`ExcelWriter` argument ``if_sheet_exists="overlay"`` option added (:issue:`40231`)
 - :meth:`read_excel` now accepts a ``decimal`` argument that allow the user to specify the decimal point when parsing string columns to numeric (:issue:`14403`)
-- :meth:`.GroupBy.mean`, :meth:`.GroupBy.std`, :meth:`.GroupBy.var`, :meth:`.GroupBy.sum` now supports `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`43731`, :issue:`44862`)
+- :meth:`.GroupBy.mean`, :meth:`.GroupBy.std`, :meth:`.GroupBy.var`, :meth:`.GroupBy.sum` now supports `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`43731`, :issue:`44862`, :issue:`44939`)
 - :meth:`Timestamp.isoformat`, now handles the ``timespec`` argument from the base :class:``datetime`` class (:issue:`26131`)
 - :meth:`NaT.to_numpy` ``dtype`` argument is now respected, so ``np.timedelta64`` can be returned (:issue:`44460`)
 - New option ``display.max_dir_items`` customizes the number of columns added to :meth:`Dataframe.__dir__` and suggested for tab completion (:issue:`37996`)

From 26826d5525766fd7c00767d6eadfbc0906541202 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Fri, 17 Dec 2021 17:55:30 -0800
Subject: [PATCH 3/3] Reference dtype gh issue

---
 pandas/tests/groupby/test_numba.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/tests/groupby/test_numba.py b/pandas/tests/groupby/test_numba.py
index 645f2ebddf63d..6554993c140a1 100644
--- a/pandas/tests/groupby/test_numba.py
+++ b/pandas/tests/groupby/test_numba.py
@@ -24,6 +24,7 @@ def test_cython_vs_numba_frame(
             engine="numba", engine_kwargs=engine_kwargs, **kwargs
         )
         expected = getattr(gb, func)(**kwargs)
+        # check_dtype can be removed if GH 44952 is addressed
         check_dtype = func != "sum"
         tm.assert_frame_equal(result, expected, check_dtype=check_dtype)
 
@@ -38,6 +39,7 @@ def test_cython_vs_numba_getitem(
             engine="numba", engine_kwargs=engine_kwargs, **kwargs
         )
         expected = getattr(gb, func)(**kwargs)
+        # check_dtype can be removed if GH 44952 is addressed
         check_dtype = func != "sum"
         tm.assert_series_equal(result, expected, check_dtype=check_dtype)
 
@@ -52,6 +54,7 @@ def test_cython_vs_numba_series(
             engine="numba", engine_kwargs=engine_kwargs, **kwargs
         )
         expected = getattr(gb, func)(**kwargs)
+        # check_dtype can be removed if GH 44952 is addressed
         check_dtype = func != "sum"
         tm.assert_series_equal(result, expected, check_dtype=check_dtype)