From e2fb9acfa003195ea25e3fb82f6d4ca5649df98c Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 8 Jun 2021 11:25:46 -0400
Subject: [PATCH 1/6] PERF: group_cumsum ints/datetimelike

---
 pandas/_libs/groupby.pyx | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index b72b927b3c2a8..a2c97f85e15f2 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -260,10 +260,7 @@ def group_cumsum(numeric[:, ::1] out,
                             accum[lab, j] = NaN
                             break
                 else:
-                    y = val - compensation[lab, j]
-                    t = accum[lab, j] + y
-                    compensation[lab, j] = t - accum[lab, j] - y
-                    accum[lab, j] = t
+                    accum[lab, j] = val + accum[lab, j]
                     out[i, j] = accum[lab, j]
 
 

From 246d829068d7091f8ad9abe808a55fb508d44dd4 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 8 Jun 2021 11:40:33 -0400
Subject: [PATCH 2/6] WIP

---
 pandas/_libs/groupby.pyx | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index a2c97f85e15f2..8b24164659ec0 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -230,7 +230,7 @@ def group_cumsum(numeric[:, ::1] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, size
-        numeric val, y, t
+        numeric val, next_val, y, t
         numeric[:, ::1] accum, compensation
         intp_t lab
 
@@ -260,8 +260,9 @@ def group_cumsum(numeric[:, ::1] out,
                             accum[lab, j] = NaN
                             break
                 else:
-                    accum[lab, j] = val + accum[lab, j]
-                    out[i, j] = accum[lab, j]
+                    next_val = val + accum[lab, j]
+                    accum[lab, j] = next_val
+                    out[i, j] = next_val
 
 
 @cython.boundscheck(False)

From 6c1b9ca66ab9e33037e00abc15bca3d1f0df61f7 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 8 Jun 2021 12:00:25 -0400
Subject: [PATCH 3/6] PERF/CLN: no need for kahan for int group_cumsum

---
 pandas/_libs/groupby.pyx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 8b24164659ec0..f5ae1c9b42c36 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -230,7 +230,7 @@ def group_cumsum(numeric[:, ::1] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, size
-        numeric val, next_val, y, t
+        numeric val, y, t
         numeric[:, ::1] accum, compensation
         intp_t lab
 
@@ -253,16 +253,16 @@ def group_cumsum(numeric[:, ::1] out,
                         t = accum[lab, j] + y
                         compensation[lab, j] = t - accum[lab, j] - y
                         accum[lab, j] = t
-                        out[i, j] = accum[lab, j]
+                        out[i, j] = t
                     else:
                         out[i, j] = NaN
                         if not skipna:
                             accum[lab, j] = NaN
                             break
                 else:
-                    next_val = val + accum[lab, j]
-                    accum[lab, j] = next_val
-                    out[i, j] = next_val
+                    t = val + accum[lab, j]
+                    accum[lab, j] = t
+                    out[i, j] = t
 
 
 @cython.boundscheck(False)

From 8b8a832088456108d3a553200cf6fbce73b3c617 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 8 Jun 2021 12:02:11 -0400
Subject: [PATCH 4/6] Add benchmark

---
 asv_bench/benchmarks/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 27761ccd0d917..1d349fe31ef0e 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -509,7 +509,7 @@ class CumminMax:
     param_names = ["dtype", "method"]
     params = [
         ["float64", "int64", "Float64", "Int64"],
-        ["cummin", "cummax"],
+        ["cummin", "cummax", "cumsum"],
     ]
 
     def setup(self, dtype, method):

From 06e98ab239da9934f98a35f8b386ac294a080062 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 8 Jun 2021 12:02:42 -0400
Subject: [PATCH 5/6] Change benchmark name

---
 asv_bench/benchmarks/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 1d349fe31ef0e..8138ba2ced046 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -505,7 +505,7 @@ def time_frame_agg(self, dtype, method):
         self.df.groupby("key").agg(method)
 
 
-class CumminMax:
+class Cumulative:
     param_names = ["dtype", "method"]
     params = [
         ["float64", "int64", "Float64", "Int64"],

From 208e6edcb07ae91f03d38c5b55a77d0a170d3121 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 8 Jun 2021 19:41:54 -0400
Subject: [PATCH 6/6] Add kahan comment

---
 pandas/_libs/groupby.pyx | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index f5ae1c9b42c36..c05dbf5e3c8ec 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -247,6 +247,8 @@ def group_cumsum(numeric[:, ::1] out,
             for j in range(K):
                 val = values[i, j]
 
+                # For floats, use Kahan summation to reduce floating-point
+                # error (https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
                 if numeric == float32_t or numeric == float64_t:
                     if val == val:
                         y = val - compensation[lab, j]