diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 0f37933a1b768..1648985a56b91 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -507,11 +507,11 @@ def time_frame_agg(self, dtype, method): self.df.groupby("key").agg(method) -class CumminMax: +class Cumulative: param_names = ["dtype", "method"] params = [ ["float64", "int64", "Float64", "Int64"], - ["cummin", "cummax"], + ["cummin", "cummax", "cumsum"], ] def setup(self, dtype, method): diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index b72b927b3c2a8..c05dbf5e3c8ec 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -247,24 +247,24 @@ def group_cumsum(numeric[:, ::1] out, for j in range(K): val = values[i, j] + # For floats, use Kahan summation to reduce floating-point + # error (https://en.wikipedia.org/wiki/Kahan_summation_algorithm) if numeric == float32_t or numeric == float64_t: if val == val: y = val - compensation[lab, j] t = accum[lab, j] + y compensation[lab, j] = t - accum[lab, j] - y accum[lab, j] = t - out[i, j] = accum[lab, j] + out[i, j] = t else: out[i, j] = NaN if not skipna: accum[lab, j] = NaN break else: - y = val - compensation[lab, j] - t = accum[lab, j] + y - compensation[lab, j] = t - accum[lab, j] - y + t = val + accum[lab, j] accum[lab, j] = t - out[i, j] = accum[lab, j] + out[i, j] = t @cython.boundscheck(False)