Skip to content

Commit 37b5c3f

Browse files
mzeitlin11JulianWgs
authored andcommitted
CLN/PERF: no need for kahan for int group_cumsum (pandas-dev#41874)
1 parent c089f7e commit 37b5c3f

File tree

2 files changed

+7
-7
lines changed

2 files changed

+7
-7
lines changed

asv_bench/benchmarks/groupby.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -507,11 +507,11 @@ def time_frame_agg(self, dtype, method):
507507
self.df.groupby("key").agg(method)
508508

509509

510-
class CumminMax:
510+
class Cumulative:
511511
param_names = ["dtype", "method"]
512512
params = [
513513
["float64", "int64", "Float64", "Int64"],
514-
["cummin", "cummax"],
514+
["cummin", "cummax", "cumsum"],
515515
]
516516

517517
def setup(self, dtype, method):

pandas/_libs/groupby.pyx

+5-5
Original file line numberDiff line numberDiff line change
@@ -247,24 +247,24 @@ def group_cumsum(numeric[:, ::1] out,
247247
for j in range(K):
248248
val = values[i, j]
249249

250+
# For floats, use Kahan summation to reduce floating-point
251+
# error (https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
250252
if numeric == float32_t or numeric == float64_t:
251253
if val == val:
252254
y = val - compensation[lab, j]
253255
t = accum[lab, j] + y
254256
compensation[lab, j] = t - accum[lab, j] - y
255257
accum[lab, j] = t
256-
out[i, j] = accum[lab, j]
258+
out[i, j] = t
257259
else:
258260
out[i, j] = NaN
259261
if not skipna:
260262
accum[lab, j] = NaN
261263
break
262264
else:
263-
y = val - compensation[lab, j]
264-
t = accum[lab, j] + y
265-
compensation[lab, j] = t - accum[lab, j] - y
265+
t = val + accum[lab, j]
266266
accum[lab, j] = t
267-
out[i, j] = accum[lab, j]
267+
out[i, j] = t
268268

269269

270270
@cython.boundscheck(False)

0 commit comments

Comments
 (0)