cleanup groupby and avoid gil with fused_types

CloseChoice · CloseChoice · commit a507488355d5 · 2021-11-20T10:48:35.000+01:00
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -67,6 +67,14 @@ cdef enum InterpolationEnumType:
     INTERPOLATION_NEAREST,
     INTERPOLATION_MIDPOINT
 
+cdef inline bint check_inf(numeric_t ai) nogil:
+
+    if numeric_t == cython.float:
+        if (ai == MINfloat32) or (ai == MAXfloat32):
+            return True
+    else:
+        if (ai == MINfloat64) or (ai == MAXfloat64):
+            return True
 
 cdef inline float64_t median_linear(float64_t* a, int n) nogil:
     cdef:
@@ -258,40 +266,18 @@ def group_cumsum(numeric_t[:, ::1] out,
 
                 # For floats, use Kahan summation to reduce floating-point
                 # error (https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
-                if numeric_t == float32_t:
-                    if (val == MAXfloat32) or (val == MINfloat32):
-                        if (t == MAXfloat32) or (t == MINfloat32):
-                            val = t
-                        out[i, j] = val
-                        accum[lab, j] = val
-                        break
-                    elif val == val:
-                        y = val - compensation[lab, j]
-                        t = accum[lab, j] + y
-                        compensation[lab, j] = t - accum[lab, j] - y
-                        accum[lab, j] = t
-                        out[i, j] = t
-                    else:
-                        out[i, j] = NaN
-                        if not skipna:
-                            accum[lab, j] = NaN
-                            break
-                elif numeric_t == float64_t:
-                    if (val == MAXfloat64) or (val == MINfloat64):
-                        if (t == MAXfloat64) or (t == MINfloat64):
-                            val = t
-                        out[i, j] = val
-                        accum[lab, j] = val
-                        break
-                    elif val == val:
-                        y = val - compensation[lab, j]
-                        t = accum[lab, j] + y
-                        compensation[lab, j] = t - accum[lab, j] - y
-                        accum[lab, j] = t
-                        out[i, j] = t
-                        if (t == MAXfloat64) or (t == MINfloat64):
-                            compensation[lab, j] = 0
-                            break
+                if (numeric_t == float32_t) or (numeric_t == float64_t):
+                    if val == val:
+                        # if val or accum are inf/-inf don't use kahan
+                        if check_inf(val) or check_inf(accum[lab, j]):
+                            accum[lab, j] += val
+                            out[i, j] = accum[lab, j]
+                        else:
+                            y = val - compensation[lab, j]
+                            t = accum[lab, j] + y
+                            compensation[lab, j] = t - accum[lab, j] - y
+                            accum[lab, j] = t
+                            out[i, j] = t
                     else:
                         out[i, j] = NaN
                         if not skipna:
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -1182,3 +1182,10 @@ def test_cumsum_inf():
     result = ser.groupby([1, 1, 1]).cumsum()
     expected = Series([np.inf, np.inf, np.inf])
     tm.assert_series_equal(result, expected)
+
+
+def test_cumsum_ninf_inf():
+    ser = Series([np.inf, 1, 1, -np.inf, 1])
+
+    result = ser.groupby([1, 1, 1, 1, 1]).cumsum()
+    expected = Series([np.inf, np.inf, np.inf, np.nan, np.nan])