pandas-dev · dontgoto · Mar 23, 2024 · Mar 26, 2024 · Mar 26, 2024 · Mar 26, 2024
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -324,6 +324,7 @@ Bug fixes
 - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
 - Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
 - Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
+- Fixed bug in :meth:`Series.rolling.kurt` with low variance arrays getting zeroed out even when numerically stable (:issue:`57972`)
 
 Categorical
 ^^^^^^^^^^^

diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -712,7 +712,8 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs,
             #         if the variance is less than 1e-14, it could be
             #         treat as zero, here we follow the original
             #         skew/kurt behaviour to check B <= 1e-14
-            if B <= 1e-14:
+            # #57972: for non-zero but low variance arrays the cutoff can be lowered
+            if B <= 1e-281:
                 result = NaN
             else:
                 K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2)

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -1357,9 +1357,13 @@ def nankurt(
     # floating point error
     #
     # #18044 in _libs/windows.pyx calc_kurt follow this behavior
-    # to fix the fperr to treat denom <1e-14 as zero
-    numerator = _zero_out_fperr(numerator)
-    denominator = _zero_out_fperr(denominator)
+    # to fix the fperr to treat denom <1e-14 as zero (default cutoff)
+    # GH-57972 set cutoff lower for low variance arrays to prevent cutoff of otherwise
+    # numerically stable values. Scipy.kurtosis and this implementation start
+    # diverging for examples with cutoffs below e-281
+    cutoff = 1e-281
+    numerator = _zero_out_fperr(numerator, cutoff)
+    denominator = _zero_out_fperr(denominator, cutoff)
 
     if not isinstance(denominator, np.ndarray):
         # if ``denom`` is a scalar, check these corner cases first before
@@ -1576,12 +1580,12 @@ def check_below_min_count(
     return False
 
 
-def _zero_out_fperr(arg):
+def _zero_out_fperr(arg, cutoff=1e-14):
     # #18044 reference this behavior to fix rolling skew/kurt issue
     if isinstance(arg, np.ndarray):
-        return np.where(np.abs(arg) < 1e-14, 0, arg)
+        return np.where(np.abs(arg) < cutoff, 0, arg)
     else:
-        return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg
+        return arg.dtype.type(0) if np.abs(arg) < cutoff else arg
 
 
 @disallow("M8", "m8")

diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py
@@ -1105,6 +1105,21 @@ def test_nans_skipna(self, samples, actual_kurt):
         kurt = nanops.nankurt(samples, skipna=True)
         tm.assert_almost_equal(kurt, actual_kurt)
 
+    def test_arrays_with_low_variance(self):
+        # GH-57972
+        # sample arrays with low variance have a lower threshold for breakdown
+        # of numerical stability and should be handled accordingly
+        n = 10_000
+        n2 = 10
+        # scipy.kurt is nan at e-81,
+        # both kurtosis start diverging from each other around e-76
+        scale = 1e-72
+        low_var = np.array([-2.3 * scale] * n2 + [-4.1 * scale] * n2 + [0.0] * n)
+        # calculated with scipy.status kurtosis(low_var_samples, bias=False)
+        scipy_kurt = 632.556235239126
+        kurt = nanops.nankurt(low_var)
+        tm.assert_almost_equal(kurt, scipy_kurt)
+
     @property
     def prng(self):
         return np.random.default_rng(2)