Skip to content

Commit 93c755e

Browse files
zartbotjreback
authored andcommitted
BUG:fix rolling skew kurt floating issue (#18065)
1 parent d3d60f8 commit 93c755e

File tree

4 files changed

+39
-2
lines changed

4 files changed

+39
-2
lines changed

doc/source/whatsnew/v0.21.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ Bug Fixes
5959
- Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`)
6060
- Bug in :class:`TimedeltaIndex` subtraction could incorrectly overflow when ``NaT`` is present (:issue:`17791`)
6161
- Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`)
62+
- Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`)
6263

6364
Conversion
6465
^^^^^^^^^^

pandas/_libs/window.pyx

+21-2
Original file line numberDiff line numberDiff line change
@@ -788,7 +788,17 @@ cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx,
788788
A = x / dnobs
789789
B = xx / dnobs - A * A
790790
C = xxx / dnobs - A * A * A - 3 * A * B
791-
if B <= 0 or nobs < 3:
791+
792+
# #18044: with uniform distribution, floating issue will
793+
# cause B != 0. and cause the result is a very
794+
# large number.
795+
#
796+
# in core/nanops.py nanskew/nankurt call the function
797+
# _zero_out_fperr(m2) to fix floating error.
798+
# if the variance is less than 1e-14, it could be
799+
# treat as zero, here we follow the original
800+
# skew/kurt behaviour to check B <= 1e-14
801+
if B <= 1e-14 or nobs < 3:
792802
result = NaN
793803
else:
794804
R = sqrt(B)
@@ -915,7 +925,16 @@ cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx,
915925
R = R * A
916926
D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
917927

918-
if B == 0 or nobs < 4:
928+
# #18044: with uniform distribution, floating issue will
929+
# cause B != 0. and cause the result is a very
930+
# large number.
931+
#
932+
# in core/nanops.py nanskew/nankurt call the function
933+
# _zero_out_fperr(m2) to fix floating error.
934+
# if the variance is less than 1e-14, it could be
935+
# treat as zero, here we follow the original
936+
# skew/kurt behaviour to check B <= 1e-14
937+
if B <= 1e-14 or nobs < 4:
919938
result = NaN
920939
else:
921940
K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2)

pandas/core/nanops.py

+7
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,9 @@ def nanskew(values, axis=None, skipna=True):
548548
m3 = adjusted3.sum(axis, dtype=np.float64)
549549

550550
# floating point error
551+
#
552+
# #18044 in _libs/windows.pyx calc_skew follow this behavior
553+
# to fix the fperr to treat m2 <1e-14 as zero
551554
m2 = _zero_out_fperr(m2)
552555
m3 = _zero_out_fperr(m3)
553556

@@ -609,6 +612,9 @@ def nankurt(values, axis=None, skipna=True):
609612
result = numer / denom - adj
610613

611614
# floating point error
615+
#
616+
# #18044 in _libs/windows.pyx calc_kurt follow this behavior
617+
# to fix the fperr to treat denom <1e-14 as zero
612618
numer = _zero_out_fperr(numer)
613619
denom = _zero_out_fperr(denom)
614620

@@ -699,6 +705,7 @@ def _maybe_null_out(result, axis, mask):
699705

700706

701707
def _zero_out_fperr(arg):
708+
# #18044 reference this behavior to fix rolling skew/kurt issue
702709
if isinstance(arg, np.ndarray):
703710
with np.errstate(invalid='ignore'):
704711
return np.where(np.abs(arg) < 1e-14, 0, arg)

pandas/tests/test_window.py

+10
Original file line numberDiff line numberDiff line change
@@ -2979,6 +2979,16 @@ def test_rolling_kurt_edge_cases(self):
29792979
x = d.rolling(window=4).kurt()
29802980
tm.assert_series_equal(expected, x)
29812981

2982+
def test_rolling_skew_eq_value_fperr(self):
2983+
# #18804 all rolling skew for all equal values should return Nan
2984+
a = pd.Series([1.1] * 15).rolling(window=10).skew()
2985+
assert np.isnan(a).all()
2986+
2987+
def test_rolling_kurt_eq_value_fperr(self):
2988+
# #18804 all rolling kurt for all equal values should return Nan
2989+
a = pd.Series([1.1] * 15).rolling(window=10).kurt()
2990+
assert np.isnan(a).all()
2991+
29822992
def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True,
29832993
has_time_rule=True, preserve_nan=True):
29842994
result = func(self.arr)

0 commit comments

Comments
 (0)