From b32fe26edbb1cbbdd4a62ea8138cc5d73b88da6f Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 2 Nov 2017 00:05:11 +0800 Subject: [PATCH 1/8] fix rolling skew kurt floating issue --- pandas/_libs/window.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index a95e50785c9b0..914b998f7ccfc 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -788,7 +788,7 @@ cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx, A = x / dnobs B = xx / dnobs - A * A C = xxx / dnobs - A * A * A - 3 * A * B - if B <= 0 or nobs < 3: + if B <= 1e-14 or nobs < 3: result = NaN else: R = sqrt(B) @@ -915,7 +915,7 @@ cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx, R = R * A D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A - if B == 0 or nobs < 4: + if B <= 1e-14 or nobs < 4: result = NaN else: K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2) From ca253dd36b9481a1f8de1878c0eebee6c5dfd697 Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 2 Nov 2017 10:24:28 +0800 Subject: [PATCH 2/8] update whatsnew --- doc/source/whatsnew/v0.21.1.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 088168fd8a008..84b6a1885178a 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -59,6 +59,7 @@ Bug Fixes - Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`) - Bug in :class:`TimedeltaIndex` subtraction could incorrectly overflow when ``NaT`` is present (:issue:`17791`) - Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`) +- Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`) Conversion ^^^^^^^^^^ From 51aa486f3e6c8ca8f676eaff14ca5e6ee276d99f Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 2 Nov 2017 11:16:17 +0800 Subject: [PATCH 3/8] add unit test case --- pandas/tests/test_window.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index c567613acebd1..f874eedc32913 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -2979,6 +2979,16 @@ def test_rolling_kurt_edge_cases(self): x = d.rolling(window=4).kurt() tm.assert_series_equal(expected, x) + def test_rolling_skew_eq_value_fperr(self): + # #18804 all rolling skew for all equal values should return Nan + a = pd.Series([1.1]*15).rolling(10).skew() + assert np.isnan(a).all() + + def test_rolling_kurt_eq_value_fperr(self): + # #18804 all rolling kurt for all equal values should return Nan + a = pd.Series([1.1]*15).rolling(10).kurt() + assert np.isnan(a).all() + def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, has_time_rule=True, preserve_nan=True): result = func(self.arr) From 3522031aa62b8e77148770dac572f98988e857e0 Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 2 Nov 2017 11:24:38 +0800 Subject: [PATCH 4/8] fix pep8 issue --- pandas/tests/test_window.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index f874eedc32913..165813a89b5db 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -2981,12 +2981,12 @@ def test_rolling_kurt_edge_cases(self): def test_rolling_skew_eq_value_fperr(self): # #18804 all rolling skew for all equal values should return Nan - a = pd.Series([1.1]*15).rolling(10).skew() + a = pd.Series([1.1] * 15).rolling(window=10).skew() assert np.isnan(a).all() def test_rolling_kurt_eq_value_fperr(self): # #18804 all rolling kurt for all equal values should return Nan - a = pd.Series([1.1]*15).rolling(10).kurt() + a = pd.Series([1.1] * 15).rolling(window=10).kurt() assert np.isnan(a).all() def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, From 74efc4210659bac18c30bba64e4627436f430e94 Mon Sep 17 00:00:00 2001 From: kevin Date: Sun, 5 Nov 2017 22:20:08 +0800 Subject: [PATCH 5/8] update with review comments --- pandas/_libs/window.pyx | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 914b998f7ccfc..889134b62119e 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -788,6 +788,12 @@ cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx, A = x / dnobs B = xx / dnobs - A * A C = xxx / dnobs - A * A * A - 3 * A * B + + # #18044: with uniform distribution, floating issue will + # cause B != 0. and cause the result is a very + # large number. + # if B is less than 1e-14, + # we treat it as uniform distribution. if B <= 1e-14 or nobs < 3: result = NaN else: @@ -915,6 +921,11 @@ cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx, R = R * A D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A + # #18044: with uniform distribution, floating issue will + # cause B != 0. and cause the result is a very + # large number. + # if B is less than 1e-14, + # we treat it as uniform distribution. if B <= 1e-14 or nobs < 4: result = NaN else: From 9a968ddd24d41019b52b9e0d934aa1a0ce5591ff Mon Sep 17 00:00:00 2001 From: kevin Date: Mon, 6 Nov 2017 00:29:55 +0800 Subject: [PATCH 6/8] remove trailing whitespace --- pandas/_libs/window.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 889134b62119e..0debdd26fd798 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -789,7 +789,7 @@ cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx, B = xx / dnobs - A * A C = xxx / dnobs - A * A * A - 3 * A * B - # #18044: with uniform distribution, floating issue will + # #18044: with uniform distribution, floating issue will # cause B != 0. and cause the result is a very # large number. # if B is less than 1e-14, @@ -921,7 +921,7 @@ cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx, R = R * A D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A - # #18044: with uniform distribution, floating issue will + # #18044: with uniform distribution, floating issue will # cause B != 0. and cause the result is a very # large number. # if B is less than 1e-14, From 3e20523e58d031ccd0da90e7f8624e1c72d4df9a Mon Sep 17 00:00:00 2001 From: kevin Date: Tue, 7 Nov 2017 21:35:40 +0800 Subject: [PATCH 7/8] update with review comments --- pandas/_libs/window.pyx | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 0debdd26fd798..4d5ebdc0c581a 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -792,8 +792,12 @@ cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx, # #18044: with uniform distribution, floating issue will # cause B != 0. and cause the result is a very # large number. - # if B is less than 1e-14, - # we treat it as uniform distribution. + # + # in core/nanops.py nanskew/nankurt call the function + # _zero_out_fperr(m2) to fix floating error. + # if the variance is less than 1e-14, it could be + # treat as zero, here we follow the original + # skew/kurt behaviour to check B <= 1e-14 if B <= 1e-14 or nobs < 3: result = NaN else: @@ -924,8 +928,12 @@ cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx, # #18044: with uniform distribution, floating issue will # cause B != 0. and cause the result is a very # large number. - # if B is less than 1e-14, - # we treat it as uniform distribution. + # + # in core/nanops.py nanskew/nankurt call the function + # _zero_out_fperr(m2) to fix floating error. + # if the variance is less than 1e-14, it could be + # treat as zero, here we follow the original + # skew/kurt behaviour to check B <= 1e-14 if B <= 1e-14 or nobs < 4: result = NaN else: From ee5a33d6015771f44b9b9e55480a72bc032b2473 Mon Sep 17 00:00:00 2001 From: kevin Date: Tue, 7 Nov 2017 21:51:19 +0800 Subject: [PATCH 8/8] update nanops.py to reference --- pandas/core/nanops.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index baeb869239c1e..e1c09947ac0b4 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -548,6 +548,9 @@ def nanskew(values, axis=None, skipna=True): m3 = adjusted3.sum(axis, dtype=np.float64) # floating point error + # + # #18044 in _libs/windows.pyx calc_skew follow this behavior + # to fix the fperr to treat m2 <1e-14 as zero m2 = _zero_out_fperr(m2) m3 = _zero_out_fperr(m3) @@ -609,6 +612,9 @@ def nankurt(values, axis=None, skipna=True): result = numer / denom - adj # floating point error + # + # #18044 in _libs/windows.pyx calc_kurt follow this behavior + # to fix the fperr to treat denom <1e-14 as zero numer = _zero_out_fperr(numer) denom = _zero_out_fperr(denom) @@ -699,6 +705,7 @@ def _maybe_null_out(result, axis, mask): def _zero_out_fperr(arg): + # #18044 reference this behavior to fix rolling skew/kurt issue if isinstance(arg, np.ndarray): with np.errstate(invalid='ignore'): return np.where(np.abs(arg) < 1e-14, 0, arg)