Skip to content

BUG:fix rolling skew kurt floating issue #18065

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Nov 8, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ Bug Fixes
- Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`)
- Bug in :class:`TimedeltaIndex` subtraction could incorrectly overflow when ``NaT`` is present (:issue:`17791`)
- Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`)
- Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`)

Conversion
^^^^^^^^^^
Expand Down
23 changes: 21 additions & 2 deletions pandas/_libs/window.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -788,7 +788,17 @@ cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx,
A = x / dnobs
B = xx / dnobs - A * A
C = xxx / dnobs - A * A * A - 3 * A * B
if B <= 0 or nobs < 3:

# #18044: with uniform distribution, floating issue will
# cause B != 0. and cause the result is a very
# large number.
#
# in core/nanops.py nanskew/nankurt call the function
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add the reference to _zero_fperr as well (back to here)

# _zero_out_fperr(m2) to fix floating error.
# if the variance is less than 1e-14, it could be
# treat as zero, here we follow the original
# skew/kurt behaviour to check B <= 1e-14
if B <= 1e-14 or nobs < 3:
result = NaN
else:
R = sqrt(B)
Expand Down Expand Up @@ -915,7 +925,16 @@ cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx,
R = R * A
D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A

if B == 0 or nobs < 4:
# #18044: with uniform distribution, floating issue will
# cause B != 0. and cause the result is a very
# large number.
#
# in core/nanops.py nanskew/nankurt call the function
# _zero_out_fperr(m2) to fix floating error.
# if the variance is less than 1e-14, it could be
# treat as zero, here we follow the original
# skew/kurt behaviour to check B <= 1e-14
if B <= 1e-14 or nobs < 4:
result = NaN
else:
K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2)
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,9 @@ def nanskew(values, axis=None, skipna=True):
m3 = adjusted3.sum(axis, dtype=np.float64)

# floating point error
#
# #18044 in _libs/windows.pyx calc_skew follow this behavior
# to fix the fperr to treat m2 <1e-14 as zero
m2 = _zero_out_fperr(m2)
m3 = _zero_out_fperr(m3)

Expand Down Expand Up @@ -609,6 +612,9 @@ def nankurt(values, axis=None, skipna=True):
result = numer / denom - adj

# floating point error
#
# #18044 in _libs/windows.pyx calc_kurt follow this behavior
# to fix the fperr to treat denom <1e-14 as zero
numer = _zero_out_fperr(numer)
denom = _zero_out_fperr(denom)

Expand Down Expand Up @@ -699,6 +705,7 @@ def _maybe_null_out(result, axis, mask):


def _zero_out_fperr(arg):
# #18044 reference this behavior to fix rolling skew/kurt issue
if isinstance(arg, np.ndarray):
with np.errstate(invalid='ignore'):
return np.where(np.abs(arg) < 1e-14, 0, arg)
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -2979,6 +2979,16 @@ def test_rolling_kurt_edge_cases(self):
x = d.rolling(window=4).kurt()
tm.assert_series_equal(expected, x)

def test_rolling_skew_eq_value_fperr(self):
# #18804 all rolling skew for all equal values should return Nan
a = pd.Series([1.1] * 15).rolling(window=10).skew()
assert np.isnan(a).all()

def test_rolling_kurt_eq_value_fperr(self):
# #18804 all rolling kurt for all equal values should return Nan
a = pd.Series([1.1] * 15).rolling(window=10).kurt()
assert np.isnan(a).all()

def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True,
has_time_rule=True, preserve_nan=True):
result = func(self.arr)
Expand Down