Skip to content

Commit 948195d

Browse files
committed
BUG: rolling_apply(..., center=True) should not append NaNs
1 parent 35a9527 commit 948195d

File tree

4 files changed

+118
-52
lines changed

4 files changed

+118
-52
lines changed

doc/source/v0.15.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ API changes
8888
:func:`rolling_window`, and :func:`rolling_apply` with ``center=True`` previously would return a result of the same
8989
structure as the input ``arg`` with ``NaN`` in the final ``(window-1)/2`` entries.
9090
Now the final ``(window-1)/2`` entries of the result are calculated as if the input ``arg`` were followed
91-
by ``(window-1)/2`` ``NaN`` values. (:issue:`7925`)
91+
by ``(window-1)/2`` ``NaN`` values (or with shrinking windows, in the case of :func:`rolling_apply`).
92+
(:issue:`7925`, :issue:`8269`)
9293

9394
Prior behavior (note final value is ``NaN``):
9495

pandas/algos.pyx

+19-14
Original file line numberDiff line numberDiff line change
@@ -1846,8 +1846,9 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int win,
18461846

18471847
return output
18481848

1849-
def roll_generic(ndarray[float64_t, cast=True] input, int win,
1850-
int minp, object func, object args, object kwargs):
1849+
def roll_generic(ndarray[float64_t, cast=True] input,
1850+
int win, int minp, int offset,
1851+
object func, object args, object kwargs):
18511852
cdef ndarray[double_t] output, counts, bufarr
18521853
cdef Py_ssize_t i, n
18531854
cdef float64_t *buf
@@ -1856,37 +1857,41 @@ def roll_generic(ndarray[float64_t, cast=True] input, int win,
18561857
if not input.flags.c_contiguous:
18571858
input = input.copy('C')
18581859

1859-
buf = <float64_t*> input.data
1860-
18611860
n = len(input)
18621861
if n == 0:
18631862
return input
18641863

18651864
minp = _check_minp(win, minp, n, floor=0)
18661865
output = np.empty(n, dtype=float)
1867-
counts = roll_sum(np.isfinite(input).astype(float), win, minp)
1866+
counts = roll_sum(np.concatenate((np.isfinite(input).astype(float), np.array([0.] * offset))), win, minp)[offset:]
18681867

1869-
bufarr = np.empty(win, dtype=float)
1870-
oldbuf = <float64_t*> bufarr.data
1871-
1872-
n = len(input)
1873-
for i from 0 <= i < int_min(win, n):
1868+
# truncated windows at the beginning, through first full-length window
1869+
for i from 0 <= i < (int_min(win, n) - offset):
18741870
if counts[i] >= minp:
1875-
output[i] = func(input[int_max(i - win + 1, 0) : i + 1], *args,
1876-
**kwargs)
1871+
output[i] = func(input[0 : (i + offset + 1)], *args, **kwargs)
18771872
else:
18781873
output[i] = NaN
18791874

1880-
for i from win <= i < n:
1875+
# remaining full-length windows
1876+
buf = <float64_t*> input.data
1877+
bufarr = np.empty(win, dtype=float)
1878+
oldbuf = <float64_t*> bufarr.data
1879+
for i from (win - offset) <= i < (n - offset):
18811880
buf = buf + 1
18821881
bufarr.data = <char*> buf
18831882
if counts[i] >= minp:
18841883
output[i] = func(bufarr, *args, **kwargs)
18851884
else:
18861885
output[i] = NaN
1887-
18881886
bufarr.data = <char*> oldbuf
18891887

1888+
# truncated windows at the end
1889+
for i from int_max(n - offset, 0) <= i < n:
1890+
if counts[i] >= minp:
1891+
output[i] = func(input[int_max(i + offset - win + 1, 0) : n], *args, **kwargs)
1892+
else:
1893+
output[i] = NaN
1894+
18901895
return output
18911896

18921897

pandas/stats/moments.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -738,11 +738,12 @@ def rolling_apply(arg, window, func, min_periods=None, freq=None,
738738
frequency by resampling the data. This is done with the default parameters
739739
of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
740740
"""
741+
offset = int((window - 1) / 2.) if center else 0
741742
def call_cython(arg, window, minp, args, kwargs):
742743
minp = _use_window(minp, window)
743-
return algos.roll_generic(arg, window, minp, func, args, kwargs)
744+
return algos.roll_generic(arg, window, minp, offset, func, args, kwargs)
744745
return _rolling_moment(arg, window, call_cython, min_periods, freq=freq,
745-
center=center, args=args, kwargs=kwargs)
746+
center=False, args=args, kwargs=kwargs)
746747

747748

748749
def rolling_window(arg, window=None, win_type=None, min_periods=None,

pandas/stats/tests/test_moments.py

+94-35
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from datetime import datetime
66
from numpy.random import randn
77
import numpy as np
8+
from distutils.version import LooseVersion
89

910
from pandas import Series, DataFrame, Panel, bdate_range, isnull, notnull
1011
from pandas.util.testing import (
@@ -877,6 +878,45 @@ def _ewma(s, com, min_periods, adjust, ignore_na):
877878
var_debiasing_factors=lambda x: _variance_debiasing_factors(x, com=com, adjust=adjust, ignore_na=ignore_na))
878879

879880
def test_expanding_consistency(self):
881+
base_functions = [
882+
(mom.expanding_count, lambda v: Series(v).count(), None),
883+
(mom.expanding_max, lambda v: Series(v).max(), None),
884+
(mom.expanding_min, lambda v: Series(v).min(), None),
885+
(mom.expanding_sum, lambda v: Series(v).sum(), None),
886+
(mom.expanding_mean, lambda v: Series(v).mean(), None),
887+
(mom.expanding_std, lambda v: Series(v).std(), 1),
888+
(mom.expanding_cov, lambda v: Series(v).cov(Series(v)), None),
889+
(mom.expanding_corr, lambda v: Series(v).corr(Series(v)), None),
890+
(mom.expanding_var, lambda v: Series(v).var(), 1),
891+
#(mom.expanding_skew, lambda v: Series(v).skew(), 3), # restore once GH 8086 is fixed
892+
#(mom.expanding_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed
893+
#(lambda x, min_periods: mom.expanding_quantile(x, 0.3, min_periods=min_periods),
894+
# lambda v: Series(v).quantile(0.3), None), # restore once GH 8084 is fixed
895+
(mom.expanding_median, lambda v: Series(v).median(), None),
896+
(mom.expanding_max, np.nanmax, 1),
897+
(mom.expanding_min, np.nanmin, 1),
898+
(mom.expanding_sum, np.nansum, 1),
899+
]
900+
if np.__version__ >= LooseVersion('1.8.0'):
901+
base_functions += [
902+
(mom.expanding_mean, np.nanmean, 1),
903+
(mom.expanding_std, lambda v: np.nanstd(v, ddof=1), 1),
904+
(mom.expanding_var, lambda v: np.nanvar(v, ddof=1), 1),
905+
]
906+
if np.__version__ >= LooseVersion('1.9.0'):
907+
base_functions += [
908+
(mom.expanding_median, np.nanmedian, 1),
909+
]
910+
no_nan_functions = [
911+
(mom.expanding_max, np.max, None),
912+
(mom.expanding_min, np.min, None),
913+
(mom.expanding_sum, np.sum, None),
914+
(mom.expanding_mean, np.mean, None),
915+
(mom.expanding_std, lambda v: np.std(v, ddof=1), 1),
916+
(mom.expanding_var, lambda v: np.var(v, ddof=1), 1),
917+
(mom.expanding_median, np.median, None),
918+
]
919+
880920
for min_periods in [0, 1, 2, 3, 4]:
881921

882922
# test consistency between different expanding_* moments
@@ -895,25 +935,15 @@ def test_expanding_consistency(self):
895935
var_debiasing_factors=lambda x: mom.expanding_count(x) / (mom.expanding_count(x) - 1.).replace(0., np.nan)
896936
)
897937

898-
# test consistency between expanding_xyz() and expanding_apply of Series/DataFrame.xyz()
938+
# test consistency between expanding_xyz() and either (a) expanding_apply of Series.xyz(),
939+
# or (b) expanding_apply of np.nanxyz()
899940
for x in self._test_data():
900941
assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal
901-
for (expanding_f, f, require_min_periods) in [
902-
(mom.expanding_count, lambda v: Series(v).count(), None),
903-
(mom.expanding_max, lambda v: Series(v).max(), None),
904-
(mom.expanding_min, lambda v: Series(v).min(), None),
905-
(mom.expanding_sum, lambda v: Series(v).sum(), None),
906-
(mom.expanding_mean, lambda v: Series(v).mean(), None),
907-
(mom.expanding_std, lambda v: Series(v).std(), 1),
908-
(mom.expanding_cov, lambda v: Series(v).cov(Series(v)), None),
909-
(mom.expanding_corr, lambda v: Series(v).corr(Series(v)), None),
910-
(mom.expanding_var, lambda v: Series(v).var(), 1),
911-
#(mom.expanding_skew, lambda v: Series(v).skew(), 3), # restore once GH 8086 is fixed
912-
#(mom.expanding_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed
913-
#(lambda x, min_periods: mom.expanding_quantile(x, 0.3, min_periods=min_periods),
914-
# lambda v: Series(v).quantile(0.3), None), # restore once GH 8084 is fixed
915-
(mom.expanding_median, lambda v: Series(v).median(), None),
916-
]:
942+
functions = base_functions
943+
# GH 8269
944+
if x.notnull().all().all():
945+
functions = base_functions + no_nan_functions
946+
for (expanding_f, f, require_min_periods) in functions:
917947
if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods):
918948
continue
919949

@@ -938,7 +968,46 @@ def test_expanding_consistency(self):
938968
assert_panel_equal(expanding_f_result, expected)
939969

940970
def test_rolling_consistency(self):
941-
for window in [1, 3, 10, 20]:
971+
base_functions = [
972+
(mom.rolling_count, lambda v: Series(v).count(), None),
973+
(mom.rolling_max, lambda v: Series(v).max(), None),
974+
(mom.rolling_min, lambda v: Series(v).min(), None),
975+
(mom.rolling_sum, lambda v: Series(v).sum(), None),
976+
(mom.rolling_mean, lambda v: Series(v).mean(), None),
977+
(mom.rolling_std, lambda v: Series(v).std(), 1),
978+
(mom.rolling_cov, lambda v: Series(v).cov(Series(v)), None),
979+
(mom.rolling_corr, lambda v: Series(v).corr(Series(v)), None),
980+
(mom.rolling_var, lambda v: Series(v).var(), 1),
981+
#(mom.rolling_skew, lambda v: Series(v).skew(), 3), # restore once GH 8086 is fixed
982+
# (mom.rolling_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed
983+
#(lambda x, window, min_periods, center: mom.rolling_quantile(x, window, 0.3, min_periods=min_periods, center=center),
984+
# lambda v: Series(v).quantile(0.3), None), # restore once GH 8084 is fixed
985+
(mom.rolling_median, lambda v: Series(v).median(), None),
986+
(mom.rolling_max, np.nanmax, 1),
987+
(mom.rolling_min, np.nanmin, 1),
988+
(mom.rolling_sum, np.nansum, 1),
989+
]
990+
if np.__version__ >= LooseVersion('1.8.0'):
991+
base_functions += [
992+
(mom.rolling_mean, np.nanmean, 1),
993+
(mom.rolling_std, lambda v: np.nanstd(v, ddof=1), 1),
994+
(mom.rolling_var, lambda v: np.nanvar(v, ddof=1), 1),
995+
]
996+
if np.__version__ >= LooseVersion('1.9.0'):
997+
base_functions += [
998+
(mom.rolling_median, np.nanmedian, 1),
999+
]
1000+
no_nan_functions = [
1001+
(mom.rolling_max, np.max, None),
1002+
(mom.rolling_min, np.min, None),
1003+
(mom.rolling_sum, np.sum, None),
1004+
(mom.rolling_mean, np.mean, None),
1005+
(mom.rolling_std, lambda v: np.std(v, ddof=1), 1),
1006+
(mom.rolling_var, lambda v: np.var(v, ddof=1), 1),
1007+
(mom.rolling_median, np.median, None),
1008+
]
1009+
1010+
for window in [1, 2, 3, 10, 20]:
9421011
for min_periods in set([0, 1, 2, 3, 4, window]):
9431012
if min_periods and (min_periods > window):
9441013
continue
@@ -962,25 +1031,15 @@ def test_rolling_consistency(self):
9621031
(mom.rolling_count(x, window=window, center=center) - 1.).replace(0., np.nan)),
9631032
)
9641033

965-
# test consistency between rolling_xyz and rolling_apply of Series/DataFrame.xyz
1034+
# test consistency between rolling_xyz() and either (a) rolling_apply of Series.xyz(),
1035+
# or (b) rolling_apply of np.nanxyz()
9661036
for x in self._test_data():
9671037
assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal
968-
for (rolling_f, f, require_min_periods) in [
969-
(mom.rolling_count, lambda v: Series(v).count(), None),
970-
(mom.rolling_max, lambda v: Series(v).max(), None),
971-
(mom.rolling_min, lambda v: Series(v).min(), None),
972-
(mom.rolling_sum, lambda v: Series(v).sum(), None),
973-
(mom.rolling_mean, lambda v: Series(v).mean(), None),
974-
(mom.rolling_std, lambda v: Series(v).std(), 1),
975-
(mom.rolling_cov, lambda v: Series(v).cov(Series(v)), None),
976-
(mom.rolling_corr, lambda v: Series(v).corr(Series(v)), None),
977-
(mom.rolling_var, lambda v: Series(v).var(), 1),
978-
#(mom.rolling_skew, lambda v: Series(v).skew(), 3), # restore once GH 8086 is fixed
979-
# (mom.rolling_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed
980-
#(lambda x, window, min_periods, center: mom.rolling_quantile(x, window, 0.3, min_periods=min_periods, center=center),
981-
# lambda v: Series(v).quantile(0.3), None), # restore once GH 8084 is fixed
982-
(mom.rolling_median, lambda v: Series(v).median(), None),
983-
]:
1038+
functions = base_functions
1039+
# GH 8269
1040+
if x.notnull().all().all():
1041+
functions = base_functions + no_nan_functions
1042+
for (rolling_f, f, require_min_periods) in functions:
9841043
if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods):
9851044
continue
9861045

0 commit comments

Comments
 (0)