diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index 880316ec6111a..488306d0585c5 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -1,187 +1,12 @@ import numpy as np -from numpy.random import randn import pytest import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Series, isna, notna +from pandas import DataFrame, Series import pandas._testing as tm -import pandas.tseries.offsets as offsets - - -def _check_moment_func( - static_comp, - name, - raw, - has_min_periods=True, - has_center=True, - has_time_rule=True, - fill_value=None, - zero_min_periods_equal=True, - series=None, - frame=None, - **kwargs, -): - def get_result(obj, window, min_periods=None, center=False): - r = obj.rolling(window=window, min_periods=min_periods, center=center) - return getattr(r, name)(**kwargs) - - series_result = get_result(series, window=50) - assert isinstance(series_result, Series) - tm.assert_almost_equal(series_result.iloc[-1], static_comp(series[-50:])) - - frame_result = get_result(frame, window=50) - assert isinstance(frame_result, DataFrame) - tm.assert_series_equal( - frame_result.iloc[-1, :], - frame.iloc[-50:, :].apply(static_comp, axis=0, raw=raw), - check_names=False, - ) - - # check time_rule works - if has_time_rule: - win = 25 - minp = 10 - ser = series[::2].resample("B").mean() - frm = frame[::2].resample("B").mean() - - if has_min_periods: - series_result = get_result(ser, window=win, min_periods=minp) - frame_result = get_result(frm, window=win, min_periods=minp) - else: - series_result = get_result(ser, window=win, min_periods=0) - frame_result = get_result(frm, window=win, min_periods=0) - - last_date = series_result.index[-1] - prev_date = last_date - 24 * offsets.BDay() - - trunc_series = series[::2].truncate(prev_date, last_date) - trunc_frame = frame[::2].truncate(prev_date, last_date) - - tm.assert_almost_equal(series_result[-1], static_comp(trunc_series)) - - tm.assert_series_equal( - frame_result.xs(last_date), - trunc_frame.apply(static_comp, raw=raw), - check_names=False, - ) - - # excluding NaNs correctly - obj = Series(randn(50)) - obj[:10] = np.NaN - obj[-10:] = np.NaN - if has_min_periods: - result = get_result(obj, 50, min_periods=30) - tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) - - # min_periods is working correctly - result = get_result(obj, 20, min_periods=15) - assert isna(result.iloc[23]) - assert not isna(result.iloc[24]) - - assert not isna(result.iloc[-6]) - assert isna(result.iloc[-5]) - - obj2 = Series(randn(20)) - result = get_result(obj2, 10, min_periods=5) - assert isna(result.iloc[3]) - assert notna(result.iloc[4]) - - if zero_min_periods_equal: - # min_periods=0 may be equivalent to min_periods=1 - result0 = get_result(obj, 20, min_periods=0) - result1 = get_result(obj, 20, min_periods=1) - tm.assert_almost_equal(result0, result1) - else: - result = get_result(obj, 50) - tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) - - # window larger than series length (#7297) - if has_min_periods: - for minp in (0, len(series) - 1, len(series)): - result = get_result(series, len(series) + 1, min_periods=minp) - expected = get_result(series, len(series), min_periods=minp) - nan_mask = isna(result) - tm.assert_series_equal(nan_mask, isna(expected)) - - nan_mask = ~nan_mask - tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) - else: - result = get_result(series, len(series) + 1, min_periods=0) - expected = get_result(series, len(series), min_periods=0) - nan_mask = isna(result) - tm.assert_series_equal(nan_mask, isna(expected)) - - nan_mask = ~nan_mask - tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) - - # check center=True - if has_center: - if has_min_periods: - result = get_result(obj, 20, min_periods=15, center=True) - expected = get_result( - pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15 - )[9:].reset_index(drop=True) - else: - result = get_result(obj, 20, min_periods=0, center=True) - print(result) - expected = get_result( - pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0 - )[9:].reset_index(drop=True) - - tm.assert_series_equal(result, expected) - - # shifter index - s = [f"x{x:d}" for x in range(12)] - - if has_min_periods: - minp = 10 - - series_xp = ( - get_result( - series.reindex(list(series.index) + s), window=25, min_periods=minp - ) - .shift(-12) - .reindex(series.index) - ) - frame_xp = ( - get_result( - frame.reindex(list(frame.index) + s), window=25, min_periods=minp - ) - .shift(-12) - .reindex(frame.index) - ) - - series_rs = get_result(series, window=25, min_periods=minp, center=True) - frame_rs = get_result(frame, window=25, min_periods=minp, center=True) - - else: - series_xp = ( - get_result( - series.reindex(list(series.index) + s), window=25, min_periods=0 - ) - .shift(-12) - .reindex(series.index) - ) - frame_xp = ( - get_result( - frame.reindex(list(frame.index) + s), window=25, min_periods=0 - ) - .shift(-12) - .reindex(frame.index) - ) - - series_rs = get_result(series, window=25, min_periods=0, center=True) - frame_rs = get_result(frame, window=25, min_periods=0, center=True) - - if fill_value is not None: - series_xp = series_xp.fillna(fill_value) - frame_xp = frame_xp.fillna(fill_value) - tm.assert_series_equal(series_xp, series_rs) - tm.assert_frame_equal(frame_xp, frame_rs) - def test_centered_axis_validation(): @@ -716,33 +541,6 @@ def test_rolling_max_min_periods(): pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).max() -@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) -def test_rolling_quantile(q, raw, series, frame): - def scoreatpercentile(a, per): - values = np.sort(a, axis=0) - - idx = int(per / 1.0 * (values.shape[0] - 1)) - - if idx == values.shape[0] - 1: - retval = values[-1] - - else: - qlow = float(idx) / float(values.shape[0] - 1) - qhig = float(idx + 1) / float(values.shape[0] - 1) - vlow = values[idx] - vhig = values[idx + 1] - retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow) - - return retval - - def quantile_func(x): - return scoreatpercentile(x, q) - - _check_moment_func( - quantile_func, name="quantile", quantile=q, raw=raw, series=series, frame=frame - ) - - def test_rolling_quantile_np_percentile(): # #9413: Tests that rolling window's quantile default behavior # is analogous to Numpy's percentile @@ -845,25 +643,3 @@ def test_rolling_std_neg_sqrt(): b = a.ewm(span=3).std() assert np.isfinite(b[2:]).all() - - -@td.skip_if_no_scipy -def test_rolling_skew(raw, series, frame): - from scipy.stats import skew - - _check_moment_func( - lambda x: skew(x, bias=False), name="skew", raw=raw, series=series, frame=frame - ) - - -@td.skip_if_no_scipy -def test_rolling_kurt(raw, series, frame): - from scipy.stats import kurtosis - - _check_moment_func( - lambda x: kurtosis(x, bias=False), - name="kurt", - raw=raw, - series=series, - frame=frame, - ) diff --git a/pandas/tests/window/moments/test_moments_rolling_quantile.py b/pandas/tests/window/moments/test_moments_rolling_quantile.py new file mode 100644 index 0000000000000..1b6d4a5c82164 --- /dev/null +++ b/pandas/tests/window/moments/test_moments_rolling_quantile.py @@ -0,0 +1,166 @@ +from functools import partial + +import numpy as np +import pytest + +from pandas import DataFrame, Series, concat, isna, notna +import pandas._testing as tm + +import pandas.tseries.offsets as offsets + + +def scoreatpercentile(a, per): + values = np.sort(a, axis=0) + + idx = int(per / 1.0 * (values.shape[0] - 1)) + + if idx == values.shape[0] - 1: + retval = values[-1] + + else: + qlow = float(idx) / float(values.shape[0] - 1) + qhig = float(idx + 1) / float(values.shape[0] - 1) + vlow = values[idx] + vhig = values[idx + 1] + retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow) + + return retval + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_series(series, q): + compare_func = partial(scoreatpercentile, per=q) + result = series.rolling(50).quantile(q) + assert isinstance(result, Series) + tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:])) + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_frame(raw, frame, q): + compare_func = partial(scoreatpercentile, per=q) + result = frame.rolling(50).quantile(q) + assert isinstance(result, DataFrame) + tm.assert_series_equal( + result.iloc[-1, :], + frame.iloc[-50:, :].apply(compare_func, axis=0, raw=raw), + check_names=False, + ) + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_time_rule_series(series, q): + compare_func = partial(scoreatpercentile, per=q) + win = 25 + ser = series[::2].resample("B").mean() + series_result = ser.rolling(window=win, min_periods=10).quantile(q) + last_date = series_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() + + trunc_series = series[::2].truncate(prev_date, last_date) + tm.assert_almost_equal(series_result[-1], compare_func(trunc_series)) + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_time_rule_frame(raw, frame, q): + compare_func = partial(scoreatpercentile, per=q) + win = 25 + frm = frame[::2].resample("B").mean() + frame_result = frm.rolling(window=win, min_periods=10).quantile(q) + last_date = frame_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() + + trunc_frame = frame[::2].truncate(prev_date, last_date) + tm.assert_series_equal( + frame_result.xs(last_date), + trunc_frame.apply(compare_func, raw=raw), + check_names=False, + ) + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_nans(q): + compare_func = partial(scoreatpercentile, per=q) + obj = Series(np.random.randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + + result = obj.rolling(50, min_periods=30).quantile(q) + tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10])) + + # min_periods is working correctly + result = obj.rolling(20, min_periods=15).quantile(q) + assert isna(result.iloc[23]) + assert not isna(result.iloc[24]) + + assert not isna(result.iloc[-6]) + assert isna(result.iloc[-5]) + + obj2 = Series(np.random.randn(20)) + result = obj2.rolling(10, min_periods=5).quantile(q) + assert isna(result.iloc[3]) + assert notna(result.iloc[4]) + + result0 = obj.rolling(20, min_periods=0).quantile(q) + result1 = obj.rolling(20, min_periods=1).quantile(q) + tm.assert_almost_equal(result0, result1) + + +@pytest.mark.parametrize("minp", [0, 99, 100]) +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_min_periods(series, minp, q): + result = series.rolling(len(series) + 1, min_periods=minp).quantile(q) + expected = series.rolling(len(series), min_periods=minp).quantile(q) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_center(q): + obj = Series(np.random.randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + + result = obj.rolling(20, center=True).quantile(q) + expected = ( + concat([obj, Series([np.NaN] * 9)]) + .rolling(20) + .quantile(q)[9:] + .reset_index(drop=True) + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_center_reindex_series(series, q): + # shifter index + s = [f"x{x:d}" for x in range(12)] + + series_xp = ( + series.reindex(list(series.index) + s) + .rolling(window=25) + .quantile(q) + .shift(-12) + .reindex(series.index) + ) + + series_rs = series.rolling(window=25, center=True).quantile(q) + tm.assert_series_equal(series_xp, series_rs) + + +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_center_reindex_frame(frame, q): + # shifter index + s = [f"x{x:d}" for x in range(12)] + + frame_xp = ( + frame.reindex(list(frame.index) + s) + .rolling(window=25) + .quantile(q) + .shift(-12) + .reindex(frame.index) + ) + frame_rs = frame.rolling(window=25, center=True).quantile(q) + tm.assert_frame_equal(frame_xp, frame_rs) diff --git a/pandas/tests/window/moments/test_moments_rolling_skew_kurt.py b/pandas/tests/window/moments/test_moments_rolling_skew_kurt.py new file mode 100644 index 0000000000000..cc67e602be12e --- /dev/null +++ b/pandas/tests/window/moments/test_moments_rolling_skew_kurt.py @@ -0,0 +1,163 @@ +from functools import partial + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame, Series, concat, isna, notna +import pandas._testing as tm + +import pandas.tseries.offsets as offsets + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]]) +def test_series(series, sp_func, roll_func): + import scipy.stats + + compare_func = partial(getattr(scipy.stats, sp_func), bias=False) + result = getattr(series.rolling(50), roll_func)() + assert isinstance(result, Series) + tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:])) + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]]) +def test_frame(raw, frame, sp_func, roll_func): + import scipy.stats + + compare_func = partial(getattr(scipy.stats, sp_func), bias=False) + result = getattr(frame.rolling(50), roll_func)() + assert isinstance(result, DataFrame) + tm.assert_series_equal( + result.iloc[-1, :], + frame.iloc[-50:, :].apply(compare_func, axis=0, raw=raw), + check_names=False, + ) + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]]) +def test_time_rule_series(series, sp_func, roll_func): + import scipy.stats + + compare_func = partial(getattr(scipy.stats, sp_func), bias=False) + win = 25 + ser = series[::2].resample("B").mean() + series_result = getattr(ser.rolling(window=win, min_periods=10), roll_func)() + last_date = series_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() + + trunc_series = series[::2].truncate(prev_date, last_date) + tm.assert_almost_equal(series_result[-1], compare_func(trunc_series)) + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]]) +def test_time_rule_frame(raw, frame, sp_func, roll_func): + import scipy.stats + + compare_func = partial(getattr(scipy.stats, sp_func), bias=False) + win = 25 + frm = frame[::2].resample("B").mean() + frame_result = getattr(frm.rolling(window=win, min_periods=10), roll_func)() + last_date = frame_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() + + trunc_frame = frame[::2].truncate(prev_date, last_date) + tm.assert_series_equal( + frame_result.xs(last_date), + trunc_frame.apply(compare_func, raw=raw), + check_names=False, + ) + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]]) +def test_nans(sp_func, roll_func): + import scipy.stats + + compare_func = partial(getattr(scipy.stats, sp_func), bias=False) + obj = Series(np.random.randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + + result = getattr(obj.rolling(50, min_periods=30), roll_func)() + tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10])) + + # min_periods is working correctly + result = getattr(obj.rolling(20, min_periods=15), roll_func)() + assert isna(result.iloc[23]) + assert not isna(result.iloc[24]) + + assert not isna(result.iloc[-6]) + assert isna(result.iloc[-5]) + + obj2 = Series(np.random.randn(20)) + result = getattr(obj2.rolling(10, min_periods=5), roll_func)() + assert isna(result.iloc[3]) + assert notna(result.iloc[4]) + + result0 = getattr(obj.rolling(20, min_periods=0), roll_func)() + result1 = getattr(obj.rolling(20, min_periods=1), roll_func)() + tm.assert_almost_equal(result0, result1) + + +@pytest.mark.parametrize("minp", [0, 99, 100]) +@pytest.mark.parametrize("roll_func", ["kurt", "skew"]) +def test_min_periods(series, minp, roll_func): + result = getattr(series.rolling(len(series) + 1, min_periods=minp), roll_func)() + expected = getattr(series.rolling(len(series), min_periods=minp), roll_func)() + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + + +@pytest.mark.parametrize("roll_func", ["kurt", "skew"]) +def test_center(roll_func): + obj = Series(np.random.randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + + result = getattr(obj.rolling(20, center=True), roll_func)() + expected = getattr(concat([obj, Series([np.NaN] * 9)]).rolling(20), roll_func)()[ + 9: + ].reset_index(drop=True) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("roll_func", ["kurt", "skew"]) +def test_center_reindex_series(series, roll_func): + # shifter index + s = [f"x{x:d}" for x in range(12)] + + series_xp = ( + getattr( + series.reindex(list(series.index) + s).rolling(window=25), + roll_func, + )() + .shift(-12) + .reindex(series.index) + ) + series_rs = getattr(series.rolling(window=25, center=True), roll_func)() + tm.assert_series_equal(series_xp, series_rs) + + +@pytest.mark.parametrize("roll_func", ["kurt", "skew"]) +def test_center_reindex_frame(frame, roll_func): + # shifter index + s = [f"x{x:d}" for x in range(12)] + + frame_xp = ( + getattr( + frame.reindex(list(frame.index) + s).rolling(window=25), + roll_func, + )() + .shift(-12) + .reindex(frame.index) + ) + frame_rs = getattr(frame.rolling(window=25, center=True), roll_func)() + tm.assert_frame_equal(frame_xp, frame_rs)