From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 1/7] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From dea38f24c0067ae3fe9484b837c9649714213bba Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 14 Jan 2020 21:26:31 +0100 Subject: [PATCH 2/7] fix issue 17038 --- pandas/core/reshape/pivot.py | 4 +++- pandas/tests/reshape/test_pivot.py | 20 ++++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index b443ba142369c..9743d90f4dd04 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -117,7 +117,9 @@ def pivot_table( agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype) table = agged - if table.index.nlevels > 1: + + # GH 17038, this check should only happen if index is specified + if table.index.nlevels > 1 and index: # Related GH #17123 # If index_names are integers, determine whether the integers refer # to the level position or name. diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 743fc50c87e96..46a05123c9fdd 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -896,12 +896,6 @@ def _check_output( totals = table.loc[("All", ""), value_col] assert totals == self.data[value_col].mean() - # no rows - rtable = self.data.pivot_table( - columns=["AA", "BB"], margins=True, aggfunc=np.mean - ) - assert isinstance(rtable, Series) - table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean") for item in ["DD", "EE", "FF"]: totals = table.loc[("All", ""), item] @@ -972,6 +966,20 @@ def test_pivot_integer_columns(self): tm.assert_frame_equal(table, table2, check_names=False) + @pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)]) + def test_pivot_table_multiindex_only(self, cols): + # GH 17038 + df2 = DataFrame({cols[0]: [1, 2, 3], cols[1]: [1, 2, 3], "v": [4, 5, 6]}) + + result = df2.pivot_table(values="v", columns=cols) + expected = DataFrame( + [[4, 5, 6]], + columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols), + index=Index(["v"]), + ) + + tm.assert_frame_equal(result, expected) + def test_pivot_no_level_overlap(self): # GH #1181 From cd9e7ac3f31ffaf95cd628863df911dea9fa1248 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 14 Jan 2020 21:29:43 +0100 Subject: [PATCH 3/7] revert change --- pandas/core/reshape/pivot.py | 3 +-- pandas/tests/reshape/test_pivot.py | 20 ++++++-------------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 9743d90f4dd04..a7cdbb0da7a4e 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -118,8 +118,7 @@ def pivot_table( table = agged - # GH 17038, this check should only happen if index is specified - if table.index.nlevels > 1 and index: + if table.index.nlevels > 1: # Related GH #17123 # If index_names are integers, determine whether the integers refer # to the level position or name. diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 46a05123c9fdd..743fc50c87e96 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -896,6 +896,12 @@ def _check_output( totals = table.loc[("All", ""), value_col] assert totals == self.data[value_col].mean() + # no rows + rtable = self.data.pivot_table( + columns=["AA", "BB"], margins=True, aggfunc=np.mean + ) + assert isinstance(rtable, Series) + table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean") for item in ["DD", "EE", "FF"]: totals = table.loc[("All", ""), item] @@ -966,20 +972,6 @@ def test_pivot_integer_columns(self): tm.assert_frame_equal(table, table2, check_names=False) - @pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)]) - def test_pivot_table_multiindex_only(self, cols): - # GH 17038 - df2 = DataFrame({cols[0]: [1, 2, 3], cols[1]: [1, 2, 3], "v": [4, 5, 6]}) - - result = df2.pivot_table(values="v", columns=cols) - expected = DataFrame( - [[4, 5, 6]], - columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols), - index=Index(["v"]), - ) - - tm.assert_frame_equal(result, expected) - def test_pivot_no_level_overlap(self): # GH #1181 From e5e912be0f596943067a7df812442764d311a086 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 14 Jan 2020 21:30:16 +0100 Subject: [PATCH 4/7] revert change --- pandas/core/reshape/pivot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index a7cdbb0da7a4e..b443ba142369c 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -117,7 +117,6 @@ def pivot_table( agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype) table = agged - if table.index.nlevels > 1: # Related GH #17123 # If index_names are integers, determine whether the integers refer From e4944e0cbf2779aa7965804c0e1c86435db3473d Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 14 May 2020 20:58:45 +0200 Subject: [PATCH 5/7] remove base and fixturize --- pandas/tests/window/common.py | 22 +- pandas/tests/window/conftest.py | 62 +- .../moments/test_moments_consistency_ewm.py | 11 +- .../test_moments_consistency_expanding.py | 202 +- .../test_moments_consistency_rolling.py | 81 +- .../tests/window/moments/test_moments_ewm.py | 481 +++-- .../window/moments/test_moments_rolling.py | 1671 +++++++++-------- pandas/tests/window/test_api.py | 560 +++--- pandas/tests/window/test_ewm.py | 92 +- pandas/tests/window/test_expanding.py | 209 +-- pandas/tests/window/test_rolling.py | 693 +++---- pandas/tests/window/test_window.py | 126 +- 12 files changed, 2124 insertions(+), 2086 deletions(-) diff --git a/pandas/tests/window/common.py b/pandas/tests/window/common.py index 5dca26df49930..7e0be331ec8d5 100644 --- a/pandas/tests/window/common.py +++ b/pandas/tests/window/common.py @@ -1,28 +1,8 @@ -from datetime import datetime - import numpy as np -from numpy.random import randn -from pandas import DataFrame, Series, bdate_range +from pandas import Series import pandas._testing as tm -N, K = 100, 10 - - -class Base: - - _nan_locs = np.arange(20, 40) - _inf_locs = np.array([]) - - def _create_data(self): - arr = randn(N) - arr[self._nan_locs] = np.NaN - - self.arr = arr - self.rng = bdate_range(datetime(2009, 1, 1), periods=N) - self.series = Series(arr.copy(), index=self.rng) - self.frame = DataFrame(randn(N, K), index=self.rng, columns=np.arange(K)) - def check_pairwise_moment(frame, dispatch, name, **kwargs): def get_result(obj, obj2=None): diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index 4f462a09a60a3..74f3406d30225 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -1,9 +1,12 @@ +from datetime import datetime + import numpy as np +from numpy.random import randn import pytest import pandas.util._test_decorators as td -from pandas import DataFrame, Series, notna +from pandas import DataFrame, Series, bdate_range, notna @pytest.fixture(params=[True, False]) @@ -242,3 +245,60 @@ def no_nans(x): def consistency_data(request): """Create consistency data""" return request.param + + +def _create_arr(): + """Internal function to mock an array.""" + arr = randn(100) + locs = np.arange(20, 40) + arr[locs] = np.NaN + return arr + + +def _create_rng(): + """Internal function to mock date range.""" + rng = bdate_range(datetime(2009, 1, 1), periods=100) + return rng + + +def _create_series(): + """Internal function to mock Series.""" + arr = _create_arr() + series = Series(arr.copy(), index=_create_rng()) + return series + + +def _create_frame(): + """Internal function to mock DataFrame.""" + rng = _create_rng() + return DataFrame(randn(100, 10), index=rng, columns=np.arange(10)) + + +@pytest.fixture +def nan_locs(): + """Make a range as loc fixture.""" + return np.arange(20, 40) + + +@pytest.fixture +def arr(): + """Make an array as fixture.""" + return _create_arr() + + +@pytest.fixture +def frame(): + """Make mocked frame as fixture.""" + return _create_frame() + + +@pytest.fixture +def series(): + """Make mocked series as fixture.""" + return _create_series() + + +@pytest.fixture(params=[_create_series(), _create_frame()]) +def which(request): + """Turn parametrized which as fixture for series and frame""" + return request.param diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index 3b3a9d59cb6e7..f143278e12ec5 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -4,7 +4,6 @@ from pandas import DataFrame, Series, concat from pandas.tests.window.common import ( - Base, check_binary_ew, check_binary_ew_min_periods, check_pairwise_moment, @@ -19,13 +18,9 @@ ) -class TestEwmMomentsConsistency(Base): - def setup_method(self, method): - self._create_data() - - @pytest.mark.parametrize("func", ["cov", "corr"]) - def test_ewm_pairwise_cov_corr(self, func): - check_pairwise_moment(self.frame, "ewm", func, span=10, min_periods=5) +@pytest.mark.parametrize("func", ["cov", "corr"]) +def test_ewm_pairwise_cov_corr(func, frame): + check_pairwise_moment(frame, "ewm", func, span=10, min_periods=5) @pytest.mark.parametrize("name", ["cov", "corr"]) diff --git a/pandas/tests/window/moments/test_moments_consistency_expanding.py b/pandas/tests/window/moments/test_moments_consistency_expanding.py index 09cd2ff218c2b..ee3579d76d1db 100644 --- a/pandas/tests/window/moments/test_moments_consistency_expanding.py +++ b/pandas/tests/window/moments/test_moments_consistency_expanding.py @@ -7,7 +7,6 @@ from pandas import DataFrame, Index, MultiIndex, Series, isna, notna import pandas._testing as tm from pandas.tests.window.common import ( - Base, moments_consistency_cov_data, moments_consistency_is_constant, moments_consistency_mock_mean, @@ -18,132 +17,145 @@ ) -class TestExpandingMomentsConsistency(Base): - def setup_method(self, method): - self._create_data() +def _check_expanding( + func, static_comp, preserve_nan=True, series=None, frame=None, nan_locs=None +): - def test_expanding_corr(self): - A = self.series.dropna() - B = (A + randn(len(A)))[:-5] + series_result = func(series) + assert isinstance(series_result, Series) + frame_result = func(frame) + assert isinstance(frame_result, DataFrame) - result = A.expanding().corr(B) + result = func(series) + tm.assert_almost_equal(result[10], static_comp(series[:11])) - rolling_result = A.rolling(window=len(A), min_periods=1).corr(B) + if preserve_nan: + assert result.iloc[nan_locs].isna().all() - tm.assert_almost_equal(rolling_result, result) - def test_expanding_count(self): - result = self.series.expanding(min_periods=0).count() - tm.assert_almost_equal( - result, self.series.rolling(window=len(self.series), min_periods=0).count() - ) +def _check_expanding_has_min_periods(func, static_comp, has_min_periods): + ser = Series(randn(50)) - def test_expanding_quantile(self): - result = self.series.expanding().quantile(0.5) + if has_min_periods: + result = func(ser, min_periods=30) + assert result[:29].isna().all() + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) - rolling_result = self.series.rolling( - window=len(self.series), min_periods=1 - ).quantile(0.5) + # min_periods is working correctly + result = func(ser, min_periods=15) + assert isna(result.iloc[13]) + assert notna(result.iloc[14]) - tm.assert_almost_equal(result, rolling_result) + ser2 = Series(randn(20)) + result = func(ser2, min_periods=5) + assert isna(result[3]) + assert notna(result[4]) - def test_expanding_cov(self): - A = self.series - B = (A + randn(len(A)))[:-5] + # min_periods=0 + result0 = func(ser, min_periods=0) + result1 = func(ser, min_periods=1) + tm.assert_almost_equal(result0, result1) + else: + result = func(ser) + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) - result = A.expanding().cov(B) - rolling_result = A.rolling(window=len(A), min_periods=1).cov(B) +def test_expanding_corr(series): + A = series.dropna() + B = (A + randn(len(A)))[:-5] - tm.assert_almost_equal(rolling_result, result) + result = A.expanding().corr(B) - def test_expanding_cov_pairwise(self): - result = self.frame.expanding().corr() + rolling_result = A.rolling(window=len(A), min_periods=1).corr(B) - rolling_result = self.frame.rolling( - window=len(self.frame), min_periods=1 - ).corr() + tm.assert_almost_equal(rolling_result, result) - tm.assert_frame_equal(result, rolling_result) - def test_expanding_corr_pairwise(self): - result = self.frame.expanding().corr() +def test_expanding_count(series): + result = series.expanding(min_periods=0).count() + tm.assert_almost_equal( + result, series.rolling(window=len(series), min_periods=0).count() + ) - rolling_result = self.frame.rolling( - window=len(self.frame), min_periods=1 - ).corr() - tm.assert_frame_equal(result, rolling_result) - @pytest.mark.parametrize("has_min_periods", [True, False]) - @pytest.mark.parametrize( - "func,static_comp", - [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], - ids=["sum", "mean", "max", "min"], - ) - def test_expanding_func(self, func, static_comp, has_min_periods): - def expanding_func(x, min_periods=1, center=False, axis=0): - exp = x.expanding(min_periods=min_periods, center=center, axis=axis) - return getattr(exp, func)() - - self._check_expanding(expanding_func, static_comp, preserve_nan=False) - self._check_expanding_has_min_periods( - expanding_func, static_comp, has_min_periods - ) +def test_expanding_quantile(series): + result = series.expanding().quantile(0.5) + + rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5) + + tm.assert_almost_equal(result, rolling_result) + - @pytest.mark.parametrize("has_min_periods", [True, False]) - def test_expanding_apply(self, engine_and_raw, has_min_periods): +def test_expanding_cov(series): + A = series + B = (A + randn(len(A)))[:-5] - engine, raw = engine_and_raw + result = A.expanding().cov(B) - def expanding_mean(x, min_periods=1): + rolling_result = A.rolling(window=len(A), min_periods=1).cov(B) - exp = x.expanding(min_periods=min_periods) - result = exp.apply(lambda x: x.mean(), raw=raw, engine=engine) - return result + tm.assert_almost_equal(rolling_result, result) - # TODO(jreback), needed to add preserve_nan=False - # here to make this pass - self._check_expanding(expanding_mean, np.mean, preserve_nan=False) - self._check_expanding_has_min_periods(expanding_mean, np.mean, has_min_periods) - def _check_expanding(self, func, static_comp, preserve_nan=True): +def test_expanding_cov_pairwise(frame): + result = frame.expanding().cov() - series_result = func(self.series) - assert isinstance(series_result, Series) - frame_result = func(self.frame) - assert isinstance(frame_result, DataFrame) + rolling_result = frame.rolling(window=len(frame), min_periods=1).cov() - result = func(self.series) - tm.assert_almost_equal(result[10], static_comp(self.series[:11])) + tm.assert_frame_equal(result, rolling_result) - if preserve_nan: - assert result.iloc[self._nan_locs].isna().all() - def _check_expanding_has_min_periods(self, func, static_comp, has_min_periods): - ser = Series(randn(50)) +def test_expanding_corr_pairwise(frame): + result = frame.expanding().corr() - if has_min_periods: - result = func(ser, min_periods=30) - assert result[:29].isna().all() - tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) + rolling_result = frame.rolling(window=len(frame), min_periods=1).corr() + tm.assert_frame_equal(result, rolling_result) - # min_periods is working correctly - result = func(ser, min_periods=15) - assert isna(result.iloc[13]) - assert notna(result.iloc[14]) - ser2 = Series(randn(20)) - result = func(ser2, min_periods=5) - assert isna(result[3]) - assert notna(result[4]) +@pytest.mark.parametrize("has_min_periods", [True, False]) +@pytest.mark.parametrize( + "func,static_comp", + [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], + ids=["sum", "mean", "max", "min"], +) +def test_expanding_func(func, static_comp, has_min_periods, series, frame, nan_locs): + def expanding_func(x, min_periods=1, center=False, axis=0): + exp = x.expanding(min_periods=min_periods, center=center, axis=axis) + return getattr(exp, func)() + + _check_expanding( + expanding_func, + static_comp, + preserve_nan=False, + series=series, + frame=frame, + nan_locs=nan_locs, + ) + _check_expanding_has_min_periods(expanding_func, static_comp, has_min_periods) + - # min_periods=0 - result0 = func(ser, min_periods=0) - result1 = func(ser, min_periods=1) - tm.assert_almost_equal(result0, result1) - else: - result = func(ser) - tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) +@pytest.mark.parametrize("has_min_periods", [True, False]) +def test_expanding_apply(engine_and_raw, has_min_periods, series, frame, nan_locs): + + engine, raw = engine_and_raw + + def expanding_mean(x, min_periods=1): + + exp = x.expanding(min_periods=min_periods) + result = exp.apply(lambda x: x.mean(), raw=raw, engine=engine) + return result + + # TODO(jreback), needed to add preserve_nan=False + # here to make this pass + _check_expanding( + expanding_mean, + np.mean, + preserve_nan=False, + series=series, + frame=frame, + nan_locs=nan_locs, + ) + _check_expanding_has_min_periods(expanding_mean, np.mean, has_min_periods) @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py index 2c37baeae13b7..a3de8aa69f840 100644 --- a/pandas/tests/window/moments/test_moments_consistency_rolling.py +++ b/pandas/tests/window/moments/test_moments_consistency_rolling.py @@ -12,7 +12,6 @@ import pandas._testing as tm from pandas.core.window.common import _flex_binary_moment from pandas.tests.window.common import ( - Base, check_pairwise_moment, moments_consistency_cov_data, moments_consistency_is_constant, @@ -33,60 +32,56 @@ def _rolling_consistency_cases(): yield window, min_periods, center -class TestRollingMomentsConsistency(Base): - def setup_method(self, method): - self._create_data() +# binary moments +def test_rolling_cov(series): + A = series + B = A + randn(len(A)) - # binary moments - def test_rolling_cov(self): - A = self.series - B = A + randn(len(A)) + result = A.rolling(window=50, min_periods=25).cov(B) + tm.assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) - result = A.rolling(window=50, min_periods=25).cov(B) - tm.assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) - def test_rolling_corr(self): - A = self.series - B = A + randn(len(A)) +def test_rolling_corr(series): + A = series + B = A + randn(len(A)) - result = A.rolling(window=50, min_periods=25).corr(B) - tm.assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) + result = A.rolling(window=50, min_periods=25).corr(B) + tm.assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) - # test for correct bias correction - a = tm.makeTimeSeries() - b = tm.makeTimeSeries() - a[:5] = np.nan - b[:10] = np.nan + # test for correct bias correction + a = tm.makeTimeSeries() + b = tm.makeTimeSeries() + a[:5] = np.nan + b[:10] = np.nan - result = a.rolling(window=len(a), min_periods=1).corr(b) - tm.assert_almost_equal(result[-1], a.corr(b)) + result = a.rolling(window=len(a), min_periods=1).corr(b) + tm.assert_almost_equal(result[-1], a.corr(b)) - @pytest.mark.parametrize("func", ["cov", "corr"]) - def test_rolling_pairwise_cov_corr(self, func): - check_pairwise_moment(self.frame, "rolling", func, window=10, min_periods=5) - @pytest.mark.parametrize("method", ["corr", "cov"]) - def test_flex_binary_frame(self, method): - series = self.frame[1] +@pytest.mark.parametrize("func", ["cov", "corr"]) +def test_rolling_pairwise_cov_corr(func, frame): + check_pairwise_moment(frame, "rolling", func, window=10, min_periods=5) - res = getattr(series.rolling(window=10), method)(self.frame) - res2 = getattr(self.frame.rolling(window=10), method)(series) - exp = self.frame.apply(lambda x: getattr(series.rolling(window=10), method)(x)) - tm.assert_frame_equal(res, exp) - tm.assert_frame_equal(res2, exp) +@pytest.mark.parametrize("method", ["corr", "cov"]) +def test_flex_binary_frame(method, frame): + series = frame[1] - frame2 = self.frame.copy() - frame2.values[:] = np.random.randn(*frame2.shape) + res = getattr(series.rolling(window=10), method)(frame) + res2 = getattr(frame.rolling(window=10), method)(series) + exp = frame.apply(lambda x: getattr(series.rolling(window=10), method)(x)) - res3 = getattr(self.frame.rolling(window=10), method)(frame2) - exp = DataFrame( - { - k: getattr(self.frame[k].rolling(window=10), method)(frame2[k]) - for k in self.frame - } - ) - tm.assert_frame_equal(res3, exp) + tm.assert_frame_equal(res, exp) + tm.assert_frame_equal(res2, exp) + + frame2 = frame.copy() + frame2.values[:] = np.random.randn(*frame2.shape) + + res3 = getattr(frame.rolling(window=10), method)(frame2) + exp = DataFrame( + {k: getattr(frame[k].rolling(window=10), method)(frame2[k]) for k in frame} + ) + tm.assert_frame_equal(res3, exp) @pytest.mark.slow diff --git a/pandas/tests/window/moments/test_moments_ewm.py b/pandas/tests/window/moments/test_moments_ewm.py index 162917fff9363..c6a92c0ad47b6 100644 --- a/pandas/tests/window/moments/test_moments_ewm.py +++ b/pandas/tests/window/moments/test_moments_ewm.py @@ -5,257 +5,248 @@ import pandas as pd from pandas import DataFrame, Series import pandas._testing as tm -from pandas.tests.window.common import Base -@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") -class TestMoments(Base): - def setup_method(self, method): - self._create_data() +def check_ew(name=None, preserve_nan=False, series=None, frame=None, nan_locs=None): + series_result = getattr(series.ewm(com=10), name)() + assert isinstance(series_result, Series) + + frame_result = getattr(frame.ewm(com=10), name)() + assert type(frame_result) == DataFrame + + result = getattr(series.ewm(com=10), name)() + if preserve_nan: + assert result[nan_locs].isna().all() + + +def test_ewma(series, frame, nan_locs): + check_ew(name="mean", frame=frame, series=series, nan_locs=nan_locs) + + vals = pd.Series(np.zeros(1000)) + vals[5] = 1 + result = vals.ewm(span=100, adjust=False).mean().sum() + assert np.abs(result - 1) < 1e-2 + + +@pytest.mark.parametrize("adjust", [True, False]) +@pytest.mark.parametrize("ignore_na", [True, False]) +def test_ewma_cases(adjust, ignore_na): + # try adjust/ignore_na args matrix + + s = Series([1.0, 2.0, 4.0, 8.0]) + + if adjust: + expected = Series([1.0, 1.6, 2.736842, 4.923077]) + else: + expected = Series([1.0, 1.333333, 2.222222, 4.148148]) + + result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean() + tm.assert_series_equal(result, expected) + + +def test_ewma_nan_handling(): + s = Series([1.0] + [np.nan] * 5 + [1.0]) + result = s.ewm(com=5).mean() + tm.assert_series_equal(result, Series([1.0] * len(s))) + + s = Series([np.nan] * 2 + [1.0] + [np.nan] * 2 + [1.0]) + result = s.ewm(com=5).mean() + tm.assert_series_equal(result, Series([np.nan] * 2 + [1.0] * 4)) + + # GH 7603 + s0 = Series([np.nan, 1.0, 101.0]) + s1 = Series([1.0, np.nan, 101.0]) + s2 = Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]) + s3 = Series([1.0, np.nan, 101.0, 50.0]) + com = 2.0 + alpha = 1.0 / (1.0 + com) + + def simple_wma(s, w): + return (s.multiply(w).cumsum() / w.cumsum()).fillna(method="ffill") + + for (s, adjust, ignore_na, w) in [ + (s0, True, False, [np.nan, (1.0 - alpha), 1.0]), + (s0, True, True, [np.nan, (1.0 - alpha), 1.0]), + (s0, False, False, [np.nan, (1.0 - alpha), alpha]), + (s0, False, True, [np.nan, (1.0 - alpha), alpha]), + (s1, True, False, [(1.0 - alpha) ** 2, np.nan, 1.0]), + (s1, True, True, [(1.0 - alpha), np.nan, 1.0]), + (s1, False, False, [(1.0 - alpha) ** 2, np.nan, alpha]), + (s1, False, True, [(1.0 - alpha), np.nan, alpha]), + (s2, True, False, [np.nan, (1.0 - alpha) ** 3, np.nan, np.nan, 1.0, np.nan],), + (s2, True, True, [np.nan, (1.0 - alpha), np.nan, np.nan, 1.0, np.nan]), + ( + s2, + False, + False, + [np.nan, (1.0 - alpha) ** 3, np.nan, np.nan, alpha, np.nan], + ), + (s2, False, True, [np.nan, (1.0 - alpha), np.nan, np.nan, alpha, np.nan]), + (s3, True, False, [(1.0 - alpha) ** 3, np.nan, (1.0 - alpha), 1.0]), + (s3, True, True, [(1.0 - alpha) ** 2, np.nan, (1.0 - alpha), 1.0]), + ( + s3, + False, + False, + [ + (1.0 - alpha) ** 3, + np.nan, + (1.0 - alpha) * alpha, + alpha * ((1.0 - alpha) ** 2 + alpha), + ], + ), + (s3, False, True, [(1.0 - alpha) ** 2, np.nan, (1.0 - alpha) * alpha, alpha],), + ]: + expected = simple_wma(s, Series(w)) + result = s.ewm(com=com, adjust=adjust, ignore_na=ignore_na).mean() - def test_ewma(self): - self._check_ew(name="mean") - - vals = pd.Series(np.zeros(1000)) - vals[5] = 1 - result = vals.ewm(span=100, adjust=False).mean().sum() - assert np.abs(result - 1) < 1e-2 - - @pytest.mark.parametrize("adjust", [True, False]) - @pytest.mark.parametrize("ignore_na", [True, False]) - def test_ewma_cases(self, adjust, ignore_na): - # try adjust/ignore_na args matrix - - s = Series([1.0, 2.0, 4.0, 8.0]) - - if adjust: - expected = Series([1.0, 1.6, 2.736842, 4.923077]) - else: - expected = Series([1.0, 1.333333, 2.222222, 4.148148]) - - result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean() tm.assert_series_equal(result, expected) + if ignore_na is False: + # check that ignore_na defaults to False + result = s.ewm(com=com, adjust=adjust).mean() + tm.assert_series_equal(result, expected) - def test_ewma_nan_handling(self): - s = Series([1.0] + [np.nan] * 5 + [1.0]) - result = s.ewm(com=5).mean() - tm.assert_series_equal(result, Series([1.0] * len(s))) - - s = Series([np.nan] * 2 + [1.0] + [np.nan] * 2 + [1.0]) - result = s.ewm(com=5).mean() - tm.assert_series_equal(result, Series([np.nan] * 2 + [1.0] * 4)) - - # GH 7603 - s0 = Series([np.nan, 1.0, 101.0]) - s1 = Series([1.0, np.nan, 101.0]) - s2 = Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]) - s3 = Series([1.0, np.nan, 101.0, 50.0]) - com = 2.0 - alpha = 1.0 / (1.0 + com) - - def simple_wma(s, w): - return (s.multiply(w).cumsum() / w.cumsum()).fillna(method="ffill") - - for (s, adjust, ignore_na, w) in [ - (s0, True, False, [np.nan, (1.0 - alpha), 1.0]), - (s0, True, True, [np.nan, (1.0 - alpha), 1.0]), - (s0, False, False, [np.nan, (1.0 - alpha), alpha]), - (s0, False, True, [np.nan, (1.0 - alpha), alpha]), - (s1, True, False, [(1.0 - alpha) ** 2, np.nan, 1.0]), - (s1, True, True, [(1.0 - alpha), np.nan, 1.0]), - (s1, False, False, [(1.0 - alpha) ** 2, np.nan, alpha]), - (s1, False, True, [(1.0 - alpha), np.nan, alpha]), - ( - s2, - True, - False, - [np.nan, (1.0 - alpha) ** 3, np.nan, np.nan, 1.0, np.nan], - ), - (s2, True, True, [np.nan, (1.0 - alpha), np.nan, np.nan, 1.0, np.nan]), - ( - s2, - False, - False, - [np.nan, (1.0 - alpha) ** 3, np.nan, np.nan, alpha, np.nan], - ), - (s2, False, True, [np.nan, (1.0 - alpha), np.nan, np.nan, alpha, np.nan]), - (s3, True, False, [(1.0 - alpha) ** 3, np.nan, (1.0 - alpha), 1.0]), - (s3, True, True, [(1.0 - alpha) ** 2, np.nan, (1.0 - alpha), 1.0]), - ( - s3, - False, - False, - [ - (1.0 - alpha) ** 3, - np.nan, - (1.0 - alpha) * alpha, - alpha * ((1.0 - alpha) ** 2 + alpha), - ], - ), - ( - s3, - False, - True, - [(1.0 - alpha) ** 2, np.nan, (1.0 - alpha) * alpha, alpha], - ), - ]: - expected = simple_wma(s, Series(w)) - result = s.ewm(com=com, adjust=adjust, ignore_na=ignore_na).mean() - tm.assert_series_equal(result, expected) - if ignore_na is False: - # check that ignore_na defaults to False - result = s.ewm(com=com, adjust=adjust).mean() - tm.assert_series_equal(result, expected) - - def test_ewmvar(self): - self._check_ew(name="var") - - def test_ewmvol(self): - self._check_ew(name="vol") - - def test_ewma_span_com_args(self): - A = self.series.ewm(com=9.5).mean() - B = self.series.ewm(span=20).mean() - tm.assert_almost_equal(A, B) - - with pytest.raises(ValueError): - self.series.ewm(com=9.5, span=20) - with pytest.raises(ValueError): - self.series.ewm().mean() - - def test_ewma_halflife_arg(self): - A = self.series.ewm(com=13.932726172912965).mean() - B = self.series.ewm(halflife=10.0).mean() - tm.assert_almost_equal(A, B) - - with pytest.raises(ValueError): - self.series.ewm(span=20, halflife=50) - with pytest.raises(ValueError): - self.series.ewm(com=9.5, halflife=50) - with pytest.raises(ValueError): - self.series.ewm(com=9.5, span=20, halflife=50) - with pytest.raises(ValueError): - self.series.ewm() - - def test_ewm_alpha(self): - # GH 10789 - s = Series(self.arr) - a = s.ewm(alpha=0.61722699889169674).mean() - b = s.ewm(com=0.62014947789973052).mean() - c = s.ewm(span=2.240298955799461).mean() - d = s.ewm(halflife=0.721792864318).mean() - tm.assert_series_equal(a, b) - tm.assert_series_equal(a, c) - tm.assert_series_equal(a, d) - - def test_ewm_alpha_arg(self): - # GH 10789 - s = self.series - with pytest.raises(ValueError): - s.ewm() - with pytest.raises(ValueError): - s.ewm(com=10.0, alpha=0.5) - with pytest.raises(ValueError): - s.ewm(span=10.0, alpha=0.5) - with pytest.raises(ValueError): - s.ewm(halflife=10.0, alpha=0.5) - - def test_ewm_domain_checks(self): - # GH 12492 - s = Series(self.arr) - msg = "comass must satisfy: comass >= 0" - with pytest.raises(ValueError, match=msg): - s.ewm(com=-0.1) - s.ewm(com=0.0) - s.ewm(com=0.1) - - msg = "span must satisfy: span >= 1" - with pytest.raises(ValueError, match=msg): - s.ewm(span=-0.1) - with pytest.raises(ValueError, match=msg): - s.ewm(span=0.0) - with pytest.raises(ValueError, match=msg): - s.ewm(span=0.9) - s.ewm(span=1.0) - s.ewm(span=1.1) - - msg = "halflife must satisfy: halflife > 0" - with pytest.raises(ValueError, match=msg): - s.ewm(halflife=-0.1) - with pytest.raises(ValueError, match=msg): - s.ewm(halflife=0.0) - s.ewm(halflife=0.1) - - msg = "alpha must satisfy: 0 < alpha <= 1" - with pytest.raises(ValueError, match=msg): - s.ewm(alpha=-0.1) - with pytest.raises(ValueError, match=msg): - s.ewm(alpha=0.0) - s.ewm(alpha=0.1) - s.ewm(alpha=1.0) - with pytest.raises(ValueError, match=msg): - s.ewm(alpha=1.1) - - @pytest.mark.parametrize("method", ["mean", "vol", "var"]) - def test_ew_empty_series(self, method): - vals = pd.Series([], dtype=np.float64) - - ewm = vals.ewm(3) - result = getattr(ewm, method)() - tm.assert_almost_equal(result, vals) - - def _check_ew(self, name=None, preserve_nan=False): - series_result = getattr(self.series.ewm(com=10), name)() - assert isinstance(series_result, Series) - - frame_result = getattr(self.frame.ewm(com=10), name)() - assert type(frame_result) == DataFrame - - result = getattr(self.series.ewm(com=10), name)() - if preserve_nan: - assert result[self._nan_locs].isna().all() - - @pytest.mark.parametrize("min_periods", [0, 1]) - @pytest.mark.parametrize("name", ["mean", "var", "vol"]) - def test_ew_min_periods(self, min_periods, name): - # excluding NaNs correctly - arr = randn(50) - arr[:10] = np.NaN - arr[-10:] = np.NaN - s = Series(arr) - - # check min_periods - # GH 7898 - result = getattr(s.ewm(com=50, min_periods=2), name)() +@pytest.mark.parametrize("name", ["var", "vol"]) +def test_ewmvar_ewmvol(series, frame, nan_locs, name): + check_ew(name=name, frame=frame, series=series, nan_locs=nan_locs) + + +def test_ewma_span_com_args(series): + A = series.ewm(com=9.5).mean() + B = series.ewm(span=20).mean() + tm.assert_almost_equal(A, B) + + with pytest.raises(ValueError): + series.ewm(com=9.5, span=20) + with pytest.raises(ValueError): + series.ewm().mean() + + +def test_ewma_halflife_arg(series): + A = series.ewm(com=13.932726172912965).mean() + B = series.ewm(halflife=10.0).mean() + tm.assert_almost_equal(A, B) + + with pytest.raises(ValueError): + series.ewm(span=20, halflife=50) + with pytest.raises(ValueError): + series.ewm(com=9.5, halflife=50) + with pytest.raises(ValueError): + series.ewm(com=9.5, span=20, halflife=50) + with pytest.raises(ValueError): + series.ewm() + + +def test_ewm_alpha(arr): + # GH 10789 + s = Series(arr) + a = s.ewm(alpha=0.61722699889169674).mean() + b = s.ewm(com=0.62014947789973052).mean() + c = s.ewm(span=2.240298955799461).mean() + d = s.ewm(halflife=0.721792864318).mean() + tm.assert_series_equal(a, b) + tm.assert_series_equal(a, c) + tm.assert_series_equal(a, d) + + +def test_ewm_alpha_arg(series): + # GH 10789 + s = series + with pytest.raises(ValueError): + s.ewm() + with pytest.raises(ValueError): + s.ewm(com=10.0, alpha=0.5) + with pytest.raises(ValueError): + s.ewm(span=10.0, alpha=0.5) + with pytest.raises(ValueError): + s.ewm(halflife=10.0, alpha=0.5) + + +def test_ewm_domain_checks(arr): + # GH 12492 + s = Series(arr) + msg = "comass must satisfy: comass >= 0" + with pytest.raises(ValueError, match=msg): + s.ewm(com=-0.1) + s.ewm(com=0.0) + s.ewm(com=0.1) + + msg = "span must satisfy: span >= 1" + with pytest.raises(ValueError, match=msg): + s.ewm(span=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(span=0.0) + with pytest.raises(ValueError, match=msg): + s.ewm(span=0.9) + s.ewm(span=1.0) + s.ewm(span=1.1) + + msg = "halflife must satisfy: halflife > 0" + with pytest.raises(ValueError, match=msg): + s.ewm(halflife=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(halflife=0.0) + s.ewm(halflife=0.1) + + msg = "alpha must satisfy: 0 < alpha <= 1" + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=0.0) + s.ewm(alpha=0.1) + s.ewm(alpha=1.0) + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=1.1) + + +@pytest.mark.parametrize("method", ["mean", "vol", "var"]) +def test_ew_empty_series(method): + vals = pd.Series([], dtype=np.float64) + + ewm = vals.ewm(3) + result = getattr(ewm, method)() + tm.assert_almost_equal(result, vals) + + +@pytest.mark.parametrize("min_periods", [0, 1]) +@pytest.mark.parametrize("name", ["mean", "var", "vol"]) +def test_ew_min_periods(min_periods, name): + # excluding NaNs correctly + arr = randn(50) + arr[:10] = np.NaN + arr[-10:] = np.NaN + s = Series(arr) + + # check min_periods + # GH 7898 + result = getattr(s.ewm(com=50, min_periods=2), name)() + assert result[:11].isna().all() + assert not result[11:].isna().any() + + result = getattr(s.ewm(com=50, min_periods=min_periods), name)() + if name == "mean": + assert result[:10].isna().all() + assert not result[10:].isna().any() + else: + # ewm.std, ewm.vol, ewm.var (with bias=False) require at least + # two values assert result[:11].isna().all() assert not result[11:].isna().any() - result = getattr(s.ewm(com=50, min_periods=min_periods), name)() - if name == "mean": - assert result[:10].isna().all() - assert not result[10:].isna().any() - else: - # ewm.std, ewm.vol, ewm.var (with bias=False) require at least - # two values - assert result[:11].isna().all() - assert not result[11:].isna().any() - - # check series of length 0 - result = getattr( - Series(dtype=object).ewm(com=50, min_periods=min_periods), name - )() - tm.assert_series_equal(result, Series(dtype="float64")) - - # check series of length 1 - result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)() - if name == "mean": - tm.assert_series_equal(result, Series([1.0])) - else: - # ewm.std, ewm.vol, ewm.var with bias=False require at least - # two values - tm.assert_series_equal(result, Series([np.NaN])) - - # pass in ints - result2 = getattr(Series(np.arange(50)).ewm(span=10), name)() - assert result2.dtype == np.float_ + # check series of length 0 + result = getattr(Series(dtype=object).ewm(com=50, min_periods=min_periods), name)() + tm.assert_series_equal(result, Series(dtype="float64")) + + # check series of length 1 + result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)() + if name == "mean": + tm.assert_series_equal(result, Series([1.0])) + else: + # ewm.std, ewm.vol, ewm.var with bias=False require at least + # two values + tm.assert_series_equal(result, Series([np.NaN])) + + # pass in ints + result2 = getattr(Series(np.arange(50)).ewm(span=10), name)() + assert result2.dtype == np.float_ diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index 399b76e92fc4f..6a3aec5c18141 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -10,912 +10,927 @@ import pandas as pd from pandas import DataFrame, Series, isna, notna import pandas._testing as tm -from pandas.tests.window.common import Base import pandas.tseries.offsets as offsets -@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") -class TestMoments(Base): - def setup_method(self, method): - self._create_data() +def _check_moment_func( + static_comp, + name, + raw, + has_min_periods=True, + has_center=True, + has_time_rule=True, + fill_value=None, + zero_min_periods_equal=True, + series=None, + frame=None, + **kwargs, +): + + # inject raw + if name == "apply": + kwargs = copy.copy(kwargs) + kwargs["raw"] = raw + + def get_result(obj, window, min_periods=None, center=False): + r = obj.rolling(window=window, min_periods=min_periods, center=center) + return getattr(r, name)(**kwargs) + + series_result = get_result(series, window=50) + assert isinstance(series_result, Series) + tm.assert_almost_equal(series_result.iloc[-1], static_comp(series[-50:])) + + frame_result = get_result(frame, window=50) + assert isinstance(frame_result, DataFrame) + tm.assert_series_equal( + frame_result.iloc[-1, :], + frame.iloc[-50:, :].apply(static_comp, axis=0, raw=raw), + check_names=False, + ) - def test_centered_axis_validation(self): + # check time_rule works + if has_time_rule: + win = 25 + minp = 10 + ser = series[::2].resample("B").mean() + frm = frame[::2].resample("B").mean() - # ok - Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean() + if has_min_periods: + series_result = get_result(ser, window=win, min_periods=minp) + frame_result = get_result(frm, window=win, min_periods=minp) + else: + series_result = get_result(ser, window=win, min_periods=0) + frame_result = get_result(frm, window=win, min_periods=0) - # bad axis - with pytest.raises(ValueError): - Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean() + last_date = series_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() - # ok ok - DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=0).mean() - DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=1).mean() + trunc_series = series[::2].truncate(prev_date, last_date) + trunc_frame = frame[::2].truncate(prev_date, last_date) - # bad axis - with pytest.raises(ValueError): - (DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=2).mean()) + tm.assert_almost_equal(series_result[-1], static_comp(trunc_series)) - def test_rolling_sum(self, raw): - self._check_moment_func( - np.nansum, name="sum", zero_min_periods_equal=False, raw=raw + tm.assert_series_equal( + frame_result.xs(last_date), + trunc_frame.apply(static_comp, raw=raw), + check_names=False, ) - def test_rolling_count(self, raw): - counter = lambda x: np.isfinite(x).astype(float).sum() - self._check_moment_func( - counter, name="count", has_min_periods=False, fill_value=0, raw=raw - ) + # excluding NaNs correctly + obj = Series(randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + if has_min_periods: + result = get_result(obj, 50, min_periods=30) + tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) + + # min_periods is working correctly + result = get_result(obj, 20, min_periods=15) + assert isna(result.iloc[23]) + assert not isna(result.iloc[24]) + + assert not isna(result.iloc[-6]) + assert isna(result.iloc[-5]) + + obj2 = Series(randn(20)) + result = get_result(obj2, 10, min_periods=5) + assert isna(result.iloc[3]) + assert notna(result.iloc[4]) + + if zero_min_periods_equal: + # min_periods=0 may be equivalent to min_periods=1 + result0 = get_result(obj, 20, min_periods=0) + result1 = get_result(obj, 20, min_periods=1) + tm.assert_almost_equal(result0, result1) + else: + result = get_result(obj, 50) + tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) + + # window larger than series length (#7297) + if has_min_periods: + for minp in (0, len(series) - 1, len(series)): + result = get_result(series, len(series) + 1, min_periods=minp) + expected = get_result(series, len(series), min_periods=minp) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) - def test_rolling_mean(self, raw): - self._check_moment_func(np.mean, name="mean", raw=raw) + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + else: + result = get_result(series, len(series) + 1, min_periods=0) + expected = get_result(series, len(series), min_periods=0) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) - @td.skip_if_no_scipy - def test_cmov_mean(self): - # GH 8238 - vals = np.array( - [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] - ) - result = Series(vals).rolling(5, center=True).mean() - expected = Series( - [ - np.nan, - np.nan, - 9.962, - 11.27, - 11.564, - 12.516, - 12.818, - 12.952, - np.nan, - np.nan, - ] - ) - tm.assert_series_equal(expected, result) + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) - @td.skip_if_no_scipy - def test_cmov_window(self): - # GH 8238 - vals = np.array( - [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] - ) - result = Series(vals).rolling(5, win_type="boxcar", center=True).mean() - expected = Series( - [ - np.nan, - np.nan, - 9.962, - 11.27, - 11.564, - 12.516, - 12.818, - 12.952, - np.nan, - np.nan, - ] - ) - tm.assert_series_equal(expected, result) - - @td.skip_if_no_scipy - def test_cmov_window_corner(self): - # GH 8238 - # all nan - vals = pd.Series([np.nan] * 10) - result = vals.rolling(5, center=True, win_type="boxcar").mean() - assert np.isnan(result).all() - - # empty - vals = pd.Series([], dtype=object) - result = vals.rolling(5, center=True, win_type="boxcar").mean() - assert len(result) == 0 - - # shorter than window - vals = pd.Series(np.random.randn(5)) - result = vals.rolling(10, win_type="boxcar").mean() - assert np.isnan(result).all() - assert len(result) == 5 - - @td.skip_if_no_scipy - @pytest.mark.parametrize( - "f,xp", - [ - ( - "mean", - [ - [np.nan, np.nan], - [np.nan, np.nan], - [9.252, 9.392], - [8.644, 9.906], - [8.87, 10.208], - [6.81, 8.588], - [7.792, 8.644], - [9.05, 7.824], - [np.nan, np.nan], - [np.nan, np.nan], - ], - ), - ( - "std", - [ - [np.nan, np.nan], - [np.nan, np.nan], - [3.789706, 4.068313], - [3.429232, 3.237411], - [3.589269, 3.220810], - [3.405195, 2.380655], - [3.281839, 2.369869], - [3.676846, 1.801799], - [np.nan, np.nan], - [np.nan, np.nan], - ], - ), - ( - "var", - [ - [np.nan, np.nan], - [np.nan, np.nan], - [14.36187, 16.55117], - [11.75963, 10.48083], - [12.88285, 10.37362], - [11.59535, 5.66752], - [10.77047, 5.61628], - [13.51920, 3.24648], - [np.nan, np.nan], - [np.nan, np.nan], - ], - ), - ( - "sum", - [ - [np.nan, np.nan], - [np.nan, np.nan], - [46.26, 46.96], - [43.22, 49.53], - [44.35, 51.04], - [34.05, 42.94], - [38.96, 43.22], - [45.25, 39.12], - [np.nan, np.nan], - [np.nan, np.nan], - ], - ), - ], - ) - def test_cmov_window_frame(self, f, xp): - # Gh 8238 - df = DataFrame( - np.array( - [ - [12.18, 3.64], - [10.18, 9.16], - [13.24, 14.61], - [4.51, 8.11], - [6.15, 11.44], - [9.14, 6.21], - [11.31, 10.67], - [2.94, 6.51], - [9.42, 8.39], - [12.44, 7.34], - ] + # check center=True + if has_center: + if has_min_periods: + result = get_result(obj, 20, min_periods=15, center=True) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15 + )[9:].reset_index(drop=True) + else: + result = get_result(obj, 20, min_periods=0, center=True) + print(result) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0 + )[9:].reset_index(drop=True) + + tm.assert_series_equal(result, expected) + + # shifter index + s = [f"x{x:d}" for x in range(12)] + + if has_min_periods: + minp = 10 + + series_xp = ( + get_result( + series.reindex(list(series.index) + s), window=25, min_periods=minp, + ) + .shift(-12) + .reindex(series.index) + ) + frame_xp = ( + get_result( + frame.reindex(list(frame.index) + s), window=25, min_periods=minp, + ) + .shift(-12) + .reindex(frame.index) ) - ) - xp = DataFrame(np.array(xp)) - roll = df.rolling(5, win_type="boxcar", center=True) - rs = getattr(roll, f)() + series_rs = get_result(series, window=25, min_periods=minp, center=True) + frame_rs = get_result(frame, window=25, min_periods=minp, center=True) - tm.assert_frame_equal(xp, rs) + else: + series_xp = ( + get_result( + series.reindex(list(series.index) + s), window=25, min_periods=0, + ) + .shift(-12) + .reindex(series.index) + ) + frame_xp = ( + get_result( + frame.reindex(list(frame.index) + s), window=25, min_periods=0, + ) + .shift(-12) + .reindex(frame.index) + ) - @td.skip_if_no_scipy - def test_cmov_window_na_min_periods(self): - # min_periods - vals = Series(np.random.randn(10)) - vals[4] = np.nan - vals[8] = np.nan + series_rs = get_result(series, window=25, min_periods=0, center=True) + frame_rs = get_result(frame, window=25, min_periods=0, center=True) - xp = vals.rolling(5, min_periods=4, center=True).mean() - rs = vals.rolling(5, win_type="boxcar", min_periods=4, center=True).mean() - tm.assert_series_equal(xp, rs) + if fill_value is not None: + series_xp = series_xp.fillna(fill_value) + frame_xp = frame_xp.fillna(fill_value) + tm.assert_series_equal(series_xp, series_rs) + tm.assert_frame_equal(frame_xp, frame_rs) - @td.skip_if_no_scipy - def test_cmov_window_regular(self, win_types): - # GH 8238 - vals = np.array( - [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] - ) - xps = { - "hamming": [ - np.nan, - np.nan, - 8.71384, - 9.56348, - 12.38009, - 14.03687, - 13.8567, - 11.81473, - np.nan, - np.nan, - ], - "triang": [ - np.nan, - np.nan, - 9.28667, - 10.34667, - 12.00556, - 13.33889, - 13.38, - 12.33667, - np.nan, - np.nan, - ], - "barthann": [ - np.nan, - np.nan, - 8.4425, - 9.1925, - 12.5575, - 14.3675, - 14.0825, - 11.5675, - np.nan, - np.nan, - ], - "bohman": [ - np.nan, - np.nan, - 7.61599, - 9.1764, - 12.83559, - 14.17267, - 14.65923, - 11.10401, - np.nan, - np.nan, - ], - "blackmanharris": [ - np.nan, - np.nan, - 6.97691, - 9.16438, - 13.05052, - 14.02156, - 15.10512, - 10.74574, - np.nan, - np.nan, - ], - "nuttall": [ - np.nan, - np.nan, - 7.04618, - 9.16786, - 13.02671, - 14.03559, - 15.05657, - 10.78514, - np.nan, - np.nan, - ], - "blackman": [ - np.nan, - np.nan, - 7.73345, - 9.17869, - 12.79607, - 14.20036, - 14.57726, - 11.16988, - np.nan, - np.nan, - ], - "bartlett": [ - np.nan, - np.nan, - 8.4425, - 9.1925, - 12.5575, - 14.3675, - 14.0825, - 11.5675, - np.nan, - np.nan, - ], - } - - xp = Series(xps[win_types]) - rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() - tm.assert_series_equal(xp, rs) - - @td.skip_if_no_scipy - def test_cmov_window_regular_linear_range(self, win_types): - # GH 8238 - vals = np.array(range(10), dtype=np.float) - xp = vals.copy() - xp[:2] = np.nan - xp[-2:] = np.nan - xp = Series(xp) - - rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() - tm.assert_series_equal(xp, rs) - - @td.skip_if_no_scipy - def test_cmov_window_regular_missing_data(self, win_types): - # GH 8238 - vals = np.array( - [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48] - ) - xps = { - "bartlett": [ - np.nan, - np.nan, - 9.70333, - 10.5225, - 8.4425, - 9.1925, - 12.5575, - 14.3675, - 15.61667, - 13.655, - ], - "blackman": [ - np.nan, - np.nan, - 9.04582, - 11.41536, - 7.73345, - 9.17869, - 12.79607, - 14.20036, - 15.8706, - 13.655, - ], - "barthann": [ - np.nan, - np.nan, - 9.70333, - 10.5225, - 8.4425, - 9.1925, - 12.5575, - 14.3675, - 15.61667, - 13.655, - ], - "bohman": [ - np.nan, - np.nan, - 8.9444, - 11.56327, - 7.61599, - 9.1764, - 12.83559, - 14.17267, - 15.90976, - 13.655, - ], - "hamming": [ - np.nan, - np.nan, - 9.59321, - 10.29694, - 8.71384, - 9.56348, - 12.38009, - 14.20565, - 15.24694, - 13.69758, + +def test_centered_axis_validation(): + + # ok + Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean() + + # bad axis + with pytest.raises(ValueError): + Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean() + + # ok ok + DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=0).mean() + DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=1).mean() + + # bad axis + with pytest.raises(ValueError): + (DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=2).mean()) + + +def test_rolling_sum(raw, series, frame): + _check_moment_func( + np.nansum, + name="sum", + zero_min_periods_equal=False, + raw=raw, + series=series, + frame=frame, + ) + + +def test_rolling_count(raw, series, frame): + counter = lambda x: np.isfinite(x).astype(float).sum() + _check_moment_func( + counter, + name="count", + has_min_periods=False, + fill_value=0, + raw=raw, + series=series, + frame=frame, + ) + + +def test_rolling_mean(raw, series, frame): + _check_moment_func(np.mean, name="mean", raw=raw, series=series, frame=frame) + + +@td.skip_if_no_scipy +def test_cmov_mean(): + # GH 8238 + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) + result = Series(vals).rolling(5, center=True).mean() + expected = Series( + [np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, 12.952, np.nan, np.nan,] + ) + tm.assert_series_equal(expected, result) + + +@td.skip_if_no_scipy +def test_cmov_window(): + # GH 8238 + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) + result = Series(vals).rolling(5, win_type="boxcar", center=True).mean() + expected = Series( + [np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, 12.952, np.nan, np.nan,] + ) + tm.assert_series_equal(expected, result) + + +@td.skip_if_no_scipy +def test_cmov_window_corner(): + # GH 8238 + # all nan + vals = pd.Series([np.nan] * 10) + result = vals.rolling(5, center=True, win_type="boxcar").mean() + assert np.isnan(result).all() + + # empty + vals = pd.Series([], dtype=object) + result = vals.rolling(5, center=True, win_type="boxcar").mean() + assert len(result) == 0 + + # shorter than window + vals = pd.Series(np.random.randn(5)) + result = vals.rolling(10, win_type="boxcar").mean() + assert np.isnan(result).all() + assert len(result) == 5 + + +@td.skip_if_no_scipy +@pytest.mark.parametrize( + "f,xp", + [ + ( + "mean", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [9.252, 9.392], + [8.644, 9.906], + [8.87, 10.208], + [6.81, 8.588], + [7.792, 8.644], + [9.05, 7.824], + [np.nan, np.nan], + [np.nan, np.nan], ], - "nuttall": [ - np.nan, - np.nan, - 8.47693, - 12.2821, - 7.04618, - 9.16786, - 13.02671, - 14.03673, - 16.08759, - 13.65553, + ), + ( + "std", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [3.789706, 4.068313], + [3.429232, 3.237411], + [3.589269, 3.220810], + [3.405195, 2.380655], + [3.281839, 2.369869], + [3.676846, 1.801799], + [np.nan, np.nan], + [np.nan, np.nan], ], - "triang": [ - np.nan, - np.nan, - 9.33167, - 9.76125, - 9.28667, - 10.34667, - 12.00556, - 13.82125, - 14.49429, - 13.765, + ), + ( + "var", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [14.36187, 16.55117], + [11.75963, 10.48083], + [12.88285, 10.37362], + [11.59535, 5.66752], + [10.77047, 5.61628], + [13.51920, 3.24648], + [np.nan, np.nan], + [np.nan, np.nan], ], - "blackmanharris": [ - np.nan, - np.nan, - 8.42526, - 12.36824, - 6.97691, - 9.16438, - 13.05052, - 14.02175, - 16.1098, - 13.65509, + ), + ( + "sum", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [46.26, 46.96], + [43.22, 49.53], + [44.35, 51.04], + [34.05, 42.94], + [38.96, 43.22], + [45.25, 39.12], + [np.nan, np.nan], + [np.nan, np.nan], ], - } - - xp = Series(xps[win_types]) - rs = Series(vals).rolling(5, win_type=win_types, min_periods=3).mean() - tm.assert_series_equal(xp, rs) - - @td.skip_if_no_scipy - def test_cmov_window_special(self, win_types_special): - # GH 8238 - kwds = { - "kaiser": {"beta": 1.0}, - "gaussian": {"std": 1.0}, - "general_gaussian": {"power": 2.0, "width": 2.0}, - "exponential": {"tau": 10}, - } - - vals = np.array( - [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] + ), + ], +) +def test_cmov_window_frame(f, xp): + # Gh 8238 + df = DataFrame( + np.array( + [ + [12.18, 3.64], + [10.18, 9.16], + [13.24, 14.61], + [4.51, 8.11], + [6.15, 11.44], + [9.14, 6.21], + [11.31, 10.67], + [2.94, 6.51], + [9.42, 8.39], + [12.44, 7.34], + ] ) + ) + xp = DataFrame(np.array(xp)) + + roll = df.rolling(5, win_type="boxcar", center=True) + rs = getattr(roll, f)() + + tm.assert_frame_equal(xp, rs) + + +@td.skip_if_no_scipy +def test_cmov_window_na_min_periods(): + # min_periods + vals = Series(np.random.randn(10)) + vals[4] = np.nan + vals[8] = np.nan + + xp = vals.rolling(5, min_periods=4, center=True).mean() + rs = vals.rolling(5, win_type="boxcar", min_periods=4, center=True).mean() + tm.assert_series_equal(xp, rs) + + +@td.skip_if_no_scipy +def test_cmov_window_regular(win_types): + # GH 8238 + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) + xps = { + "hamming": [ + np.nan, + np.nan, + 8.71384, + 9.56348, + 12.38009, + 14.03687, + 13.8567, + 11.81473, + np.nan, + np.nan, + ], + "triang": [ + np.nan, + np.nan, + 9.28667, + 10.34667, + 12.00556, + 13.33889, + 13.38, + 12.33667, + np.nan, + np.nan, + ], + "barthann": [ + np.nan, + np.nan, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 14.0825, + 11.5675, + np.nan, + np.nan, + ], + "bohman": [ + np.nan, + np.nan, + 7.61599, + 9.1764, + 12.83559, + 14.17267, + 14.65923, + 11.10401, + np.nan, + np.nan, + ], + "blackmanharris": [ + np.nan, + np.nan, + 6.97691, + 9.16438, + 13.05052, + 14.02156, + 15.10512, + 10.74574, + np.nan, + np.nan, + ], + "nuttall": [ + np.nan, + np.nan, + 7.04618, + 9.16786, + 13.02671, + 14.03559, + 15.05657, + 10.78514, + np.nan, + np.nan, + ], + "blackman": [ + np.nan, + np.nan, + 7.73345, + 9.17869, + 12.79607, + 14.20036, + 14.57726, + 11.16988, + np.nan, + np.nan, + ], + "bartlett": [ + np.nan, + np.nan, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 14.0825, + 11.5675, + np.nan, + np.nan, + ], + } - xps = { - "gaussian": [ - np.nan, - np.nan, - 8.97297, - 9.76077, - 12.24763, - 13.89053, - 13.65671, - 12.01002, - np.nan, - np.nan, - ], - "general_gaussian": [ - np.nan, - np.nan, - 9.85011, - 10.71589, - 11.73161, - 13.08516, - 12.95111, - 12.74577, - np.nan, - np.nan, - ], - "kaiser": [ - np.nan, - np.nan, - 9.86851, - 11.02969, - 11.65161, - 12.75129, - 12.90702, - 12.83757, - np.nan, - np.nan, - ], - "exponential": [ - np.nan, - np.nan, - 9.83364, - 11.10472, - 11.64551, - 12.66138, - 12.92379, - 12.83770, - np.nan, - np.nan, - ], - } + xp = Series(xps[win_types]) + rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() + tm.assert_series_equal(xp, rs) - xp = Series(xps[win_types_special]) - rs = ( - Series(vals) - .rolling(5, win_type=win_types_special, center=True) - .mean(**kwds[win_types_special]) - ) - tm.assert_series_equal(xp, rs) - - @td.skip_if_no_scipy - def test_cmov_window_special_linear_range(self, win_types_special): - # GH 8238 - kwds = { - "kaiser": {"beta": 1.0}, - "gaussian": {"std": 1.0}, - "general_gaussian": {"power": 2.0, "width": 2.0}, - "slepian": {"width": 0.5}, - "exponential": {"tau": 10}, - } - - vals = np.array(range(10), dtype=np.float) - xp = vals.copy() - xp[:2] = np.nan - xp[-2:] = np.nan - xp = Series(xp) - - rs = ( - Series(vals) - .rolling(5, win_type=win_types_special, center=True) - .mean(**kwds[win_types_special]) - ) - tm.assert_series_equal(xp, rs) - def test_rolling_median(self, raw): - self._check_moment_func(np.median, name="median", raw=raw) +@td.skip_if_no_scipy +def test_cmov_window_regular_linear_range(win_types): + # GH 8238 + vals = np.array(range(10), dtype=np.float) + xp = vals.copy() + xp[:2] = np.nan + xp[-2:] = np.nan + xp = Series(xp) - def test_rolling_min(self, raw): - self._check_moment_func(np.min, name="min", raw=raw) + rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() + tm.assert_series_equal(xp, rs) - a = pd.Series([1, 2, 3, 4, 5]) - result = a.rolling(window=100, min_periods=1).min() - expected = pd.Series(np.ones(len(a))) - tm.assert_series_equal(result, expected) - with pytest.raises(ValueError): - pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).min() +@td.skip_if_no_scipy +def test_cmov_window_regular_missing_data(win_types): + # GH 8238 + vals = np.array( + [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48] + ) + xps = { + "bartlett": [ + np.nan, + np.nan, + 9.70333, + 10.5225, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 15.61667, + 13.655, + ], + "blackman": [ + np.nan, + np.nan, + 9.04582, + 11.41536, + 7.73345, + 9.17869, + 12.79607, + 14.20036, + 15.8706, + 13.655, + ], + "barthann": [ + np.nan, + np.nan, + 9.70333, + 10.5225, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 15.61667, + 13.655, + ], + "bohman": [ + np.nan, + np.nan, + 8.9444, + 11.56327, + 7.61599, + 9.1764, + 12.83559, + 14.17267, + 15.90976, + 13.655, + ], + "hamming": [ + np.nan, + np.nan, + 9.59321, + 10.29694, + 8.71384, + 9.56348, + 12.38009, + 14.20565, + 15.24694, + 13.69758, + ], + "nuttall": [ + np.nan, + np.nan, + 8.47693, + 12.2821, + 7.04618, + 9.16786, + 13.02671, + 14.03673, + 16.08759, + 13.65553, + ], + "triang": [ + np.nan, + np.nan, + 9.33167, + 9.76125, + 9.28667, + 10.34667, + 12.00556, + 13.82125, + 14.49429, + 13.765, + ], + "blackmanharris": [ + np.nan, + np.nan, + 8.42526, + 12.36824, + 6.97691, + 9.16438, + 13.05052, + 14.02175, + 16.1098, + 13.65509, + ], + } + + xp = Series(xps[win_types]) + rs = Series(vals).rolling(5, win_type=win_types, min_periods=3).mean() + tm.assert_series_equal(xp, rs) + + +@td.skip_if_no_scipy +def test_cmov_window_special(win_types_special): + # GH 8238 + kwds = { + "kaiser": {"beta": 1.0}, + "gaussian": {"std": 1.0}, + "general_gaussian": {"power": 2.0, "width": 2.0}, + "exponential": {"tau": 10}, + } + + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) + + xps = { + "gaussian": [ + np.nan, + np.nan, + 8.97297, + 9.76077, + 12.24763, + 13.89053, + 13.65671, + 12.01002, + np.nan, + np.nan, + ], + "general_gaussian": [ + np.nan, + np.nan, + 9.85011, + 10.71589, + 11.73161, + 13.08516, + 12.95111, + 12.74577, + np.nan, + np.nan, + ], + "kaiser": [ + np.nan, + np.nan, + 9.86851, + 11.02969, + 11.65161, + 12.75129, + 12.90702, + 12.83757, + np.nan, + np.nan, + ], + "exponential": [ + np.nan, + np.nan, + 9.83364, + 11.10472, + 11.64551, + 12.66138, + 12.92379, + 12.83770, + np.nan, + np.nan, + ], + } - def test_rolling_max(self, raw): - self._check_moment_func(np.max, name="max", raw=raw) + xp = Series(xps[win_types_special]) + rs = ( + Series(vals) + .rolling(5, win_type=win_types_special, center=True) + .mean(**kwds[win_types_special]) + ) + tm.assert_series_equal(xp, rs) + + +@td.skip_if_no_scipy +def test_cmov_window_special_linear_range(win_types_special): + # GH 8238 + kwds = { + "kaiser": {"beta": 1.0}, + "gaussian": {"std": 1.0}, + "general_gaussian": {"power": 2.0, "width": 2.0}, + "slepian": {"width": 0.5}, + "exponential": {"tau": 10}, + } + + vals = np.array(range(10), dtype=np.float) + xp = vals.copy() + xp[:2] = np.nan + xp[-2:] = np.nan + xp = Series(xp) + + rs = ( + Series(vals) + .rolling(5, win_type=win_types_special, center=True) + .mean(**kwds[win_types_special]) + ) + tm.assert_series_equal(xp, rs) - a = pd.Series([1, 2, 3, 4, 5], dtype=np.float64) - b = a.rolling(window=100, min_periods=1).max() - tm.assert_almost_equal(a, b) - with pytest.raises(ValueError): - pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).max() +def test_rolling_median(raw, series, frame): + _check_moment_func(np.median, name="median", raw=raw, series=series, frame=frame) - @pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) - def test_rolling_quantile(self, q, raw): - def scoreatpercentile(a, per): - values = np.sort(a, axis=0) - idx = int(per / 1.0 * (values.shape[0] - 1)) +def test_rolling_min(raw, series, frame): + _check_moment_func(np.min, name="min", raw=raw, series=series, frame=frame) - if idx == values.shape[0] - 1: - retval = values[-1] + a = pd.Series([1, 2, 3, 4, 5]) + result = a.rolling(window=100, min_periods=1).min() + expected = pd.Series(np.ones(len(a))) + tm.assert_series_equal(result, expected) - else: - qlow = float(idx) / float(values.shape[0] - 1) - qhig = float(idx + 1) / float(values.shape[0] - 1) - vlow = values[idx] - vhig = values[idx + 1] - retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow) + with pytest.raises(ValueError): + pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).min() - return retval - def quantile_func(x): - return scoreatpercentile(x, q) +def test_rolling_max(raw, series, frame): + _check_moment_func(np.max, name="max", raw=raw, series=series, frame=frame) - self._check_moment_func(quantile_func, name="quantile", quantile=q, raw=raw) + a = pd.Series([1, 2, 3, 4, 5], dtype=np.float64) + b = a.rolling(window=100, min_periods=1).max() + tm.assert_almost_equal(a, b) - def test_rolling_quantile_np_percentile(self): - # #9413: Tests that rolling window's quantile default behavior - # is analogous to Numpy's percentile - row = 10 - col = 5 - idx = pd.date_range("20100101", periods=row, freq="B") - df = DataFrame(np.random.rand(row * col).reshape((row, -1)), index=idx) + with pytest.raises(ValueError): + pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).max() - df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) - np_percentile = np.percentile(df, [25, 50, 75], axis=0) - tm.assert_almost_equal(df_quantile.values, np.array(np_percentile)) +@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) +def test_rolling_quantile(q, raw, series, frame): + def scoreatpercentile(a, per): + values = np.sort(a, axis=0) + + idx = int(per / 1.0 * (values.shape[0] - 1)) + + if idx == values.shape[0] - 1: + retval = values[-1] - @pytest.mark.parametrize("quantile", [0.0, 0.1, 0.45, 0.5, 1]) - @pytest.mark.parametrize( - "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] - ) - @pytest.mark.parametrize( - "data", - [ - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], - [8.0, 1.0, 3.0, 4.0, 5.0, 2.0, 6.0, 7.0], - [0.0, np.nan, 0.2, np.nan, 0.4], - [np.nan, np.nan, np.nan, np.nan], - [np.nan, 0.1, np.nan, 0.3, 0.4, 0.5], - [0.5], - [np.nan, 0.7, 0.6], - ], - ) - def test_rolling_quantile_interpolation_options( - self, quantile, interpolation, data - ): - # Tests that rolling window's quantile behavior is analogous to - # Series' quantile for each interpolation option - s = Series(data) - - q1 = s.quantile(quantile, interpolation) - q2 = s.expanding(min_periods=1).quantile(quantile, interpolation).iloc[-1] - - if np.isnan(q1): - assert np.isnan(q2) else: - assert q1 == q2 + qlow = float(idx) / float(values.shape[0] - 1) + qhig = float(idx + 1) / float(values.shape[0] - 1) + vlow = values[idx] + vhig = values[idx + 1] + retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow) - def test_invalid_quantile_value(self): - data = np.arange(5) - s = Series(data) + return retval - msg = "Interpolation 'invalid' is not supported" - with pytest.raises(ValueError, match=msg): - s.rolling(len(data), min_periods=1).quantile(0.5, interpolation="invalid") + def quantile_func(x): + return scoreatpercentile(x, q) - def test_rolling_quantile_param(self): - ser = Series([0.0, 0.1, 0.5, 0.9, 1.0]) + _check_moment_func( + quantile_func, name="quantile", quantile=q, raw=raw, series=series, frame=frame + ) - with pytest.raises(ValueError): - ser.rolling(3).quantile(-0.1) - with pytest.raises(ValueError): - ser.rolling(3).quantile(10.0) +def test_rolling_quantile_np_percentile(): + # #9413: Tests that rolling window's quantile default behavior + # is analogous to Numpy's percentile + row = 10 + col = 5 + idx = pd.date_range("20100101", periods=row, freq="B") + df = DataFrame(np.random.rand(row * col).reshape((row, -1)), index=idx) - with pytest.raises(TypeError): - ser.rolling(3).quantile("foo") + df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) + np_percentile = np.percentile(df, [25, 50, 75], axis=0) - def test_rolling_apply(self, raw): - # suppress warnings about empty slices, as we are deliberately testing - # with a 0-length Series + tm.assert_almost_equal(df_quantile.values, np.array(np_percentile)) - def f(x): - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message=".*(empty slice|0 for slice).*", - category=RuntimeWarning, - ) - return x[np.isfinite(x)].mean() - self._check_moment_func(np.mean, name="apply", func=f, raw=raw) +@pytest.mark.parametrize("quantile", [0.0, 0.1, 0.45, 0.5, 1]) +@pytest.mark.parametrize( + "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] +) +@pytest.mark.parametrize( + "data", + [ + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], + [8.0, 1.0, 3.0, 4.0, 5.0, 2.0, 6.0, 7.0], + [0.0, np.nan, 0.2, np.nan, 0.4], + [np.nan, np.nan, np.nan, np.nan], + [np.nan, 0.1, np.nan, 0.3, 0.4, 0.5], + [0.5], + [np.nan, 0.7, 0.6], + ], +) +def test_rolling_quantile_interpolation_options(quantile, interpolation, data): + # Tests that rolling window's quantile behavior is analogous to + # Series' quantile for each interpolation option + s = Series(data) - def test_rolling_std(self, raw): - self._check_moment_func(lambda x: np.std(x, ddof=1), name="std", raw=raw) - self._check_moment_func( - lambda x: np.std(x, ddof=0), name="std", ddof=0, raw=raw - ) + q1 = s.quantile(quantile, interpolation) + q2 = s.expanding(min_periods=1).quantile(quantile, interpolation).iloc[-1] - def test_rolling_std_1obs(self): - vals = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0]) + if np.isnan(q1): + assert np.isnan(q2) + else: + assert q1 == q2 - result = vals.rolling(1, min_periods=1).std() - expected = pd.Series([np.nan] * 5) - tm.assert_series_equal(result, expected) - result = vals.rolling(1, min_periods=1).std(ddof=0) - expected = pd.Series([0.0] * 5) - tm.assert_series_equal(result, expected) +def test_invalid_quantile_value(): + data = np.arange(5) + s = Series(data) - result = pd.Series([np.nan, np.nan, 3, 4, 5]).rolling(3, min_periods=2).std() - assert np.isnan(result[2]) + msg = "Interpolation 'invalid' is not supported" + with pytest.raises(ValueError, match=msg): + s.rolling(len(data), min_periods=1).quantile(0.5, interpolation="invalid") - def test_rolling_std_neg_sqrt(self): - # unit test from Bottleneck - # Test move_nanstd for neg sqrt. +def test_rolling_quantile_param(): + ser = Series([0.0, 0.1, 0.5, 0.9, 1.0]) - a = pd.Series( - [ - 0.0011448196318903589, - 0.00028718669878572767, - 0.00028718669878572767, - 0.00028718669878572767, - 0.00028718669878572767, - ] - ) - b = a.rolling(window=3).std() - assert np.isfinite(b[2:]).all() + with pytest.raises(ValueError): + ser.rolling(3).quantile(-0.1) - b = a.ewm(span=3).std() - assert np.isfinite(b[2:]).all() + with pytest.raises(ValueError): + ser.rolling(3).quantile(10.0) - def test_rolling_var(self, raw): - self._check_moment_func(lambda x: np.var(x, ddof=1), name="var", raw=raw) - self._check_moment_func( - lambda x: np.var(x, ddof=0), name="var", ddof=0, raw=raw - ) + with pytest.raises(TypeError): + ser.rolling(3).quantile("foo") - @td.skip_if_no_scipy - def test_rolling_skew(self, raw): - from scipy.stats import skew - - self._check_moment_func(lambda x: skew(x, bias=False), name="skew", raw=raw) - - @td.skip_if_no_scipy - def test_rolling_kurt(self, raw): - from scipy.stats import kurtosis - - self._check_moment_func(lambda x: kurtosis(x, bias=False), name="kurt", raw=raw) - - def _check_moment_func( - self, - static_comp, - name, - raw, - has_min_periods=True, - has_center=True, - has_time_rule=True, - fill_value=None, - zero_min_periods_equal=True, - **kwargs, - ): - - # inject raw - if name == "apply": - kwargs = copy.copy(kwargs) - kwargs["raw"] = raw - - def get_result(obj, window, min_periods=None, center=False): - r = obj.rolling(window=window, min_periods=min_periods, center=center) - return getattr(r, name)(**kwargs) - - series_result = get_result(self.series, window=50) - assert isinstance(series_result, Series) - tm.assert_almost_equal(series_result.iloc[-1], static_comp(self.series[-50:])) - - frame_result = get_result(self.frame, window=50) - assert isinstance(frame_result, DataFrame) - tm.assert_series_equal( - frame_result.iloc[-1, :], - self.frame.iloc[-50:, :].apply(static_comp, axis=0, raw=raw), - check_names=False, - ) - # check time_rule works - if has_time_rule: - win = 25 - minp = 10 - series = self.series[::2].resample("B").mean() - frame = self.frame[::2].resample("B").mean() +def test_rolling_apply(raw, series, frame): + # suppress warnings about empty slices, as we are deliberately testing + # with a 0-length Series - if has_min_periods: - series_result = get_result(series, window=win, min_periods=minp) - frame_result = get_result(frame, window=win, min_periods=minp) - else: - series_result = get_result(series, window=win, min_periods=0) - frame_result = get_result(frame, window=win, min_periods=0) + def f(x): + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=".*(empty slice|0 for slice).*", + category=RuntimeWarning, + ) + return x[np.isfinite(x)].mean() - last_date = series_result.index[-1] - prev_date = last_date - 24 * offsets.BDay() + _check_moment_func( + np.mean, name="apply", func=f, raw=raw, series=series, frame=frame + ) - trunc_series = self.series[::2].truncate(prev_date, last_date) - trunc_frame = self.frame[::2].truncate(prev_date, last_date) - tm.assert_almost_equal(series_result[-1], static_comp(trunc_series)) +def test_rolling_std(raw, series, frame): + _check_moment_func( + lambda x: np.std(x, ddof=1), name="std", raw=raw, series=series, frame=frame + ) + _check_moment_func( + lambda x: np.std(x, ddof=0), + name="std", + ddof=0, + raw=raw, + series=series, + frame=frame, + ) - tm.assert_series_equal( - frame_result.xs(last_date), - trunc_frame.apply(static_comp, raw=raw), - check_names=False, - ) - # excluding NaNs correctly - obj = Series(randn(50)) - obj[:10] = np.NaN - obj[-10:] = np.NaN - if has_min_periods: - result = get_result(obj, 50, min_periods=30) - tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) - - # min_periods is working correctly - result = get_result(obj, 20, min_periods=15) - assert isna(result.iloc[23]) - assert not isna(result.iloc[24]) - - assert not isna(result.iloc[-6]) - assert isna(result.iloc[-5]) - - obj2 = Series(randn(20)) - result = get_result(obj2, 10, min_periods=5) - assert isna(result.iloc[3]) - assert notna(result.iloc[4]) - - if zero_min_periods_equal: - # min_periods=0 may be equivalent to min_periods=1 - result0 = get_result(obj, 20, min_periods=0) - result1 = get_result(obj, 20, min_periods=1) - tm.assert_almost_equal(result0, result1) - else: - result = get_result(obj, 50) - tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) +def test_rolling_std_1obs(): + vals = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0]) - # window larger than series length (#7297) - if has_min_periods: - for minp in (0, len(self.series) - 1, len(self.series)): - result = get_result(self.series, len(self.series) + 1, min_periods=minp) - expected = get_result(self.series, len(self.series), min_periods=minp) - nan_mask = isna(result) - tm.assert_series_equal(nan_mask, isna(expected)) - - nan_mask = ~nan_mask - tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) - else: - result = get_result(self.series, len(self.series) + 1, min_periods=0) - expected = get_result(self.series, len(self.series), min_periods=0) - nan_mask = isna(result) - tm.assert_series_equal(nan_mask, isna(expected)) + result = vals.rolling(1, min_periods=1).std() + expected = pd.Series([np.nan] * 5) + tm.assert_series_equal(result, expected) - nan_mask = ~nan_mask - tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + result = vals.rolling(1, min_periods=1).std(ddof=0) + expected = pd.Series([0.0] * 5) + tm.assert_series_equal(result, expected) - # check center=True - if has_center: - if has_min_periods: - result = get_result(obj, 20, min_periods=15, center=True) - expected = get_result( - pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15 - )[9:].reset_index(drop=True) - else: - result = get_result(obj, 20, min_periods=0, center=True) - print(result) - expected = get_result( - pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0 - )[9:].reset_index(drop=True) - - tm.assert_series_equal(result, expected) - - # shifter index - s = [f"x{x:d}" for x in range(12)] - - if has_min_periods: - minp = 10 - - series_xp = ( - get_result( - self.series.reindex(list(self.series.index) + s), - window=25, - min_periods=minp, - ) - .shift(-12) - .reindex(self.series.index) - ) - frame_xp = ( - get_result( - self.frame.reindex(list(self.frame.index) + s), - window=25, - min_periods=minp, - ) - .shift(-12) - .reindex(self.frame.index) - ) + result = pd.Series([np.nan, np.nan, 3, 4, 5]).rolling(3, min_periods=2).std() + assert np.isnan(result[2]) - series_rs = get_result( - self.series, window=25, min_periods=minp, center=True - ) - frame_rs = get_result( - self.frame, window=25, min_periods=minp, center=True - ) - else: - series_xp = ( - get_result( - self.series.reindex(list(self.series.index) + s), - window=25, - min_periods=0, - ) - .shift(-12) - .reindex(self.series.index) - ) - frame_xp = ( - get_result( - self.frame.reindex(list(self.frame.index) + s), - window=25, - min_periods=0, - ) - .shift(-12) - .reindex(self.frame.index) - ) +def test_rolling_std_neg_sqrt(): + # unit test from Bottleneck + + # Test move_nanstd for neg sqrt. + + a = pd.Series( + [ + 0.0011448196318903589, + 0.00028718669878572767, + 0.00028718669878572767, + 0.00028718669878572767, + 0.00028718669878572767, + ] + ) + b = a.rolling(window=3).std() + assert np.isfinite(b[2:]).all() + + b = a.ewm(span=3).std() + assert np.isfinite(b[2:]).all() - series_rs = get_result( - self.series, window=25, min_periods=0, center=True - ) - frame_rs = get_result(self.frame, window=25, min_periods=0, center=True) - if fill_value is not None: - series_xp = series_xp.fillna(fill_value) - frame_xp = frame_xp.fillna(fill_value) - tm.assert_series_equal(series_xp, series_rs) - tm.assert_frame_equal(frame_xp, frame_rs) +def test_rolling_var(raw, series, frame): + _check_moment_func( + lambda x: np.var(x, ddof=1), name="var", raw=raw, series=series, frame=frame + ) + _check_moment_func( + lambda x: np.var(x, ddof=0), + name="var", + ddof=0, + raw=raw, + series=series, + frame=frame, + ) + + +@td.skip_if_no_scipy +def test_rolling_skew(raw, series, frame): + from scipy.stats import skew + + _check_moment_func( + lambda x: skew(x, bias=False), name="skew", raw=raw, series=series, frame=frame + ) + + +@td.skip_if_no_scipy +def test_rolling_kurt(raw, series, frame): + from scipy.stats import kurtosis + + _check_moment_func( + lambda x: kurtosis(x, bias=False), + name="kurt", + raw=raw, + series=series, + frame=frame, + ) diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index 680237db0535b..c6e37d447c1a2 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -9,336 +9,336 @@ from pandas import DataFrame, Index, Series, Timestamp, concat import pandas._testing as tm from pandas.core.base import SpecificationError -from pandas.tests.window.common import Base -class TestApi(Base): - def setup_method(self, method): - self._create_data() +def test_getitem(frame): + r = frame.rolling(window=5) + tm.assert_index_equal(r._selected_obj.columns, frame.columns) - def test_getitem(self): + r = frame.rolling(window=5)[1] + assert r._selected_obj.name == frame.columns[1] - r = self.frame.rolling(window=5) - tm.assert_index_equal(r._selected_obj.columns, self.frame.columns) + # technically this is allowed + r = frame.rolling(window=5)[1, 3] + tm.assert_index_equal(r._selected_obj.columns, frame.columns[[1, 3]]) - r = self.frame.rolling(window=5)[1] - assert r._selected_obj.name == self.frame.columns[1] + r = frame.rolling(window=5)[[1, 3]] + tm.assert_index_equal(r._selected_obj.columns, frame.columns[[1, 3]]) - # technically this is allowed - r = self.frame.rolling(window=5)[1, 3] - tm.assert_index_equal(r._selected_obj.columns, self.frame.columns[[1, 3]]) - r = self.frame.rolling(window=5)[[1, 3]] - tm.assert_index_equal(r._selected_obj.columns, self.frame.columns[[1, 3]]) +def test_select_bad_cols(): + df = DataFrame([[1, 2]], columns=["A", "B"]) + g = df.rolling(window=5) + with pytest.raises(KeyError, match="Columns not found: 'C'"): + g[["C"]] + with pytest.raises(KeyError, match="^[^A]+$"): + # A should not be referenced as a bad column... + # will have to rethink regex if you change message! + g[["A", "C"]] - def test_select_bad_cols(self): - df = DataFrame([[1, 2]], columns=["A", "B"]) - g = df.rolling(window=5) - with pytest.raises(KeyError, match="Columns not found: 'C'"): - g[["C"]] - with pytest.raises(KeyError, match="^[^A]+$"): - # A should not be referenced as a bad column... - # will have to rethink regex if you change message! - g[["A", "C"]] - def test_attribute_access(self): +def test_attribute_access(): - df = DataFrame([[1, 2]], columns=["A", "B"]) - r = df.rolling(window=5) - tm.assert_series_equal(r.A.sum(), r["A"].sum()) - msg = "'Rolling' object has no attribute 'F'" - with pytest.raises(AttributeError, match=msg): - r.F + df = DataFrame([[1, 2]], columns=["A", "B"]) + r = df.rolling(window=5) + tm.assert_series_equal(r.A.sum(), r["A"].sum()) + msg = "'Rolling' object has no attribute 'F'" + with pytest.raises(AttributeError, match=msg): + r.F - def tests_skip_nuisance(self): - df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) - r = df.rolling(window=3) - result = r[["A", "B"]].sum() - expected = DataFrame( - {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, - columns=list("AB"), - ) - tm.assert_frame_equal(result, expected) - - def test_skip_sum_object_raises(self): - df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) - r = df.rolling(window=3) - result = r.sum() - expected = DataFrame( - {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, - columns=list("AB"), - ) - tm.assert_frame_equal(result, expected) +def tests_skip_nuisance(): - def test_agg(self): - df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) + r = df.rolling(window=3) + result = r[["A", "B"]].sum() + expected = DataFrame( + {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, + columns=list("AB"), + ) + tm.assert_frame_equal(result, expected) - r = df.rolling(window=3) - a_mean = r["A"].mean() - a_std = r["A"].std() - a_sum = r["A"].sum() - b_mean = r["B"].mean() - b_std = r["B"].std() - result = r.aggregate([np.mean, np.std]) - expected = concat([a_mean, a_std, b_mean, b_std], axis=1) - expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) - tm.assert_frame_equal(result, expected) +def test_skip_sum_object_raises(): + df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) + r = df.rolling(window=3) + result = r.sum() + expected = DataFrame( + {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, + columns=list("AB"), + ) + tm.assert_frame_equal(result, expected) - result = r.aggregate({"A": np.mean, "B": np.std}) - expected = concat([a_mean, b_std], axis=1) - tm.assert_frame_equal(result, expected, check_like=True) +def test_agg(): + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - result = r.aggregate({"A": ["mean", "std"]}) - expected = concat([a_mean, a_std], axis=1) - expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")]) - tm.assert_frame_equal(result, expected) + r = df.rolling(window=3) + a_mean = r["A"].mean() + a_std = r["A"].std() + a_sum = r["A"].sum() + b_mean = r["B"].mean() + b_std = r["B"].std() - result = r["A"].aggregate(["mean", "sum"]) - expected = concat([a_mean, a_sum], axis=1) - expected.columns = ["mean", "sum"] - tm.assert_frame_equal(result, expected) + result = r.aggregate([np.mean, np.std]) + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) + tm.assert_frame_equal(result, expected) - msg = "nested renamer is not supported" - with pytest.raises(SpecificationError, match=msg): - # using a dict with renaming - r.aggregate({"A": {"mean": "mean", "sum": "sum"}}) + result = r.aggregate({"A": np.mean, "B": np.std}) - with pytest.raises(SpecificationError, match=msg): - r.aggregate( - { - "A": {"mean": "mean", "sum": "sum"}, - "B": {"mean2": "mean", "sum2": "sum"}, - } - ) + expected = concat([a_mean, b_std], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) - result = r.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]}) - expected = concat([a_mean, a_std, b_mean, b_std], axis=1) + result = r.aggregate({"A": ["mean", "std"]}) + expected = concat([a_mean, a_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")]) + tm.assert_frame_equal(result, expected) - exp_cols = [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")] - expected.columns = pd.MultiIndex.from_tuples(exp_cols) - tm.assert_frame_equal(result, expected, check_like=True) + result = r["A"].aggregate(["mean", "sum"]) + expected = concat([a_mean, a_sum], axis=1) + expected.columns = ["mean", "sum"] + tm.assert_frame_equal(result, expected) - def test_agg_apply(self, raw): + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + # using a dict with renaming + r.aggregate({"A": {"mean": "mean", "sum": "sum"}}) - # passed lambda - df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + with pytest.raises(SpecificationError, match=msg): + r.aggregate( + { + "A": {"mean": "mean", "sum": "sum"}, + "B": {"mean2": "mean", "sum2": "sum"}, + } + ) - r = df.rolling(window=3) - a_sum = r["A"].sum() + result = r.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]}) + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) - result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)}) - rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw) - expected = concat([a_sum, rcustom], axis=1) - tm.assert_frame_equal(result, expected, check_like=True) + exp_cols = [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")] + expected.columns = pd.MultiIndex.from_tuples(exp_cols) + tm.assert_frame_equal(result, expected, check_like=True) - def test_agg_consistency(self): - df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - r = df.rolling(window=3) +def test_agg_apply(raw): - result = r.agg([np.sum, np.mean]).columns - expected = pd.MultiIndex.from_product([list("AB"), ["sum", "mean"]]) - tm.assert_index_equal(result, expected) + # passed lambda + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - result = r["A"].agg([np.sum, np.mean]).columns - expected = Index(["sum", "mean"]) - tm.assert_index_equal(result, expected) + r = df.rolling(window=3) + a_sum = r["A"].sum() - result = r.agg({"A": [np.sum, np.mean]}).columns - expected = pd.MultiIndex.from_tuples([("A", "sum"), ("A", "mean")]) - tm.assert_index_equal(result, expected) + result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)}) + rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw) + expected = concat([a_sum, rcustom], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) - def test_agg_nested_dicts(self): - # API change for disallowing these types of nested dicts - df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - r = df.rolling(window=3) +def test_agg_consistency(): - msg = "nested renamer is not supported" - with pytest.raises(SpecificationError, match=msg): - r.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}}) + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + r = df.rolling(window=3) - expected = concat( - [r["A"].mean(), r["A"].std(), r["B"].mean(), r["B"].std()], axis=1 - ) - expected.columns = pd.MultiIndex.from_tuples( - [("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")] - ) - with pytest.raises(SpecificationError, match=msg): - r[["A", "B"]].agg( - {"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}} - ) - - with pytest.raises(SpecificationError, match=msg): - r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) - - def test_count_nonnumeric_types(self): - # GH12541 - cols = [ - "int", - "float", - "string", - "datetime", - "timedelta", - "periods", - "fl_inf", - "fl_nan", - "str_nan", - "dt_nat", - "periods_nat", - ] - - df = DataFrame( - { - "int": [1, 2, 3], - "float": [4.0, 5.0, 6.0], - "string": list("abc"), - "datetime": pd.date_range("20170101", periods=3), - "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), - "periods": [ - pd.Period("2012-01"), - pd.Period("2012-02"), - pd.Period("2012-03"), - ], - "fl_inf": [1.0, 2.0, np.Inf], - "fl_nan": [1.0, 2.0, np.NaN], - "str_nan": ["aa", "bb", np.NaN], - "dt_nat": [ - Timestamp("20170101"), - Timestamp("20170203"), - Timestamp(None), - ], - "periods_nat": [ - pd.Period("2012-01"), - pd.Period("2012-02"), - pd.Period(None), - ], - }, - columns=cols, - ) + result = r.agg([np.sum, np.mean]).columns + expected = pd.MultiIndex.from_product([list("AB"), ["sum", "mean"]]) + tm.assert_index_equal(result, expected) - expected = DataFrame( - { - "int": [1.0, 2.0, 2.0], - "float": [1.0, 2.0, 2.0], - "string": [1.0, 2.0, 2.0], - "datetime": [1.0, 2.0, 2.0], - "timedelta": [1.0, 2.0, 2.0], - "periods": [1.0, 2.0, 2.0], - "fl_inf": [1.0, 2.0, 2.0], - "fl_nan": [1.0, 2.0, 1.0], - "str_nan": [1.0, 2.0, 1.0], - "dt_nat": [1.0, 2.0, 1.0], - "periods_nat": [1.0, 2.0, 1.0], - }, - columns=cols, - ) + result = r["A"].agg([np.sum, np.mean]).columns + expected = Index(["sum", "mean"]) + tm.assert_index_equal(result, expected) - result = df.rolling(window=2, min_periods=0).count() - tm.assert_frame_equal(result, expected) + result = r.agg({"A": [np.sum, np.mean]}).columns + expected = pd.MultiIndex.from_tuples([("A", "sum"), ("A", "mean")]) + tm.assert_index_equal(result, expected) - result = df.rolling(1, min_periods=0).count() - expected = df.notna().astype(float) - tm.assert_frame_equal(result, expected) - @td.skip_if_no_scipy - @pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") - def test_window_with_args(self): - # make sure that we are aggregating window functions correctly with arg - r = Series(np.random.randn(100)).rolling( - window=10, min_periods=1, win_type="gaussian" - ) - expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) - expected.columns = ["", ""] - result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=0.01)]) - tm.assert_frame_equal(result, expected) +def test_agg_nested_dicts(): - def a(x): - return x.mean(std=10) + # API change for disallowing these types of nested dicts + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + r = df.rolling(window=3) - def b(x): - return x.mean(std=0.01) + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + r.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}}) - expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) - expected.columns = ["a", "b"] - result = r.aggregate([a, b]) - tm.assert_frame_equal(result, expected) + expected = concat( + [r["A"].mean(), r["A"].std(), r["B"].mean(), r["B"].std()], axis=1 + ) + expected.columns = pd.MultiIndex.from_tuples( + [("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")] + ) + with pytest.raises(SpecificationError, match=msg): + r[["A", "B"]].agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) + + with pytest.raises(SpecificationError, match=msg): + r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) + + +def test_count_nonnumeric_types(): + # GH12541 + cols = [ + "int", + "float", + "string", + "datetime", + "timedelta", + "periods", + "fl_inf", + "fl_nan", + "str_nan", + "dt_nat", + "periods_nat", + ] + + df = DataFrame( + { + "int": [1, 2, 3], + "float": [4.0, 5.0, 6.0], + "string": list("abc"), + "datetime": pd.date_range("20170101", periods=3), + "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), + "periods": [ + pd.Period("2012-01"), + pd.Period("2012-02"), + pd.Period("2012-03"), + ], + "fl_inf": [1.0, 2.0, np.Inf], + "fl_nan": [1.0, 2.0, np.NaN], + "str_nan": ["aa", "bb", np.NaN], + "dt_nat": [Timestamp("20170101"), Timestamp("20170203"), Timestamp(None),], + "periods_nat": [ + pd.Period("2012-01"), + pd.Period("2012-02"), + pd.Period(None), + ], + }, + columns=cols, + ) - def test_preserve_metadata(self): - # GH 10565 - s = Series(np.arange(100), name="foo") + expected = DataFrame( + { + "int": [1.0, 2.0, 2.0], + "float": [1.0, 2.0, 2.0], + "string": [1.0, 2.0, 2.0], + "datetime": [1.0, 2.0, 2.0], + "timedelta": [1.0, 2.0, 2.0], + "periods": [1.0, 2.0, 2.0], + "fl_inf": [1.0, 2.0, 2.0], + "fl_nan": [1.0, 2.0, 1.0], + "str_nan": [1.0, 2.0, 1.0], + "dt_nat": [1.0, 2.0, 1.0], + "periods_nat": [1.0, 2.0, 1.0], + }, + columns=cols, + ) - s2 = s.rolling(30).sum() - s3 = s.rolling(20).sum() - assert s2.name == "foo" - assert s3.name == "foo" + result = df.rolling(window=2, min_periods=0).count() + tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize( - "func,window_size,expected_vals", - [ - ( - "rolling", - 2, - [ - [np.nan, np.nan, np.nan, np.nan], - [15.0, 20.0, 25.0, 20.0], - [25.0, 30.0, 35.0, 30.0], - [np.nan, np.nan, np.nan, np.nan], - [20.0, 30.0, 35.0, 30.0], - [35.0, 40.0, 60.0, 40.0], - [60.0, 80.0, 85.0, 80], - ], - ), - ( - "expanding", - None, - [ - [10.0, 10.0, 20.0, 20.0], - [15.0, 20.0, 25.0, 20.0], - [20.0, 30.0, 30.0, 20.0], - [10.0, 10.0, 30.0, 30.0], - [20.0, 30.0, 35.0, 30.0], - [26.666667, 40.0, 50.0, 30.0], - [40.0, 80.0, 60.0, 30.0], - ], - ), - ], + result = df.rolling(1, min_periods=0).count() + expected = df.notna().astype(float) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no_scipy +@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") +def test_window_with_args(): + # make sure that we are aggregating window functions correctly with arg + r = Series(np.random.randn(100)).rolling( + window=10, min_periods=1, win_type="gaussian" ) - def test_multiple_agg_funcs(self, func, window_size, expected_vals): - # GH 15072 - df = pd.DataFrame( + expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) + expected.columns = ["", ""] + result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=0.01)]) + tm.assert_frame_equal(result, expected) + + def a(x): + return x.mean(std=10) + + def b(x): + return x.mean(std=0.01) + + expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) + expected.columns = ["a", "b"] + result = r.aggregate([a, b]) + tm.assert_frame_equal(result, expected) + + +def test_preserve_metadata(): + # GH 10565 + s = Series(np.arange(100), name="foo") + + s2 = s.rolling(30).sum() + s3 = s.rolling(20).sum() + assert s2.name == "foo" + assert s3.name == "foo" + + +@pytest.mark.parametrize( + "func,window_size,expected_vals", + [ + ( + "rolling", + 2, [ - ["A", 10, 20], - ["A", 20, 30], - ["A", 30, 40], - ["B", 10, 30], - ["B", 30, 40], - ["B", 40, 80], - ["B", 80, 90], + [np.nan, np.nan, np.nan, np.nan], + [15.0, 20.0, 25.0, 20.0], + [25.0, 30.0, 35.0, 30.0], + [np.nan, np.nan, np.nan, np.nan], + [20.0, 30.0, 35.0, 30.0], + [35.0, 40.0, 60.0, 40.0], + [60.0, 80.0, 85.0, 80], ], - columns=["stock", "low", "high"], - ) + ), + ( + "expanding", + None, + [ + [10.0, 10.0, 20.0, 20.0], + [15.0, 20.0, 25.0, 20.0], + [20.0, 30.0, 30.0, 20.0], + [10.0, 10.0, 30.0, 30.0], + [20.0, 30.0, 35.0, 30.0], + [26.666667, 40.0, 50.0, 30.0], + [40.0, 80.0, 60.0, 30.0], + ], + ), + ], +) +def test_multiple_agg_funcs(func, window_size, expected_vals): + # GH 15072 + df = pd.DataFrame( + [ + ["A", 10, 20], + ["A", 20, 30], + ["A", 30, 40], + ["B", 10, 30], + ["B", 30, 40], + ["B", 40, 80], + ["B", 80, 90], + ], + columns=["stock", "low", "high"], + ) - f = getattr(df.groupby("stock"), func) - if window_size: - window = f(window_size) - else: - window = f() + f = getattr(df.groupby("stock"), func) + if window_size: + window = f(window_size) + else: + window = f() - index = pd.MultiIndex.from_tuples( - [("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)], - names=["stock", None], - ) - columns = pd.MultiIndex.from_tuples( - [("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")] - ) - expected = pd.DataFrame(expected_vals, index=index, columns=columns) + index = pd.MultiIndex.from_tuples( + [("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)], + names=["stock", None], + ) + columns = pd.MultiIndex.from_tuples( + [("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")] + ) + expected = pd.DataFrame(expected_vals, index=index, columns=columns) - result = window.agg( - OrderedDict((("low", ["mean", "max"]), ("high", ["mean", "min"]))) - ) + result = window.agg( + OrderedDict((("low", ["mean", "max"]), ("high", ["mean", "min"]))) + ) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 1683fda500f85..9ba194dcf0959 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -5,66 +5,62 @@ from pandas import DataFrame, Series from pandas.core.window import EWM -from pandas.tests.window.common import Base -class TestEWM(Base): - def setup_method(self, method): - self._create_data() +def test_doc_string(): - def test_doc_string(self): + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.ewm(com=0.5).mean() - df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) - df - df.ewm(com=0.5).mean() - @pytest.mark.parametrize("which", ["series", "frame"]) - def test_constructor(self, which): - o = getattr(self, which) - c = o.ewm +def test_constructor(which): - # valid - c(com=0.5) - c(span=1.5) - c(alpha=0.5) - c(halflife=0.75) - c(com=0.5, span=None) - c(alpha=0.5, com=None) - c(halflife=0.75, alpha=None) + c = which.ewm - # not valid: mutually exclusive - with pytest.raises(ValueError): - c(com=0.5, alpha=0.5) - with pytest.raises(ValueError): - c(span=1.5, halflife=0.75) - with pytest.raises(ValueError): - c(alpha=0.5, span=1.5) + # valid + c(com=0.5) + c(span=1.5) + c(alpha=0.5) + c(halflife=0.75) + c(com=0.5, span=None) + c(alpha=0.5, com=None) + c(halflife=0.75, alpha=None) - # not valid: com < 0 - with pytest.raises(ValueError): - c(com=-0.5) + # not valid: mutually exclusive + with pytest.raises(ValueError): + c(com=0.5, alpha=0.5) + with pytest.raises(ValueError): + c(span=1.5, halflife=0.75) + with pytest.raises(ValueError): + c(alpha=0.5, span=1.5) - # not valid: span < 1 - with pytest.raises(ValueError): - c(span=0.5) + # not valid: com < 0 + with pytest.raises(ValueError): + c(com=-0.5) + + # not valid: span < 1 + with pytest.raises(ValueError): + c(span=0.5) + + # not valid: halflife <= 0 + with pytest.raises(ValueError): + c(halflife=0) - # not valid: halflife <= 0 + # not valid: alpha <= 0 or alpha > 1 + for alpha in (-0.5, 1.5): with pytest.raises(ValueError): - c(halflife=0) + c(alpha=alpha) - # not valid: alpha <= 0 or alpha > 1 - for alpha in (-0.5, 1.5): - with pytest.raises(ValueError): - c(alpha=alpha) - @pytest.mark.parametrize("method", ["std", "mean", "var"]) - def test_numpy_compat(self, method): - # see gh-12811 - e = EWM(Series([2, 4, 6]), alpha=0.5) +@pytest.mark.parametrize("method", ["std", "mean", "var"]) +def test_numpy_compat(method): + # see gh-12811 + e = EWM(Series([2, 4, 6]), alpha=0.5) - msg = "numpy operations are not valid with window objects" + msg = "numpy operations are not valid with window objects" - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(e, method)(1, 2, 3) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(e, method)(dtype=np.float64) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(dtype=np.float64) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 6b6367fd80b26..aaa7e9a34fadf 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -7,112 +7,111 @@ from pandas import DataFrame, Series import pandas._testing as tm from pandas.core.window import Expanding -from pandas.tests.window.common import Base - - -class TestExpanding(Base): - def setup_method(self, method): - self._create_data() - - def test_doc_string(self): - - df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) - df - df.expanding(2).sum() - - @pytest.mark.parametrize("which", ["series", "frame"]) - def test_constructor(self, which): - # GH 12669 - - o = getattr(self, which) - c = o.expanding - - # valid - c(min_periods=1) - c(min_periods=1, center=True) - c(min_periods=1, center=False) - - # not valid - for w in [2.0, "foo", np.array([2])]: - with pytest.raises(ValueError): - c(min_periods=w) - with pytest.raises(ValueError): - c(min_periods=1, center=w) - - @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) - def test_numpy_compat(self, method): - # see gh-12811 - e = Expanding(Series([2, 4, 6]), window=2) - - msg = "numpy operations are not valid with window objects" - - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(e, method)(1, 2, 3) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(e, method)(dtype=np.float64) - - @pytest.mark.parametrize( - "expander", - [ - 1, - pytest.param( - "ls", - marks=pytest.mark.xfail( - reason="GH#16425 expanding with offset not supported" - ), + + +def test_doc_string(): + + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.expanding(2).sum() + + +def test_constructor(which): + # GH 12669 + + c = which.expanding + + # valid + c(min_periods=1) + c(min_periods=1, center=True) + c(min_periods=1, center=False) + + # not valid + for w in [2.0, "foo", np.array([2])]: + with pytest.raises(ValueError): + c(min_periods=w) + with pytest.raises(ValueError): + c(min_periods=1, center=w) + + +@pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) +def test_numpy_compat(method): + # see gh-12811 + e = Expanding(Series([2, 4, 6]), window=2) + + msg = "numpy operations are not valid with window objects" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(dtype=np.float64) + + +@pytest.mark.parametrize( + "expander", + [ + 1, + pytest.param( + "ls", + marks=pytest.mark.xfail( + reason="GH#16425 expanding with offset not supported" ), - ], - ) - def test_empty_df_expanding(self, expander): - # GH 15819 Verifies that datetime and integer expanding windows can be - # applied to empty DataFrames - - expected = DataFrame() - result = DataFrame().expanding(expander).sum() - tm.assert_frame_equal(result, expected) - - # Verifies that datetime and integer expanding windows can be applied - # to empty DataFrames with datetime index - expected = DataFrame(index=pd.DatetimeIndex([])) - result = DataFrame(index=pd.DatetimeIndex([])).expanding(expander).sum() - tm.assert_frame_equal(result, expected) - - def test_missing_minp_zero(self): - # https://github.com/pandas-dev/pandas/pull/18921 - # minp=0 - x = pd.Series([np.nan]) - result = x.expanding(min_periods=0).sum() - expected = pd.Series([0.0]) - tm.assert_series_equal(result, expected) - - # minp=1 - result = x.expanding(min_periods=1).sum() - expected = pd.Series([np.nan]) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) - def test_iter_raises(self, klass): - # https://github.com/pandas-dev/pandas/issues/11704 - # Iteration over a Window - obj = klass([1, 2, 3, 4]) - with pytest.raises(NotImplementedError): - iter(obj.expanding(2)) - - def test_expanding_axis(self, axis_frame): - # see gh-23372. - df = DataFrame(np.ones((10, 20))) - axis = df._get_axis_number(axis_frame) - - if axis == 0: - expected = DataFrame( - {i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)} - ) - else: - # axis == 1 - expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10) - - result = df.expanding(3, axis=axis_frame).sum() - tm.assert_frame_equal(result, expected) + ), + ], +) +def test_empty_df_expanding(expander): + # GH 15819 Verifies that datetime and integer expanding windows can be + # applied to empty DataFrames + + expected = DataFrame() + result = DataFrame().expanding(expander).sum() + tm.assert_frame_equal(result, expected) + + # Verifies that datetime and integer expanding windows can be applied + # to empty DataFrames with datetime index + expected = DataFrame(index=pd.DatetimeIndex([])) + result = DataFrame(index=pd.DatetimeIndex([])).expanding(expander).sum() + tm.assert_frame_equal(result, expected) + + +def test_missing_minp_zero(): + # https://github.com/pandas-dev/pandas/pull/18921 + # minp=0 + x = pd.Series([np.nan]) + result = x.expanding(min_periods=0).sum() + expected = pd.Series([0.0]) + tm.assert_series_equal(result, expected) + + # minp=1 + result = x.expanding(min_periods=1).sum() + expected = pd.Series([np.nan]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) +def test_iter_raises(klass): + # https://github.com/pandas-dev/pandas/issues/11704 + # Iteration over a Window + obj = klass([1, 2, 3, 4]) + with pytest.raises(NotImplementedError): + iter(obj.expanding(2)) + + +def test_expanding_axis(axis_frame): + # see gh-23372. + df = DataFrame(np.ones((10, 20))) + axis = df._get_axis_number(axis_frame) + + if axis == 0: + expected = DataFrame( + {i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)} + ) + else: + # axis == 1 + expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10) + + result = df.expanding(3, axis=axis_frame).sum() + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("constructor", [Series, DataFrame]) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 866b7da59382d..a7582a86c0848 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -10,377 +10,380 @@ from pandas import DataFrame, Series import pandas._testing as tm from pandas.core.window import Rolling -from pandas.tests.window.common import Base -class TestRolling(Base): - def setup_method(self, method): - self._create_data() +def test_doc_string(): - def test_doc_string(self): + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.rolling(2).sum() + df.rolling(2, min_periods=1).sum() - df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) - df - df.rolling(2).sum() - df.rolling(2, min_periods=1).sum() - @pytest.mark.parametrize("which", ["series", "frame"]) - def test_constructor(self, which): - # GH 12669 +def test_constructor(which): + # GH 12669 - o = getattr(self, which) - c = o.rolling + c = which.rolling - # valid - c(0) - c(window=2) - c(window=2, min_periods=1) - c(window=2, min_periods=1, center=True) - c(window=2, min_periods=1, center=False) + # valid + c(0) + c(window=2) + c(window=2, min_periods=1) + c(window=2, min_periods=1, center=True) + c(window=2, min_periods=1, center=False) - # GH 13383 + # GH 13383 - msg = "window must be non-negative" + msg = "window must be non-negative" + with pytest.raises(ValueError, match=msg): + c(-1) + + # not valid + for w in [2.0, "foo", np.array([2])]: + msg = ( + "window must be an integer|" + "passed window foo is not compatible with a datetimelike index" + ) with pytest.raises(ValueError, match=msg): - c(-1) - - # not valid - for w in [2.0, "foo", np.array([2])]: - msg = ( - "window must be an integer|" - "passed window foo is not compatible with a datetimelike index" - ) - with pytest.raises(ValueError, match=msg): - c(window=w) - - msg = "min_periods must be an integer" - with pytest.raises(ValueError, match=msg): - c(window=2, min_periods=w) - - msg = "center must be a boolean" - with pytest.raises(ValueError, match=msg): - c(window=2, min_periods=1, center=w) - - @td.skip_if_no_scipy - @pytest.mark.parametrize("which", ["series", "frame"]) - def test_constructor_with_win_type(self, which): - # GH 13383 - o = getattr(self, which) - c = o.rolling - - msg = "window must be > 0" + c(window=w) + msg = "min_periods must be an integer" with pytest.raises(ValueError, match=msg): - c(-1, win_type="boxcar") - - @pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3)]) - def test_constructor_with_timedelta_window(self, window): - # GH 15440 - n = 10 - df = DataFrame( - {"value": np.arange(n)}, - index=pd.date_range("2015-12-24", periods=n, freq="D"), - ) - expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3)) + c(window=2, min_periods=w) - result = df.rolling(window=window).sum() - expected = DataFrame( - {"value": expected_data}, - index=pd.date_range("2015-12-24", periods=n, freq="D"), - ) - tm.assert_frame_equal(result, expected) - expected = df.rolling("3D").sum() - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3), "3D"]) - def test_constructor_timedelta_window_and_minperiods(self, window, raw): - # GH 15305 - n = 10 - df = DataFrame( - {"value": np.arange(n)}, - index=pd.date_range("2017-08-08", periods=n, freq="D"), - ) - expected = DataFrame( - {"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))}, - index=pd.date_range("2017-08-08", periods=n, freq="D"), - ) - result_roll_sum = df.rolling(window=window, min_periods=2).sum() - result_roll_generic = df.rolling(window=window, min_periods=2).apply( - sum, raw=raw - ) - tm.assert_frame_equal(result_roll_sum, expected) - tm.assert_frame_equal(result_roll_generic, expected) + msg = "center must be a boolean" + with pytest.raises(ValueError, match=msg): + c(window=2, min_periods=1, center=w) - @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) - def test_numpy_compat(self, method): - # see gh-12811 - r = Rolling(Series([2, 4, 6]), window=2) - msg = "numpy operations are not valid with window objects" +@td.skip_if_no_scipy +def test_constructor_with_win_type(which): + # GH 13383 + c = which.rolling - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(r, method)(1, 2, 3) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(r, method)(dtype=np.float64) + msg = "window must be > 0" - def test_closed(self): - df = DataFrame({"A": [0, 1, 2, 3, 4]}) - # closed only allowed for datetimelike + with pytest.raises(ValueError, match=msg): + c(-1, win_type="boxcar") - msg = "closed only implemented for datetimelike and offset based windows" - with pytest.raises(ValueError, match=msg): - df.rolling(window=3, closed="neither") - - @pytest.mark.parametrize("closed", ["neither", "left"]) - def test_closed_empty(self, closed, arithmetic_win_operators): - # GH 26005 - func_name = arithmetic_win_operators - ser = pd.Series( - data=np.arange(5), index=pd.date_range("2000", periods=5, freq="2D") - ) - roll = ser.rolling("1D", closed=closed) - - result = getattr(roll, func_name)() - expected = pd.Series([np.nan] * 5, index=ser.index) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("func", ["min", "max"]) - def test_closed_one_entry(self, func): - # GH24718 - ser = pd.Series(data=[2], index=pd.date_range("2000", periods=1)) - result = getattr(ser.rolling("10D", closed="left"), func)() - tm.assert_series_equal(result, pd.Series([np.nan], index=ser.index)) - - @pytest.mark.parametrize("func", ["min", "max"]) - def test_closed_one_entry_groupby(self, func): - # GH24718 - ser = pd.DataFrame( - data={"A": [1, 1, 2], "B": [3, 2, 1]}, - index=pd.date_range("2000", periods=3), - ) - result = getattr( - ser.groupby("A", sort=False)["B"].rolling("10D", closed="left"), func - )() - exp_idx = pd.MultiIndex.from_arrays( - arrays=[[1, 1, 2], ser.index], names=("A", None) - ) - expected = pd.Series(data=[np.nan, 3, np.nan], index=exp_idx, name="B") - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("input_dtype", ["int", "float"]) - @pytest.mark.parametrize( - "func,closed,expected", - [ - ("min", "right", [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]), - ("min", "both", [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]), - ("min", "neither", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]), - ("min", "left", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]), - ("max", "right", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), - ("max", "both", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), - ("max", "neither", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), - ("max", "left", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), - ], +@pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3)]) +def test_constructor_with_timedelta_window(window): + # GH 15440 + n = 10 + df = DataFrame( + {"value": np.arange(n)}, index=pd.date_range("2015-12-24", periods=n, freq="D"), ) - def test_closed_min_max_datetime(self, input_dtype, func, closed, expected): - # see gh-21704 - ser = pd.Series( - data=np.arange(10).astype(input_dtype), - index=pd.date_range("2000", periods=10), - ) + expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3)) - result = getattr(ser.rolling("3D", closed=closed), func)() - expected = pd.Series(expected, index=ser.index) - tm.assert_series_equal(result, expected) - - def test_closed_uneven(self): - # see gh-21704 - ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) - - # uneven - ser = ser.drop(index=ser.index[[1, 5]]) - result = ser.rolling("3D", closed="left").min() - expected = pd.Series([np.nan, 0, 0, 2, 3, 4, 6, 6], index=ser.index) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "func,closed,expected", - [ - ("min", "right", [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), - ("min", "both", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]), - ("min", "neither", [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), - ("min", "left", [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]), - ("max", "right", [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]), - ("max", "both", [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]), - ("max", "neither", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]), - ("max", "left", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan]), - ], + result = df.rolling(window=window).sum() + expected = DataFrame( + {"value": expected_data}, + index=pd.date_range("2015-12-24", periods=n, freq="D"), ) - def test_closed_min_max_minp(self, func, closed, expected): - # see gh-21704 - ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) - ser[ser.index[-3:]] = np.nan - result = getattr(ser.rolling("3D", min_periods=2, closed=closed), func)() - expected = pd.Series(expected, index=ser.index) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "closed,expected", - [ - ("right", [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8]), - ("both", [0, 0.5, 1, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), - ("neither", [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), - ("left", [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]), - ], + tm.assert_frame_equal(result, expected) + expected = df.rolling("3D").sum() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3), "3D"]) +def test_constructor_timedelta_window_and_minperiods(window, raw): + # GH 15305 + n = 10 + df = DataFrame( + {"value": np.arange(n)}, index=pd.date_range("2017-08-08", periods=n, freq="D"), ) - def test_closed_median_quantile(self, closed, expected): - # GH 26005 - ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) - roll = ser.rolling("3D", closed=closed) - expected = pd.Series(expected, index=ser.index) - - result = roll.median() - tm.assert_series_equal(result, expected) - - result = roll.quantile(0.5) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("roller", ["1s", 1]) - def tests_empty_df_rolling(self, roller): - # GH 15819 Verifies that datetime and integer rolling windows can be - # applied to empty DataFrames - expected = DataFrame() - result = DataFrame().rolling(roller).sum() - tm.assert_frame_equal(result, expected) - - # Verifies that datetime and integer rolling windows can be applied to - # empty DataFrames with datetime index - expected = DataFrame(index=pd.DatetimeIndex([])) - result = DataFrame(index=pd.DatetimeIndex([])).rolling(roller).sum() - tm.assert_frame_equal(result, expected) - - def test_empty_window_median_quantile(self): - # GH 26005 - expected = pd.Series([np.nan, np.nan, np.nan]) - roll = pd.Series(np.arange(3)).rolling(0) - - result = roll.median() - tm.assert_series_equal(result, expected) - - result = roll.quantile(0.1) - tm.assert_series_equal(result, expected) - - def test_missing_minp_zero(self): - # https://github.com/pandas-dev/pandas/pull/18921 - # minp=0 - x = pd.Series([np.nan]) - result = x.rolling(1, min_periods=0).sum() - expected = pd.Series([0.0]) - tm.assert_series_equal(result, expected) - - # minp=1 - result = x.rolling(1, min_periods=1).sum() - expected = pd.Series([np.nan]) - tm.assert_series_equal(result, expected) - - def test_missing_minp_zero_variable(self): - # https://github.com/pandas-dev/pandas/pull/18921 - x = pd.Series( - [np.nan] * 4, - index=pd.DatetimeIndex( - ["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"] - ), - ) - result = x.rolling(pd.Timedelta("2d"), min_periods=0).sum() - expected = pd.Series(0.0, index=x.index) - tm.assert_series_equal(result, expected) + expected = DataFrame( + {"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))}, + index=pd.date_range("2017-08-08", periods=n, freq="D"), + ) + result_roll_sum = df.rolling(window=window, min_periods=2).sum() + result_roll_generic = df.rolling(window=window, min_periods=2).apply(sum, raw=raw) + tm.assert_frame_equal(result_roll_sum, expected) + tm.assert_frame_equal(result_roll_generic, expected) - def test_multi_index_names(self): - # GH 16789, 16825 - cols = pd.MultiIndex.from_product( - [["A", "B"], ["C", "D", "E"]], names=["1", "2"] - ) - df = DataFrame(np.ones((10, 6)), columns=cols) - result = df.rolling(3).cov() - - tm.assert_index_equal(result.columns, df.columns) - assert result.index.names == [None, "1", "2"] - - @pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) - def test_iter_raises(self, klass): - # https://github.com/pandas-dev/pandas/issues/11704 - # Iteration over a Window - obj = klass([1, 2, 3, 4]) - - msg = "See issue #11704 https://github.com/pandas-dev/pandas/issues/11704" - - with pytest.raises(NotImplementedError, match=msg): - iter(obj.rolling(2)) - - def test_rolling_axis_sum(self, axis_frame): - # see gh-23372. - df = DataFrame(np.ones((10, 20))) - axis = df._get_axis_number(axis_frame) - - if axis == 0: - expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)}) - else: - # axis == 1 - expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10) - - result = df.rolling(3, axis=axis_frame).sum() - tm.assert_frame_equal(result, expected) - - def test_rolling_axis_count(self, axis_frame): - # see gh-26055 - df = DataFrame({"x": range(3), "y": range(3)}) - - axis = df._get_axis_number(axis_frame) - - if axis in [0, "index"]: - expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]}) - else: - expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) - - result = df.rolling(2, axis=axis_frame, min_periods=0).count() - tm.assert_frame_equal(result, expected) - - def test_readonly_array(self): - # GH-27766 - arr = np.array([1, 3, np.nan, 3, 5]) - arr.setflags(write=False) - result = pd.Series(arr).rolling(2).mean() - expected = pd.Series([np.nan, 2, np.nan, np.nan, 4]) - tm.assert_series_equal(result, expected) - - def test_rolling_datetime(self, axis_frame, tz_naive_fixture): - # GH-28192 - tz = tz_naive_fixture - df = pd.DataFrame( - { - i: [1] * 2 - for i in pd.date_range("2019-8-01", "2019-08-03", freq="D", tz=tz) - } - ) - if axis_frame in [0, "index"]: - result = df.T.rolling("2D", axis=axis_frame).sum().T - else: - result = df.rolling("2D", axis=axis_frame).sum() - expected = pd.DataFrame( - { - **{ - i: [1.0] * 2 - for i in pd.date_range("2019-8-01", periods=1, freq="D", tz=tz) - }, - **{ - i: [2.0] * 2 - for i in pd.date_range("2019-8-02", "2019-8-03", freq="D", tz=tz) - }, - } - ) - tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) +def test_numpy_compat(method): + # see gh-12811 + r = Rolling(Series([2, 4, 6]), window=2) + + msg = "numpy operations are not valid with window objects" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, method)(dtype=np.float64) + + +def test_closed(): + df = DataFrame({"A": [0, 1, 2, 3, 4]}) + # closed only allowed for datetimelike + + msg = "closed only implemented for datetimelike and offset based windows" + + with pytest.raises(ValueError, match=msg): + df.rolling(window=3, closed="neither") + + +@pytest.mark.parametrize("closed", ["neither", "left"]) +def test_closed_empty(closed, arithmetic_win_operators): + # GH 26005 + func_name = arithmetic_win_operators + ser = pd.Series( + data=np.arange(5), index=pd.date_range("2000", periods=5, freq="2D") + ) + roll = ser.rolling("1D", closed=closed) + + result = getattr(roll, func_name)() + expected = pd.Series([np.nan] * 5, index=ser.index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_closed_one_entry(func): + # GH24718 + ser = pd.Series(data=[2], index=pd.date_range("2000", periods=1)) + result = getattr(ser.rolling("10D", closed="left"), func)() + tm.assert_series_equal(result, pd.Series([np.nan], index=ser.index)) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_closed_one_entry_groupby(func): + # GH24718 + ser = pd.DataFrame( + data={"A": [1, 1, 2], "B": [3, 2, 1]}, index=pd.date_range("2000", periods=3), + ) + result = getattr( + ser.groupby("A", sort=False)["B"].rolling("10D", closed="left"), func + )() + exp_idx = pd.MultiIndex.from_arrays( + arrays=[[1, 1, 2], ser.index], names=("A", None) + ) + expected = pd.Series(data=[np.nan, 3, np.nan], index=exp_idx, name="B") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("input_dtype", ["int", "float"]) +@pytest.mark.parametrize( + "func,closed,expected", + [ + ("min", "right", [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]), + ("min", "both", [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]), + ("min", "neither", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]), + ("min", "left", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]), + ("max", "right", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + ("max", "both", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + ("max", "neither", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), + ("max", "left", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), + ], +) +def test_closed_min_max_datetime(input_dtype, func, closed, expected): + # see gh-21704 + ser = pd.Series( + data=np.arange(10).astype(input_dtype), index=pd.date_range("2000", periods=10), + ) + + result = getattr(ser.rolling("3D", closed=closed), func)() + expected = pd.Series(expected, index=ser.index) + tm.assert_series_equal(result, expected) + + +def test_closed_uneven(): + # see gh-21704 + ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) + + # uneven + ser = ser.drop(index=ser.index[[1, 5]]) + result = ser.rolling("3D", closed="left").min() + expected = pd.Series([np.nan, 0, 0, 2, 3, 4, 6, 6], index=ser.index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "func,closed,expected", + [ + ("min", "right", [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), + ("min", "both", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]), + ("min", "neither", [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), + ("min", "left", [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]), + ("max", "right", [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]), + ("max", "both", [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]), + ("max", "neither", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]), + ("max", "left", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan]), + ], +) +def test_closed_min_max_minp(func, closed, expected): + # see gh-21704 + ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) + ser[ser.index[-3:]] = np.nan + result = getattr(ser.rolling("3D", min_periods=2, closed=closed), func)() + expected = pd.Series(expected, index=ser.index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "closed,expected", + [ + ("right", [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8]), + ("both", [0, 0.5, 1, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), + ("neither", [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), + ("left", [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]), + ], +) +def test_closed_median_quantile(closed, expected): + # GH 26005 + ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) + roll = ser.rolling("3D", closed=closed) + expected = pd.Series(expected, index=ser.index) + + result = roll.median() + tm.assert_series_equal(result, expected) + + result = roll.quantile(0.5) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("roller", ["1s", 1]) +def tests_empty_df_rolling(roller): + # GH 15819 Verifies that datetime and integer rolling windows can be + # applied to empty DataFrames + expected = DataFrame() + result = DataFrame().rolling(roller).sum() + tm.assert_frame_equal(result, expected) + + # Verifies that datetime and integer rolling windows can be applied to + # empty DataFrames with datetime index + expected = DataFrame(index=pd.DatetimeIndex([])) + result = DataFrame(index=pd.DatetimeIndex([])).rolling(roller).sum() + tm.assert_frame_equal(result, expected) + + +def test_empty_window_median_quantile(): + # GH 26005 + expected = pd.Series([np.nan, np.nan, np.nan]) + roll = pd.Series(np.arange(3)).rolling(0) + + result = roll.median() + tm.assert_series_equal(result, expected) + + result = roll.quantile(0.1) + tm.assert_series_equal(result, expected) + + +def test_missing_minp_zero(): + # https://github.com/pandas-dev/pandas/pull/18921 + # minp=0 + x = pd.Series([np.nan]) + result = x.rolling(1, min_periods=0).sum() + expected = pd.Series([0.0]) + tm.assert_series_equal(result, expected) + + # minp=1 + result = x.rolling(1, min_periods=1).sum() + expected = pd.Series([np.nan]) + tm.assert_series_equal(result, expected) + + +def test_missing_minp_zero_variable(): + # https://github.com/pandas-dev/pandas/pull/18921 + x = pd.Series( + [np.nan] * 4, + index=pd.DatetimeIndex( + ["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"] + ), + ) + result = x.rolling(pd.Timedelta("2d"), min_periods=0).sum() + expected = pd.Series(0.0, index=x.index) + tm.assert_series_equal(result, expected) + + +def test_multi_index_names(): + + # GH 16789, 16825 + cols = pd.MultiIndex.from_product([["A", "B"], ["C", "D", "E"]], names=["1", "2"]) + df = DataFrame(np.ones((10, 6)), columns=cols) + result = df.rolling(3).cov() + + tm.assert_index_equal(result.columns, df.columns) + assert result.index.names == [None, "1", "2"] + + +@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) +def test_iter_raises(klass): + # https://github.com/pandas-dev/pandas/issues/11704 + # Iteration over a Window + obj = klass([1, 2, 3, 4]) + + msg = "See issue #11704 https://github.com/pandas-dev/pandas/issues/11704" + + with pytest.raises(NotImplementedError, match=msg): + iter(obj.rolling(2)) + + +def test_rolling_axis_sum(axis_frame): + # see gh-23372. + df = DataFrame(np.ones((10, 20))) + axis = df._get_axis_number(axis_frame) + + if axis == 0: + expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)}) + else: + # axis == 1 + expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10) + + result = df.rolling(3, axis=axis_frame).sum() + tm.assert_frame_equal(result, expected) + + +def test_rolling_axis_count(axis_frame): + # see gh-26055 + df = DataFrame({"x": range(3), "y": range(3)}) + + axis = df._get_axis_number(axis_frame) + + if axis in [0, "index"]: + expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]}) + else: + expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) + + result = df.rolling(2, axis=axis_frame, min_periods=0).count() + tm.assert_frame_equal(result, expected) + + +def test_readonly_array(): + # GH-27766 + arr = np.array([1, 3, np.nan, 3, 5]) + arr.setflags(write=False) + result = pd.Series(arr).rolling(2).mean() + expected = pd.Series([np.nan, 2, np.nan, np.nan, 4]) + tm.assert_series_equal(result, expected) + + +def test_rolling_datetime(axis_frame, tz_naive_fixture): + # GH-28192 + tz = tz_naive_fixture + df = pd.DataFrame( + {i: [1] * 2 for i in pd.date_range("2019-8-01", "2019-08-03", freq="D", tz=tz)} + ) + if axis_frame in [0, "index"]: + result = df.T.rolling("2D", axis=axis_frame).sum().T + else: + result = df.rolling("2D", axis=axis_frame).sum() + expected = pd.DataFrame( + { + **{ + i: [1.0] * 2 + for i in pd.date_range("2019-8-01", periods=1, freq="D", tz=tz) + }, + **{ + i: [2.0] * 2 + for i in pd.date_range("2019-8-02", "2019-8-03", freq="D", tz=tz) + }, + } + ) + tm.assert_frame_equal(result, expected) def test_rolling_window_as_string(): diff --git a/pandas/tests/window/test_window.py b/pandas/tests/window/test_window.py index c7c45f0e5e0de..a450d29797c41 100644 --- a/pandas/tests/window/test_window.py +++ b/pandas/tests/window/test_window.py @@ -7,70 +7,62 @@ import pandas as pd from pandas import Series from pandas.core.window import Window -from pandas.tests.window.common import Base - - -@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") -class TestWindow(Base): - def setup_method(self, method): - self._create_data() - - @td.skip_if_no_scipy - @pytest.mark.parametrize("which", ["series", "frame"]) - def test_constructor(self, which): - # GH 12669 - - o = getattr(self, which) - c = o.rolling - - # valid - c(win_type="boxcar", window=2, min_periods=1) - c(win_type="boxcar", window=2, min_periods=1, center=True) - c(win_type="boxcar", window=2, min_periods=1, center=False) - - # not valid - for w in [2.0, "foo", np.array([2])]: - with pytest.raises(ValueError, match="min_periods must be an integer"): - c(win_type="boxcar", window=2, min_periods=w) - with pytest.raises(ValueError, match="center must be a boolean"): - c(win_type="boxcar", window=2, min_periods=1, center=w) - - for wt in ["foobar", 1]: - with pytest.raises(ValueError, match="Invalid win_type"): - c(win_type=wt, window=2) - - @td.skip_if_no_scipy - @pytest.mark.parametrize("which", ["series", "frame"]) - def test_constructor_with_win_type(self, which, win_types): - # GH 12669 - o = getattr(self, which) - c = o.rolling - c(win_type=win_types, window=2) - - @pytest.mark.parametrize("method", ["sum", "mean"]) - def test_numpy_compat(self, method): - # see gh-12811 - w = Window(Series([2, 4, 6]), window=[0, 2]) - - msg = "numpy operations are not valid with window objects" - - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(w, method)(1, 2, 3) - with pytest.raises(UnsupportedFunctionCall, match=msg): - getattr(w, method)(dtype=np.float64) - - @td.skip_if_no_scipy - @pytest.mark.parametrize("arg", ["median", "kurt", "skew"]) - def test_agg_function_support(self, arg): - df = pd.DataFrame({"A": np.arange(5)}) - roll = df.rolling(2, win_type="triang") - - msg = f"'{arg}' is not a valid function for 'Window' object" - with pytest.raises(AttributeError, match=msg): - roll.agg(arg) - - with pytest.raises(AttributeError, match=msg): - roll.agg([arg]) - - with pytest.raises(AttributeError, match=msg): - roll.agg({"A": arg}) + + +@td.skip_if_no_scipy +def test_constructor(which): + # GH 12669 + c = which.rolling + + # valid + c(win_type="boxcar", window=2, min_periods=1) + c(win_type="boxcar", window=2, min_periods=1, center=True) + c(win_type="boxcar", window=2, min_periods=1, center=False) + + # not valid + for w in [2.0, "foo", np.array([2])]: + with pytest.raises(ValueError, match="min_periods must be an integer"): + c(win_type="boxcar", window=2, min_periods=w) + with pytest.raises(ValueError, match="center must be a boolean"): + c(win_type="boxcar", window=2, min_periods=1, center=w) + + for wt in ["foobar", 1]: + with pytest.raises(ValueError, match="Invalid win_type"): + c(win_type=wt, window=2) + + +@td.skip_if_no_scipy +def test_constructor_with_win_type(which, win_types): + # GH 12669 + c = which.rolling + c(win_type=win_types, window=2) + + +@pytest.mark.parametrize("method", ["sum", "mean"]) +def test_numpy_compat(method): + # see gh-12811 + w = Window(Series([2, 4, 6]), window=[0, 2]) + + msg = "numpy operations are not valid with window objects" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(w, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(w, method)(dtype=np.float64) + + +@td.skip_if_no_scipy +@pytest.mark.parametrize("arg", ["median", "kurt", "skew"]) +def test_agg_function_support(arg): + df = pd.DataFrame({"A": np.arange(5)}) + roll = df.rolling(2, win_type="triang") + + msg = f"'{arg}' is not a valid function for 'Window' object" + with pytest.raises(AttributeError, match=msg): + roll.agg(arg) + + with pytest.raises(AttributeError, match=msg): + roll.agg([arg]) + + with pytest.raises(AttributeError, match=msg): + roll.agg({"A": arg}) From 4a42844dc5557eb3ad9fb0777d7ebd25e6c161f4 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 14 May 2020 21:01:40 +0200 Subject: [PATCH 6/7] linting --- pandas/tests/window/moments/test_moments_rolling.py | 4 ++-- pandas/tests/window/test_api.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index 6a3aec5c18141..aa6bd66e14903 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -244,7 +244,7 @@ def test_cmov_mean(): vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) result = Series(vals).rolling(5, center=True).mean() expected = Series( - [np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, 12.952, np.nan, np.nan,] + [np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, 12.952, np.nan, np.nan, ] ) tm.assert_series_equal(expected, result) @@ -255,7 +255,7 @@ def test_cmov_window(): vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) result = Series(vals).rolling(5, win_type="boxcar", center=True).mean() expected = Series( - [np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, 12.952, np.nan, np.nan,] + [np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, 12.952, np.nan, np.nan, ] ) tm.assert_series_equal(expected, result) diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index c6e37d447c1a2..2f7c37edc7c36 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -207,7 +207,7 @@ def test_count_nonnumeric_types(): "fl_inf": [1.0, 2.0, np.Inf], "fl_nan": [1.0, 2.0, np.NaN], "str_nan": ["aa", "bb", np.NaN], - "dt_nat": [Timestamp("20170101"), Timestamp("20170203"), Timestamp(None),], + "dt_nat": [Timestamp("20170101"), Timestamp("20170203"), Timestamp(None), ], "periods_nat": [ pd.Period("2012-01"), pd.Period("2012-02"), From f3509b5d375b32c8bdcb0d7f33885472f915c7bb Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 14 May 2020 21:30:23 +0200 Subject: [PATCH 7/7] fix conflict between pep8 and black --- .../window/moments/test_moments_rolling.py | 32 +++++++++++++++---- pandas/tests/window/test_api.py | 7 +++- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index aa6bd66e14903..3e5475e6b274f 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -243,9 +243,19 @@ def test_cmov_mean(): # GH 8238 vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) result = Series(vals).rolling(5, center=True).mean() - expected = Series( - [np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, 12.952, np.nan, np.nan, ] - ) + expected_values = [ + np.nan, + np.nan, + 9.962, + 11.27, + 11.564, + 12.516, + 12.818, + 12.952, + np.nan, + np.nan, + ] + expected = Series(expected_values) tm.assert_series_equal(expected, result) @@ -254,9 +264,19 @@ def test_cmov_window(): # GH 8238 vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) result = Series(vals).rolling(5, win_type="boxcar", center=True).mean() - expected = Series( - [np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, 12.952, np.nan, np.nan, ] - ) + expected_values = [ + np.nan, + np.nan, + 9.962, + 11.27, + 11.564, + 12.516, + 12.818, + 12.952, + np.nan, + np.nan, + ] + expected = Series(expected_values) tm.assert_series_equal(expected, result) diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index 2f7c37edc7c36..33fb79d98a324 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -191,6 +191,11 @@ def test_count_nonnumeric_types(): "dt_nat", "periods_nat", ] + dt_nat_col = [ + Timestamp("20170101"), + Timestamp("20170203"), + Timestamp(None), + ] df = DataFrame( { @@ -207,7 +212,7 @@ def test_count_nonnumeric_types(): "fl_inf": [1.0, 2.0, np.Inf], "fl_nan": [1.0, 2.0, np.NaN], "str_nan": ["aa", "bb", np.NaN], - "dt_nat": [Timestamp("20170101"), Timestamp("20170203"), Timestamp(None), ], + "dt_nat": dt_nat_col, "periods_nat": [ pd.Period("2012-01"), pd.Period("2012-02"),