From e607b6b8f9402f2e99042d8a26c2a3cb1a817267 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 9 Jul 2019 00:21:45 -0700 Subject: [PATCH 1/3] CLN: Split test_window.py --- pandas/tests/window/__init__.py | 0 pandas/tests/window/test_dtypes.py | 228 ++++ pandas/tests/window/test_pairwise.py | 183 +++ pandas/tests/window/test_timeseries_window.py | 692 ++++++++++ pandas/tests/{ => window}/test_window.py | 1154 +---------------- 5 files changed, 1141 insertions(+), 1116 deletions(-) create mode 100644 pandas/tests/window/__init__.py create mode 100644 pandas/tests/window/test_dtypes.py create mode 100644 pandas/tests/window/test_pairwise.py create mode 100644 pandas/tests/window/test_timeseries_window.py rename pandas/tests/{ => window}/test_window.py (77%) diff --git a/pandas/tests/window/__init__.py b/pandas/tests/window/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py new file mode 100644 index 0000000000000..ab2915a333afd --- /dev/null +++ b/pandas/tests/window/test_dtypes.py @@ -0,0 +1,228 @@ +from itertools import product + +import numpy as np +import pytest + +from pandas import DataFrame, Series +from pandas.core.base import DataError +import pandas.util.testing as tm + +# gh-12373 : rolling functions error on float32 data +# make sure rolling functions works for different dtypes +# +# NOTE that these are yielded tests and so _create_data +# is explicitly called. +# +# further note that we are only checking rolling for fully dtype +# compliance (though both expanding and ewm inherit) + + +class Dtype: + window = 2 + + funcs = { + "count": lambda v: v.count(), + "max": lambda v: v.max(), + "min": lambda v: v.min(), + "sum": lambda v: v.sum(), + "mean": lambda v: v.mean(), + "std": lambda v: v.std(), + "var": lambda v: v.var(), + "median": lambda v: v.median(), + } + + def get_expects(self): + expects = { + "sr1": { + "count": Series([1, 2, 2, 2, 2], dtype="float64"), + "max": Series([np.nan, 1, 2, 3, 4], dtype="float64"), + "min": Series([np.nan, 0, 1, 2, 3], dtype="float64"), + "sum": Series([np.nan, 1, 3, 5, 7], dtype="float64"), + "mean": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"), + "std": Series([np.nan] + [np.sqrt(0.5)] * 4, dtype="float64"), + "var": Series([np.nan, 0.5, 0.5, 0.5, 0.5], dtype="float64"), + "median": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"), + }, + "sr2": { + "count": Series([1, 2, 2, 2, 2], dtype="float64"), + "max": Series([np.nan, 10, 8, 6, 4], dtype="float64"), + "min": Series([np.nan, 8, 6, 4, 2], dtype="float64"), + "sum": Series([np.nan, 18, 14, 10, 6], dtype="float64"), + "mean": Series([np.nan, 9, 7, 5, 3], dtype="float64"), + "std": Series([np.nan] + [np.sqrt(2)] * 4, dtype="float64"), + "var": Series([np.nan, 2, 2, 2, 2], dtype="float64"), + "median": Series([np.nan, 9, 7, 5, 3], dtype="float64"), + }, + "df": { + "count": DataFrame( + {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}, + dtype="float64", + ), + "max": DataFrame( + {0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])}, + dtype="float64", + ), + "min": DataFrame( + {0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])}, + dtype="float64", + ), + "sum": DataFrame( + { + 0: Series([np.nan, 2, 6, 10, 14]), + 1: Series([np.nan, 4, 8, 12, 16]), + }, + dtype="float64", + ), + "mean": DataFrame( + {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}, + dtype="float64", + ), + "std": DataFrame( + { + 0: Series([np.nan] + [np.sqrt(2)] * 4), + 1: Series([np.nan] + [np.sqrt(2)] * 4), + }, + dtype="float64", + ), + "var": DataFrame( + {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])}, + dtype="float64", + ), + "median": DataFrame( + {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}, + dtype="float64", + ), + }, + } + return expects + + def _create_dtype_data(self, dtype): + sr1 = Series(np.arange(5), dtype=dtype) + sr2 = Series(np.arange(10, 0, -2), dtype=dtype) + df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype) + + data = {"sr1": sr1, "sr2": sr2, "df": df} + + return data + + def _create_data(self): + self.data = self._create_dtype_data(self.dtype) + self.expects = self.get_expects() + + def test_dtypes(self): + self._create_data() + for f_name, d_name in product(self.funcs.keys(), self.data.keys()): + + f = self.funcs[f_name] + d = self.data[d_name] + exp = self.expects[d_name][f_name] + self.check_dtypes(f, f_name, d, d_name, exp) + + def check_dtypes(self, f, f_name, d, d_name, exp): + roll = d.rolling(window=self.window) + result = f(roll) + tm.assert_almost_equal(result, exp) + + +class TestDtype_object(Dtype): + dtype = object + + +class Dtype_integer(Dtype): + pass + + +class TestDtype_int8(Dtype_integer): + dtype = np.int8 + + +class TestDtype_int16(Dtype_integer): + dtype = np.int16 + + +class TestDtype_int32(Dtype_integer): + dtype = np.int32 + + +class TestDtype_int64(Dtype_integer): + dtype = np.int64 + + +class Dtype_uinteger(Dtype): + pass + + +class TestDtype_uint8(Dtype_uinteger): + dtype = np.uint8 + + +class TestDtype_uint16(Dtype_uinteger): + dtype = np.uint16 + + +class TestDtype_uint32(Dtype_uinteger): + dtype = np.uint32 + + +class TestDtype_uint64(Dtype_uinteger): + dtype = np.uint64 + + +class Dtype_float(Dtype): + pass + + +class TestDtype_float16(Dtype_float): + dtype = np.float16 + + +class TestDtype_float32(Dtype_float): + dtype = np.float32 + + +class TestDtype_float64(Dtype_float): + dtype = np.float64 + + +class TestDtype_category(Dtype): + dtype = "category" + include_df = False + + def _create_dtype_data(self, dtype): + sr1 = Series(range(5), dtype=dtype) + sr2 = Series(range(10, 0, -2), dtype=dtype) + + data = {"sr1": sr1, "sr2": sr2} + + return data + + +class DatetimeLike(Dtype): + def check_dtypes(self, f, f_name, d, d_name, exp): + + roll = d.rolling(window=self.window) + if f_name == "count": + result = f(roll) + tm.assert_almost_equal(result, exp) + + else: + with pytest.raises(DataError): + f(roll) + + +class TestDtype_timedelta(DatetimeLike): + dtype = np.dtype("m8[ns]") + + +class TestDtype_datetime(DatetimeLike): + dtype = np.dtype("M8[ns]") + + +class TestDtype_datetime64UTC(DatetimeLike): + dtype = "datetime64[ns, UTC]" + + def _create_data(self): + pytest.skip( + "direct creation of extension dtype " + "datetime64[ns, UTC] is not supported ATM" + ) diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py new file mode 100644 index 0000000000000..56d89e15c418c --- /dev/null +++ b/pandas/tests/window/test_pairwise.py @@ -0,0 +1,183 @@ +import warnings + +import pytest + +from pandas import DataFrame, Series +from pandas.core.sorting import safe_sort +import pandas.util.testing as tm + + +class TestPairwise: + + # GH 7738 + df1s = [ + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", "C"]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1.0, 0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0.0, 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", 1]), + DataFrame([[2.0, 4.0], [1.0, 2.0], [5.0, 2.0], [8.0, 1.0]], columns=[1, 0.0]), + DataFrame([[2, 4.0], [1, 2.0], [5, 2.0], [8, 1.0]], columns=[0, 1.0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.0]], columns=[1.0, "X"]), + ] + df2 = DataFrame( + [[None, 1, 1], [None, 1, 2], [None, 3, 2], [None, 8, 1]], + columns=["Y", "Z", "X"], + ) + s = Series([1, 1, 3, 8]) + + def compare(self, result, expected): + + # since we have sorted the results + # we can only compare non-nans + result = result.dropna().values + expected = expected.dropna().values + + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + @pytest.mark.parametrize("f", [lambda x: x.cov(), lambda x: x.corr()]) + def test_no_flex(self, f): + + # DataFrame methods (which do not call _flex_binary_moment()) + + results = [f(df) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.columns) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + "f", + [ + lambda x: x.expanding().cov(pairwise=True), + lambda x: x.expanding().corr(pairwise=True), + lambda x: x.rolling(window=3).cov(pairwise=True), + lambda x: x.rolling(window=3).corr(pairwise=True), + lambda x: x.ewm(com=3).cov(pairwise=True), + lambda x: x.ewm(com=3).corr(pairwise=True), + ], + ) + def test_pairwise_with_self(self, f): + + # DataFrame with itself, pairwise=True + # note that we may construct the 1st level of the MI + # in a non-monotonic way, so compare accordingly + results = [] + for i, df in enumerate(self.df1s): + result = f(df) + tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) + tm.assert_numpy_array_equal( + safe_sort(result.index.levels[1]), safe_sort(df.columns.unique()) + ) + tm.assert_index_equal(result.columns, df.columns) + results.append(df) + + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + "f", + [ + lambda x: x.expanding().cov(pairwise=False), + lambda x: x.expanding().corr(pairwise=False), + lambda x: x.rolling(window=3).cov(pairwise=False), + lambda x: x.rolling(window=3).corr(pairwise=False), + lambda x: x.ewm(com=3).cov(pairwise=False), + lambda x: x.ewm(com=3).corr(pairwise=False), + ], + ) + def test_no_pairwise_with_self(self, f): + + # DataFrame with itself, pairwise=False + results = [f(df) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.index) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + "f", + [ + lambda x, y: x.expanding().cov(y, pairwise=True), + lambda x, y: x.expanding().corr(y, pairwise=True), + lambda x, y: x.rolling(window=3).cov(y, pairwise=True), + lambda x, y: x.rolling(window=3).corr(y, pairwise=True), + lambda x, y: x.ewm(com=3).cov(y, pairwise=True), + lambda x, y: x.ewm(com=3).corr(y, pairwise=True), + ], + ) + def test_pairwise_with_other(self, f): + + # DataFrame with another DataFrame, pairwise=True + results = [f(df, self.df2) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) + tm.assert_numpy_array_equal( + safe_sort(result.index.levels[1]), safe_sort(self.df2.columns.unique()) + ) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + "f", + [ + lambda x, y: x.expanding().cov(y, pairwise=False), + lambda x, y: x.expanding().corr(y, pairwise=False), + lambda x, y: x.rolling(window=3).cov(y, pairwise=False), + lambda x, y: x.rolling(window=3).corr(y, pairwise=False), + lambda x, y: x.ewm(com=3).cov(y, pairwise=False), + lambda x, y: x.ewm(com=3).corr(y, pairwise=False), + ], + ) + def test_no_pairwise_with_other(self, f): + + # DataFrame with another DataFrame, pairwise=False + results = [ + f(df, self.df2) if df.columns.is_unique else None for df in self.df1s + ] + for (df, result) in zip(self.df1s, results): + if result is not None: + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + # we can have int and str columns + expected_index = df.index.union(self.df2.index) + expected_columns = df.columns.union(self.df2.columns) + tm.assert_index_equal(result.index, expected_index) + tm.assert_index_equal(result.columns, expected_columns) + else: + with pytest.raises(ValueError, match="'arg1' columns are not unique"): + f(df, self.df2) + with pytest.raises(ValueError, match="'arg2' columns are not unique"): + f(self.df2, df) + + @pytest.mark.parametrize( + "f", + [ + lambda x, y: x.expanding().cov(y), + lambda x, y: x.expanding().corr(y), + lambda x, y: x.rolling(window=3).cov(y), + lambda x, y: x.rolling(window=3).corr(y), + lambda x, y: x.ewm(com=3).cov(y), + lambda x, y: x.ewm(com=3).corr(y), + ], + ) + def test_pairwise_with_series(self, f): + + # DataFrame with a Series + results = [f(df, self.s) for df in self.df1s] + [ + f(self.s, df) for df in self.df1s + ] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.index) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py new file mode 100644 index 0000000000000..e057eadae9da8 --- /dev/null +++ b/pandas/tests/window/test_timeseries_window.py @@ -0,0 +1,692 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Index, Series, Timestamp, date_range, to_datetime +import pandas.util.testing as tm + +import pandas.tseries.offsets as offsets + + +class TestRollingTS: + + # rolling time-series friendly + # xref GH13327 + + def setup_method(self, method): + + self.regular = DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)} + ).set_index("A") + + self.ragged = DataFrame({"B": range(5)}) + self.ragged.index = [ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ] + + def test_doc_string(self): + + df = DataFrame( + {"B": [0, 1, 2, np.nan, 4]}, + index=[ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ], + ) + df + df.rolling("2s").sum() + + def test_valid(self): + + df = self.regular + + # not a valid freq + with pytest.raises(ValueError): + df.rolling(window="foobar") + + # not a datetimelike index + with pytest.raises(ValueError): + df.reset_index().rolling(window="foobar") + + # non-fixed freqs + for freq in ["2MS", offsets.MonthBegin(2)]: + with pytest.raises(ValueError): + df.rolling(window=freq) + + for freq in ["1D", offsets.Day(2), "2ms"]: + df.rolling(window=freq) + + # non-integer min_periods + for minp in [1.0, "foo", np.array([1, 2, 3])]: + with pytest.raises(ValueError): + df.rolling(window="1D", min_periods=minp) + + # center is not implemented + with pytest.raises(NotImplementedError): + df.rolling(window="1D", center=True) + + def test_on(self): + + df = self.regular + + # not a valid column + with pytest.raises(ValueError): + df.rolling(window="2s", on="foobar") + + # column is valid + df = df.copy() + df["C"] = date_range("20130101", periods=len(df)) + df.rolling(window="2d", on="C").sum() + + # invalid columns + with pytest.raises(ValueError): + df.rolling(window="2d", on="B") + + # ok even though on non-selected + df.rolling(window="2d", on="C").B.sum() + + def test_monotonic_on(self): + + # on/index must be monotonic + df = DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)} + ) + + assert df.A.is_monotonic + df.rolling("2s", on="A").sum() + + df = df.set_index("A") + assert df.index.is_monotonic + df.rolling("2s").sum() + + # non-monotonic + df.index = reversed(df.index.tolist()) + assert not df.index.is_monotonic + + with pytest.raises(ValueError): + df.rolling("2s").sum() + + df = df.reset_index() + with pytest.raises(ValueError): + df.rolling("2s", on="A").sum() + + def test_frame_on(self): + + df = DataFrame( + {"B": range(5), "C": date_range("20130101 09:00:00", periods=5, freq="3s")} + ) + + df["A"] = [ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ] + + # we are doing simulating using 'on' + expected = df.set_index("A").rolling("2s").B.sum().reset_index(drop=True) + + result = df.rolling("2s", on="A").B.sum() + tm.assert_series_equal(result, expected) + + # test as a frame + # we should be ignoring the 'on' as an aggregation column + # note that the expected is setting, computing, and resetting + # so the columns need to be switched compared + # to the actual result where they are ordered as in the + # original + expected = ( + df.set_index("A").rolling("2s")[["B"]].sum().reset_index()[["B", "A"]] + ) + + result = df.rolling("2s", on="A")[["B"]].sum() + tm.assert_frame_equal(result, expected) + + def test_frame_on2(self): + + # using multiple aggregation columns + df = DataFrame( + { + "A": [0, 1, 2, 3, 4], + "B": [0, 1, 2, np.nan, 4], + "C": Index( + [ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ] + ), + }, + columns=["A", "C", "B"], + ) + + expected1 = DataFrame( + {"A": [0.0, 1, 3, 3, 7], "B": [0, 1, 3, np.nan, 4], "C": df["C"]}, + columns=["A", "C", "B"], + ) + + result = df.rolling("2s", on="C").sum() + expected = expected1 + tm.assert_frame_equal(result, expected) + + expected = Series([0, 1, 3, np.nan, 4], name="B") + result = df.rolling("2s", on="C").B.sum() + tm.assert_series_equal(result, expected) + + expected = expected1[["A", "B", "C"]] + result = df.rolling("2s", on="C")[["A", "B", "C"]].sum() + tm.assert_frame_equal(result, expected) + + def test_basic_regular(self): + + df = self.regular.copy() + + df.index = date_range("20130101", periods=5, freq="D") + expected = df.rolling(window=1, min_periods=1).sum() + result = df.rolling(window="1D").sum() + tm.assert_frame_equal(result, expected) + + df.index = date_range("20130101", periods=5, freq="2D") + expected = df.rolling(window=1, min_periods=1).sum() + result = df.rolling(window="2D", min_periods=1).sum() + tm.assert_frame_equal(result, expected) + + expected = df.rolling(window=1, min_periods=1).sum() + result = df.rolling(window="2D", min_periods=1).sum() + tm.assert_frame_equal(result, expected) + + expected = df.rolling(window=1).sum() + result = df.rolling(window="2D").sum() + tm.assert_frame_equal(result, expected) + + def test_min_periods(self): + + # compare for min_periods + df = self.regular + + # these slightly different + expected = df.rolling(2, min_periods=1).sum() + result = df.rolling("2s").sum() + tm.assert_frame_equal(result, expected) + + expected = df.rolling(2, min_periods=1).sum() + result = df.rolling("2s", min_periods=1).sum() + tm.assert_frame_equal(result, expected) + + def test_closed(self): + + # xref GH13965 + + df = DataFrame( + {"A": [1] * 5}, + index=[ + Timestamp("20130101 09:00:01"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:04"), + Timestamp("20130101 09:00:06"), + ], + ) + + # closed must be 'right', 'left', 'both', 'neither' + with pytest.raises(ValueError): + self.regular.rolling(window="2s", closed="blabla") + + expected = df.copy() + expected["A"] = [1.0, 2, 2, 2, 1] + result = df.rolling("2s", closed="right").sum() + tm.assert_frame_equal(result, expected) + + # default should be 'right' + result = df.rolling("2s").sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [1.0, 2, 3, 3, 2] + result = df.rolling("2s", closed="both").sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [np.nan, 1.0, 2, 2, 1] + result = df.rolling("2s", closed="left").sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [np.nan, 1.0, 1, 1, np.nan] + result = df.rolling("2s", closed="neither").sum() + tm.assert_frame_equal(result, expected) + + def test_ragged_sum(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 3, 7] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=2).sum() + expected = df.copy() + expected["B"] = [np.nan, np.nan, 3, np.nan, 7] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="3s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 5, 7] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="3s").sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 5, 7] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="4s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 6, 9] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="4s", min_periods=3).sum() + expected = df.copy() + expected["B"] = [np.nan, np.nan, 3, 6, 9] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 6, 10] + tm.assert_frame_equal(result, expected) + + def test_ragged_mean(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).mean() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).mean() + expected = df.copy() + expected["B"] = [0.0, 1, 1.5, 3.0, 3.5] + tm.assert_frame_equal(result, expected) + + def test_ragged_median(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).median() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).median() + expected = df.copy() + expected["B"] = [0.0, 1, 1.5, 3.0, 3.5] + tm.assert_frame_equal(result, expected) + + def test_ragged_quantile(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).quantile(0.5) + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).quantile(0.5) + expected = df.copy() + expected["B"] = [0.0, 1, 1.5, 3.0, 3.5] + tm.assert_frame_equal(result, expected) + + def test_ragged_std(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).std(ddof=0) + expected = df.copy() + expected["B"] = [0.0] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="1s", min_periods=1).std(ddof=1) + expected = df.copy() + expected["B"] = [np.nan] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="3s", min_periods=1).std(ddof=0) + expected = df.copy() + expected["B"] = [0.0] + [0.5] * 4 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).std(ddof=1) + expected = df.copy() + expected["B"] = [np.nan, 0.707107, 1.0, 1.0, 1.290994] + tm.assert_frame_equal(result, expected) + + def test_ragged_var(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).var(ddof=0) + expected = df.copy() + expected["B"] = [0.0] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="1s", min_periods=1).var(ddof=1) + expected = df.copy() + expected["B"] = [np.nan] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="3s", min_periods=1).var(ddof=0) + expected = df.copy() + expected["B"] = [0.0] + [0.25] * 4 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).var(ddof=1) + expected = df.copy() + expected["B"] = [np.nan, 0.5, 1.0, 1.0, 1 + 2 / 3.0] + tm.assert_frame_equal(result, expected) + + def test_ragged_skew(self): + + df = self.ragged + result = df.rolling(window="3s", min_periods=1).skew() + expected = df.copy() + expected["B"] = [np.nan] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).skew() + expected = df.copy() + expected["B"] = [np.nan] * 2 + [0.0, 0.0, 0.0] + tm.assert_frame_equal(result, expected) + + def test_ragged_kurt(self): + + df = self.ragged + result = df.rolling(window="3s", min_periods=1).kurt() + expected = df.copy() + expected["B"] = [np.nan] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).kurt() + expected = df.copy() + expected["B"] = [np.nan] * 4 + [-1.2] + tm.assert_frame_equal(result, expected) + + def test_ragged_count(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).count() + expected = df.copy() + expected["B"] = [1.0, 1, 1, 1, 1] + tm.assert_frame_equal(result, expected) + + df = self.ragged + result = df.rolling(window="1s").count() + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).count() + expected = df.copy() + expected["B"] = [1.0, 1, 2, 1, 2] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=2).count() + expected = df.copy() + expected["B"] = [np.nan, np.nan, 2, np.nan, 2] + tm.assert_frame_equal(result, expected) + + def test_regular_min(self): + + df = DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": [0.0, 1, 2, 3, 4]} + ).set_index("A") + result = df.rolling("1s").min() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + df = DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": [5, 4, 3, 4, 5]} + ).set_index("A") + + tm.assert_frame_equal(result, expected) + result = df.rolling("2s").min() + expected = df.copy() + expected["B"] = [5.0, 4, 3, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling("5s").min() + expected = df.copy() + expected["B"] = [5.0, 4, 3, 3, 3] + tm.assert_frame_equal(result, expected) + + def test_ragged_min(self): + + df = self.ragged + + result = df.rolling(window="1s", min_periods=1).min() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).min() + expected = df.copy() + expected["B"] = [0.0, 1, 1, 3, 3] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).min() + expected = df.copy() + expected["B"] = [0.0, 0, 0, 1, 1] + tm.assert_frame_equal(result, expected) + + def test_perf_min(self): + + N = 10000 + + dfp = DataFrame( + {"B": np.random.randn(N)}, index=date_range("20130101", periods=N, freq="s") + ) + expected = dfp.rolling(2, min_periods=1).min() + result = dfp.rolling("2s").min() + assert ((result - expected) < 0.01).all().bool() + + expected = dfp.rolling(200, min_periods=1).min() + result = dfp.rolling("200s").min() + assert ((result - expected) < 0.01).all().bool() + + def test_ragged_max(self): + + df = self.ragged + + result = df.rolling(window="1s", min_periods=1).max() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).max() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).max() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + def test_ragged_apply(self, raw): + + df = self.ragged + + f = lambda x: 1 + result = df.rolling(window="1s", min_periods=1).apply(f, raw=raw) + expected = df.copy() + expected["B"] = 1.0 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).apply(f, raw=raw) + expected = df.copy() + expected["B"] = 1.0 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).apply(f, raw=raw) + expected = df.copy() + expected["B"] = 1.0 + tm.assert_frame_equal(result, expected) + + def test_all(self): + + # simple comparison of integer vs time-based windowing + df = self.regular * 2 + er = df.rolling(window=1) + r = df.rolling(window="1s") + + for f in [ + "sum", + "mean", + "count", + "median", + "std", + "var", + "kurt", + "skew", + "min", + "max", + ]: + + result = getattr(r, f)() + expected = getattr(er, f)() + tm.assert_frame_equal(result, expected) + + result = r.quantile(0.5) + expected = er.quantile(0.5) + tm.assert_frame_equal(result, expected) + + def test_all_apply(self, raw): + + df = self.regular * 2 + er = df.rolling(window=1) + r = df.rolling(window="1s") + + result = r.apply(lambda x: 1, raw=raw) + expected = er.apply(lambda x: 1, raw=raw) + tm.assert_frame_equal(result, expected) + + def test_all2(self): + + # more sophisticated comparison of integer vs. + # time-based windowing + df = DataFrame( + {"B": np.arange(50)}, index=date_range("20130101", periods=50, freq="H") + ) + # in-range data + dft = df.between_time("09:00", "16:00") + + r = dft.rolling(window="5H") + + for f in [ + "sum", + "mean", + "count", + "median", + "std", + "var", + "kurt", + "skew", + "min", + "max", + ]: + + result = getattr(r, f)() + + # we need to roll the days separately + # to compare with a time-based roll + # finally groupby-apply will return a multi-index + # so we need to drop the day + def agg_by_day(x): + x = x.between_time("09:00", "16:00") + return getattr(x.rolling(5, min_periods=1), f)() + + expected = ( + df.groupby(df.index.day) + .apply(agg_by_day) + .reset_index(level=0, drop=True) + ) + + tm.assert_frame_equal(result, expected) + + def test_groupby_monotonic(self): + + # GH 15130 + # we don't need to validate monotonicity when grouping + + data = [ + ["David", "1/1/2015", 100], + ["David", "1/5/2015", 500], + ["David", "5/30/2015", 50], + ["David", "7/25/2015", 50], + ["Ryan", "1/4/2014", 100], + ["Ryan", "1/19/2015", 500], + ["Ryan", "3/31/2016", 50], + ["Joe", "7/1/2015", 100], + ["Joe", "9/9/2015", 500], + ["Joe", "10/15/2015", 50], + ] + + df = DataFrame(data=data, columns=["name", "date", "amount"]) + df["date"] = to_datetime(df["date"]) + + expected = ( + df.set_index("date") + .groupby("name") + .apply(lambda x: x.rolling("180D")["amount"].sum()) + ) + result = df.groupby("name").rolling("180D", on="date")["amount"].sum() + tm.assert_series_equal(result, expected) + + def test_non_monotonic(self): + # GH 13966 (similar to #15130, closed by #15175) + + dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s") + df = DataFrame( + { + "A": [1] * 20 + [2] * 12 + [3] * 8, + "B": np.concatenate((dates, dates)), + "C": np.arange(40), + } + ) + + result = df.groupby("A").rolling("4s", on="B").C.mean() + expected = ( + df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean()) + ) + tm.assert_series_equal(result, expected) + + df2 = df.sort_values("B") + result = df2.groupby("A").rolling("4s", on="B").C.mean() + tm.assert_series_equal(result, expected) + + def test_rolling_cov_offset(self): + # GH16058 + + idx = date_range("2017-01-01", periods=24, freq="1h") + ss = Series(np.arange(len(idx)), index=idx) + + result = ss.rolling("2h").cov() + expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx) + tm.assert_series_equal(result, expected) + + expected2 = ss.rolling(2, min_periods=1).cov() + tm.assert_series_equal(result, expected2) + + result = ss.rolling("3h").cov() + expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx) + tm.assert_series_equal(result, expected) + + expected2 = ss.rolling(3, min_periods=1).cov() + tm.assert_series_equal(result, expected2) diff --git a/pandas/tests/test_window.py b/pandas/tests/window/test_window.py similarity index 77% rename from pandas/tests/test_window.py rename to pandas/tests/window/test_window.py index 2df5460a05953..f778568553e9c 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/window/test_window.py @@ -1,6 +1,6 @@ from collections import OrderedDict from datetime import datetime, timedelta -from itertools import product +from typing import Any import warnings from warnings import catch_warnings @@ -13,8 +13,7 @@ import pandas as pd from pandas import DataFrame, Index, Series, Timestamp, bdate_range, concat, isna, notna -from pandas.core.base import DataError, SpecificationError -from pandas.core.sorting import safe_sort +from pandas.core.base import SpecificationError import pandas.core.window as rwindow import pandas.util.testing as tm @@ -23,15 +22,8 @@ N, K = 100, 10 -def assert_equal(left, right): - if isinstance(left, Series): - tm.assert_series_equal(left, right) - else: - tm.assert_frame_equal(left, right) - - @pytest.fixture(params=[True, False]) -def raw(request): +def raw(request) -> bool: return request.param @@ -47,19 +39,34 @@ def raw(request): "barthann", ] ) -def win_types(request): +def win_types(request) -> str: return request.param @pytest.fixture(params=["kaiser", "gaussian", "general_gaussian", "exponential"]) -def win_types_special(request): +def win_types_special(request) -> str: return request.param @pytest.fixture( params=["sum", "mean", "median", "max", "min", "var", "std", "kurt", "skew"] ) -def arithmetic_win_operators(request): +def arithmetic_win_operators(request) -> str: + return request.param + + +@pytest.fixture(params=["right", "left", "both", "neither"]) +def closed(request) -> str: + return request.param + + +@pytest.fixture(params=[True, False]) +def center(request) -> bool: + return request.param + + +@pytest.fixture(params=[None, 1]) +def min_periods(request) -> Any: return request.param @@ -963,225 +970,6 @@ def test_numpy_compat(self, method): getattr(e, method)(dtype=np.float64) -# gh-12373 : rolling functions error on float32 data -# make sure rolling functions works for different dtypes -# -# NOTE that these are yielded tests and so _create_data -# is explicitly called. -# -# further note that we are only checking rolling for fully dtype -# compliance (though both expanding and ewm inherit) -class Dtype: - window = 2 - - funcs = { - "count": lambda v: v.count(), - "max": lambda v: v.max(), - "min": lambda v: v.min(), - "sum": lambda v: v.sum(), - "mean": lambda v: v.mean(), - "std": lambda v: v.std(), - "var": lambda v: v.var(), - "median": lambda v: v.median(), - } - - def get_expects(self): - expects = { - "sr1": { - "count": Series([1, 2, 2, 2, 2], dtype="float64"), - "max": Series([np.nan, 1, 2, 3, 4], dtype="float64"), - "min": Series([np.nan, 0, 1, 2, 3], dtype="float64"), - "sum": Series([np.nan, 1, 3, 5, 7], dtype="float64"), - "mean": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"), - "std": Series([np.nan] + [np.sqrt(0.5)] * 4, dtype="float64"), - "var": Series([np.nan, 0.5, 0.5, 0.5, 0.5], dtype="float64"), - "median": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"), - }, - "sr2": { - "count": Series([1, 2, 2, 2, 2], dtype="float64"), - "max": Series([np.nan, 10, 8, 6, 4], dtype="float64"), - "min": Series([np.nan, 8, 6, 4, 2], dtype="float64"), - "sum": Series([np.nan, 18, 14, 10, 6], dtype="float64"), - "mean": Series([np.nan, 9, 7, 5, 3], dtype="float64"), - "std": Series([np.nan] + [np.sqrt(2)] * 4, dtype="float64"), - "var": Series([np.nan, 2, 2, 2, 2], dtype="float64"), - "median": Series([np.nan, 9, 7, 5, 3], dtype="float64"), - }, - "df": { - "count": DataFrame( - {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}, - dtype="float64", - ), - "max": DataFrame( - {0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])}, - dtype="float64", - ), - "min": DataFrame( - {0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])}, - dtype="float64", - ), - "sum": DataFrame( - { - 0: Series([np.nan, 2, 6, 10, 14]), - 1: Series([np.nan, 4, 8, 12, 16]), - }, - dtype="float64", - ), - "mean": DataFrame( - {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}, - dtype="float64", - ), - "std": DataFrame( - { - 0: Series([np.nan] + [np.sqrt(2)] * 4), - 1: Series([np.nan] + [np.sqrt(2)] * 4), - }, - dtype="float64", - ), - "var": DataFrame( - {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])}, - dtype="float64", - ), - "median": DataFrame( - {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}, - dtype="float64", - ), - }, - } - return expects - - def _create_dtype_data(self, dtype): - sr1 = Series(np.arange(5), dtype=dtype) - sr2 = Series(np.arange(10, 0, -2), dtype=dtype) - df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype) - - data = {"sr1": sr1, "sr2": sr2, "df": df} - - return data - - def _create_data(self): - self.data = self._create_dtype_data(self.dtype) - self.expects = self.get_expects() - - def test_dtypes(self): - self._create_data() - for f_name, d_name in product(self.funcs.keys(), self.data.keys()): - - f = self.funcs[f_name] - d = self.data[d_name] - exp = self.expects[d_name][f_name] - self.check_dtypes(f, f_name, d, d_name, exp) - - def check_dtypes(self, f, f_name, d, d_name, exp): - roll = d.rolling(window=self.window) - result = f(roll) - tm.assert_almost_equal(result, exp) - - -class TestDtype_object(Dtype): - dtype = object - - -class Dtype_integer(Dtype): - pass - - -class TestDtype_int8(Dtype_integer): - dtype = np.int8 - - -class TestDtype_int16(Dtype_integer): - dtype = np.int16 - - -class TestDtype_int32(Dtype_integer): - dtype = np.int32 - - -class TestDtype_int64(Dtype_integer): - dtype = np.int64 - - -class Dtype_uinteger(Dtype): - pass - - -class TestDtype_uint8(Dtype_uinteger): - dtype = np.uint8 - - -class TestDtype_uint16(Dtype_uinteger): - dtype = np.uint16 - - -class TestDtype_uint32(Dtype_uinteger): - dtype = np.uint32 - - -class TestDtype_uint64(Dtype_uinteger): - dtype = np.uint64 - - -class Dtype_float(Dtype): - pass - - -class TestDtype_float16(Dtype_float): - dtype = np.float16 - - -class TestDtype_float32(Dtype_float): - dtype = np.float32 - - -class TestDtype_float64(Dtype_float): - dtype = np.float64 - - -class TestDtype_category(Dtype): - dtype = "category" - include_df = False - - def _create_dtype_data(self, dtype): - sr1 = Series(range(5), dtype=dtype) - sr2 = Series(range(10, 0, -2), dtype=dtype) - - data = {"sr1": sr1, "sr2": sr2} - - return data - - -class DatetimeLike(Dtype): - def check_dtypes(self, f, f_name, d, d_name, exp): - - roll = d.rolling(window=self.window) - if f_name == "count": - result = f(roll) - tm.assert_almost_equal(result, exp) - - else: - with pytest.raises(DataError): - f(roll) - - -class TestDtype_timedelta(DatetimeLike): - dtype = np.dtype("m8[ns]") - - -class TestDtype_datetime(DatetimeLike): - dtype = np.dtype("M8[ns]") - - -class TestDtype_datetime64UTC(DatetimeLike): - dtype = "datetime64[ns, UTC]" - - def _create_data(self): - pytest.skip( - "direct creation of extension dtype " - "datetime64[ns, UTC] is not supported ATM" - ) - - @pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") class TestMoments(Base): def setup_method(self, method): @@ -2339,182 +2127,6 @@ def _check_ew(self, name=None, preserve_nan=False): assert result2.dtype == np.float_ -class TestPairwise: - - # GH 7738 - df1s = [ - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", "C"]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1.0, 0]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0.0, 1]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", 1]), - DataFrame([[2.0, 4.0], [1.0, 2.0], [5.0, 2.0], [8.0, 1.0]], columns=[1, 0.0]), - DataFrame([[2, 4.0], [1, 2.0], [5, 2.0], [8, 1.0]], columns=[0, 1.0]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.0]], columns=[1.0, "X"]), - ] - df2 = DataFrame( - [[None, 1, 1], [None, 1, 2], [None, 3, 2], [None, 8, 1]], - columns=["Y", "Z", "X"], - ) - s = Series([1, 1, 3, 8]) - - def compare(self, result, expected): - - # since we have sorted the results - # we can only compare non-nans - result = result.dropna().values - expected = expected.dropna().values - - tm.assert_numpy_array_equal(result, expected, check_dtype=False) - - @pytest.mark.parametrize("f", [lambda x: x.cov(), lambda x: x.corr()]) - def test_no_flex(self, f): - - # DataFrame methods (which do not call _flex_binary_moment()) - - results = [f(df) for df in self.df1s] - for (df, result) in zip(self.df1s, results): - tm.assert_index_equal(result.index, df.columns) - tm.assert_index_equal(result.columns, df.columns) - for i, result in enumerate(results): - if i > 0: - self.compare(result, results[0]) - - @pytest.mark.parametrize( - "f", - [ - lambda x: x.expanding().cov(pairwise=True), - lambda x: x.expanding().corr(pairwise=True), - lambda x: x.rolling(window=3).cov(pairwise=True), - lambda x: x.rolling(window=3).corr(pairwise=True), - lambda x: x.ewm(com=3).cov(pairwise=True), - lambda x: x.ewm(com=3).corr(pairwise=True), - ], - ) - def test_pairwise_with_self(self, f): - - # DataFrame with itself, pairwise=True - # note that we may construct the 1st level of the MI - # in a non-monotonic way, so compare accordingly - results = [] - for i, df in enumerate(self.df1s): - result = f(df) - tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) - tm.assert_numpy_array_equal( - safe_sort(result.index.levels[1]), safe_sort(df.columns.unique()) - ) - tm.assert_index_equal(result.columns, df.columns) - results.append(df) - - for i, result in enumerate(results): - if i > 0: - self.compare(result, results[0]) - - @pytest.mark.parametrize( - "f", - [ - lambda x: x.expanding().cov(pairwise=False), - lambda x: x.expanding().corr(pairwise=False), - lambda x: x.rolling(window=3).cov(pairwise=False), - lambda x: x.rolling(window=3).corr(pairwise=False), - lambda x: x.ewm(com=3).cov(pairwise=False), - lambda x: x.ewm(com=3).corr(pairwise=False), - ], - ) - def test_no_pairwise_with_self(self, f): - - # DataFrame with itself, pairwise=False - results = [f(df) for df in self.df1s] - for (df, result) in zip(self.df1s, results): - tm.assert_index_equal(result.index, df.index) - tm.assert_index_equal(result.columns, df.columns) - for i, result in enumerate(results): - if i > 0: - self.compare(result, results[0]) - - @pytest.mark.parametrize( - "f", - [ - lambda x, y: x.expanding().cov(y, pairwise=True), - lambda x, y: x.expanding().corr(y, pairwise=True), - lambda x, y: x.rolling(window=3).cov(y, pairwise=True), - lambda x, y: x.rolling(window=3).corr(y, pairwise=True), - lambda x, y: x.ewm(com=3).cov(y, pairwise=True), - lambda x, y: x.ewm(com=3).corr(y, pairwise=True), - ], - ) - def test_pairwise_with_other(self, f): - - # DataFrame with another DataFrame, pairwise=True - results = [f(df, self.df2) for df in self.df1s] - for (df, result) in zip(self.df1s, results): - tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) - tm.assert_numpy_array_equal( - safe_sort(result.index.levels[1]), safe_sort(self.df2.columns.unique()) - ) - for i, result in enumerate(results): - if i > 0: - self.compare(result, results[0]) - - @pytest.mark.parametrize( - "f", - [ - lambda x, y: x.expanding().cov(y, pairwise=False), - lambda x, y: x.expanding().corr(y, pairwise=False), - lambda x, y: x.rolling(window=3).cov(y, pairwise=False), - lambda x, y: x.rolling(window=3).corr(y, pairwise=False), - lambda x, y: x.ewm(com=3).cov(y, pairwise=False), - lambda x, y: x.ewm(com=3).corr(y, pairwise=False), - ], - ) - def test_no_pairwise_with_other(self, f): - - # DataFrame with another DataFrame, pairwise=False - results = [ - f(df, self.df2) if df.columns.is_unique else None for df in self.df1s - ] - for (df, result) in zip(self.df1s, results): - if result is not None: - with catch_warnings(record=True): - warnings.simplefilter("ignore", RuntimeWarning) - # we can have int and str columns - expected_index = df.index.union(self.df2.index) - expected_columns = df.columns.union(self.df2.columns) - tm.assert_index_equal(result.index, expected_index) - tm.assert_index_equal(result.columns, expected_columns) - else: - with pytest.raises(ValueError, match="'arg1' columns are not unique"): - f(df, self.df2) - with pytest.raises(ValueError, match="'arg2' columns are not unique"): - f(self.df2, df) - - @pytest.mark.parametrize( - "f", - [ - lambda x, y: x.expanding().cov(y), - lambda x, y: x.expanding().corr(y), - lambda x, y: x.rolling(window=3).cov(y), - lambda x, y: x.rolling(window=3).corr(y), - lambda x, y: x.ewm(com=3).cov(y), - lambda x, y: x.ewm(com=3).corr(y), - ], - ) - def test_pairwise_with_series(self, f): - - # DataFrame with a Series - results = [f(df, self.s) for df in self.df1s] + [ - f(self.s, df) for df in self.df1s - ] - for (df, result) in zip(self.df1s, results): - tm.assert_index_equal(result.index, df.index) - tm.assert_index_equal(result.columns, df.columns) - for i, result in enumerate(results): - if i > 0: - self.compare(result, results[0]) - - # create the data only once as we are not setting it def _create_consistency_data(): def create_series(): @@ -2741,7 +2353,7 @@ def _non_null_values(x): if mock_mean: # check that mean equals mock_mean expected = mock_mean(x) - assert_equal(mean_x, expected.astype("float64")) + tm.assert_equal(mean_x, expected.astype("float64")) # check that correlation of a series with itself is either 1 or NaN corr_x_x = corr(x, x) @@ -2755,18 +2367,18 @@ def _non_null_values(x): # check mean of constant series expected = x * np.nan expected[count_x >= max(min_periods, 1)] = exp - assert_equal(mean_x, expected) + tm.assert_equal(mean_x, expected) # check correlation of constant series with itself is NaN expected[:] = np.nan - assert_equal(corr_x_x, expected) + tm.assert_equal(corr_x_x, expected) if var_unbiased and var_biased and var_debiasing_factors: # check variance debiasing factors var_unbiased_x = var_unbiased(x) var_biased_x = var_biased(x) var_debiasing_factors_x = var_debiasing_factors(x) - assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) + tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) for (std, var, cov) in [ (std_biased, var_biased, cov_biased), @@ -2783,15 +2395,15 @@ def _non_null_values(x): assert not (cov_x_x < 0).any().any() # check that var(x) == cov(x, x) - assert_equal(var_x, cov_x_x) + tm.assert_equal(var_x, cov_x_x) # check that var(x) == std(x)^2 - assert_equal(var_x, std_x * std_x) + tm.assert_equal(var_x, std_x * std_x) if var is var_biased: # check that biased var(x) == mean(x^2) - mean(x)^2 mean_x2 = mean(x * x) - assert_equal(var_x, mean_x2 - (mean_x * mean_x)) + tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x)) if is_constant: # check that variance of constant series is identically 0 @@ -2800,7 +2412,7 @@ def _non_null_values(x): expected[count_x >= max(min_periods, 1)] = 0.0 if var is var_unbiased: expected[count_x < 2] = np.nan - assert_equal(var_x, expected) + tm.assert_equal(var_x, expected) if isinstance(x, Series): for (y, is_constant, no_nans) in self.data: @@ -2812,31 +2424,33 @@ def _non_null_values(x): # check that cor(x, y) is symmetric corr_x_y = corr(x, y) corr_y_x = corr(y, x) - assert_equal(corr_x_y, corr_y_x) + tm.assert_equal(corr_x_y, corr_y_x) if cov: # check that cov(x, y) is symmetric cov_x_y = cov(x, y) cov_y_x = cov(y, x) - assert_equal(cov_x_y, cov_y_x) + tm.assert_equal(cov_x_y, cov_y_x) # check that cov(x, y) == (var(x+y) - var(x) - # var(y)) / 2 var_x_plus_y = var(x + y) var_y = var(y) - assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) + tm.assert_equal( + cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y) + ) # check that corr(x, y) == cov(x, y) / (std(x) * # std(y)) std_y = std(y) - assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) + tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) if cov is cov_biased: # check that biased cov(x, y) == mean(x*y) - # mean(x)*mean(y) mean_y = mean(y) mean_x_times_y = mean(x * y) - assert_equal( + tm.assert_equal( cov_x_y, mean_x_times_y - (mean_x * mean_y) ) @@ -3026,7 +2640,7 @@ def test_expanding_consistency(self, min_periods): # GH 9422 if name in ["sum", "prod"]: - assert_equal(expanding_f_result, expanding_apply_f_result) + tm.assert_equal(expanding_f_result, expanding_apply_f_result) @pytest.mark.slow @pytest.mark.parametrize( @@ -3147,7 +2761,7 @@ def test_rolling_consistency(self, window, min_periods, center): # GH 9422 if name in ["sum", "prod"]: - assert_equal(rolling_f_result, rolling_apply_f_result) + tm.assert_equal(rolling_f_result, rolling_apply_f_result) # binary moments def test_rolling_cov(self): @@ -4057,695 +3671,3 @@ def test_expanding_apply(self, raw): result = r.apply(lambda x: x.sum(), raw=raw) expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw)) tm.assert_frame_equal(result, expected) - - -class TestRollingTS: - - # rolling time-series friendly - # xref GH13327 - - def setup_method(self, method): - - self.regular = DataFrame( - {"A": pd.date_range("20130101", periods=5, freq="s"), "B": range(5)} - ).set_index("A") - - self.ragged = DataFrame({"B": range(5)}) - self.ragged.index = [ - Timestamp("20130101 09:00:00"), - Timestamp("20130101 09:00:02"), - Timestamp("20130101 09:00:03"), - Timestamp("20130101 09:00:05"), - Timestamp("20130101 09:00:06"), - ] - - def test_doc_string(self): - - df = DataFrame( - {"B": [0, 1, 2, np.nan, 4]}, - index=[ - Timestamp("20130101 09:00:00"), - Timestamp("20130101 09:00:02"), - Timestamp("20130101 09:00:03"), - Timestamp("20130101 09:00:05"), - Timestamp("20130101 09:00:06"), - ], - ) - df - df.rolling("2s").sum() - - def test_valid(self): - - df = self.regular - - # not a valid freq - with pytest.raises(ValueError): - df.rolling(window="foobar") - - # not a datetimelike index - with pytest.raises(ValueError): - df.reset_index().rolling(window="foobar") - - # non-fixed freqs - for freq in ["2MS", pd.offsets.MonthBegin(2)]: - with pytest.raises(ValueError): - df.rolling(window=freq) - - for freq in ["1D", pd.offsets.Day(2), "2ms"]: - df.rolling(window=freq) - - # non-integer min_periods - for minp in [1.0, "foo", np.array([1, 2, 3])]: - with pytest.raises(ValueError): - df.rolling(window="1D", min_periods=minp) - - # center is not implemented - with pytest.raises(NotImplementedError): - df.rolling(window="1D", center=True) - - def test_on(self): - - df = self.regular - - # not a valid column - with pytest.raises(ValueError): - df.rolling(window="2s", on="foobar") - - # column is valid - df = df.copy() - df["C"] = pd.date_range("20130101", periods=len(df)) - df.rolling(window="2d", on="C").sum() - - # invalid columns - with pytest.raises(ValueError): - df.rolling(window="2d", on="B") - - # ok even though on non-selected - df.rolling(window="2d", on="C").B.sum() - - def test_monotonic_on(self): - - # on/index must be monotonic - df = DataFrame( - {"A": pd.date_range("20130101", periods=5, freq="s"), "B": range(5)} - ) - - assert df.A.is_monotonic - df.rolling("2s", on="A").sum() - - df = df.set_index("A") - assert df.index.is_monotonic - df.rolling("2s").sum() - - # non-monotonic - df.index = reversed(df.index.tolist()) - assert not df.index.is_monotonic - - with pytest.raises(ValueError): - df.rolling("2s").sum() - - df = df.reset_index() - with pytest.raises(ValueError): - df.rolling("2s", on="A").sum() - - def test_frame_on(self): - - df = DataFrame( - { - "B": range(5), - "C": pd.date_range("20130101 09:00:00", periods=5, freq="3s"), - } - ) - - df["A"] = [ - Timestamp("20130101 09:00:00"), - Timestamp("20130101 09:00:02"), - Timestamp("20130101 09:00:03"), - Timestamp("20130101 09:00:05"), - Timestamp("20130101 09:00:06"), - ] - - # we are doing simulating using 'on' - expected = df.set_index("A").rolling("2s").B.sum().reset_index(drop=True) - - result = df.rolling("2s", on="A").B.sum() - tm.assert_series_equal(result, expected) - - # test as a frame - # we should be ignoring the 'on' as an aggregation column - # note that the expected is setting, computing, and resetting - # so the columns need to be switched compared - # to the actual result where they are ordered as in the - # original - expected = ( - df.set_index("A").rolling("2s")[["B"]].sum().reset_index()[["B", "A"]] - ) - - result = df.rolling("2s", on="A")[["B"]].sum() - tm.assert_frame_equal(result, expected) - - def test_frame_on2(self): - - # using multiple aggregation columns - df = DataFrame( - { - "A": [0, 1, 2, 3, 4], - "B": [0, 1, 2, np.nan, 4], - "C": Index( - [ - Timestamp("20130101 09:00:00"), - Timestamp("20130101 09:00:02"), - Timestamp("20130101 09:00:03"), - Timestamp("20130101 09:00:05"), - Timestamp("20130101 09:00:06"), - ] - ), - }, - columns=["A", "C", "B"], - ) - - expected1 = DataFrame( - {"A": [0.0, 1, 3, 3, 7], "B": [0, 1, 3, np.nan, 4], "C": df["C"]}, - columns=["A", "C", "B"], - ) - - result = df.rolling("2s", on="C").sum() - expected = expected1 - tm.assert_frame_equal(result, expected) - - expected = Series([0, 1, 3, np.nan, 4], name="B") - result = df.rolling("2s", on="C").B.sum() - tm.assert_series_equal(result, expected) - - expected = expected1[["A", "B", "C"]] - result = df.rolling("2s", on="C")[["A", "B", "C"]].sum() - tm.assert_frame_equal(result, expected) - - def test_basic_regular(self): - - df = self.regular.copy() - - df.index = pd.date_range("20130101", periods=5, freq="D") - expected = df.rolling(window=1, min_periods=1).sum() - result = df.rolling(window="1D").sum() - tm.assert_frame_equal(result, expected) - - df.index = pd.date_range("20130101", periods=5, freq="2D") - expected = df.rolling(window=1, min_periods=1).sum() - result = df.rolling(window="2D", min_periods=1).sum() - tm.assert_frame_equal(result, expected) - - expected = df.rolling(window=1, min_periods=1).sum() - result = df.rolling(window="2D", min_periods=1).sum() - tm.assert_frame_equal(result, expected) - - expected = df.rolling(window=1).sum() - result = df.rolling(window="2D").sum() - tm.assert_frame_equal(result, expected) - - def test_min_periods(self): - - # compare for min_periods - df = self.regular - - # these slightly different - expected = df.rolling(2, min_periods=1).sum() - result = df.rolling("2s").sum() - tm.assert_frame_equal(result, expected) - - expected = df.rolling(2, min_periods=1).sum() - result = df.rolling("2s", min_periods=1).sum() - tm.assert_frame_equal(result, expected) - - def test_closed(self): - - # xref GH13965 - - df = DataFrame( - {"A": [1] * 5}, - index=[ - Timestamp("20130101 09:00:01"), - Timestamp("20130101 09:00:02"), - Timestamp("20130101 09:00:03"), - Timestamp("20130101 09:00:04"), - Timestamp("20130101 09:00:06"), - ], - ) - - # closed must be 'right', 'left', 'both', 'neither' - with pytest.raises(ValueError): - self.regular.rolling(window="2s", closed="blabla") - - expected = df.copy() - expected["A"] = [1.0, 2, 2, 2, 1] - result = df.rolling("2s", closed="right").sum() - tm.assert_frame_equal(result, expected) - - # default should be 'right' - result = df.rolling("2s").sum() - tm.assert_frame_equal(result, expected) - - expected = df.copy() - expected["A"] = [1.0, 2, 3, 3, 2] - result = df.rolling("2s", closed="both").sum() - tm.assert_frame_equal(result, expected) - - expected = df.copy() - expected["A"] = [np.nan, 1.0, 2, 2, 1] - result = df.rolling("2s", closed="left").sum() - tm.assert_frame_equal(result, expected) - - expected = df.copy() - expected["A"] = [np.nan, 1.0, 1, 1, np.nan] - result = df.rolling("2s", closed="neither").sum() - tm.assert_frame_equal(result, expected) - - def test_ragged_sum(self): - - df = self.ragged - result = df.rolling(window="1s", min_periods=1).sum() - expected = df.copy() - expected["B"] = [0.0, 1, 2, 3, 4] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="2s", min_periods=1).sum() - expected = df.copy() - expected["B"] = [0.0, 1, 3, 3, 7] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="2s", min_periods=2).sum() - expected = df.copy() - expected["B"] = [np.nan, np.nan, 3, np.nan, 7] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="3s", min_periods=1).sum() - expected = df.copy() - expected["B"] = [0.0, 1, 3, 5, 7] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="3s").sum() - expected = df.copy() - expected["B"] = [0.0, 1, 3, 5, 7] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="4s", min_periods=1).sum() - expected = df.copy() - expected["B"] = [0.0, 1, 3, 6, 9] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="4s", min_periods=3).sum() - expected = df.copy() - expected["B"] = [np.nan, np.nan, 3, 6, 9] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="5s", min_periods=1).sum() - expected = df.copy() - expected["B"] = [0.0, 1, 3, 6, 10] - tm.assert_frame_equal(result, expected) - - def test_ragged_mean(self): - - df = self.ragged - result = df.rolling(window="1s", min_periods=1).mean() - expected = df.copy() - expected["B"] = [0.0, 1, 2, 3, 4] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="2s", min_periods=1).mean() - expected = df.copy() - expected["B"] = [0.0, 1, 1.5, 3.0, 3.5] - tm.assert_frame_equal(result, expected) - - def test_ragged_median(self): - - df = self.ragged - result = df.rolling(window="1s", min_periods=1).median() - expected = df.copy() - expected["B"] = [0.0, 1, 2, 3, 4] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="2s", min_periods=1).median() - expected = df.copy() - expected["B"] = [0.0, 1, 1.5, 3.0, 3.5] - tm.assert_frame_equal(result, expected) - - def test_ragged_quantile(self): - - df = self.ragged - result = df.rolling(window="1s", min_periods=1).quantile(0.5) - expected = df.copy() - expected["B"] = [0.0, 1, 2, 3, 4] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="2s", min_periods=1).quantile(0.5) - expected = df.copy() - expected["B"] = [0.0, 1, 1.5, 3.0, 3.5] - tm.assert_frame_equal(result, expected) - - def test_ragged_std(self): - - df = self.ragged - result = df.rolling(window="1s", min_periods=1).std(ddof=0) - expected = df.copy() - expected["B"] = [0.0] * 5 - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="1s", min_periods=1).std(ddof=1) - expected = df.copy() - expected["B"] = [np.nan] * 5 - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="3s", min_periods=1).std(ddof=0) - expected = df.copy() - expected["B"] = [0.0] + [0.5] * 4 - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="5s", min_periods=1).std(ddof=1) - expected = df.copy() - expected["B"] = [np.nan, 0.707107, 1.0, 1.0, 1.290994] - tm.assert_frame_equal(result, expected) - - def test_ragged_var(self): - - df = self.ragged - result = df.rolling(window="1s", min_periods=1).var(ddof=0) - expected = df.copy() - expected["B"] = [0.0] * 5 - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="1s", min_periods=1).var(ddof=1) - expected = df.copy() - expected["B"] = [np.nan] * 5 - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="3s", min_periods=1).var(ddof=0) - expected = df.copy() - expected["B"] = [0.0] + [0.25] * 4 - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="5s", min_periods=1).var(ddof=1) - expected = df.copy() - expected["B"] = [np.nan, 0.5, 1.0, 1.0, 1 + 2 / 3.0] - tm.assert_frame_equal(result, expected) - - def test_ragged_skew(self): - - df = self.ragged - result = df.rolling(window="3s", min_periods=1).skew() - expected = df.copy() - expected["B"] = [np.nan] * 5 - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="5s", min_periods=1).skew() - expected = df.copy() - expected["B"] = [np.nan] * 2 + [0.0, 0.0, 0.0] - tm.assert_frame_equal(result, expected) - - def test_ragged_kurt(self): - - df = self.ragged - result = df.rolling(window="3s", min_periods=1).kurt() - expected = df.copy() - expected["B"] = [np.nan] * 5 - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="5s", min_periods=1).kurt() - expected = df.copy() - expected["B"] = [np.nan] * 4 + [-1.2] - tm.assert_frame_equal(result, expected) - - def test_ragged_count(self): - - df = self.ragged - result = df.rolling(window="1s", min_periods=1).count() - expected = df.copy() - expected["B"] = [1.0, 1, 1, 1, 1] - tm.assert_frame_equal(result, expected) - - df = self.ragged - result = df.rolling(window="1s").count() - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="2s", min_periods=1).count() - expected = df.copy() - expected["B"] = [1.0, 1, 2, 1, 2] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="2s", min_periods=2).count() - expected = df.copy() - expected["B"] = [np.nan, np.nan, 2, np.nan, 2] - tm.assert_frame_equal(result, expected) - - def test_regular_min(self): - - df = DataFrame( - { - "A": pd.date_range("20130101", periods=5, freq="s"), - "B": [0.0, 1, 2, 3, 4], - } - ).set_index("A") - result = df.rolling("1s").min() - expected = df.copy() - expected["B"] = [0.0, 1, 2, 3, 4] - tm.assert_frame_equal(result, expected) - - df = DataFrame( - {"A": pd.date_range("20130101", periods=5, freq="s"), "B": [5, 4, 3, 4, 5]} - ).set_index("A") - - tm.assert_frame_equal(result, expected) - result = df.rolling("2s").min() - expected = df.copy() - expected["B"] = [5.0, 4, 3, 3, 4] - tm.assert_frame_equal(result, expected) - - result = df.rolling("5s").min() - expected = df.copy() - expected["B"] = [5.0, 4, 3, 3, 3] - tm.assert_frame_equal(result, expected) - - def test_ragged_min(self): - - df = self.ragged - - result = df.rolling(window="1s", min_periods=1).min() - expected = df.copy() - expected["B"] = [0.0, 1, 2, 3, 4] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="2s", min_periods=1).min() - expected = df.copy() - expected["B"] = [0.0, 1, 1, 3, 3] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="5s", min_periods=1).min() - expected = df.copy() - expected["B"] = [0.0, 0, 0, 1, 1] - tm.assert_frame_equal(result, expected) - - def test_perf_min(self): - - N = 10000 - - dfp = DataFrame( - {"B": np.random.randn(N)}, - index=pd.date_range("20130101", periods=N, freq="s"), - ) - expected = dfp.rolling(2, min_periods=1).min() - result = dfp.rolling("2s").min() - assert ((result - expected) < 0.01).all().bool() - - expected = dfp.rolling(200, min_periods=1).min() - result = dfp.rolling("200s").min() - assert ((result - expected) < 0.01).all().bool() - - def test_ragged_max(self): - - df = self.ragged - - result = df.rolling(window="1s", min_periods=1).max() - expected = df.copy() - expected["B"] = [0.0, 1, 2, 3, 4] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="2s", min_periods=1).max() - expected = df.copy() - expected["B"] = [0.0, 1, 2, 3, 4] - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="5s", min_periods=1).max() - expected = df.copy() - expected["B"] = [0.0, 1, 2, 3, 4] - tm.assert_frame_equal(result, expected) - - def test_ragged_apply(self, raw): - - df = self.ragged - - f = lambda x: 1 - result = df.rolling(window="1s", min_periods=1).apply(f, raw=raw) - expected = df.copy() - expected["B"] = 1.0 - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="2s", min_periods=1).apply(f, raw=raw) - expected = df.copy() - expected["B"] = 1.0 - tm.assert_frame_equal(result, expected) - - result = df.rolling(window="5s", min_periods=1).apply(f, raw=raw) - expected = df.copy() - expected["B"] = 1.0 - tm.assert_frame_equal(result, expected) - - def test_all(self): - - # simple comparison of integer vs time-based windowing - df = self.regular * 2 - er = df.rolling(window=1) - r = df.rolling(window="1s") - - for f in [ - "sum", - "mean", - "count", - "median", - "std", - "var", - "kurt", - "skew", - "min", - "max", - ]: - - result = getattr(r, f)() - expected = getattr(er, f)() - tm.assert_frame_equal(result, expected) - - result = r.quantile(0.5) - expected = er.quantile(0.5) - tm.assert_frame_equal(result, expected) - - def test_all_apply(self, raw): - - df = self.regular * 2 - er = df.rolling(window=1) - r = df.rolling(window="1s") - - result = r.apply(lambda x: 1, raw=raw) - expected = er.apply(lambda x: 1, raw=raw) - tm.assert_frame_equal(result, expected) - - def test_all2(self): - - # more sophisticated comparison of integer vs. - # time-based windowing - df = DataFrame( - {"B": np.arange(50)}, index=pd.date_range("20130101", periods=50, freq="H") - ) - # in-range data - dft = df.between_time("09:00", "16:00") - - r = dft.rolling(window="5H") - - for f in [ - "sum", - "mean", - "count", - "median", - "std", - "var", - "kurt", - "skew", - "min", - "max", - ]: - - result = getattr(r, f)() - - # we need to roll the days separately - # to compare with a time-based roll - # finally groupby-apply will return a multi-index - # so we need to drop the day - def agg_by_day(x): - x = x.between_time("09:00", "16:00") - return getattr(x.rolling(5, min_periods=1), f)() - - expected = ( - df.groupby(df.index.day) - .apply(agg_by_day) - .reset_index(level=0, drop=True) - ) - - tm.assert_frame_equal(result, expected) - - def test_groupby_monotonic(self): - - # GH 15130 - # we don't need to validate monotonicity when grouping - - data = [ - ["David", "1/1/2015", 100], - ["David", "1/5/2015", 500], - ["David", "5/30/2015", 50], - ["David", "7/25/2015", 50], - ["Ryan", "1/4/2014", 100], - ["Ryan", "1/19/2015", 500], - ["Ryan", "3/31/2016", 50], - ["Joe", "7/1/2015", 100], - ["Joe", "9/9/2015", 500], - ["Joe", "10/15/2015", 50], - ] - - df = DataFrame(data=data, columns=["name", "date", "amount"]) - df["date"] = pd.to_datetime(df["date"]) - - expected = ( - df.set_index("date") - .groupby("name") - .apply(lambda x: x.rolling("180D")["amount"].sum()) - ) - result = df.groupby("name").rolling("180D", on="date")["amount"].sum() - tm.assert_series_equal(result, expected) - - def test_non_monotonic(self): - # GH 13966 (similar to #15130, closed by #15175) - - dates = pd.date_range(start="2016-01-01 09:30:00", periods=20, freq="s") - df = DataFrame( - { - "A": [1] * 20 + [2] * 12 + [3] * 8, - "B": np.concatenate((dates, dates)), - "C": np.arange(40), - } - ) - - result = df.groupby("A").rolling("4s", on="B").C.mean() - expected = ( - df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean()) - ) - tm.assert_series_equal(result, expected) - - df2 = df.sort_values("B") - result = df2.groupby("A").rolling("4s", on="B").C.mean() - tm.assert_series_equal(result, expected) - - def test_rolling_cov_offset(self): - # GH16058 - - idx = pd.date_range("2017-01-01", periods=24, freq="1h") - ss = Series(np.arange(len(idx)), index=idx) - - result = ss.rolling("2h").cov() - expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx) - tm.assert_series_equal(result, expected) - - expected2 = ss.rolling(2, min_periods=1).cov() - tm.assert_series_equal(result, expected2) - - result = ss.rolling("3h").cov() - expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx) - tm.assert_series_equal(result, expected) - - expected2 = ss.rolling(3, min_periods=1).cov() - tm.assert_series_equal(result, expected2) From 24b1305798f2ed733828ec5230c0dbdae48669aa Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 10 Jul 2019 00:10:50 -0700 Subject: [PATCH 2/3] Add conftest and fix tests --- pandas/tests/window/conftest.py | 49 ++++++++++++++ pandas/tests/window/test_window.py | 103 +++++++++-------------------- 2 files changed, 80 insertions(+), 72 deletions(-) create mode 100644 pandas/tests/window/conftest.py diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py new file mode 100644 index 0000000000000..7ea4be25ca2a6 --- /dev/null +++ b/pandas/tests/window/conftest.py @@ -0,0 +1,49 @@ +import pytest + + +@pytest.fixture(params=[True, False]) +def raw(request): + return request.param + + +@pytest.fixture( + params=[ + "triang", + "blackman", + "hamming", + "bartlett", + "bohman", + "blackmanharris", + "nuttall", + "barthann", + ] +) +def win_types(request): + return request.param + + +@pytest.fixture(params=["kaiser", "gaussian", "general_gaussian", "exponential"]) +def win_types_special(request): + return request.param + + +@pytest.fixture( + params=["sum", "mean", "median", "max", "min", "var", "std", "kurt", "skew"] +) +def arithmetic_win_operators(request): + return request.param + + +@pytest.fixture(params=["right", "left", "both", "neither"]) +def closed(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def center(request): + return request.param + + +@pytest.fixture(params=[None, 1]) +def min_periods(request): + return request.param diff --git a/pandas/tests/window/test_window.py b/pandas/tests/window/test_window.py index f778568553e9c..c88abaf88ac69 100644 --- a/pandas/tests/window/test_window.py +++ b/pandas/tests/window/test_window.py @@ -22,54 +22,6 @@ N, K = 100, 10 -@pytest.fixture(params=[True, False]) -def raw(request) -> bool: - return request.param - - -@pytest.fixture( - params=[ - "triang", - "blackman", - "hamming", - "bartlett", - "bohman", - "blackmanharris", - "nuttall", - "barthann", - ] -) -def win_types(request) -> str: - return request.param - - -@pytest.fixture(params=["kaiser", "gaussian", "general_gaussian", "exponential"]) -def win_types_special(request) -> str: - return request.param - - -@pytest.fixture( - params=["sum", "mean", "median", "max", "min", "var", "std", "kurt", "skew"] -) -def arithmetic_win_operators(request) -> str: - return request.param - - -@pytest.fixture(params=["right", "left", "both", "neither"]) -def closed(request) -> str: - return request.param - - -@pytest.fixture(params=[True, False]) -def center(request) -> bool: - return request.param - - -@pytest.fixture(params=[None, 1]) -def min_periods(request) -> Any: - return request.param - - class Base: _nan_locs = np.arange(20, 40) @@ -992,17 +944,19 @@ def test_centered_axis_validation(self): with pytest.raises(ValueError): (DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=2).mean()) - def test_rolling_sum(self): - self._check_moment_func(np.nansum, name="sum", zero_min_periods_equal=False) + def test_rolling_sum(self, raw): + self._check_moment_func( + np.nansum, name="sum", zero_min_periods_equal=False, raw=raw + ) - def test_rolling_count(self): + def test_rolling_count(self, raw): counter = lambda x: np.isfinite(x).astype(float).sum() self._check_moment_func( - counter, name="count", has_min_periods=False, fill_value=0 + counter, name="count", has_min_periods=False, fill_value=0, raw=raw ) - def test_rolling_mean(self): - self._check_moment_func(np.mean, name="mean") + def test_rolling_mean(self, raw): + self._check_moment_func(np.mean, name="mean", raw=raw) @td.skip_if_no_scipy def test_cmov_mean(self): @@ -1467,11 +1421,11 @@ def test_cmov_window_special_linear_range(self, win_types_special): ) tm.assert_series_equal(xp, rs) - def test_rolling_median(self): - self._check_moment_func(np.median, name="median") + def test_rolling_median(self, raw): + self._check_moment_func(np.median, name="median", raw=raw) - def test_rolling_min(self): - self._check_moment_func(np.min, name="min") + def test_rolling_min(self, raw): + self._check_moment_func(np.min, name="min", raw=raw) a = pd.Series([1, 2, 3, 4, 5]) result = a.rolling(window=100, min_periods=1).min() @@ -1481,8 +1435,8 @@ def test_rolling_min(self): with pytest.raises(ValueError): pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).min() - def test_rolling_max(self): - self._check_moment_func(np.max, name="max") + def test_rolling_max(self, raw): + self._check_moment_func(np.max, name="max", raw=raw) a = pd.Series([1, 2, 3, 4, 5], dtype=np.float64) b = a.rolling(window=100, min_periods=1).max() @@ -1492,7 +1446,7 @@ def test_rolling_max(self): pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).max() @pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) - def test_rolling_quantile(self, q): + def test_rolling_quantile(self, q, raw): def scoreatpercentile(a, per): values = np.sort(a, axis=0) @@ -1513,7 +1467,7 @@ def scoreatpercentile(a, per): def quantile_func(x): return scoreatpercentile(x, q) - self._check_moment_func(quantile_func, name="quantile", quantile=q) + self._check_moment_func(quantile_func, name="quantile", quantile=q, raw=raw) def test_rolling_quantile_np_percentile(self): # #9413: Tests that rolling window's quantile default behavior @@ -1653,9 +1607,11 @@ def f(x): with pytest.raises(AttributeError): df.rolling(window).apply(f, raw=True) - def test_rolling_std(self): - self._check_moment_func(lambda x: np.std(x, ddof=1), name="std") - self._check_moment_func(lambda x: np.std(x, ddof=0), name="std", ddof=0) + def test_rolling_std(self, raw): + self._check_moment_func(lambda x: np.std(x, ddof=1), name="std", raw=raw) + self._check_moment_func( + lambda x: np.std(x, ddof=0), name="std", ddof=0, raw=raw + ) def test_rolling_std_1obs(self): vals = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0]) @@ -1691,26 +1647,29 @@ def test_rolling_std_neg_sqrt(self): b = a.ewm(span=3).std() assert np.isfinite(b[2:]).all() - def test_rolling_var(self): - self._check_moment_func(lambda x: np.var(x, ddof=1), name="var") - self._check_moment_func(lambda x: np.var(x, ddof=0), name="var", ddof=0) + def test_rolling_var(self, raw): + self._check_moment_func(lambda x: np.var(x, ddof=1), name="var", raw=raw) + self._check_moment_func( + lambda x: np.var(x, ddof=0), name="var", ddof=0, raw=raw + ) @td.skip_if_no_scipy - def test_rolling_skew(self): + def test_rolling_skew(self, raw): from scipy.stats import skew - self._check_moment_func(lambda x: skew(x, bias=False), name="skew") + self._check_moment_func(lambda x: skew(x, bias=False), name="skew", raw=raw) @td.skip_if_no_scipy - def test_rolling_kurt(self): + def test_rolling_kurt(self, raw): from scipy.stats import kurtosis - self._check_moment_func(lambda x: kurtosis(x, bias=False), name="kurt") + self._check_moment_func(lambda x: kurtosis(x, bias=False), name="kurt", raw=raw) def _check_moment_func( self, static_comp, name, + raw, has_min_periods=True, has_center=True, has_time_rule=True, From c1f2fe1ca0944a81f38d6d764f2d65ee7183ada4 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 10 Jul 2019 09:22:21 -0700 Subject: [PATCH 3/3] Remove unused import --- pandas/tests/window/test_window.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/window/test_window.py b/pandas/tests/window/test_window.py index c88abaf88ac69..966edf4bf6588 100644 --- a/pandas/tests/window/test_window.py +++ b/pandas/tests/window/test_window.py @@ -1,6 +1,5 @@ from collections import OrderedDict from datetime import datetime, timedelta -from typing import Any import warnings from warnings import catch_warnings