From 173484ea4c4503d2553b07c7a500ec437f1a43fe Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 18 Nov 2020 15:50:32 -0800 Subject: [PATCH 1/9] Refactor series data --- .../moments/test_moments_consistency_ewm.py | 87 +++++++++++-------- 1 file changed, 50 insertions(+), 37 deletions(-) diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index 2718bdabee96a..f3f3003e023db 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -257,47 +257,60 @@ def test_ewm_consistency_cov(consistency_data, min_periods, adjust, ignore_na): ) -@pytest.mark.slow @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) -@pytest.mark.parametrize("adjust", [True, False]) -@pytest.mark.parametrize("ignore_na", [True, False]) -def test_ewm_consistency_series_data(consistency_data, min_periods, adjust, ignore_na): +@pytest.mark.parametrize("bias", [True, False]) +def test_expanding_consistency_series_cov_corr( + consistency_data, adjust, ignore_na, min_periods, bias +): x, is_constant, no_nans = consistency_data com = 3.0 - moments_consistency_series_data( - x=x, - mean=lambda x: x.ewm( + + if isinstance(x, Series): + var_x_plus_y = ( + (x + x) + .ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na) + .var(bias=bias) + ) + var_x = x.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).mean(), - corr=lambda x, y: x.ewm( + ).var(bias=bias) + var_y = x.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).corr(y), - var_unbiased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=False) - ), - std_unbiased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).std(bias=False) - ), - cov_unbiased=lambda x, y: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).cov(y, bias=False) - ), - var_biased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=True) - ), - std_biased=lambda x: x.ewm( + ).var(bias=bias) + cov_x_y = x.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).std(bias=True), - cov_biased=lambda x, y: ( - x.ewm( + ).cov(x, bias=bias) + # check that cov(x, y) == (var(x+y) - var(x) - + # var(y)) / 2 + tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) + + # check that corr(x, y) == cov(x, y) / (std(x) * + # std(y)) + corr_x_y = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).corr(x, bias=bias) + std_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).std(bias=bias) + std_y = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).std(bias=bias) + tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) + + if bias: + # check that biased cov(x, y) == mean(x*y) - + # mean(x)*mean(y) + mean_x = x.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).cov(y, bias=True) - ), - ) + ).mean() + mean_y = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean() + mean_x_times_y = ( + (x * x) + .ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ) + .mean() + ) + tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) From 1cb0c2d08e8d83ebbda4ec293ee427d0e1c9b111 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 18 Nov 2020 15:51:02 -0800 Subject: [PATCH 2/9] Rename func --- pandas/tests/window/moments/test_moments_consistency_ewm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index f3f3003e023db..670118addfa33 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -259,7 +259,7 @@ def test_ewm_consistency_cov(consistency_data, min_periods, adjust, ignore_na): @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) @pytest.mark.parametrize("bias", [True, False]) -def test_expanding_consistency_series_cov_corr( +def test_ewm_consistency_series_cov_corr( consistency_data, adjust, ignore_na, min_periods, bias ): x, is_constant, no_nans = consistency_data From 972f06979c98e222959dd1036c67a019cab8d766 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 18 Nov 2020 15:54:38 -0800 Subject: [PATCH 3/9] Refactor cov test --- .../moments/test_moments_consistency_ewm.py | 40 +++++++------------ 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index 670118addfa33..2c402eed735c6 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -227,34 +227,22 @@ def test_ewm_consistency_std(consistency_data, min_periods, adjust, ignore_na): @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) -@pytest.mark.parametrize("adjust", [True, False]) -@pytest.mark.parametrize("ignore_na", [True, False]) -def test_ewm_consistency_cov(consistency_data, min_periods, adjust, ignore_na): +@pytest.mark.parametrize("bias", [True, False]) +def test_ewm_consistency_cov(consistency_data, adjust, ignore_na, min_periods, bias): x, is_constant, no_nans = consistency_data com = 3.0 - moments_consistency_cov_data( - x=x, - var_unbiased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=False) - ), - cov_unbiased=lambda x, y: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).cov(y, bias=False) - ), - var_biased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=True) - ), - cov_biased=lambda x, y: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).cov(y, bias=True) - ), - ) + var_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=bias) + assert not (var_x < 0).any().any() + + cov_x_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).cov(x, bias=bias) + assert not (cov_x_x < 0).any().any() + + # check that var(x) == cov(x, x) + tm.assert_equal(var_x, cov_x_x) @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) From bd1ce9193f3168e1bb8de3663d2b25594f959b73 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 18 Nov 2020 15:57:40 -0800 Subject: [PATCH 4/9] Fix std tests --- .../moments/test_moments_consistency_ewm.py | 37 +++++++------------ 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index 2c402eed735c6..dc9481994a56b 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -198,32 +198,21 @@ def test_ewm_consistency_var(consistency_data, min_periods, adjust, ignore_na): @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) -@pytest.mark.parametrize("adjust", [True, False]) -@pytest.mark.parametrize("ignore_na", [True, False]) -def test_ewm_consistency_std(consistency_data, min_periods, adjust, ignore_na): +@pytest.mark.parametrize("bias", [True, False]) +def test_ewm_consistency_std(consistency_data, adjust, ignore_na, min_periods, bias): x, is_constant, no_nans = consistency_data com = 3.0 - moments_consistency_std_data( - x=x, - var_unbiased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=False) - ), - std_unbiased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).std(bias=False) - ), - var_biased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=True) - ), - std_biased=lambda x: x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).std(bias=True), - ) + var_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=bias) + std_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).std(bias=bias) + assert not (var_x < 0).any().any() + assert not (std_x < 0).any().any() + + # check that var(x) == std(x)^2 + tm.assert_equal(var_x, std_x * std_x) @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) From 2bdfa3eff8aa841108adf7447dbf39a4f01dcd96 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 18 Nov 2020 16:08:23 -0800 Subject: [PATCH 5/9] Remove var tests --- .../moments/test_moments_consistency_ewm.py | 63 ++++++++++++------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index dc9481994a56b..ccac51efe2533 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -171,30 +171,51 @@ def _ewma(s, com, min_periods, adjust, ignore_na): @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) -@pytest.mark.parametrize("adjust", [True, False]) -@pytest.mark.parametrize("ignore_na", [True, False]) -def test_ewm_consistency_var(consistency_data, min_periods, adjust, ignore_na): +@pytest.mark.parametrize("bias", [True, False]) +def test_moments_consistency_var( + consistency_data, adjust, ignore_na, min_periods, bias +): x, is_constant, no_nans = consistency_data com = 3.0 - moments_consistency_var_data( - x=x, - is_constant=is_constant, - min_periods=min_periods, - count=lambda x: x.expanding().count(), - mean=lambda x: x.ewm( + + mean_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean() + var_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=bias) + assert not (var_x < 0).any().any() + + if bias: + # check that biased var(x) == mean(x^2) - mean(x)^2 + mean_x2 = ( + (x * x) + .ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na) + .mean() + ) + tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x)) + + +@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) +@pytest.mark.parametrize("bias", [True, False]) +def test_moments_consistency_var_constant( + consistency_data, adjust, ignore_na, min_periods, bias +): + x, is_constant, no_nans = consistency_data + com = 3.0 + if is_constant: + count_x = x.expanding(min_periods=min_periods).count() + var_x = x.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).mean(), - var_unbiased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=False) - ), - var_biased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=True) - ), - ) + ).var(bias=bias) + + # check that variance of constant series is identically 0 + assert not (var_x > 0).any().any() + expected = x * np.nan + expected[count_x >= max(min_periods, 1)] = 0.0 + if not bias: + expected[count_x < 2] = np.nan + tm.assert_equal(var_x, expected) @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) From 8bcd556dec3b3b32fe1fc7c9fff5a2a7dffecfb1 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 18 Nov 2020 17:39:30 -0800 Subject: [PATCH 6/9] break out consistency test --- .../moments/test_moments_consistency_ewm.py | 142 ++++++++++++++++-- 1 file changed, 132 insertions(+), 10 deletions(-) diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index ccac51efe2533..f363f1f133b18 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -65,8 +65,126 @@ def test_different_input_array_raise_exception(name, binary_ew_data): getattr(A.ewm(com=20, min_periods=5), name)(np.random.randn(50)) -@pytest.mark.slow +def create_mock_weights(obj, com, adjust, ignore_na): + if isinstance(obj, DataFrame): + if not len(obj.columns): + return DataFrame(index=obj.index, columns=obj.columns) + w = concat( + [ + create_mock_series_weights( + obj.iloc[:, i], com=com, adjust=adjust, ignore_na=ignore_na + ) + for i, _ in enumerate(obj.columns) + ], + axis=1, + ) + w.index = obj.index + w.columns = obj.columns + return w + else: + create_mock_series_weights(obj, com, adjust, ignore_na) + + +def create_mock_series_weights(s, com, adjust, ignore_na): + w = Series(np.nan, index=s.index) + alpha = 1.0 / (1.0 + com) + if adjust: + count = 0 + for i in range(len(s)): + if s.iat[i] == s.iat[i]: + w.iat[i] = pow(1.0 / (1.0 - alpha), count) + count += 1 + elif not ignore_na: + count += 1 + else: + sum_wts = 0.0 + prev_i = -1 + count = 0 + for i in range(len(s)): + if s.iat[i] == s.iat[i]: + if prev_i == -1: + w.iat[i] = 1.0 + else: + w.iat[i] = alpha * sum_wts / pow(1.0 - alpha, count - prev_i) + sum_wts += w.iat[i] + prev_i = count + count += 1 + elif not ignore_na: + count += 1 + return w + + +@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) +def test_ewm_consistency_mean(consistency_data, adjust, ignore_na, min_periods): + x, is_constant, no_nans = consistency_data + com = 3.0 + + result = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean() + weights = create_mock_weights(x, com=com, adjust=adjust, ignore_na=ignore_na) + expected = ( + x.multiply(weights).cumsum().divide(weights.cumsum()).fillna(method="ffill") + ) + expected[ + x.expanding().count() < (max(min_periods, 1) if min_periods else 1) + ] = np.nan + tm.assert_equal(result, expected.astype("float64")) + + @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) +def test_ewm_consistency_consistent(consistency_data, adjust, ignore_na, min_periods): + x, is_constant, no_nans = consistency_data + com = 3.0 + + if is_constant: + count_x = x.expanding().count() + mean_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean() + # check that correlation of a series with itself is either 1 or NaN + corr_x_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).corr(x) + exp = x.max() if isinstance(x, Series) else x.max().max() + + # check mean of constant series + expected = x * np.nan + expected[count_x >= max(min_periods, 1)] = exp + tm.assert_equal(mean_x, expected) + + # check correlation of constant series with itself is NaN + expected[:] = np.nan + tm.assert_equal(corr_x_x, expected) + + +@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) +def test_ewm_consistency_var_debiasing_factors( + consistency_data, adjust, ignore_na, min_periods +): + x, is_constant, no_nans = consistency_data + com = 3.0 + + # check variance debiasing factors + var_unbiased_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=False) + var_biased_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=True) + + weights = create_mock_weights(x, com=com, adjust=adjust, ignore_na=ignore_na) + cum_sum = weights.cumsum().fillna(method="ffill") + cum_sum_sq = (weights * weights).cumsum().fillna(method="ffill") + numerator = cum_sum * cum_sum + denominator = numerator - cum_sum_sq + denominator[denominator <= 0.0] = np.nan + var_debiasing_factors_x = numerator / denominator + + tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) + +""" +@pytest.mark.slow @pytest.mark.parametrize("adjust", [True, False]) @pytest.mark.parametrize("ignore_na", [True, False]) def test_ewm_consistency(consistency_data, min_periods, adjust, ignore_na): @@ -87,25 +205,29 @@ def _weights(s, com, adjust, ignore_na): w = Series(np.nan, index=s.index) alpha = 1.0 / (1.0 + com) - if ignore_na: - w[s.notna()] = _weights( - s[s.notna()], com=com, adjust=adjust, ignore_na=False - ) - elif adjust: + if adjust: + count = 0 for i in range(len(s)): if s.iat[i] == s.iat[i]: - w.iat[i] = pow(1.0 / (1.0 - alpha), i) + w.iat[i] = pow(1.0 / (1.0 - alpha), count) + count += 1 + elif not ignore_na: + count += 1 else: sum_wts = 0.0 prev_i = -1 + count = 0 for i in range(len(s)): if s.iat[i] == s.iat[i]: if prev_i == -1: w.iat[i] = 1.0 else: - w.iat[i] = alpha * sum_wts / pow(1.0 - alpha, i - prev_i) + w.iat[i] = alpha * sum_wts / pow(1.0 - alpha, count - prev_i) sum_wts += w.iat[i] - prev_i = i + prev_i = count + count += 1 + elif not ignore_na: + count += 1 return w def _variance_debiasing_factors(s, com, adjust, ignore_na): @@ -168,7 +290,7 @@ def _ewma(s, com, min_periods, adjust, ignore_na): _variance_debiasing_factors(x, com=com, adjust=adjust, ignore_na=ignore_na) ), ) - +""" @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) @pytest.mark.parametrize("bias", [True, False]) From bff601536b416efd4b9eb8b94b7a0da0ad005735 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 18 Nov 2020 20:33:57 -0800 Subject: [PATCH 7/9] Finish splitting consistency tests --- .../moments/test_moments_consistency_ewm.py | 119 +----------------- 1 file changed, 1 insertion(+), 118 deletions(-) diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index f363f1f133b18..95689cb41a3c6 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -3,15 +3,6 @@ from pandas import DataFrame, Series, concat import pandas._testing as tm -from pandas.tests.window.common import ( - moments_consistency_cov_data, - moments_consistency_is_constant, - moments_consistency_mock_mean, - moments_consistency_series_data, - moments_consistency_std_data, - moments_consistency_var_data, - moments_consistency_var_debiasing_factors, -) @pytest.mark.parametrize("func", ["cov", "corr"]) @@ -82,7 +73,7 @@ def create_mock_weights(obj, com, adjust, ignore_na): w.columns = obj.columns return w else: - create_mock_series_weights(obj, com, adjust, ignore_na) + return create_mock_series_weights(obj, com, adjust, ignore_na) def create_mock_series_weights(s, com, adjust, ignore_na): @@ -183,114 +174,6 @@ def test_ewm_consistency_var_debiasing_factors( tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) -""" -@pytest.mark.slow -@pytest.mark.parametrize("adjust", [True, False]) -@pytest.mark.parametrize("ignore_na", [True, False]) -def test_ewm_consistency(consistency_data, min_periods, adjust, ignore_na): - def _weights(s, com, adjust, ignore_na): - if isinstance(s, DataFrame): - if not len(s.columns): - return DataFrame(index=s.index, columns=s.columns) - w = concat( - [ - _weights(s.iloc[:, i], com=com, adjust=adjust, ignore_na=ignore_na) - for i, _ in enumerate(s.columns) - ], - axis=1, - ) - w.index = s.index - w.columns = s.columns - return w - - w = Series(np.nan, index=s.index) - alpha = 1.0 / (1.0 + com) - if adjust: - count = 0 - for i in range(len(s)): - if s.iat[i] == s.iat[i]: - w.iat[i] = pow(1.0 / (1.0 - alpha), count) - count += 1 - elif not ignore_na: - count += 1 - else: - sum_wts = 0.0 - prev_i = -1 - count = 0 - for i in range(len(s)): - if s.iat[i] == s.iat[i]: - if prev_i == -1: - w.iat[i] = 1.0 - else: - w.iat[i] = alpha * sum_wts / pow(1.0 - alpha, count - prev_i) - sum_wts += w.iat[i] - prev_i = count - count += 1 - elif not ignore_na: - count += 1 - return w - - def _variance_debiasing_factors(s, com, adjust, ignore_na): - weights = _weights(s, com=com, adjust=adjust, ignore_na=ignore_na) - cum_sum = weights.cumsum().fillna(method="ffill") - cum_sum_sq = (weights * weights).cumsum().fillna(method="ffill") - numerator = cum_sum * cum_sum - denominator = numerator - cum_sum_sq - denominator[denominator <= 0.0] = np.nan - return numerator / denominator - - def _ewma(s, com, min_periods, adjust, ignore_na): - weights = _weights(s, com=com, adjust=adjust, ignore_na=ignore_na) - result = ( - s.multiply(weights).cumsum().divide(weights.cumsum()).fillna(method="ffill") - ) - result[ - s.expanding().count() < (max(min_periods, 1) if min_periods else 1) - ] = np.nan - return result - - x, is_constant, no_nans = consistency_data - com = 3.0 - moments_consistency_mock_mean( - x=x, - mean=lambda x: x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).mean(), - mock_mean=lambda x: _ewma( - x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ), - ) - - moments_consistency_is_constant( - x=x, - is_constant=is_constant, - min_periods=min_periods, - count=lambda x: x.expanding().count(), - mean=lambda x: x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).mean(), - corr=lambda x, y: x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).corr(y), - ) - - moments_consistency_var_debiasing_factors( - x=x, - var_unbiased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=False) - ), - var_biased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=True) - ), - var_debiasing_factors=lambda x: ( - _variance_debiasing_factors(x, com=com, adjust=adjust, ignore_na=ignore_na) - ), - ) -""" @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) @pytest.mark.parametrize("bias", [True, False]) From 210ce579e49fe7e75babfffdab78909ce90d098d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 18 Nov 2020 20:34:54 -0800 Subject: [PATCH 8/9] Remove common functions --- pandas/tests/window/common.py | 147 ---------------------------------- 1 file changed, 147 deletions(-) delete mode 100644 pandas/tests/window/common.py diff --git a/pandas/tests/window/common.py b/pandas/tests/window/common.py deleted file mode 100644 index 7c8c9de40f7c5..0000000000000 --- a/pandas/tests/window/common.py +++ /dev/null @@ -1,147 +0,0 @@ -import numpy as np - -from pandas import Series -import pandas._testing as tm - - -def moments_consistency_mock_mean(x, mean, mock_mean): - mean_x = mean(x) - # check that correlation of a series with itself is either 1 or NaN - - if mock_mean: - # check that mean equals mock_mean - expected = mock_mean(x) - tm.assert_equal(mean_x, expected.astype("float64")) - - -def moments_consistency_is_constant(x, is_constant, min_periods, count, mean, corr): - count_x = count(x) - mean_x = mean(x) - # check that correlation of a series with itself is either 1 or NaN - corr_x_x = corr(x, x) - - if is_constant: - exp = x.max() if isinstance(x, Series) else x.max().max() - - # check mean of constant series - expected = x * np.nan - expected[count_x >= max(min_periods, 1)] = exp - tm.assert_equal(mean_x, expected) - - # check correlation of constant series with itself is NaN - expected[:] = np.nan - tm.assert_equal(corr_x_x, expected) - - -def moments_consistency_var_debiasing_factors( - x, var_biased, var_unbiased, var_debiasing_factors -): - if var_unbiased and var_biased and var_debiasing_factors: - # check variance debiasing factors - var_unbiased_x = var_unbiased(x) - var_biased_x = var_biased(x) - var_debiasing_factors_x = var_debiasing_factors(x) - tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) - - -def moments_consistency_var_data( - x, is_constant, min_periods, count, mean, var_unbiased, var_biased -): - count_x = count(x) - mean_x = mean(x) - for var in [var_biased, var_unbiased]: - var_x = var(x) - assert not (var_x < 0).any().any() - - if var is var_biased: - # check that biased var(x) == mean(x^2) - mean(x)^2 - mean_x2 = mean(x * x) - tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x)) - - if is_constant: - # check that variance of constant series is identically 0 - assert not (var_x > 0).any().any() - expected = x * np.nan - expected[count_x >= max(min_periods, 1)] = 0.0 - if var is var_unbiased: - expected[count_x < 2] = np.nan - tm.assert_equal(var_x, expected) - - -def moments_consistency_std_data(x, std_unbiased, var_unbiased, std_biased, var_biased): - for (std, var) in [(std_biased, var_biased), (std_unbiased, var_unbiased)]: - var_x = var(x) - std_x = std(x) - assert not (var_x < 0).any().any() - assert not (std_x < 0).any().any() - - # check that var(x) == std(x)^2 - tm.assert_equal(var_x, std_x * std_x) - - -def moments_consistency_cov_data(x, cov_unbiased, var_unbiased, cov_biased, var_biased): - for (cov, var) in [(cov_biased, var_biased), (cov_unbiased, var_unbiased)]: - var_x = var(x) - assert not (var_x < 0).any().any() - if cov: - cov_x_x = cov(x, x) - assert not (cov_x_x < 0).any().any() - - # check that var(x) == cov(x, x) - tm.assert_equal(var_x, cov_x_x) - - -def moments_consistency_series_data( - x, - corr, - mean, - std_biased, - std_unbiased, - cov_unbiased, - var_unbiased, - var_biased, - cov_biased, -): - if isinstance(x, Series): - y = x - mean_x = mean(x) - if not x.isna().equals(y.isna()): - # can only easily test two Series with similar - # structure - pass - - # check that cor(x, y) is symmetric - corr_x_y = corr(x, y) - corr_y_x = corr(y, x) - tm.assert_equal(corr_x_y, corr_y_x) - - for (std, var, cov) in [ - (std_biased, var_biased, cov_biased), - (std_unbiased, var_unbiased, cov_unbiased), - ]: - var_x = var(x) - std_x = std(x) - - if cov: - # check that cov(x, y) is symmetric - cov_x_y = cov(x, y) - cov_y_x = cov(y, x) - tm.assert_equal(cov_x_y, cov_y_x) - - # check that cov(x, y) == (var(x+y) - var(x) - - # var(y)) / 2 - var_x_plus_y = var(x + y) - var_y = var(y) - tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) - - # check that corr(x, y) == cov(x, y) / (std(x) * - # std(y)) - std_y = std(y) - tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) - - if cov is cov_biased: - # check that biased cov(x, y) == mean(x*y) - - # mean(x)*mean(y) - mean_y = mean(y) - mean_x_times_y = mean(x * y) - tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) From a352e0b0ab4c37a3c8477411776972927218dc0d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 18 Nov 2020 20:56:02 -0800 Subject: [PATCH 9/9] Remove moments specific conftest and inline some fixtures directly in tests --- pandas/tests/window/conftest.py | 38 +++++-------------- pandas/tests/window/moments/conftest.py | 19 ---------- .../moments/test_moments_consistency_ewm.py | 23 ++++++++--- .../tests/window/moments/test_moments_ewm.py | 12 +++++- 4 files changed, 36 insertions(+), 56 deletions(-) delete mode 100644 pandas/tests/window/moments/conftest.py diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index e1d7635b0a686..64e679336abb8 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -10,6 +10,7 @@ @pytest.fixture(params=[True, False]) def raw(request): + """raw keyword argument for rolling.apply""" return request.param @@ -274,43 +275,22 @@ def consistency_data(request): return request.param -def _create_arr(): - """Internal function to mock an array.""" +def _create_series(): + """Internal function to mock Series.""" arr = np.random.randn(100) locs = np.arange(20, 40) arr[locs] = np.NaN - return arr - - -def _create_rng(): - """Internal function to mock date range.""" - rng = bdate_range(datetime(2009, 1, 1), periods=100) - return rng - - -def _create_series(): - """Internal function to mock Series.""" - arr = _create_arr() - series = Series(arr.copy(), index=_create_rng()) + series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100)) return series def _create_frame(): """Internal function to mock DataFrame.""" - rng = _create_rng() - return DataFrame(np.random.randn(100, 10), index=rng, columns=np.arange(10)) - - -@pytest.fixture -def nan_locs(): - """Make a range as loc fixture.""" - return np.arange(20, 40) - - -@pytest.fixture -def arr(): - """Make an array as fixture.""" - return _create_arr() + return DataFrame( + np.random.randn(100, 10), + index=bdate_range(datetime(2009, 1, 1), periods=100), + columns=np.arange(10), + ) @pytest.fixture diff --git a/pandas/tests/window/moments/conftest.py b/pandas/tests/window/moments/conftest.py deleted file mode 100644 index ce4d04a9bcc1e..0000000000000 --- a/pandas/tests/window/moments/conftest.py +++ /dev/null @@ -1,19 +0,0 @@ -import numpy as np -import pytest - -from pandas import Series - - -@pytest.fixture -def binary_ew_data(): - A = Series(np.random.randn(50), index=np.arange(50)) - B = A[2:] + np.random.randn(48) - - A[:10] = np.NaN - B[-10:] = np.NaN - return A, B - - -@pytest.fixture(params=[0, 1, 2]) -def min_periods(request): - return request.param diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index 95689cb41a3c6..aa3453680190b 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -16,18 +16,28 @@ def test_ewm_pairwise_cov_corr(func, frame): @pytest.mark.parametrize("name", ["cov", "corr"]) -def test_ewm_corr_cov(name, binary_ew_data): - A, B = binary_ew_data +def test_ewm_corr_cov(name): + A = Series(np.random.randn(50), index=np.arange(50)) + B = A[2:] + np.random.randn(48) + + A[:10] = np.NaN + B[-10:] = np.NaN result = getattr(A.ewm(com=20, min_periods=5), name)(B) assert np.isnan(result.values[:14]).all() assert not np.isnan(result.values[14:]).any() +@pytest.mark.parametrize("min_periods", [0, 1, 2]) @pytest.mark.parametrize("name", ["cov", "corr"]) -def test_ewm_corr_cov_min_periods(name, min_periods, binary_ew_data): +def test_ewm_corr_cov_min_periods(name, min_periods): # GH 7898 - A, B = binary_ew_data + A = Series(np.random.randn(50), index=np.arange(50)) + B = A[2:] + np.random.randn(48) + + A[:10] = np.NaN + B[-10:] = np.NaN + result = getattr(A.ewm(com=20, min_periods=min_periods), name)(B) # binary functions (ewmcov, ewmcorr) with bias=False require at # least two values @@ -47,9 +57,10 @@ def test_ewm_corr_cov_min_periods(name, min_periods, binary_ew_data): @pytest.mark.parametrize("name", ["cov", "corr"]) -def test_different_input_array_raise_exception(name, binary_ew_data): +def test_different_input_array_raise_exception(name): + A = Series(np.random.randn(50), index=np.arange(50)) + A[:10] = np.NaN - A, _ = binary_ew_data msg = "Input arrays must be of the same type!" # exception raised is Exception with pytest.raises(Exception, match=msg): diff --git a/pandas/tests/window/moments/test_moments_ewm.py b/pandas/tests/window/moments/test_moments_ewm.py index def6d7289fec2..eceba7f143ab9 100644 --- a/pandas/tests/window/moments/test_moments_ewm.py +++ b/pandas/tests/window/moments/test_moments_ewm.py @@ -226,8 +226,12 @@ def test_ewma_halflife_arg(series): series.ewm() -def test_ewm_alpha(arr): +def test_ewm_alpha(): # GH 10789 + arr = np.random.randn(100) + locs = np.arange(20, 40) + arr[locs] = np.NaN + s = Series(arr) a = s.ewm(alpha=0.61722699889169674).mean() b = s.ewm(com=0.62014947789973052).mean() @@ -254,8 +258,12 @@ def test_ewm_alpha_arg(series): s.ewm(halflife=10.0, alpha=0.5) -def test_ewm_domain_checks(arr): +def test_ewm_domain_checks(): # GH 12492 + arr = np.random.randn(100) + locs = np.arange(20, 40) + arr[locs] = np.NaN + s = Series(arr) msg = "comass must satisfy: comass >= 0" with pytest.raises(ValueError, match=msg):