diff --git a/pandas/tests/window/common.py b/pandas/tests/window/common.py deleted file mode 100644 index 7c8c9de40f7c5..0000000000000 --- a/pandas/tests/window/common.py +++ /dev/null @@ -1,147 +0,0 @@ -import numpy as np - -from pandas import Series -import pandas._testing as tm - - -def moments_consistency_mock_mean(x, mean, mock_mean): - mean_x = mean(x) - # check that correlation of a series with itself is either 1 or NaN - - if mock_mean: - # check that mean equals mock_mean - expected = mock_mean(x) - tm.assert_equal(mean_x, expected.astype("float64")) - - -def moments_consistency_is_constant(x, is_constant, min_periods, count, mean, corr): - count_x = count(x) - mean_x = mean(x) - # check that correlation of a series with itself is either 1 or NaN - corr_x_x = corr(x, x) - - if is_constant: - exp = x.max() if isinstance(x, Series) else x.max().max() - - # check mean of constant series - expected = x * np.nan - expected[count_x >= max(min_periods, 1)] = exp - tm.assert_equal(mean_x, expected) - - # check correlation of constant series with itself is NaN - expected[:] = np.nan - tm.assert_equal(corr_x_x, expected) - - -def moments_consistency_var_debiasing_factors( - x, var_biased, var_unbiased, var_debiasing_factors -): - if var_unbiased and var_biased and var_debiasing_factors: - # check variance debiasing factors - var_unbiased_x = var_unbiased(x) - var_biased_x = var_biased(x) - var_debiasing_factors_x = var_debiasing_factors(x) - tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) - - -def moments_consistency_var_data( - x, is_constant, min_periods, count, mean, var_unbiased, var_biased -): - count_x = count(x) - mean_x = mean(x) - for var in [var_biased, var_unbiased]: - var_x = var(x) - assert not (var_x < 0).any().any() - - if var is var_biased: - # check that biased var(x) == mean(x^2) - mean(x)^2 - mean_x2 = mean(x * x) - tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x)) - - if is_constant: - # check that variance of constant series is identically 0 - assert not (var_x > 0).any().any() - expected = x * np.nan - expected[count_x >= max(min_periods, 1)] = 0.0 - if var is var_unbiased: - expected[count_x < 2] = np.nan - tm.assert_equal(var_x, expected) - - -def moments_consistency_std_data(x, std_unbiased, var_unbiased, std_biased, var_biased): - for (std, var) in [(std_biased, var_biased), (std_unbiased, var_unbiased)]: - var_x = var(x) - std_x = std(x) - assert not (var_x < 0).any().any() - assert not (std_x < 0).any().any() - - # check that var(x) == std(x)^2 - tm.assert_equal(var_x, std_x * std_x) - - -def moments_consistency_cov_data(x, cov_unbiased, var_unbiased, cov_biased, var_biased): - for (cov, var) in [(cov_biased, var_biased), (cov_unbiased, var_unbiased)]: - var_x = var(x) - assert not (var_x < 0).any().any() - if cov: - cov_x_x = cov(x, x) - assert not (cov_x_x < 0).any().any() - - # check that var(x) == cov(x, x) - tm.assert_equal(var_x, cov_x_x) - - -def moments_consistency_series_data( - x, - corr, - mean, - std_biased, - std_unbiased, - cov_unbiased, - var_unbiased, - var_biased, - cov_biased, -): - if isinstance(x, Series): - y = x - mean_x = mean(x) - if not x.isna().equals(y.isna()): - # can only easily test two Series with similar - # structure - pass - - # check that cor(x, y) is symmetric - corr_x_y = corr(x, y) - corr_y_x = corr(y, x) - tm.assert_equal(corr_x_y, corr_y_x) - - for (std, var, cov) in [ - (std_biased, var_biased, cov_biased), - (std_unbiased, var_unbiased, cov_unbiased), - ]: - var_x = var(x) - std_x = std(x) - - if cov: - # check that cov(x, y) is symmetric - cov_x_y = cov(x, y) - cov_y_x = cov(y, x) - tm.assert_equal(cov_x_y, cov_y_x) - - # check that cov(x, y) == (var(x+y) - var(x) - - # var(y)) / 2 - var_x_plus_y = var(x + y) - var_y = var(y) - tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) - - # check that corr(x, y) == cov(x, y) / (std(x) * - # std(y)) - std_y = std(y) - tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) - - if cov is cov_biased: - # check that biased cov(x, y) == mean(x*y) - - # mean(x)*mean(y) - mean_y = mean(y) - mean_x_times_y = mean(x * y) - tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index e1d7635b0a686..64e679336abb8 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -10,6 +10,7 @@ @pytest.fixture(params=[True, False]) def raw(request): + """raw keyword argument for rolling.apply""" return request.param @@ -274,43 +275,22 @@ def consistency_data(request): return request.param -def _create_arr(): - """Internal function to mock an array.""" +def _create_series(): + """Internal function to mock Series.""" arr = np.random.randn(100) locs = np.arange(20, 40) arr[locs] = np.NaN - return arr - - -def _create_rng(): - """Internal function to mock date range.""" - rng = bdate_range(datetime(2009, 1, 1), periods=100) - return rng - - -def _create_series(): - """Internal function to mock Series.""" - arr = _create_arr() - series = Series(arr.copy(), index=_create_rng()) + series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100)) return series def _create_frame(): """Internal function to mock DataFrame.""" - rng = _create_rng() - return DataFrame(np.random.randn(100, 10), index=rng, columns=np.arange(10)) - - -@pytest.fixture -def nan_locs(): - """Make a range as loc fixture.""" - return np.arange(20, 40) - - -@pytest.fixture -def arr(): - """Make an array as fixture.""" - return _create_arr() + return DataFrame( + np.random.randn(100, 10), + index=bdate_range(datetime(2009, 1, 1), periods=100), + columns=np.arange(10), + ) @pytest.fixture diff --git a/pandas/tests/window/moments/conftest.py b/pandas/tests/window/moments/conftest.py deleted file mode 100644 index ce4d04a9bcc1e..0000000000000 --- a/pandas/tests/window/moments/conftest.py +++ /dev/null @@ -1,19 +0,0 @@ -import numpy as np -import pytest - -from pandas import Series - - -@pytest.fixture -def binary_ew_data(): - A = Series(np.random.randn(50), index=np.arange(50)) - B = A[2:] + np.random.randn(48) - - A[:10] = np.NaN - B[-10:] = np.NaN - return A, B - - -@pytest.fixture(params=[0, 1, 2]) -def min_periods(request): - return request.param diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index 2718bdabee96a..aa3453680190b 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -3,15 +3,6 @@ from pandas import DataFrame, Series, concat import pandas._testing as tm -from pandas.tests.window.common import ( - moments_consistency_cov_data, - moments_consistency_is_constant, - moments_consistency_mock_mean, - moments_consistency_series_data, - moments_consistency_std_data, - moments_consistency_var_data, - moments_consistency_var_debiasing_factors, -) @pytest.mark.parametrize("func", ["cov", "corr"]) @@ -25,18 +16,28 @@ def test_ewm_pairwise_cov_corr(func, frame): @pytest.mark.parametrize("name", ["cov", "corr"]) -def test_ewm_corr_cov(name, binary_ew_data): - A, B = binary_ew_data +def test_ewm_corr_cov(name): + A = Series(np.random.randn(50), index=np.arange(50)) + B = A[2:] + np.random.randn(48) + + A[:10] = np.NaN + B[-10:] = np.NaN result = getattr(A.ewm(com=20, min_periods=5), name)(B) assert np.isnan(result.values[:14]).all() assert not np.isnan(result.values[14:]).any() +@pytest.mark.parametrize("min_periods", [0, 1, 2]) @pytest.mark.parametrize("name", ["cov", "corr"]) -def test_ewm_corr_cov_min_periods(name, min_periods, binary_ew_data): +def test_ewm_corr_cov_min_periods(name, min_periods): # GH 7898 - A, B = binary_ew_data + A = Series(np.random.randn(50), index=np.arange(50)) + B = A[2:] + np.random.randn(48) + + A[:10] = np.NaN + B[-10:] = np.NaN + result = getattr(A.ewm(com=20, min_periods=min_periods), name)(B) # binary functions (ewmcov, ewmcorr) with bias=False require at # least two values @@ -56,248 +57,274 @@ def test_ewm_corr_cov_min_periods(name, min_periods, binary_ew_data): @pytest.mark.parametrize("name", ["cov", "corr"]) -def test_different_input_array_raise_exception(name, binary_ew_data): +def test_different_input_array_raise_exception(name): + A = Series(np.random.randn(50), index=np.arange(50)) + A[:10] = np.NaN - A, _ = binary_ew_data msg = "Input arrays must be of the same type!" # exception raised is Exception with pytest.raises(Exception, match=msg): getattr(A.ewm(com=20, min_periods=5), name)(np.random.randn(50)) -@pytest.mark.slow -@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) -@pytest.mark.parametrize("adjust", [True, False]) -@pytest.mark.parametrize("ignore_na", [True, False]) -def test_ewm_consistency(consistency_data, min_periods, adjust, ignore_na): - def _weights(s, com, adjust, ignore_na): - if isinstance(s, DataFrame): - if not len(s.columns): - return DataFrame(index=s.index, columns=s.columns) - w = concat( - [ - _weights(s.iloc[:, i], com=com, adjust=adjust, ignore_na=ignore_na) - for i, _ in enumerate(s.columns) - ], - axis=1, - ) - w.index = s.index - w.columns = s.columns - return w - - w = Series(np.nan, index=s.index) - alpha = 1.0 / (1.0 + com) - if ignore_na: - w[s.notna()] = _weights( - s[s.notna()], com=com, adjust=adjust, ignore_na=False - ) - elif adjust: - for i in range(len(s)): - if s.iat[i] == s.iat[i]: - w.iat[i] = pow(1.0 / (1.0 - alpha), i) - else: - sum_wts = 0.0 - prev_i = -1 - for i in range(len(s)): - if s.iat[i] == s.iat[i]: - if prev_i == -1: - w.iat[i] = 1.0 - else: - w.iat[i] = alpha * sum_wts / pow(1.0 - alpha, i - prev_i) - sum_wts += w.iat[i] - prev_i = i +def create_mock_weights(obj, com, adjust, ignore_na): + if isinstance(obj, DataFrame): + if not len(obj.columns): + return DataFrame(index=obj.index, columns=obj.columns) + w = concat( + [ + create_mock_series_weights( + obj.iloc[:, i], com=com, adjust=adjust, ignore_na=ignore_na + ) + for i, _ in enumerate(obj.columns) + ], + axis=1, + ) + w.index = obj.index + w.columns = obj.columns return w + else: + return create_mock_series_weights(obj, com, adjust, ignore_na) - def _variance_debiasing_factors(s, com, adjust, ignore_na): - weights = _weights(s, com=com, adjust=adjust, ignore_na=ignore_na) - cum_sum = weights.cumsum().fillna(method="ffill") - cum_sum_sq = (weights * weights).cumsum().fillna(method="ffill") - numerator = cum_sum * cum_sum - denominator = numerator - cum_sum_sq - denominator[denominator <= 0.0] = np.nan - return numerator / denominator - - def _ewma(s, com, min_periods, adjust, ignore_na): - weights = _weights(s, com=com, adjust=adjust, ignore_na=ignore_na) - result = ( - s.multiply(weights).cumsum().divide(weights.cumsum()).fillna(method="ffill") - ) - result[ - s.expanding().count() < (max(min_periods, 1) if min_periods else 1) - ] = np.nan - return result +def create_mock_series_weights(s, com, adjust, ignore_na): + w = Series(np.nan, index=s.index) + alpha = 1.0 / (1.0 + com) + if adjust: + count = 0 + for i in range(len(s)): + if s.iat[i] == s.iat[i]: + w.iat[i] = pow(1.0 / (1.0 - alpha), count) + count += 1 + elif not ignore_na: + count += 1 + else: + sum_wts = 0.0 + prev_i = -1 + count = 0 + for i in range(len(s)): + if s.iat[i] == s.iat[i]: + if prev_i == -1: + w.iat[i] = 1.0 + else: + w.iat[i] = alpha * sum_wts / pow(1.0 - alpha, count - prev_i) + sum_wts += w.iat[i] + prev_i = count + count += 1 + elif not ignore_na: + count += 1 + return w + + +@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) +def test_ewm_consistency_mean(consistency_data, adjust, ignore_na, min_periods): x, is_constant, no_nans = consistency_data com = 3.0 - moments_consistency_mock_mean( - x=x, - mean=lambda x: x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).mean(), - mock_mean=lambda x: _ewma( - x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ), + + result = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean() + weights = create_mock_weights(x, com=com, adjust=adjust, ignore_na=ignore_na) + expected = ( + x.multiply(weights).cumsum().divide(weights.cumsum()).fillna(method="ffill") ) + expected[ + x.expanding().count() < (max(min_periods, 1) if min_periods else 1) + ] = np.nan + tm.assert_equal(result, expected.astype("float64")) + + +@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) +def test_ewm_consistency_consistent(consistency_data, adjust, ignore_na, min_periods): + x, is_constant, no_nans = consistency_data + com = 3.0 - moments_consistency_is_constant( - x=x, - is_constant=is_constant, - min_periods=min_periods, - count=lambda x: x.expanding().count(), - mean=lambda x: x.ewm( + if is_constant: + count_x = x.expanding().count() + mean_x = x.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).mean(), - corr=lambda x, y: x.ewm( + ).mean() + # check that correlation of a series with itself is either 1 or NaN + corr_x_x = x.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).corr(y), - ) + ).corr(x) + exp = x.max() if isinstance(x, Series) else x.max().max() - moments_consistency_var_debiasing_factors( - x=x, - var_unbiased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=False) - ), - var_biased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=True) - ), - var_debiasing_factors=lambda x: ( - _variance_debiasing_factors(x, com=com, adjust=adjust, ignore_na=ignore_na) - ), - ) + # check mean of constant series + expected = x * np.nan + expected[count_x >= max(min_periods, 1)] = exp + tm.assert_equal(mean_x, expected) + + # check correlation of constant series with itself is NaN + expected[:] = np.nan + tm.assert_equal(corr_x_x, expected) @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) -@pytest.mark.parametrize("adjust", [True, False]) -@pytest.mark.parametrize("ignore_na", [True, False]) -def test_ewm_consistency_var(consistency_data, min_periods, adjust, ignore_na): +def test_ewm_consistency_var_debiasing_factors( + consistency_data, adjust, ignore_na, min_periods +): x, is_constant, no_nans = consistency_data com = 3.0 - moments_consistency_var_data( - x=x, - is_constant=is_constant, - min_periods=min_periods, - count=lambda x: x.expanding().count(), - mean=lambda x: x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).mean(), - var_unbiased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=False) - ), - var_biased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=True) - ), - ) + + # check variance debiasing factors + var_unbiased_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=False) + var_biased_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=True) + + weights = create_mock_weights(x, com=com, adjust=adjust, ignore_na=ignore_na) + cum_sum = weights.cumsum().fillna(method="ffill") + cum_sum_sq = (weights * weights).cumsum().fillna(method="ffill") + numerator = cum_sum * cum_sum + denominator = numerator - cum_sum_sq + denominator[denominator <= 0.0] = np.nan + var_debiasing_factors_x = numerator / denominator + + tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) -@pytest.mark.parametrize("adjust", [True, False]) -@pytest.mark.parametrize("ignore_na", [True, False]) -def test_ewm_consistency_std(consistency_data, min_periods, adjust, ignore_na): +@pytest.mark.parametrize("bias", [True, False]) +def test_moments_consistency_var( + consistency_data, adjust, ignore_na, min_periods, bias +): x, is_constant, no_nans = consistency_data com = 3.0 - moments_consistency_std_data( - x=x, - var_unbiased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=False) - ), - std_unbiased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).std(bias=False) - ), - var_biased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=True) - ), - std_biased=lambda x: x.ewm( + + mean_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean() + var_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=bias) + assert not (var_x < 0).any().any() + + if bias: + # check that biased var(x) == mean(x^2) - mean(x)^2 + mean_x2 = ( + (x * x) + .ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na) + .mean() + ) + tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x)) + + +@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) +@pytest.mark.parametrize("bias", [True, False]) +def test_moments_consistency_var_constant( + consistency_data, adjust, ignore_na, min_periods, bias +): + x, is_constant, no_nans = consistency_data + com = 3.0 + if is_constant: + count_x = x.expanding(min_periods=min_periods).count() + var_x = x.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).std(bias=True), - ) + ).var(bias=bias) + + # check that variance of constant series is identically 0 + assert not (var_x > 0).any().any() + expected = x * np.nan + expected[count_x >= max(min_periods, 1)] = 0.0 + if not bias: + expected[count_x < 2] = np.nan + tm.assert_equal(var_x, expected) @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) -@pytest.mark.parametrize("adjust", [True, False]) -@pytest.mark.parametrize("ignore_na", [True, False]) -def test_ewm_consistency_cov(consistency_data, min_periods, adjust, ignore_na): +@pytest.mark.parametrize("bias", [True, False]) +def test_ewm_consistency_std(consistency_data, adjust, ignore_na, min_periods, bias): x, is_constant, no_nans = consistency_data com = 3.0 - moments_consistency_cov_data( - x=x, - var_unbiased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=False) - ), - cov_unbiased=lambda x, y: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).cov(y, bias=False) - ), - var_biased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=True) - ), - cov_biased=lambda x, y: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).cov(y, bias=True) - ), - ) + var_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=bias) + std_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).std(bias=bias) + assert not (var_x < 0).any().any() + assert not (std_x < 0).any().any() + + # check that var(x) == std(x)^2 + tm.assert_equal(var_x, std_x * std_x) -@pytest.mark.slow @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) -@pytest.mark.parametrize("adjust", [True, False]) -@pytest.mark.parametrize("ignore_na", [True, False]) -def test_ewm_consistency_series_data(consistency_data, min_periods, adjust, ignore_na): +@pytest.mark.parametrize("bias", [True, False]) +def test_ewm_consistency_cov(consistency_data, adjust, ignore_na, min_periods, bias): x, is_constant, no_nans = consistency_data com = 3.0 - moments_consistency_series_data( - x=x, - mean=lambda x: x.ewm( + var_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=bias) + assert not (var_x < 0).any().any() + + cov_x_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).cov(x, bias=bias) + assert not (cov_x_x < 0).any().any() + + # check that var(x) == cov(x, x) + tm.assert_equal(var_x, cov_x_x) + + +@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) +@pytest.mark.parametrize("bias", [True, False]) +def test_ewm_consistency_series_cov_corr( + consistency_data, adjust, ignore_na, min_periods, bias +): + x, is_constant, no_nans = consistency_data + com = 3.0 + + if isinstance(x, Series): + var_x_plus_y = ( + (x + x) + .ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na) + .var(bias=bias) + ) + var_x = x.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).mean(), - corr=lambda x, y: x.ewm( + ).var(bias=bias) + var_y = x.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).corr(y), - var_unbiased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=False) - ), - std_unbiased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).std(bias=False) - ), - cov_unbiased=lambda x, y: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).cov(y, bias=False) - ), - var_biased=lambda x: ( - x.ewm( - com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).var(bias=True) - ), - std_biased=lambda x: x.ewm( + ).var(bias=bias) + cov_x_y = x.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).std(bias=True), - cov_biased=lambda x, y: ( - x.ewm( + ).cov(x, bias=bias) + # check that cov(x, y) == (var(x+y) - var(x) - + # var(y)) / 2 + tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) + + # check that corr(x, y) == cov(x, y) / (std(x) * + # std(y)) + corr_x_y = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).corr(x, bias=bias) + std_x = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).std(bias=bias) + std_y = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).std(bias=bias) + tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) + + if bias: + # check that biased cov(x, y) == mean(x*y) - + # mean(x)*mean(y) + mean_x = x.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na - ).cov(y, bias=True) - ), - ) + ).mean() + mean_y = x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean() + mean_x_times_y = ( + (x * x) + .ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ) + .mean() + ) + tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) diff --git a/pandas/tests/window/moments/test_moments_ewm.py b/pandas/tests/window/moments/test_moments_ewm.py index def6d7289fec2..eceba7f143ab9 100644 --- a/pandas/tests/window/moments/test_moments_ewm.py +++ b/pandas/tests/window/moments/test_moments_ewm.py @@ -226,8 +226,12 @@ def test_ewma_halflife_arg(series): series.ewm() -def test_ewm_alpha(arr): +def test_ewm_alpha(): # GH 10789 + arr = np.random.randn(100) + locs = np.arange(20, 40) + arr[locs] = np.NaN + s = Series(arr) a = s.ewm(alpha=0.61722699889169674).mean() b = s.ewm(com=0.62014947789973052).mean() @@ -254,8 +258,12 @@ def test_ewm_alpha_arg(series): s.ewm(halflife=10.0, alpha=0.5) -def test_ewm_domain_checks(arr): +def test_ewm_domain_checks(): # GH 12492 + arr = np.random.randn(100) + locs = np.arange(20, 40) + arr[locs] = np.NaN + s = Series(arr) msg = "comass must satisfy: comass >= 0" with pytest.raises(ValueError, match=msg):