Skip to content

TST: Clean moments consistency #33813

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Apr 26, 2020
155 changes: 85 additions & 70 deletions pandas/tests/window/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,43 +252,15 @@ def _test_moments_consistency_var_debiasing_factors(
var_debiasing_factors_x = var_debiasing_factors(x)
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)

def _test_moments_consistency(
self,
min_periods,
count,
mean,
corr,
var_unbiased=None,
std_unbiased=None,
cov_unbiased=None,
var_biased=None,
std_biased=None,
cov_biased=None,
def _test_moments_consistency_var_data(
self, min_periods, count, mean, var_unbiased, var_biased
):

for (x, is_constant, no_nans) in self.data:
count_x = count(x)
mean_x = mean(x)

for (std, var, cov) in [
(std_biased, var_biased, cov_biased),
(std_unbiased, var_unbiased, cov_unbiased),
]:

# check that var(x), std(x), and cov(x) are all >= 0
for var in [var_biased, var_unbiased]:
var_x = var(x)
std_x = std(x)
assert not (var_x < 0).any().any()
assert not (std_x < 0).any().any()
if cov:
cov_x_x = cov(x, x)
assert not (cov_x_x < 0).any().any()

# check that var(x) == cov(x, x)
tm.assert_equal(var_x, cov_x_x)

# check that var(x) == std(x)^2
tm.assert_equal(var_x, std_x * std_x)

if var is var_biased:
# check that biased var(x) == mean(x^2) - mean(x)^2
Expand All @@ -304,45 +276,88 @@ def _test_moments_consistency(
expected[count_x < 2] = np.nan
tm.assert_equal(var_x, expected)

if isinstance(x, Series):
for (y, is_constant, no_nans) in self.data:
if not x.isna().equals(y.isna()):
# can only easily test two Series with similar
# structure
continue

# check that cor(x, y) is symmetric
corr_x_y = corr(x, y)
corr_y_x = corr(y, x)
tm.assert_equal(corr_x_y, corr_y_x)

if cov:
# check that cov(x, y) is symmetric
cov_x_y = cov(x, y)
cov_y_x = cov(y, x)
tm.assert_equal(cov_x_y, cov_y_x)

# check that cov(x, y) == (var(x+y) - var(x) -
# var(y)) / 2
var_x_plus_y = var(x + y)
var_y = var(y)
tm.assert_equal(
cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)
)

# check that corr(x, y) == cov(x, y) / (std(x) *
# std(y))
std_y = std(y)
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))

if cov is cov_biased:
# check that biased cov(x, y) == mean(x*y) -
# mean(x)*mean(y)
mean_y = mean(y)
mean_x_times_y = mean(x * y)
tm.assert_equal(
cov_x_y, mean_x_times_y - (mean_x * mean_y)
)
def _test_moments_consistency_std_data(
self, std_unbiased, var_unbiased, std_biased, var_biased
):
for (x, is_constant, no_nans) in self.data:
for (std, var) in [(std_biased, var_biased), (std_unbiased, var_unbiased)]:
var_x = var(x)
std_x = std(x)
assert not (var_x < 0).any().any()
assert not (std_x < 0).any().any()

# check that var(x) == std(x)^2
tm.assert_equal(var_x, std_x * std_x)

def _test_moments_consistency_cov_data(
self, cov_unbiased, var_unbiased, cov_biased, var_biased
):
for (x, is_constant, no_nans) in self.data:
for (cov, var) in [(cov_biased, var_biased), (cov_unbiased, var_unbiased)]:
var_x = var(x)
assert not (var_x < 0).any().any()
if cov:
cov_x_x = cov(x, x)
assert not (cov_x_x < 0).any().any()

# check that var(x) == cov(x, x)
tm.assert_equal(var_x, cov_x_x)

def _test_moments_consistency_series_data(
self,
corr,
mean,
std_biased,
std_unbiased,
cov_unbiased,
var_unbiased,
var_biased,
cov_biased,
):
for (x, is_constant, no_nans) in self.data:
if isinstance(x, Series):
y = x
mean_x = mean(x)
if not x.isna().equals(y.isna()):
# can only easily test two Series with similar
# structure
pass

# check that cor(x, y) is symmetric
corr_x_y = corr(x, y)
corr_y_x = corr(y, x)
tm.assert_equal(corr_x_y, corr_y_x)

for (std, var, cov) in [
(std_biased, var_biased, cov_biased),
(std_unbiased, var_unbiased, cov_unbiased),
]:
var_x = var(x)
std_x = std(x)

if cov:
# check that cov(x, y) is symmetric
cov_x_y = cov(x, y)
cov_y_x = cov(y, x)
tm.assert_equal(cov_x_y, cov_y_x)

# check that cov(x, y) == (var(x+y) - var(x) -
# var(y)) / 2
var_x_plus_y = var(x + y)
var_y = var(y)
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))

# check that corr(x, y) == cov(x, y) / (std(x) *
# std(y))
std_y = std(y)
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))

if cov is cov_biased:
# check that biased cov(x, y) == mean(x*y) -
# mean(x)*mean(y)
mean_y = mean(y)
mean_x_times_y = mean(x * y)
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))

def _check_pairwise_moment(self, dispatch, name, **kwargs):
def get_result(obj, obj2=None):
Expand Down
86 changes: 83 additions & 3 deletions pandas/tests/window/moments/test_moments_ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,90 @@ def _ewma(s, com, min_periods, adjust, ignore_na):
)
),
)
# test consistency between different ewm* moments
self._test_moments_consistency(
min_periods=min_periods,

@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
@pytest.mark.parametrize("adjust", [True, False])
@pytest.mark.parametrize("ignore_na", [True, False])
def test_ewm_consistency_var(self, min_periods, adjust, ignore_na):
com = 3.0
self._test_moments_consistency_var_data(
min_periods,
count=lambda x: x.expanding().count(),
mean=lambda x: x.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).mean(),
var_unbiased=lambda x: (
x.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).var(bias=False)
),
var_biased=lambda x: (
x.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).var(bias=True)
),
)

@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
@pytest.mark.parametrize("adjust", [True, False])
@pytest.mark.parametrize("ignore_na", [True, False])
def test_ewm_consistency_std(self, min_periods, adjust, ignore_na):
com = 3.0
self._test_moments_consistency_std_data(
var_unbiased=lambda x: (
x.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).var(bias=False)
),
std_unbiased=lambda x: (
x.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).std(bias=False)
),
var_biased=lambda x: (
x.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).var(bias=True)
),
std_biased=lambda x: x.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).std(bias=True),
)

@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
@pytest.mark.parametrize("adjust", [True, False])
@pytest.mark.parametrize("ignore_na", [True, False])
def test_ewm_consistency_cov(self, min_periods, adjust, ignore_na):
com = 3.0
self._test_moments_consistency_cov_data(
var_unbiased=lambda x: (
x.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).var(bias=False)
),
cov_unbiased=lambda x, y: (
x.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).cov(y, bias=False)
),
var_biased=lambda x: (
x.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).var(bias=True)
),
cov_biased=lambda x, y: (
x.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).cov(y, bias=True)
),
)

@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
@pytest.mark.parametrize("adjust", [True, False])
@pytest.mark.parametrize("ignore_na", [True, False])
def test_ewm_consistency_series_data(self, min_periods, adjust, ignore_na):
com = 3.0
self._test_moments_consistency_series_data(
mean=lambda x: x.ewm(
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
).mean(),
Expand Down
55 changes: 41 additions & 14 deletions pandas/tests/window/moments/test_moments_expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,20 +367,6 @@ def test_expanding_consistency(self, min_periods):
/ (x.expanding().count() - 1.0).replace(0.0, np.nan)
),
)
self._test_moments_consistency(
min_periods=min_periods,
count=lambda x: x.expanding(min_periods=min_periods).count(),
mean=lambda x: x.expanding(min_periods=min_periods).mean(),
corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y),
var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(),
std_unbiased=lambda x: x.expanding(min_periods=min_periods).std(),
cov_unbiased=lambda x, y: x.expanding(min_periods=min_periods).cov(y),
var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0),
std_biased=lambda x: x.expanding(min_periods=min_periods).std(ddof=0),
cov_biased=lambda x, y: x.expanding(min_periods=min_periods).cov(
y, ddof=0
),
)

# test consistency between expanding_xyz() and either (a)
# expanding_apply of Series.xyz(), or (b) expanding_apply of
Expand Down Expand Up @@ -418,3 +404,44 @@ def test_expanding_consistency(self, min_periods):
# GH 9422
if name in ["sum", "prod"]:
tm.assert_equal(expanding_f_result, expanding_apply_f_result)

@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should these get @pytest.mark.slow? test_rolling_consistency_cov clocks in at 38 seconds for me

def test_moments_consistency_var(self, min_periods):
self._test_moments_consistency_var_data(
min_periods=min_periods,
count=lambda x: x.expanding(min_periods=min_periods).count(),
mean=lambda x: x.expanding(min_periods=min_periods).mean(),
var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(),
var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0),
)

@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
def test_expanding_consistency_std(self, min_periods):
self._test_moments_consistency_std_data(
var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(),
std_unbiased=lambda x: x.expanding(min_periods=min_periods).std(),
var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0),
std_biased=lambda x: x.expanding(min_periods=min_periods).std(ddof=0),
)

@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
def test_expanding_consistency_cov(self, min_periods):
self._test_moments_consistency_cov_data(
var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(),
cov_unbiased=lambda x, y: x.expanding(min_periods=min_periods).cov(y),
var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0),
cov_biased=lambda x, y: x.expanding(min_periods=min_periods).cov(y, ddof=0),
)

@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
def test_expanding_consistency_series(self, min_periods):
self._test_moments_consistency_series_data(
mean=lambda x: x.expanding(min_periods=min_periods).mean(),
corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y),
var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(),
std_unbiased=lambda x: x.expanding(min_periods=min_periods).std(),
cov_unbiased=lambda x, y: x.expanding(min_periods=min_periods).cov(y),
var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0),
std_biased=lambda x: x.expanding(min_periods=min_periods).std(ddof=0),
cov_biased=lambda x, y: x.expanding(min_periods=min_periods).cov(y, ddof=0),
)
Loading