Skip to content

Commit a022d7b

Browse files
mroeschkeMatt Roeschke
and
Matt Roeschke
authored
CLN: test_moments_expanding_consistency.py (#37944)
* Break up slow test * Refactor series test * Refactor std and var tests * refactor var tests * Refactor apply sum tests * Remove unused imports Co-authored-by: Matt Roeschke <[email protected]>
1 parent 16c693a commit a022d7b

File tree

1 file changed

+140
-121
lines changed

1 file changed

+140
-121
lines changed

pandas/tests/window/moments/test_moments_consistency_expanding.py

+140-121
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,8 @@
1-
import warnings
2-
31
import numpy as np
42
import pytest
53

64
from pandas import DataFrame, Index, MultiIndex, Series, isna, notna
75
import pandas._testing as tm
8-
from pandas.tests.window.common import (
9-
moments_consistency_cov_data,
10-
moments_consistency_is_constant,
11-
moments_consistency_mock_mean,
12-
moments_consistency_series_data,
13-
moments_consistency_std_data,
14-
moments_consistency_var_data,
15-
moments_consistency_var_debiasing_factors,
16-
)
176

187

198
def test_expanding_corr(series):
@@ -171,143 +160,173 @@ def test_expanding_min_periods_apply(engine_and_raw):
171160

172161

173162
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
174-
def test_expanding_apply_consistency(
175-
consistency_data, base_functions, no_nan_functions, min_periods
176-
):
163+
@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum])
164+
def test_expanding_apply_consistency_sum_nans(consistency_data, min_periods, f):
177165
x, is_constant, no_nans = consistency_data
178166

179-
with warnings.catch_warnings():
180-
warnings.filterwarnings(
181-
"ignore", message=".*(empty slice|0 for slice).*", category=RuntimeWarning
167+
if f is np.nansum and min_periods == 0:
168+
pass
169+
else:
170+
expanding_f_result = x.expanding(min_periods=min_periods).sum()
171+
expanding_apply_f_result = x.expanding(min_periods=min_periods).apply(
172+
func=f, raw=True
182173
)
183-
# test consistency between expanding_xyz() and either (a)
184-
# expanding_apply of Series.xyz(), or (b) expanding_apply of
185-
# np.nanxyz()
186-
functions = base_functions
187-
188-
# GH 8269
189-
if no_nans:
190-
functions = base_functions + no_nan_functions
191-
for (f, require_min_periods, name) in functions:
192-
expanding_f = getattr(x.expanding(min_periods=min_periods), name)
193-
194-
if (
195-
require_min_periods
196-
and (min_periods is not None)
197-
and (min_periods < require_min_periods)
198-
):
199-
continue
200-
201-
if name == "count":
202-
expanding_f_result = expanding_f()
203-
expanding_apply_f_result = x.expanding(min_periods=0).apply(
204-
func=f, raw=True
205-
)
206-
else:
207-
if name in ["cov", "corr"]:
208-
expanding_f_result = expanding_f(pairwise=False)
209-
else:
210-
expanding_f_result = expanding_f()
211-
expanding_apply_f_result = x.expanding(min_periods=min_periods).apply(
212-
func=f, raw=True
213-
)
214-
215-
# GH 9422
216-
if name in ["sum", "prod"]:
217-
tm.assert_equal(expanding_f_result, expanding_apply_f_result)
174+
tm.assert_equal(expanding_f_result, expanding_apply_f_result)
218175

219176

220177
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
221-
def test_moments_consistency_var(consistency_data, min_periods):
178+
@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum])
179+
def test_expanding_apply_consistency_sum_no_nans(consistency_data, min_periods, f):
180+
222181
x, is_constant, no_nans = consistency_data
223-
moments_consistency_var_data(
224-
x=x,
225-
is_constant=is_constant,
226-
min_periods=min_periods,
227-
count=lambda x: x.expanding(min_periods=min_periods).count(),
228-
mean=lambda x: x.expanding(min_periods=min_periods).mean(),
229-
var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(),
230-
var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0),
231-
)
182+
183+
if no_nans:
184+
if f is np.nansum and min_periods == 0:
185+
pass
186+
else:
187+
expanding_f_result = x.expanding(min_periods=min_periods).sum()
188+
expanding_apply_f_result = x.expanding(min_periods=min_periods).apply(
189+
func=f, raw=True
190+
)
191+
tm.assert_equal(expanding_f_result, expanding_apply_f_result)
232192

233193

234194
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
235-
def test_expanding_consistency_std(consistency_data, min_periods):
195+
@pytest.mark.parametrize("ddof", [0, 1])
196+
def test_moments_consistency_var(consistency_data, min_periods, ddof):
236197
x, is_constant, no_nans = consistency_data
237-
moments_consistency_std_data(
238-
x=x,
239-
var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(),
240-
std_unbiased=lambda x: x.expanding(min_periods=min_periods).std(),
241-
var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0),
242-
std_biased=lambda x: x.expanding(min_periods=min_periods).std(ddof=0),
243-
)
198+
199+
mean_x = x.expanding(min_periods=min_periods).mean()
200+
var_x = x.expanding(min_periods=min_periods).var(ddof=ddof)
201+
assert not (var_x < 0).any().any()
202+
203+
if ddof == 0:
204+
# check that biased var(x) == mean(x^2) - mean(x)^2
205+
mean_x2 = (x * x).expanding(min_periods=min_periods).mean()
206+
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
244207

245208

246209
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
247-
def test_expanding_consistency_cov(consistency_data, min_periods):
210+
@pytest.mark.parametrize("ddof", [0, 1])
211+
def test_moments_consistency_var_constant(consistency_data, min_periods, ddof):
248212
x, is_constant, no_nans = consistency_data
249-
moments_consistency_cov_data(
250-
x=x,
251-
var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(),
252-
cov_unbiased=lambda x, y: x.expanding(min_periods=min_periods).cov(y),
253-
var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0),
254-
cov_biased=lambda x, y: x.expanding(min_periods=min_periods).cov(y, ddof=0),
255-
)
213+
214+
if is_constant:
215+
count_x = x.expanding(min_periods=min_periods).count()
216+
var_x = x.expanding(min_periods=min_periods).var(ddof=ddof)
217+
218+
# check that variance of constant series is identically 0
219+
assert not (var_x > 0).any().any()
220+
expected = x * np.nan
221+
expected[count_x >= max(min_periods, 1)] = 0.0
222+
if ddof == 1:
223+
expected[count_x < 2] = np.nan
224+
tm.assert_equal(var_x, expected)
225+
226+
227+
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
228+
@pytest.mark.parametrize("ddof", [0, 1])
229+
def test_expanding_consistency_std(consistency_data, min_periods, ddof):
230+
x, is_constant, no_nans = consistency_data
231+
232+
var_x = x.expanding(min_periods=min_periods).var(ddof=ddof)
233+
std_x = x.expanding(min_periods=min_periods).std(ddof=ddof)
234+
assert not (var_x < 0).any().any()
235+
assert not (std_x < 0).any().any()
236+
237+
# check that var(x) == std(x)^2
238+
tm.assert_equal(var_x, std_x * std_x)
239+
240+
241+
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
242+
@pytest.mark.parametrize("ddof", [0, 1])
243+
def test_expanding_consistency_cov(consistency_data, min_periods, ddof):
244+
x, is_constant, no_nans = consistency_data
245+
var_x = x.expanding(min_periods=min_periods).var(ddof=ddof)
246+
assert not (var_x < 0).any().any()
247+
248+
cov_x_x = x.expanding(min_periods=min_periods).cov(x, ddof=ddof)
249+
assert not (cov_x_x < 0).any().any()
250+
251+
# check that var(x) == cov(x, x)
252+
tm.assert_equal(var_x, cov_x_x)
253+
254+
255+
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
256+
@pytest.mark.parametrize("ddof", [0, 1])
257+
def test_expanding_consistency_series_cov_corr(consistency_data, min_periods, ddof):
258+
x, is_constant, no_nans = consistency_data
259+
260+
if isinstance(x, Series):
261+
var_x_plus_y = (x + x).expanding(min_periods=min_periods).var(ddof=ddof)
262+
var_x = x.expanding(min_periods=min_periods).var(ddof=ddof)
263+
var_y = x.expanding(min_periods=min_periods).var(ddof=ddof)
264+
cov_x_y = x.expanding(min_periods=min_periods).cov(x, ddof=ddof)
265+
# check that cov(x, y) == (var(x+y) - var(x) -
266+
# var(y)) / 2
267+
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
268+
269+
# check that corr(x, y) == cov(x, y) / (std(x) *
270+
# std(y))
271+
corr_x_y = x.expanding(min_periods=min_periods).corr(x)
272+
std_x = x.expanding(min_periods=min_periods).std(ddof=ddof)
273+
std_y = x.expanding(min_periods=min_periods).std(ddof=ddof)
274+
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
275+
276+
if ddof == 0:
277+
# check that biased cov(x, y) == mean(x*y) -
278+
# mean(x)*mean(y)
279+
mean_x = x.expanding(min_periods=min_periods).mean()
280+
mean_y = x.expanding(min_periods=min_periods).mean()
281+
mean_x_times_y = (x * x).expanding(min_periods=min_periods).mean()
282+
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))
256283

257284

258285
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
259-
def test_expanding_consistency_series(consistency_data, min_periods):
286+
def test_expanding_consistency_mean(consistency_data, min_periods):
260287
x, is_constant, no_nans = consistency_data
261-
moments_consistency_series_data(
262-
x=x,
263-
mean=lambda x: x.expanding(min_periods=min_periods).mean(),
264-
corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y),
265-
var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(),
266-
std_unbiased=lambda x: x.expanding(min_periods=min_periods).std(),
267-
cov_unbiased=lambda x, y: x.expanding(min_periods=min_periods).cov(y),
268-
var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0),
269-
std_biased=lambda x: x.expanding(min_periods=min_periods).std(ddof=0),
270-
cov_biased=lambda x, y: x.expanding(min_periods=min_periods).cov(y, ddof=0),
288+
289+
result = x.expanding(min_periods=min_periods).mean()
290+
expected = (
291+
x.expanding(min_periods=min_periods).sum()
292+
/ x.expanding(min_periods=min_periods).count()
271293
)
294+
tm.assert_equal(result, expected.astype("float64"))
272295

273296

274-
@pytest.mark.slow
275297
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
276-
def test_expanding_consistency(consistency_data, min_periods):
298+
def test_expanding_consistency_constant(consistency_data, min_periods):
277299
x, is_constant, no_nans = consistency_data
278-
# suppress warnings about empty slices, as we are deliberately testing
279-
# with empty/0-length Series/DataFrames
280-
with warnings.catch_warnings():
281-
warnings.filterwarnings(
282-
"ignore", message=".*(empty slice|0 for slice).*", category=RuntimeWarning
283-
)
284300

285-
# test consistency between different expanding_* moments
286-
moments_consistency_mock_mean(
287-
x=x,
288-
mean=lambda x: x.expanding(min_periods=min_periods).mean(),
289-
mock_mean=lambda x: x.expanding(min_periods=min_periods).sum()
290-
/ x.expanding().count(),
291-
)
301+
if is_constant:
302+
count_x = x.expanding().count()
303+
mean_x = x.expanding(min_periods=min_periods).mean()
304+
# check that correlation of a series with itself is either 1 or NaN
305+
corr_x_x = x.expanding(min_periods=min_periods).corr(x)
292306

293-
moments_consistency_is_constant(
294-
x=x,
295-
is_constant=is_constant,
296-
min_periods=min_periods,
297-
count=lambda x: x.expanding().count(),
298-
mean=lambda x: x.expanding(min_periods=min_periods).mean(),
299-
corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y),
300-
)
307+
exp = x.max() if isinstance(x, Series) else x.max().max()
301308

302-
moments_consistency_var_debiasing_factors(
303-
x=x,
304-
var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(),
305-
var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0),
306-
var_debiasing_factors=lambda x: (
307-
x.expanding().count()
308-
/ (x.expanding().count() - 1.0).replace(0.0, np.nan)
309-
),
310-
)
309+
# check mean of constant series
310+
expected = x * np.nan
311+
expected[count_x >= max(min_periods, 1)] = exp
312+
tm.assert_equal(mean_x, expected)
313+
314+
# check correlation of constant series with itself is NaN
315+
expected[:] = np.nan
316+
tm.assert_equal(corr_x_x, expected)
317+
318+
319+
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
320+
def test_expanding_consistency_var_debiasing_factors(consistency_data, min_periods):
321+
x, is_constant, no_nans = consistency_data
322+
323+
# check variance debiasing factors
324+
var_unbiased_x = x.expanding(min_periods=min_periods).var()
325+
var_biased_x = x.expanding(min_periods=min_periods).var(ddof=0)
326+
var_debiasing_factors_x = x.expanding().count() / (
327+
x.expanding().count() - 1.0
328+
).replace(0.0, np.nan)
329+
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)
311330

312331

313332
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)