|
1 |
| -import warnings |
2 |
| - |
3 | 1 | import numpy as np
|
4 | 2 | import pytest
|
5 | 3 |
|
6 | 4 | from pandas import DataFrame, Index, MultiIndex, Series, isna, notna
|
7 | 5 | import pandas._testing as tm
|
8 |
| -from pandas.tests.window.common import ( |
9 |
| - moments_consistency_cov_data, |
10 |
| - moments_consistency_is_constant, |
11 |
| - moments_consistency_mock_mean, |
12 |
| - moments_consistency_series_data, |
13 |
| - moments_consistency_std_data, |
14 |
| - moments_consistency_var_data, |
15 |
| - moments_consistency_var_debiasing_factors, |
16 |
| -) |
17 | 6 |
|
18 | 7 |
|
19 | 8 | def test_expanding_corr(series):
|
@@ -171,143 +160,173 @@ def test_expanding_min_periods_apply(engine_and_raw):
|
171 | 160 |
|
172 | 161 |
|
173 | 162 | @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
174 |
| -def test_expanding_apply_consistency( |
175 |
| - consistency_data, base_functions, no_nan_functions, min_periods |
176 |
| -): |
| 163 | +@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum]) |
| 164 | +def test_expanding_apply_consistency_sum_nans(consistency_data, min_periods, f): |
177 | 165 | x, is_constant, no_nans = consistency_data
|
178 | 166 |
|
179 |
| - with warnings.catch_warnings(): |
180 |
| - warnings.filterwarnings( |
181 |
| - "ignore", message=".*(empty slice|0 for slice).*", category=RuntimeWarning |
| 167 | + if f is np.nansum and min_periods == 0: |
| 168 | + pass |
| 169 | + else: |
| 170 | + expanding_f_result = x.expanding(min_periods=min_periods).sum() |
| 171 | + expanding_apply_f_result = x.expanding(min_periods=min_periods).apply( |
| 172 | + func=f, raw=True |
182 | 173 | )
|
183 |
| - # test consistency between expanding_xyz() and either (a) |
184 |
| - # expanding_apply of Series.xyz(), or (b) expanding_apply of |
185 |
| - # np.nanxyz() |
186 |
| - functions = base_functions |
187 |
| - |
188 |
| - # GH 8269 |
189 |
| - if no_nans: |
190 |
| - functions = base_functions + no_nan_functions |
191 |
| - for (f, require_min_periods, name) in functions: |
192 |
| - expanding_f = getattr(x.expanding(min_periods=min_periods), name) |
193 |
| - |
194 |
| - if ( |
195 |
| - require_min_periods |
196 |
| - and (min_periods is not None) |
197 |
| - and (min_periods < require_min_periods) |
198 |
| - ): |
199 |
| - continue |
200 |
| - |
201 |
| - if name == "count": |
202 |
| - expanding_f_result = expanding_f() |
203 |
| - expanding_apply_f_result = x.expanding(min_periods=0).apply( |
204 |
| - func=f, raw=True |
205 |
| - ) |
206 |
| - else: |
207 |
| - if name in ["cov", "corr"]: |
208 |
| - expanding_f_result = expanding_f(pairwise=False) |
209 |
| - else: |
210 |
| - expanding_f_result = expanding_f() |
211 |
| - expanding_apply_f_result = x.expanding(min_periods=min_periods).apply( |
212 |
| - func=f, raw=True |
213 |
| - ) |
214 |
| - |
215 |
| - # GH 9422 |
216 |
| - if name in ["sum", "prod"]: |
217 |
| - tm.assert_equal(expanding_f_result, expanding_apply_f_result) |
| 174 | + tm.assert_equal(expanding_f_result, expanding_apply_f_result) |
218 | 175 |
|
219 | 176 |
|
220 | 177 | @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
221 |
| -def test_moments_consistency_var(consistency_data, min_periods): |
| 178 | +@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum]) |
| 179 | +def test_expanding_apply_consistency_sum_no_nans(consistency_data, min_periods, f): |
| 180 | + |
222 | 181 | x, is_constant, no_nans = consistency_data
|
223 |
| - moments_consistency_var_data( |
224 |
| - x=x, |
225 |
| - is_constant=is_constant, |
226 |
| - min_periods=min_periods, |
227 |
| - count=lambda x: x.expanding(min_periods=min_periods).count(), |
228 |
| - mean=lambda x: x.expanding(min_periods=min_periods).mean(), |
229 |
| - var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(), |
230 |
| - var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0), |
231 |
| - ) |
| 182 | + |
| 183 | + if no_nans: |
| 184 | + if f is np.nansum and min_periods == 0: |
| 185 | + pass |
| 186 | + else: |
| 187 | + expanding_f_result = x.expanding(min_periods=min_periods).sum() |
| 188 | + expanding_apply_f_result = x.expanding(min_periods=min_periods).apply( |
| 189 | + func=f, raw=True |
| 190 | + ) |
| 191 | + tm.assert_equal(expanding_f_result, expanding_apply_f_result) |
232 | 192 |
|
233 | 193 |
|
234 | 194 | @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
235 |
| -def test_expanding_consistency_std(consistency_data, min_periods): |
| 195 | +@pytest.mark.parametrize("ddof", [0, 1]) |
| 196 | +def test_moments_consistency_var(consistency_data, min_periods, ddof): |
236 | 197 | x, is_constant, no_nans = consistency_data
|
237 |
| - moments_consistency_std_data( |
238 |
| - x=x, |
239 |
| - var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(), |
240 |
| - std_unbiased=lambda x: x.expanding(min_periods=min_periods).std(), |
241 |
| - var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0), |
242 |
| - std_biased=lambda x: x.expanding(min_periods=min_periods).std(ddof=0), |
243 |
| - ) |
| 198 | + |
| 199 | + mean_x = x.expanding(min_periods=min_periods).mean() |
| 200 | + var_x = x.expanding(min_periods=min_periods).var(ddof=ddof) |
| 201 | + assert not (var_x < 0).any().any() |
| 202 | + |
| 203 | + if ddof == 0: |
| 204 | + # check that biased var(x) == mean(x^2) - mean(x)^2 |
| 205 | + mean_x2 = (x * x).expanding(min_periods=min_periods).mean() |
| 206 | + tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x)) |
244 | 207 |
|
245 | 208 |
|
246 | 209 | @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
247 |
| -def test_expanding_consistency_cov(consistency_data, min_periods): |
| 210 | +@pytest.mark.parametrize("ddof", [0, 1]) |
| 211 | +def test_moments_consistency_var_constant(consistency_data, min_periods, ddof): |
248 | 212 | x, is_constant, no_nans = consistency_data
|
249 |
| - moments_consistency_cov_data( |
250 |
| - x=x, |
251 |
| - var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(), |
252 |
| - cov_unbiased=lambda x, y: x.expanding(min_periods=min_periods).cov(y), |
253 |
| - var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0), |
254 |
| - cov_biased=lambda x, y: x.expanding(min_periods=min_periods).cov(y, ddof=0), |
255 |
| - ) |
| 213 | + |
| 214 | + if is_constant: |
| 215 | + count_x = x.expanding(min_periods=min_periods).count() |
| 216 | + var_x = x.expanding(min_periods=min_periods).var(ddof=ddof) |
| 217 | + |
| 218 | + # check that variance of constant series is identically 0 |
| 219 | + assert not (var_x > 0).any().any() |
| 220 | + expected = x * np.nan |
| 221 | + expected[count_x >= max(min_periods, 1)] = 0.0 |
| 222 | + if ddof == 1: |
| 223 | + expected[count_x < 2] = np.nan |
| 224 | + tm.assert_equal(var_x, expected) |
| 225 | + |
| 226 | + |
| 227 | +@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) |
| 228 | +@pytest.mark.parametrize("ddof", [0, 1]) |
| 229 | +def test_expanding_consistency_std(consistency_data, min_periods, ddof): |
| 230 | + x, is_constant, no_nans = consistency_data |
| 231 | + |
| 232 | + var_x = x.expanding(min_periods=min_periods).var(ddof=ddof) |
| 233 | + std_x = x.expanding(min_periods=min_periods).std(ddof=ddof) |
| 234 | + assert not (var_x < 0).any().any() |
| 235 | + assert not (std_x < 0).any().any() |
| 236 | + |
| 237 | + # check that var(x) == std(x)^2 |
| 238 | + tm.assert_equal(var_x, std_x * std_x) |
| 239 | + |
| 240 | + |
| 241 | +@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) |
| 242 | +@pytest.mark.parametrize("ddof", [0, 1]) |
| 243 | +def test_expanding_consistency_cov(consistency_data, min_periods, ddof): |
| 244 | + x, is_constant, no_nans = consistency_data |
| 245 | + var_x = x.expanding(min_periods=min_periods).var(ddof=ddof) |
| 246 | + assert not (var_x < 0).any().any() |
| 247 | + |
| 248 | + cov_x_x = x.expanding(min_periods=min_periods).cov(x, ddof=ddof) |
| 249 | + assert not (cov_x_x < 0).any().any() |
| 250 | + |
| 251 | + # check that var(x) == cov(x, x) |
| 252 | + tm.assert_equal(var_x, cov_x_x) |
| 253 | + |
| 254 | + |
| 255 | +@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) |
| 256 | +@pytest.mark.parametrize("ddof", [0, 1]) |
| 257 | +def test_expanding_consistency_series_cov_corr(consistency_data, min_periods, ddof): |
| 258 | + x, is_constant, no_nans = consistency_data |
| 259 | + |
| 260 | + if isinstance(x, Series): |
| 261 | + var_x_plus_y = (x + x).expanding(min_periods=min_periods).var(ddof=ddof) |
| 262 | + var_x = x.expanding(min_periods=min_periods).var(ddof=ddof) |
| 263 | + var_y = x.expanding(min_periods=min_periods).var(ddof=ddof) |
| 264 | + cov_x_y = x.expanding(min_periods=min_periods).cov(x, ddof=ddof) |
| 265 | + # check that cov(x, y) == (var(x+y) - var(x) - |
| 266 | + # var(y)) / 2 |
| 267 | + tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) |
| 268 | + |
| 269 | + # check that corr(x, y) == cov(x, y) / (std(x) * |
| 270 | + # std(y)) |
| 271 | + corr_x_y = x.expanding(min_periods=min_periods).corr(x) |
| 272 | + std_x = x.expanding(min_periods=min_periods).std(ddof=ddof) |
| 273 | + std_y = x.expanding(min_periods=min_periods).std(ddof=ddof) |
| 274 | + tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) |
| 275 | + |
| 276 | + if ddof == 0: |
| 277 | + # check that biased cov(x, y) == mean(x*y) - |
| 278 | + # mean(x)*mean(y) |
| 279 | + mean_x = x.expanding(min_periods=min_periods).mean() |
| 280 | + mean_y = x.expanding(min_periods=min_periods).mean() |
| 281 | + mean_x_times_y = (x * x).expanding(min_periods=min_periods).mean() |
| 282 | + tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) |
256 | 283 |
|
257 | 284 |
|
258 | 285 | @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
259 |
| -def test_expanding_consistency_series(consistency_data, min_periods): |
| 286 | +def test_expanding_consistency_mean(consistency_data, min_periods): |
260 | 287 | x, is_constant, no_nans = consistency_data
|
261 |
| - moments_consistency_series_data( |
262 |
| - x=x, |
263 |
| - mean=lambda x: x.expanding(min_periods=min_periods).mean(), |
264 |
| - corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y), |
265 |
| - var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(), |
266 |
| - std_unbiased=lambda x: x.expanding(min_periods=min_periods).std(), |
267 |
| - cov_unbiased=lambda x, y: x.expanding(min_periods=min_periods).cov(y), |
268 |
| - var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0), |
269 |
| - std_biased=lambda x: x.expanding(min_periods=min_periods).std(ddof=0), |
270 |
| - cov_biased=lambda x, y: x.expanding(min_periods=min_periods).cov(y, ddof=0), |
| 288 | + |
| 289 | + result = x.expanding(min_periods=min_periods).mean() |
| 290 | + expected = ( |
| 291 | + x.expanding(min_periods=min_periods).sum() |
| 292 | + / x.expanding(min_periods=min_periods).count() |
271 | 293 | )
|
| 294 | + tm.assert_equal(result, expected.astype("float64")) |
272 | 295 |
|
273 | 296 |
|
274 |
| -@pytest.mark.slow |
275 | 297 | @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
276 |
| -def test_expanding_consistency(consistency_data, min_periods): |
| 298 | +def test_expanding_consistency_constant(consistency_data, min_periods): |
277 | 299 | x, is_constant, no_nans = consistency_data
|
278 |
| - # suppress warnings about empty slices, as we are deliberately testing |
279 |
| - # with empty/0-length Series/DataFrames |
280 |
| - with warnings.catch_warnings(): |
281 |
| - warnings.filterwarnings( |
282 |
| - "ignore", message=".*(empty slice|0 for slice).*", category=RuntimeWarning |
283 |
| - ) |
284 | 300 |
|
285 |
| - # test consistency between different expanding_* moments |
286 |
| - moments_consistency_mock_mean( |
287 |
| - x=x, |
288 |
| - mean=lambda x: x.expanding(min_periods=min_periods).mean(), |
289 |
| - mock_mean=lambda x: x.expanding(min_periods=min_periods).sum() |
290 |
| - / x.expanding().count(), |
291 |
| - ) |
| 301 | + if is_constant: |
| 302 | + count_x = x.expanding().count() |
| 303 | + mean_x = x.expanding(min_periods=min_periods).mean() |
| 304 | + # check that correlation of a series with itself is either 1 or NaN |
| 305 | + corr_x_x = x.expanding(min_periods=min_periods).corr(x) |
292 | 306 |
|
293 |
| - moments_consistency_is_constant( |
294 |
| - x=x, |
295 |
| - is_constant=is_constant, |
296 |
| - min_periods=min_periods, |
297 |
| - count=lambda x: x.expanding().count(), |
298 |
| - mean=lambda x: x.expanding(min_periods=min_periods).mean(), |
299 |
| - corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y), |
300 |
| - ) |
| 307 | + exp = x.max() if isinstance(x, Series) else x.max().max() |
301 | 308 |
|
302 |
| - moments_consistency_var_debiasing_factors( |
303 |
| - x=x, |
304 |
| - var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(), |
305 |
| - var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0), |
306 |
| - var_debiasing_factors=lambda x: ( |
307 |
| - x.expanding().count() |
308 |
| - / (x.expanding().count() - 1.0).replace(0.0, np.nan) |
309 |
| - ), |
310 |
| - ) |
| 309 | + # check mean of constant series |
| 310 | + expected = x * np.nan |
| 311 | + expected[count_x >= max(min_periods, 1)] = exp |
| 312 | + tm.assert_equal(mean_x, expected) |
| 313 | + |
| 314 | + # check correlation of constant series with itself is NaN |
| 315 | + expected[:] = np.nan |
| 316 | + tm.assert_equal(corr_x_x, expected) |
| 317 | + |
| 318 | + |
| 319 | +@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) |
| 320 | +def test_expanding_consistency_var_debiasing_factors(consistency_data, min_periods): |
| 321 | + x, is_constant, no_nans = consistency_data |
| 322 | + |
| 323 | + # check variance debiasing factors |
| 324 | + var_unbiased_x = x.expanding(min_periods=min_periods).var() |
| 325 | + var_biased_x = x.expanding(min_periods=min_periods).var(ddof=0) |
| 326 | + var_debiasing_factors_x = x.expanding().count() / ( |
| 327 | + x.expanding().count() - 1.0 |
| 328 | + ).replace(0.0, np.nan) |
| 329 | + tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) |
311 | 330 |
|
312 | 331 |
|
313 | 332 | @pytest.mark.parametrize(
|
|
0 commit comments