|
1 | 1 | import numpy as np
|
2 | 2 | import pytest
|
3 | 3 |
|
4 |
| -from pandas import ( |
5 |
| - DataFrame, |
6 |
| - Index, |
7 |
| - MultiIndex, |
8 |
| - Series, |
9 |
| - isna, |
10 |
| - notna, |
11 |
| -) |
| 4 | +from pandas import Series |
12 | 5 | import pandas._testing as tm
|
13 | 6 |
|
14 | 7 |
|
15 |
| -def test_expanding_corr(series): |
16 |
| - A = series.dropna() |
17 |
| - B = (A + np.random.randn(len(A)))[:-5] |
18 |
| - |
19 |
| - result = A.expanding().corr(B) |
20 |
| - |
21 |
| - rolling_result = A.rolling(window=len(A), min_periods=1).corr(B) |
22 |
| - |
23 |
| - tm.assert_almost_equal(rolling_result, result) |
24 |
| - |
25 |
| - |
26 |
| -def test_expanding_count(series): |
27 |
| - result = series.expanding(min_periods=0).count() |
28 |
| - tm.assert_almost_equal( |
29 |
| - result, series.rolling(window=len(series), min_periods=0).count() |
30 |
| - ) |
31 |
| - |
32 |
| - |
33 |
| -def test_expanding_quantile(series): |
34 |
| - result = series.expanding().quantile(0.5) |
35 |
| - |
36 |
| - rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5) |
37 |
| - |
38 |
| - tm.assert_almost_equal(result, rolling_result) |
39 |
| - |
40 |
| - |
41 |
| -def test_expanding_cov(series): |
42 |
| - A = series |
43 |
| - B = (A + np.random.randn(len(A)))[:-5] |
44 |
| - |
45 |
| - result = A.expanding().cov(B) |
46 |
| - |
47 |
| - rolling_result = A.rolling(window=len(A), min_periods=1).cov(B) |
48 |
| - |
49 |
| - tm.assert_almost_equal(rolling_result, result) |
50 |
| - |
51 |
| - |
52 |
| -def test_expanding_cov_pairwise(frame): |
53 |
| - result = frame.expanding().cov() |
54 |
| - |
55 |
| - rolling_result = frame.rolling(window=len(frame), min_periods=1).cov() |
56 |
| - |
57 |
| - tm.assert_frame_equal(result, rolling_result) |
58 |
| - |
59 |
| - |
60 |
| -def test_expanding_corr_pairwise(frame): |
61 |
| - result = frame.expanding().corr() |
62 |
| - |
63 |
| - rolling_result = frame.rolling(window=len(frame), min_periods=1).corr() |
64 |
| - tm.assert_frame_equal(result, rolling_result) |
65 |
| - |
66 |
| - |
67 |
| -@pytest.mark.parametrize( |
68 |
| - "func,static_comp", |
69 |
| - [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], |
70 |
| - ids=["sum", "mean", "max", "min"], |
71 |
| -) |
72 |
| -def test_expanding_func(func, static_comp, frame_or_series): |
73 |
| - data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10)) |
74 |
| - result = getattr(data.expanding(min_periods=1, axis=0), func)() |
75 |
| - assert isinstance(result, frame_or_series) |
76 |
| - |
77 |
| - if frame_or_series is Series: |
78 |
| - tm.assert_almost_equal(result[10], static_comp(data[:11])) |
79 |
| - else: |
80 |
| - tm.assert_series_equal( |
81 |
| - result.iloc[10], static_comp(data[:11]), check_names=False |
82 |
| - ) |
83 |
| - |
84 |
| - |
85 |
| -@pytest.mark.parametrize( |
86 |
| - "func,static_comp", |
87 |
| - [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], |
88 |
| - ids=["sum", "mean", "max", "min"], |
89 |
| -) |
90 |
| -def test_expanding_min_periods(func, static_comp): |
91 |
| - ser = Series(np.random.randn(50)) |
92 |
| - |
93 |
| - result = getattr(ser.expanding(min_periods=30, axis=0), func)() |
94 |
| - assert result[:29].isna().all() |
95 |
| - tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) |
96 |
| - |
97 |
| - # min_periods is working correctly |
98 |
| - result = getattr(ser.expanding(min_periods=15, axis=0), func)() |
99 |
| - assert isna(result.iloc[13]) |
100 |
| - assert notna(result.iloc[14]) |
101 |
| - |
102 |
| - ser2 = Series(np.random.randn(20)) |
103 |
| - result = getattr(ser2.expanding(min_periods=5, axis=0), func)() |
104 |
| - assert isna(result[3]) |
105 |
| - assert notna(result[4]) |
106 |
| - |
107 |
| - # min_periods=0 |
108 |
| - result0 = getattr(ser.expanding(min_periods=0, axis=0), func)() |
109 |
| - result1 = getattr(ser.expanding(min_periods=1, axis=0), func)() |
110 |
| - tm.assert_almost_equal(result0, result1) |
111 |
| - |
112 |
| - result = getattr(ser.expanding(min_periods=1, axis=0), func)() |
113 |
| - tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) |
114 |
| - |
115 |
| - |
116 |
| -def test_expanding_apply(engine_and_raw, frame_or_series): |
117 |
| - engine, raw = engine_and_raw |
118 |
| - data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10)) |
119 |
| - result = data.expanding(min_periods=1).apply( |
120 |
| - lambda x: x.mean(), raw=raw, engine=engine |
121 |
| - ) |
122 |
| - assert isinstance(result, frame_or_series) |
123 |
| - |
124 |
| - if frame_or_series is Series: |
125 |
| - tm.assert_almost_equal(result[9], np.mean(data[:11])) |
126 |
| - else: |
127 |
| - tm.assert_series_equal(result.iloc[9], np.mean(data[:11]), check_names=False) |
128 |
| - |
129 |
| - |
130 |
| -def test_expanding_min_periods_apply(engine_and_raw): |
131 |
| - engine, raw = engine_and_raw |
132 |
| - ser = Series(np.random.randn(50)) |
133 |
| - |
134 |
| - result = ser.expanding(min_periods=30).apply( |
135 |
| - lambda x: x.mean(), raw=raw, engine=engine |
136 |
| - ) |
137 |
| - assert result[:29].isna().all() |
138 |
| - tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50])) |
139 |
| - |
140 |
| - # min_periods is working correctly |
141 |
| - result = ser.expanding(min_periods=15).apply( |
142 |
| - lambda x: x.mean(), raw=raw, engine=engine |
143 |
| - ) |
144 |
| - assert isna(result.iloc[13]) |
145 |
| - assert notna(result.iloc[14]) |
146 |
| - |
147 |
| - ser2 = Series(np.random.randn(20)) |
148 |
| - result = ser2.expanding(min_periods=5).apply( |
149 |
| - lambda x: x.mean(), raw=raw, engine=engine |
150 |
| - ) |
151 |
| - assert isna(result[3]) |
152 |
| - assert notna(result[4]) |
153 |
| - |
154 |
| - # min_periods=0 |
155 |
| - result0 = ser.expanding(min_periods=0).apply( |
156 |
| - lambda x: x.mean(), raw=raw, engine=engine |
157 |
| - ) |
158 |
| - result1 = ser.expanding(min_periods=1).apply( |
159 |
| - lambda x: x.mean(), raw=raw, engine=engine |
160 |
| - ) |
161 |
| - tm.assert_almost_equal(result0, result1) |
162 |
| - |
163 |
| - result = ser.expanding(min_periods=1).apply( |
164 |
| - lambda x: x.mean(), raw=raw, engine=engine |
165 |
| - ) |
166 |
| - tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50])) |
167 |
| - |
168 |
| - |
169 | 8 | @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
170 | 9 | @pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum])
|
171 | 10 | def test_expanding_apply_consistency_sum_nans(consistency_data, min_periods, f):
|
@@ -334,202 +173,3 @@ def test_expanding_consistency_var_debiasing_factors(consistency_data, min_perio
|
334 | 173 | x.expanding().count() - 1.0
|
335 | 174 | ).replace(0.0, np.nan)
|
336 | 175 | tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)
|
337 |
| - |
338 |
| - |
339 |
| -@pytest.mark.parametrize( |
340 |
| - "f", |
341 |
| - [ |
342 |
| - lambda x: (x.expanding(min_periods=5).cov(x, pairwise=True)), |
343 |
| - lambda x: (x.expanding(min_periods=5).corr(x, pairwise=True)), |
344 |
| - ], |
345 |
| -) |
346 |
| -def test_moment_functions_zero_length_pairwise(f): |
347 |
| - |
348 |
| - df1 = DataFrame() |
349 |
| - df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar")) |
350 |
| - df2["a"] = df2["a"].astype("float64") |
351 |
| - |
352 |
| - df1_expected = DataFrame( |
353 |
| - index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([]) |
354 |
| - ) |
355 |
| - df2_expected = DataFrame( |
356 |
| - index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]), |
357 |
| - columns=Index(["a"], name="foo"), |
358 |
| - dtype="float64", |
359 |
| - ) |
360 |
| - |
361 |
| - df1_result = f(df1) |
362 |
| - tm.assert_frame_equal(df1_result, df1_expected) |
363 |
| - |
364 |
| - df2_result = f(df2) |
365 |
| - tm.assert_frame_equal(df2_result, df2_expected) |
366 |
| - |
367 |
| - |
368 |
| -@pytest.mark.parametrize( |
369 |
| - "f", |
370 |
| - [ |
371 |
| - lambda x: x.expanding().count(), |
372 |
| - lambda x: x.expanding(min_periods=5).cov(x, pairwise=False), |
373 |
| - lambda x: x.expanding(min_periods=5).corr(x, pairwise=False), |
374 |
| - lambda x: x.expanding(min_periods=5).max(), |
375 |
| - lambda x: x.expanding(min_periods=5).min(), |
376 |
| - lambda x: x.expanding(min_periods=5).sum(), |
377 |
| - lambda x: x.expanding(min_periods=5).mean(), |
378 |
| - lambda x: x.expanding(min_periods=5).std(), |
379 |
| - lambda x: x.expanding(min_periods=5).var(), |
380 |
| - lambda x: x.expanding(min_periods=5).skew(), |
381 |
| - lambda x: x.expanding(min_periods=5).kurt(), |
382 |
| - lambda x: x.expanding(min_periods=5).quantile(0.5), |
383 |
| - lambda x: x.expanding(min_periods=5).median(), |
384 |
| - lambda x: x.expanding(min_periods=5).apply(sum, raw=False), |
385 |
| - lambda x: x.expanding(min_periods=5).apply(sum, raw=True), |
386 |
| - ], |
387 |
| -) |
388 |
| -def test_moment_functions_zero_length(f): |
389 |
| - # GH 8056 |
390 |
| - s = Series(dtype=np.float64) |
391 |
| - s_expected = s |
392 |
| - df1 = DataFrame() |
393 |
| - df1_expected = df1 |
394 |
| - df2 = DataFrame(columns=["a"]) |
395 |
| - df2["a"] = df2["a"].astype("float64") |
396 |
| - df2_expected = df2 |
397 |
| - |
398 |
| - s_result = f(s) |
399 |
| - tm.assert_series_equal(s_result, s_expected) |
400 |
| - |
401 |
| - df1_result = f(df1) |
402 |
| - tm.assert_frame_equal(df1_result, df1_expected) |
403 |
| - |
404 |
| - df2_result = f(df2) |
405 |
| - tm.assert_frame_equal(df2_result, df2_expected) |
406 |
| - |
407 |
| - |
408 |
| -def test_expanding_apply_empty_series(engine_and_raw): |
409 |
| - engine, raw = engine_and_raw |
410 |
| - ser = Series([], dtype=np.float64) |
411 |
| - tm.assert_series_equal( |
412 |
| - ser, ser.expanding().apply(lambda x: x.mean(), raw=raw, engine=engine) |
413 |
| - ) |
414 |
| - |
415 |
| - |
416 |
| -def test_expanding_apply_min_periods_0(engine_and_raw): |
417 |
| - # GH 8080 |
418 |
| - engine, raw = engine_and_raw |
419 |
| - s = Series([None, None, None]) |
420 |
| - result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw, engine=engine) |
421 |
| - expected = Series([1.0, 2.0, 3.0]) |
422 |
| - tm.assert_series_equal(result, expected) |
423 |
| - |
424 |
| - |
425 |
| -def test_expanding_cov_diff_index(): |
426 |
| - # GH 7512 |
427 |
| - s1 = Series([1, 2, 3], index=[0, 1, 2]) |
428 |
| - s2 = Series([1, 3], index=[0, 2]) |
429 |
| - result = s1.expanding().cov(s2) |
430 |
| - expected = Series([None, None, 2.0]) |
431 |
| - tm.assert_series_equal(result, expected) |
432 |
| - |
433 |
| - s2a = Series([1, None, 3], index=[0, 1, 2]) |
434 |
| - result = s1.expanding().cov(s2a) |
435 |
| - tm.assert_series_equal(result, expected) |
436 |
| - |
437 |
| - s1 = Series([7, 8, 10], index=[0, 1, 3]) |
438 |
| - s2 = Series([7, 9, 10], index=[0, 2, 3]) |
439 |
| - result = s1.expanding().cov(s2) |
440 |
| - expected = Series([None, None, None, 4.5]) |
441 |
| - tm.assert_series_equal(result, expected) |
442 |
| - |
443 |
| - |
444 |
| -def test_expanding_corr_diff_index(): |
445 |
| - # GH 7512 |
446 |
| - s1 = Series([1, 2, 3], index=[0, 1, 2]) |
447 |
| - s2 = Series([1, 3], index=[0, 2]) |
448 |
| - result = s1.expanding().corr(s2) |
449 |
| - expected = Series([None, None, 1.0]) |
450 |
| - tm.assert_series_equal(result, expected) |
451 |
| - |
452 |
| - s2a = Series([1, None, 3], index=[0, 1, 2]) |
453 |
| - result = s1.expanding().corr(s2a) |
454 |
| - tm.assert_series_equal(result, expected) |
455 |
| - |
456 |
| - s1 = Series([7, 8, 10], index=[0, 1, 3]) |
457 |
| - s2 = Series([7, 9, 10], index=[0, 2, 3]) |
458 |
| - result = s1.expanding().corr(s2) |
459 |
| - expected = Series([None, None, None, 1.0]) |
460 |
| - tm.assert_series_equal(result, expected) |
461 |
| - |
462 |
| - |
463 |
| -def test_expanding_cov_pairwise_diff_length(): |
464 |
| - # GH 7512 |
465 |
| - df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=Index(["A", "B"], name="foo")) |
466 |
| - df1a = DataFrame( |
467 |
| - [[1, 5], [3, 9]], index=[0, 2], columns=Index(["A", "B"], name="foo") |
468 |
| - ) |
469 |
| - df2 = DataFrame( |
470 |
| - [[5, 6], [None, None], [2, 1]], columns=Index(["X", "Y"], name="foo") |
471 |
| - ) |
472 |
| - df2a = DataFrame( |
473 |
| - [[5, 6], [2, 1]], index=[0, 2], columns=Index(["X", "Y"], name="foo") |
474 |
| - ) |
475 |
| - # TODO: xref gh-15826 |
476 |
| - # .loc is not preserving the names |
477 |
| - result1 = df1.expanding().cov(df2, pairwise=True).loc[2] |
478 |
| - result2 = df1.expanding().cov(df2a, pairwise=True).loc[2] |
479 |
| - result3 = df1a.expanding().cov(df2, pairwise=True).loc[2] |
480 |
| - result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2] |
481 |
| - expected = DataFrame( |
482 |
| - [[-3.0, -6.0], [-5.0, -10.0]], |
483 |
| - columns=Index(["A", "B"], name="foo"), |
484 |
| - index=Index(["X", "Y"], name="foo"), |
485 |
| - ) |
486 |
| - tm.assert_frame_equal(result1, expected) |
487 |
| - tm.assert_frame_equal(result2, expected) |
488 |
| - tm.assert_frame_equal(result3, expected) |
489 |
| - tm.assert_frame_equal(result4, expected) |
490 |
| - |
491 |
| - |
492 |
| -def test_expanding_corr_pairwise_diff_length(): |
493 |
| - # GH 7512 |
494 |
| - df1 = DataFrame( |
495 |
| - [[1, 2], [3, 2], [3, 4]], columns=["A", "B"], index=Index(range(3), name="bar") |
496 |
| - ) |
497 |
| - df1a = DataFrame( |
498 |
| - [[1, 2], [3, 4]], index=Index([0, 2], name="bar"), columns=["A", "B"] |
499 |
| - ) |
500 |
| - df2 = DataFrame( |
501 |
| - [[5, 6], [None, None], [2, 1]], |
502 |
| - columns=["X", "Y"], |
503 |
| - index=Index(range(3), name="bar"), |
504 |
| - ) |
505 |
| - df2a = DataFrame( |
506 |
| - [[5, 6], [2, 1]], index=Index([0, 2], name="bar"), columns=["X", "Y"] |
507 |
| - ) |
508 |
| - result1 = df1.expanding().corr(df2, pairwise=True).loc[2] |
509 |
| - result2 = df1.expanding().corr(df2a, pairwise=True).loc[2] |
510 |
| - result3 = df1a.expanding().corr(df2, pairwise=True).loc[2] |
511 |
| - result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2] |
512 |
| - expected = DataFrame( |
513 |
| - [[-1.0, -1.0], [-1.0, -1.0]], columns=["A", "B"], index=Index(["X", "Y"]) |
514 |
| - ) |
515 |
| - tm.assert_frame_equal(result1, expected) |
516 |
| - tm.assert_frame_equal(result2, expected) |
517 |
| - tm.assert_frame_equal(result3, expected) |
518 |
| - tm.assert_frame_equal(result4, expected) |
519 |
| - |
520 |
| - |
521 |
| -def test_expanding_apply_args_kwargs(engine_and_raw): |
522 |
| - def mean_w_arg(x, const): |
523 |
| - return np.mean(x) + const |
524 |
| - |
525 |
| - engine, raw = engine_and_raw |
526 |
| - |
527 |
| - df = DataFrame(np.random.rand(20, 3)) |
528 |
| - |
529 |
| - expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0 |
530 |
| - |
531 |
| - result = df.expanding().apply(mean_w_arg, engine=engine, raw=raw, args=(20,)) |
532 |
| - tm.assert_frame_equal(result, expected) |
533 |
| - |
534 |
| - result = df.expanding().apply(mean_w_arg, raw=raw, kwargs={"const": 20}) |
535 |
| - tm.assert_frame_equal(result, expected) |
0 commit comments