|
| 1 | +import warnings |
| 2 | + |
1 | 3 | import numpy as np
|
2 | 4 | import pytest
|
3 | 5 |
|
|
7 | 9 | MultiIndex,
|
8 | 10 | Series,
|
9 | 11 | Timestamp,
|
| 12 | + concat, |
10 | 13 | date_range,
|
| 14 | + isna, |
| 15 | + notna, |
11 | 16 | )
|
12 | 17 | import pandas._testing as tm
|
13 | 18 |
|
| 19 | +import pandas.tseries.offsets as offsets |
| 20 | + |
| 21 | + |
| 22 | +def f(x): |
| 23 | + # suppress warnings about empty slices, as we are deliberately testing |
| 24 | + # with a 0-length Series |
| 25 | + with warnings.catch_warnings(): |
| 26 | + warnings.filterwarnings( |
| 27 | + "ignore", |
| 28 | + message=".*(empty slice|0 for slice).*", |
| 29 | + category=RuntimeWarning, |
| 30 | + ) |
| 31 | + return x[np.isfinite(x)].mean() |
| 32 | + |
14 | 33 |
|
15 | 34 | @pytest.mark.parametrize("bad_raw", [None, 1, 0])
|
16 | 35 | def test_rolling_apply_invalid_raw(bad_raw):
|
@@ -158,3 +177,133 @@ def foo(x, par):
|
158 | 177 |
|
159 | 178 | result = gb_rolling.apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1])
|
160 | 179 | tm.assert_series_equal(result, expected)
|
| 180 | + |
| 181 | + |
| 182 | +def test_nans(raw): |
| 183 | + obj = Series(np.random.randn(50)) |
| 184 | + obj[:10] = np.NaN |
| 185 | + obj[-10:] = np.NaN |
| 186 | + |
| 187 | + result = obj.rolling(50, min_periods=30).apply(f, raw=raw) |
| 188 | + tm.assert_almost_equal(result.iloc[-1], np.mean(obj[10:-10])) |
| 189 | + |
| 190 | + # min_periods is working correctly |
| 191 | + result = obj.rolling(20, min_periods=15).apply(f, raw=raw) |
| 192 | + assert isna(result.iloc[23]) |
| 193 | + assert not isna(result.iloc[24]) |
| 194 | + |
| 195 | + assert not isna(result.iloc[-6]) |
| 196 | + assert isna(result.iloc[-5]) |
| 197 | + |
| 198 | + obj2 = Series(np.random.randn(20)) |
| 199 | + result = obj2.rolling(10, min_periods=5).apply(f, raw=raw) |
| 200 | + assert isna(result.iloc[3]) |
| 201 | + assert notna(result.iloc[4]) |
| 202 | + |
| 203 | + result0 = obj.rolling(20, min_periods=0).apply(f, raw=raw) |
| 204 | + result1 = obj.rolling(20, min_periods=1).apply(f, raw=raw) |
| 205 | + tm.assert_almost_equal(result0, result1) |
| 206 | + |
| 207 | + |
| 208 | +def test_center(raw): |
| 209 | + obj = Series(np.random.randn(50)) |
| 210 | + obj[:10] = np.NaN |
| 211 | + obj[-10:] = np.NaN |
| 212 | + |
| 213 | + result = obj.rolling(20, min_periods=15, center=True).apply(f, raw=raw) |
| 214 | + expected = ( |
| 215 | + concat([obj, Series([np.NaN] * 9)]) |
| 216 | + .rolling(20, min_periods=15) |
| 217 | + .apply(f, raw=raw)[9:] |
| 218 | + .reset_index(drop=True) |
| 219 | + ) |
| 220 | + tm.assert_series_equal(result, expected) |
| 221 | + |
| 222 | + |
| 223 | +def test_series(raw, series): |
| 224 | + result = series.rolling(50).apply(f, raw=raw) |
| 225 | + assert isinstance(result, Series) |
| 226 | + tm.assert_almost_equal(result.iloc[-1], np.mean(series[-50:])) |
| 227 | + |
| 228 | + |
| 229 | +def test_frame(raw, frame): |
| 230 | + result = frame.rolling(50).apply(f, raw=raw) |
| 231 | + assert isinstance(result, DataFrame) |
| 232 | + tm.assert_series_equal( |
| 233 | + result.iloc[-1, :], |
| 234 | + frame.iloc[-50:, :].apply(np.mean, axis=0, raw=raw), |
| 235 | + check_names=False, |
| 236 | + ) |
| 237 | + |
| 238 | + |
| 239 | +def test_time_rule_series(raw, series): |
| 240 | + win = 25 |
| 241 | + minp = 10 |
| 242 | + ser = series[::2].resample("B").mean() |
| 243 | + series_result = ser.rolling(window=win, min_periods=minp).apply(f, raw=raw) |
| 244 | + last_date = series_result.index[-1] |
| 245 | + prev_date = last_date - 24 * offsets.BDay() |
| 246 | + |
| 247 | + trunc_series = series[::2].truncate(prev_date, last_date) |
| 248 | + tm.assert_almost_equal(series_result[-1], np.mean(trunc_series)) |
| 249 | + |
| 250 | + |
| 251 | +def test_time_rule_frame(raw, frame): |
| 252 | + win = 25 |
| 253 | + minp = 10 |
| 254 | + frm = frame[::2].resample("B").mean() |
| 255 | + frame_result = frm.rolling(window=win, min_periods=minp).apply(f, raw=raw) |
| 256 | + last_date = frame_result.index[-1] |
| 257 | + prev_date = last_date - 24 * offsets.BDay() |
| 258 | + |
| 259 | + trunc_frame = frame[::2].truncate(prev_date, last_date) |
| 260 | + tm.assert_series_equal( |
| 261 | + frame_result.xs(last_date), |
| 262 | + trunc_frame.apply(np.mean, raw=raw), |
| 263 | + check_names=False, |
| 264 | + ) |
| 265 | + |
| 266 | + |
| 267 | +@pytest.mark.parametrize("minp", [0, 99, 100]) |
| 268 | +def test_min_periods(raw, series, minp): |
| 269 | + result = series.rolling(len(series) + 1, min_periods=minp).apply(f, raw=raw) |
| 270 | + expected = series.rolling(len(series), min_periods=minp).apply(f, raw=raw) |
| 271 | + nan_mask = isna(result) |
| 272 | + tm.assert_series_equal(nan_mask, isna(expected)) |
| 273 | + |
| 274 | + nan_mask = ~nan_mask |
| 275 | + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) |
| 276 | + |
| 277 | + |
| 278 | +def test_center_reindex_series(raw, series): |
| 279 | + # shifter index |
| 280 | + s = [f"x{x:d}" for x in range(12)] |
| 281 | + minp = 10 |
| 282 | + |
| 283 | + series_xp = ( |
| 284 | + series.reindex(list(series.index) + s) |
| 285 | + .rolling(window=25, min_periods=minp) |
| 286 | + .apply(f, raw=raw) |
| 287 | + .shift(-12) |
| 288 | + .reindex(series.index) |
| 289 | + ) |
| 290 | + series_rs = series.rolling(window=25, min_periods=minp, center=True).apply( |
| 291 | + f, raw=raw |
| 292 | + ) |
| 293 | + tm.assert_series_equal(series_xp, series_rs) |
| 294 | + |
| 295 | + |
| 296 | +def test_center_reindex_frame(raw, frame): |
| 297 | + # shifter index |
| 298 | + s = [f"x{x:d}" for x in range(12)] |
| 299 | + minp = 10 |
| 300 | + |
| 301 | + frame_xp = ( |
| 302 | + frame.reindex(list(frame.index) + s) |
| 303 | + .rolling(window=25, min_periods=minp) |
| 304 | + .apply(f, raw=raw) |
| 305 | + .shift(-12) |
| 306 | + .reindex(frame.index) |
| 307 | + ) |
| 308 | + frame_rs = frame.rolling(window=25, min_periods=minp, center=True).apply(f, raw=raw) |
| 309 | + tm.assert_frame_equal(frame_xp, frame_rs) |
0 commit comments