|
4 | 4 | from pandas import (
|
5 | 5 | DataFrame,
|
6 | 6 | Series,
|
| 7 | + MultiIndex, |
| 8 | + concat, |
7 | 9 | date_range,
|
8 | 10 | )
|
9 | 11 | import pandas._testing as tm
|
|
13 | 15 | )
|
14 | 16 | from pandas.core.indexers.objects import (
|
15 | 17 | ExpandingIndexer,
|
| 18 | + FixedWindowIndexer, |
16 | 19 | VariableOffsetWindowIndexer,
|
17 | 20 | )
|
18 | 21 |
|
@@ -293,3 +296,159 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
|
293 | 296 | result = getattr(df.rolling(indexer), func)(*args)
|
294 | 297 | expected = DataFrame({"values": values})
|
295 | 298 | tm.assert_frame_equal(result, expected)
|
| 299 | + |
| 300 | + |
| 301 | +@pytest.mark.parametrize( |
| 302 | + "indexer_class", [FixedWindowIndexer, FixedForwardWindowIndexer, ExpandingIndexer] |
| 303 | +) |
| 304 | +@pytest.mark.parametrize("window_size", [1, 2, 12]) |
| 305 | +@pytest.mark.parametrize( |
| 306 | + "df_data", |
| 307 | + [ |
| 308 | + {"a": [1, 1], "b": [0, 1]}, |
| 309 | + {"a": [1, 2], "b": [0, 1]}, |
| 310 | + {"a": [1] * 16, "b": [np.nan, 1, 2, np.nan] + list(range(4, 16))}, |
| 311 | + ], |
| 312 | +) |
| 313 | +def test_indexers_are_reusable_after_groupby_rolling( |
| 314 | + indexer_class, window_size, df_data |
| 315 | +): |
| 316 | + # GH 43267 |
| 317 | + df = DataFrame(df_data) |
| 318 | + num_trials = 3 |
| 319 | + indexer = indexer_class(window_size=window_size) |
| 320 | + original_window_size = indexer.window_size |
| 321 | + for i in range(num_trials): |
| 322 | + df.groupby("a")["b"].rolling(window=indexer, min_periods=1).mean() |
| 323 | + assert indexer.window_size == original_window_size |
| 324 | + |
| 325 | + |
| 326 | +@pytest.mark.parametrize( |
| 327 | + "window_size, num_values, expected_start, expected_end", |
| 328 | + [ |
| 329 | + (1, 1, [0], [1]), |
| 330 | + (1, 2, [0, 1], [1, 2]), |
| 331 | + (2, 1, [0], [1]), |
| 332 | + (2, 2, [0, 1], [2, 2]), |
| 333 | + (5, 12, range(12), list(range(5, 12)) + [12] * 5), |
| 334 | + (12, 5, range(5), [5] * 5), |
| 335 | + (0, 0, np.array([]), np.array([])), |
| 336 | + (1, 0, np.array([]), np.array([])), |
| 337 | + (0, 1, [0], [0]), |
| 338 | + ], |
| 339 | +) |
| 340 | +def test_fixed_forward_indexer_bounds( |
| 341 | + window_size, num_values, expected_start, expected_end |
| 342 | +): |
| 343 | + # GH 43267 |
| 344 | + indexer = FixedForwardWindowIndexer(window_size=window_size) |
| 345 | + start, end = indexer.get_window_bounds(num_values=num_values) |
| 346 | + |
| 347 | + tm.assert_numpy_array_equal(start, np.array(expected_start), check_dtype=False) |
| 348 | + tm.assert_numpy_array_equal(end, np.array(expected_end), check_dtype=False) |
| 349 | + assert len(start) == len(end) |
| 350 | + |
| 351 | + |
| 352 | +@pytest.mark.parametrize( |
| 353 | + "df, window_size, expected", |
| 354 | + [ |
| 355 | + ( |
| 356 | + DataFrame({"b": [0, 1, 2], "a": [1, 2, 2]}), |
| 357 | + 2, |
| 358 | + Series( |
| 359 | + [0, 1.5, 2.0], |
| 360 | + index=MultiIndex.from_arrays([[1, 2, 2], range(3)], names=["a", None]), |
| 361 | + name="b", |
| 362 | + dtype=np.float64, |
| 363 | + ), |
| 364 | + ), |
| 365 | + ( |
| 366 | + DataFrame( |
| 367 | + { |
| 368 | + "b": [np.nan, 1, 2, np.nan] + list(range(4, 18)), |
| 369 | + "a": [1] * 7 + [2] * 11, |
| 370 | + "c": range(18), |
| 371 | + } |
| 372 | + ), |
| 373 | + 12, |
| 374 | + Series( |
| 375 | + [ |
| 376 | + 3.6, |
| 377 | + 3.6, |
| 378 | + 4.25, |
| 379 | + 5.0, |
| 380 | + 5.0, |
| 381 | + 5.5, |
| 382 | + 6.0, |
| 383 | + 12.0, |
| 384 | + 12.5, |
| 385 | + 13.0, |
| 386 | + 13.5, |
| 387 | + 14.0, |
| 388 | + 14.5, |
| 389 | + 15.0, |
| 390 | + 15.5, |
| 391 | + 16.0, |
| 392 | + 16.5, |
| 393 | + 17.0, |
| 394 | + ], |
| 395 | + index=MultiIndex.from_arrays( |
| 396 | + [[1] * 7 + [2] * 11, range(18)], names=["a", None] |
| 397 | + ), |
| 398 | + name="b", |
| 399 | + dtype=np.float64, |
| 400 | + ), |
| 401 | + ), |
| 402 | + ], |
| 403 | +) |
| 404 | +def test_rolling_groupby_with_fixed_forward_specific(df, window_size, expected): |
| 405 | + # GH 43267 |
| 406 | + indexer = FixedForwardWindowIndexer(window_size=window_size) |
| 407 | + result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).mean() |
| 408 | + tm.assert_series_equal(result, expected) |
| 409 | + |
| 410 | + |
| 411 | +@pytest.mark.parametrize( |
| 412 | + "group_keys", |
| 413 | + [ |
| 414 | + (1,), |
| 415 | + (1, 2), |
| 416 | + (2, 1), |
| 417 | + (1, 1, 2), |
| 418 | + (1, 2, 1), |
| 419 | + (1, 1, 2, 2), |
| 420 | + (1, 2, 3, 2, 3), |
| 421 | + (1, 1, 2) * 4, |
| 422 | + (1, 2, 3) * 5, |
| 423 | + ], |
| 424 | +) |
| 425 | +@pytest.mark.parametrize("window_size", [1, 2, 3, 4, 5, 8, 20]) |
| 426 | +def test_rolling_groupby_with_fixed_forward_many(group_keys, window_size): |
| 427 | + # GH 43267 |
| 428 | + df = DataFrame( |
| 429 | + dict( |
| 430 | + a=np.array(list(group_keys)), |
| 431 | + b=np.arange(len(group_keys), dtype=np.float64) + 17, |
| 432 | + c=np.arange(len(group_keys), dtype=np.int64), |
| 433 | + ) |
| 434 | + ) |
| 435 | + |
| 436 | + indexer = FixedForwardWindowIndexer(window_size=window_size) |
| 437 | + result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).sum() |
| 438 | + result.index.names = ["a", "c"] |
| 439 | + |
| 440 | + groups = df.groupby("a")[["a", "b"]] |
| 441 | + manual = concat( |
| 442 | + [ |
| 443 | + g.assign( |
| 444 | + b=[ |
| 445 | + g["b"].iloc[i : i + window_size].sum(min_count=1) |
| 446 | + for i in range(len(g)) |
| 447 | + ] |
| 448 | + ) |
| 449 | + for _, g in groups |
| 450 | + ] |
| 451 | + ) |
| 452 | + manual = manual.set_index(["a", "c"])["b"] |
| 453 | + |
| 454 | + tm.assert_series_equal(result, manual) |
0 commit comments