Skip to content

Commit b537437

Browse files
twoertweinKevin D Smith
authored and
Kevin D Smith
committed
CLN/TST: roll_sum/mean/var/skew/kurt: simplification for non-monotonic indices (pandas-dev#36933)
1 parent d749905 commit b537437

File tree

2 files changed

+146
-34
lines changed

2 files changed

+146
-34
lines changed

pandas/_libs/window/aggregations.pyx

+51-34
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ cdef:
5858
cdef inline int int_max(int a, int b): return a if a >= b else b
5959
cdef inline int int_min(int a, int b): return a if a <= b else b
6060

61-
cdef bint is_monotonic_start_end_bounds(
61+
cdef bint is_monotonic_increasing_start_end_bounds(
6262
ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end
6363
):
6464
return is_monotonic(start, False)[0] and is_monotonic(end, False)[0]
@@ -143,9 +143,11 @@ def roll_sum(ndarray[float64_t] values, ndarray[int64_t] start,
143143
int64_t s, e
144144
int64_t nobs = 0, i, j, N = len(values)
145145
ndarray[float64_t] output
146-
bint is_monotonic_bounds
146+
bint is_monotonic_increasing_bounds
147147

148-
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
148+
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
149+
start, end
150+
)
149151
output = np.empty(N, dtype=float)
150152

151153
with nogil:
@@ -154,7 +156,7 @@ def roll_sum(ndarray[float64_t] values, ndarray[int64_t] start,
154156
s = start[i]
155157
e = end[i]
156158

157-
if i == 0 or not is_monotonic_bounds:
159+
if i == 0 or not is_monotonic_increasing_bounds:
158160

159161
# setup
160162

@@ -173,9 +175,10 @@ def roll_sum(ndarray[float64_t] values, ndarray[int64_t] start,
173175

174176
output[i] = calc_sum(minp, nobs, sum_x)
175177

176-
if not is_monotonic_bounds:
177-
for j in range(s, e):
178-
remove_sum(values[j], &nobs, &sum_x, &compensation_remove)
178+
if not is_monotonic_increasing_bounds:
179+
nobs = 0
180+
sum_x = 0.0
181+
compensation_remove = 0.0
179182

180183
return output
181184

@@ -244,9 +247,11 @@ def roll_mean(ndarray[float64_t] values, ndarray[int64_t] start,
244247
int64_t s, e
245248
Py_ssize_t nobs = 0, i, j, neg_ct = 0, N = len(values)
246249
ndarray[float64_t] output
247-
bint is_monotonic_bounds
250+
bint is_monotonic_increasing_bounds
248251

249-
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
252+
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
253+
start, end
254+
)
250255
output = np.empty(N, dtype=float)
251256

252257
with nogil:
@@ -255,7 +260,7 @@ def roll_mean(ndarray[float64_t] values, ndarray[int64_t] start,
255260
s = start[i]
256261
e = end[i]
257262

258-
if i == 0 or not is_monotonic_bounds:
263+
if i == 0 or not is_monotonic_increasing_bounds:
259264

260265
# setup
261266
for j in range(s, e):
@@ -276,10 +281,11 @@ def roll_mean(ndarray[float64_t] values, ndarray[int64_t] start,
276281

277282
output[i] = calc_mean(minp, nobs, neg_ct, sum_x)
278283

279-
if not is_monotonic_bounds:
280-
for j in range(s, e):
281-
val = values[j]
282-
remove_mean(val, &nobs, &sum_x, &neg_ct, &compensation_remove)
284+
if not is_monotonic_increasing_bounds:
285+
nobs = 0
286+
neg_ct = 0
287+
sum_x = 0.0
288+
compensation_remove = 0.0
283289
return output
284290

285291
# ----------------------------------------------------------------------
@@ -367,10 +373,12 @@ def roll_var(ndarray[float64_t] values, ndarray[int64_t] start,
367373
int64_t s, e
368374
Py_ssize_t i, j, N = len(values)
369375
ndarray[float64_t] output
370-
bint is_monotonic_bounds
376+
bint is_monotonic_increasing_bounds
371377

372378
minp = max(minp, 1)
373-
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
379+
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
380+
start, end
381+
)
374382
output = np.empty(N, dtype=float)
375383

376384
with nogil:
@@ -382,7 +390,7 @@ def roll_var(ndarray[float64_t] values, ndarray[int64_t] start,
382390

383391
# Over the first window, observations can only be added
384392
# never removed
385-
if i == 0 or not is_monotonic_bounds:
393+
if i == 0 or not is_monotonic_increasing_bounds:
386394

387395
for j in range(s, e):
388396
add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add)
@@ -403,10 +411,11 @@ def roll_var(ndarray[float64_t] values, ndarray[int64_t] start,
403411

404412
output[i] = calc_var(minp, ddof, nobs, ssqdm_x)
405413

406-
if not is_monotonic_bounds:
407-
for j in range(s, e):
408-
remove_var(values[j], &nobs, &mean_x, &ssqdm_x,
409-
&compensation_remove)
414+
if not is_monotonic_increasing_bounds:
415+
nobs = 0.0
416+
mean_x = 0.0
417+
ssqdm_x = 0.0
418+
compensation_remove = 0.0
410419

411420
return output
412421

@@ -486,10 +495,12 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
486495
int64_t nobs = 0, i, j, N = len(values)
487496
int64_t s, e
488497
ndarray[float64_t] output
489-
bint is_monotonic_bounds
498+
bint is_monotonic_increasing_bounds
490499

491500
minp = max(minp, 3)
492-
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
501+
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
502+
start, end
503+
)
493504
output = np.empty(N, dtype=float)
494505

495506
with nogil:
@@ -501,7 +512,7 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
501512

502513
# Over the first window, observations can only be added
503514
# never removed
504-
if i == 0 or not is_monotonic_bounds:
515+
if i == 0 or not is_monotonic_increasing_bounds:
505516

506517
for j in range(s, e):
507518
val = values[j]
@@ -524,10 +535,11 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
524535

525536
output[i] = calc_skew(minp, nobs, x, xx, xxx)
526537

527-
if not is_monotonic_bounds:
528-
for j in range(s, e):
529-
val = values[j]
530-
remove_skew(val, &nobs, &x, &xx, &xxx)
538+
if not is_monotonic_increasing_bounds:
539+
nobs = 0
540+
x = 0.0
541+
xx = 0.0
542+
xxx = 0.0
531543

532544
return output
533545

@@ -611,10 +623,12 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
611623
float64_t x = 0, xx = 0, xxx = 0, xxxx = 0
612624
int64_t nobs = 0, i, j, s, e, N = len(values)
613625
ndarray[float64_t] output
614-
bint is_monotonic_bounds
626+
bint is_monotonic_increasing_bounds
615627

616628
minp = max(minp, 4)
617-
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
629+
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
630+
start, end
631+
)
618632
output = np.empty(N, dtype=float)
619633

620634
with nogil:
@@ -626,7 +640,7 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
626640

627641
# Over the first window, observations can only be added
628642
# never removed
629-
if i == 0 or not is_monotonic_bounds:
643+
if i == 0 or not is_monotonic_increasing_bounds:
630644

631645
for j in range(s, e):
632646
add_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx)
@@ -646,9 +660,12 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
646660

647661
output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx)
648662

649-
if not is_monotonic_bounds:
650-
for j in range(s, e):
651-
remove_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx)
663+
if not is_monotonic_increasing_bounds:
664+
nobs = 0
665+
x = 0.0
666+
xx = 0.0
667+
xxx = 0.0
668+
xxxx = 0.0
652669

653670
return output
654671

pandas/tests/window/test_rolling.py

+95
Original file line numberDiff line numberDiff line change
@@ -917,3 +917,98 @@ def test_rolling_var_numerical_issues(func, third_value, values):
917917
result = getattr(ds.rolling(2), func)()
918918
expected = Series([np.nan] + values)
919919
tm.assert_series_equal(result, expected)
920+
921+
922+
@pytest.mark.parametrize("method", ["var", "sum", "mean", "skew", "kurt", "min", "max"])
923+
def test_rolling_decreasing_indices(method):
924+
"""
925+
Make sure that decreasing indices give the same results as increasing indices.
926+
927+
GH 36933
928+
"""
929+
df = DataFrame({"values": np.arange(-15, 10) ** 2})
930+
df_reverse = DataFrame({"values": df["values"][::-1]}, index=df.index[::-1])
931+
932+
increasing = getattr(df.rolling(window=5), method)()
933+
decreasing = getattr(df_reverse.rolling(window=5), method)()
934+
935+
assert np.abs(decreasing.values[::-1][:-4] - increasing.values[4:]).max() < 1e-12
936+
937+
938+
@pytest.mark.parametrize(
939+
"method,expected",
940+
[
941+
(
942+
"var",
943+
[
944+
float("nan"),
945+
43.0,
946+
float("nan"),
947+
136.333333,
948+
43.5,
949+
94.966667,
950+
182.0,
951+
318.0,
952+
],
953+
),
954+
("mean", [float("nan"), 7.5, float("nan"), 21.5, 6.0, 9.166667, 13.0, 17.5]),
955+
("sum", [float("nan"), 30.0, float("nan"), 86.0, 30.0, 55.0, 91.0, 140.0]),
956+
(
957+
"skew",
958+
[
959+
float("nan"),
960+
0.709296,
961+
float("nan"),
962+
0.407073,
963+
0.984656,
964+
0.919184,
965+
0.874674,
966+
0.842418,
967+
],
968+
),
969+
(
970+
"kurt",
971+
[
972+
float("nan"),
973+
-0.5916711736073559,
974+
float("nan"),
975+
-1.0028993131317954,
976+
-0.06103844629409494,
977+
-0.254143227116194,
978+
-0.37362637362637585,
979+
-0.45439658241367054,
980+
],
981+
),
982+
],
983+
)
984+
def test_rolling_non_monotonic(method, expected):
985+
"""
986+
Make sure the (rare) branch of non-monotonic indices is covered by a test.
987+
988+
output from 1.1.3 is assumed to be the expected output. Output of sum/mean has
989+
manually been verified.
990+
991+
GH 36933.
992+
"""
993+
# Based on an example found in computation.rst
994+
use_expanding = [True, False, True, False, True, True, True, True]
995+
df = DataFrame({"values": np.arange(len(use_expanding)) ** 2})
996+
997+
class CustomIndexer(pd.api.indexers.BaseIndexer):
998+
def get_window_bounds(self, num_values, min_periods, center, closed):
999+
start = np.empty(num_values, dtype=np.int64)
1000+
end = np.empty(num_values, dtype=np.int64)
1001+
for i in range(num_values):
1002+
if self.use_expanding[i]:
1003+
start[i] = 0
1004+
end[i] = i + 1
1005+
else:
1006+
start[i] = i
1007+
end[i] = i + self.window_size
1008+
return start, end
1009+
1010+
indexer = CustomIndexer(window_size=4, use_expanding=use_expanding)
1011+
1012+
result = getattr(df.rolling(indexer), method)()
1013+
expected = DataFrame({"values": expected})
1014+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)