Skip to content

TST/CLN: roll_sum/mean/var/skew/kurt: simplification for non-monotonic indices #36933

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 51 additions & 34 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ cdef:
cdef inline int int_max(int a, int b): return a if a >= b else b
cdef inline int int_min(int a, int b): return a if a <= b else b

cdef bint is_monotonic_start_end_bounds(
cdef bint is_monotonic_increasing_start_end_bounds(
ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end
):
return is_monotonic(start, False)[0] and is_monotonic(end, False)[0]
Expand Down Expand Up @@ -143,9 +143,11 @@ def roll_sum(ndarray[float64_t] values, ndarray[int64_t] start,
int64_t s, e
int64_t nobs = 0, i, j, N = len(values)
ndarray[float64_t] output
bint is_monotonic_bounds
bint is_monotonic_increasing_bounds

is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
start, end
)
output = np.empty(N, dtype=float)

with nogil:
Expand All @@ -154,7 +156,7 @@ def roll_sum(ndarray[float64_t] values, ndarray[int64_t] start,
s = start[i]
e = end[i]

if i == 0 or not is_monotonic_bounds:
if i == 0 or not is_monotonic_increasing_bounds:

# setup

Expand All @@ -173,9 +175,10 @@ def roll_sum(ndarray[float64_t] values, ndarray[int64_t] start,

output[i] = calc_sum(minp, nobs, sum_x)

if not is_monotonic_bounds:
for j in range(s, e):
remove_sum(values[j], &nobs, &sum_x, &compensation_remove)
if not is_monotonic_increasing_bounds:
nobs = 0
sum_x = 0.0
compensation_remove = 0.0

return output

Expand Down Expand Up @@ -244,9 +247,11 @@ def roll_mean(ndarray[float64_t] values, ndarray[int64_t] start,
int64_t s, e
Py_ssize_t nobs = 0, i, j, neg_ct = 0, N = len(values)
ndarray[float64_t] output
bint is_monotonic_bounds
bint is_monotonic_increasing_bounds

is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
start, end
)
output = np.empty(N, dtype=float)

with nogil:
Expand All @@ -255,7 +260,7 @@ def roll_mean(ndarray[float64_t] values, ndarray[int64_t] start,
s = start[i]
e = end[i]

if i == 0 or not is_monotonic_bounds:
if i == 0 or not is_monotonic_increasing_bounds:

# setup
for j in range(s, e):
Expand All @@ -276,10 +281,11 @@ def roll_mean(ndarray[float64_t] values, ndarray[int64_t] start,

output[i] = calc_mean(minp, nobs, neg_ct, sum_x)

if not is_monotonic_bounds:
for j in range(s, e):
val = values[j]
remove_mean(val, &nobs, &sum_x, &neg_ct, &compensation_remove)
if not is_monotonic_increasing_bounds:
nobs = 0
neg_ct = 0
sum_x = 0.0
compensation_remove = 0.0
return output

# ----------------------------------------------------------------------
Expand Down Expand Up @@ -367,10 +373,12 @@ def roll_var(ndarray[float64_t] values, ndarray[int64_t] start,
int64_t s, e
Py_ssize_t i, j, N = len(values)
ndarray[float64_t] output
bint is_monotonic_bounds
bint is_monotonic_increasing_bounds

minp = max(minp, 1)
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
start, end
)
output = np.empty(N, dtype=float)

with nogil:
Expand All @@ -382,7 +390,7 @@ def roll_var(ndarray[float64_t] values, ndarray[int64_t] start,

# Over the first window, observations can only be added
# never removed
if i == 0 or not is_monotonic_bounds:
if i == 0 or not is_monotonic_increasing_bounds:

for j in range(s, e):
add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add)
Expand All @@ -403,10 +411,11 @@ def roll_var(ndarray[float64_t] values, ndarray[int64_t] start,

output[i] = calc_var(minp, ddof, nobs, ssqdm_x)

if not is_monotonic_bounds:
for j in range(s, e):
remove_var(values[j], &nobs, &mean_x, &ssqdm_x,
&compensation_remove)
if not is_monotonic_increasing_bounds:
nobs = 0.0
mean_x = 0.0
ssqdm_x = 0.0
compensation_remove = 0.0

return output

Expand Down Expand Up @@ -486,10 +495,12 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
int64_t nobs = 0, i, j, N = len(values)
int64_t s, e
ndarray[float64_t] output
bint is_monotonic_bounds
bint is_monotonic_increasing_bounds

minp = max(minp, 3)
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
start, end
)
output = np.empty(N, dtype=float)

with nogil:
Expand All @@ -501,7 +512,7 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,

# Over the first window, observations can only be added
# never removed
if i == 0 or not is_monotonic_bounds:
if i == 0 or not is_monotonic_increasing_bounds:

for j in range(s, e):
val = values[j]
Expand All @@ -524,10 +535,11 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,

output[i] = calc_skew(minp, nobs, x, xx, xxx)

if not is_monotonic_bounds:
for j in range(s, e):
val = values[j]
remove_skew(val, &nobs, &x, &xx, &xxx)
if not is_monotonic_increasing_bounds:
nobs = 0
x = 0.0
xx = 0.0
xxx = 0.0

return output

Expand Down Expand Up @@ -611,10 +623,12 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
float64_t x = 0, xx = 0, xxx = 0, xxxx = 0
int64_t nobs = 0, i, j, s, e, N = len(values)
ndarray[float64_t] output
bint is_monotonic_bounds
bint is_monotonic_increasing_bounds

minp = max(minp, 4)
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
start, end
)
output = np.empty(N, dtype=float)

with nogil:
Expand All @@ -626,7 +640,7 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,

# Over the first window, observations can only be added
# never removed
if i == 0 or not is_monotonic_bounds:
if i == 0 or not is_monotonic_increasing_bounds:

for j in range(s, e):
add_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx)
Expand All @@ -646,9 +660,12 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,

output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx)

if not is_monotonic_bounds:
for j in range(s, e):
remove_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx)
if not is_monotonic_increasing_bounds:
nobs = 0
x = 0.0
xx = 0.0
xxx = 0.0
xxxx = 0.0

return output

Expand Down
95 changes: 95 additions & 0 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,3 +917,98 @@ def test_rolling_var_numerical_issues(func, third_value, values):
result = getattr(ds.rolling(2), func)()
expected = Series([np.nan] + values)
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("method", ["var", "sum", "mean", "skew", "kurt", "min", "max"])
def test_rolling_decreasing_indices(method):
"""
Make sure that decreasing indices give the same results as increasing indices.

GH 36933
"""
df = DataFrame({"values": np.arange(-15, 10) ** 2})
df_reverse = DataFrame({"values": df["values"][::-1]}, index=df.index[::-1])

increasing = getattr(df.rolling(window=5), method)()
decreasing = getattr(df_reverse.rolling(window=5), method)()

assert np.abs(decreasing.values[::-1][:-4] - increasing.values[4:]).max() < 1e-12


@pytest.mark.parametrize(
"method,expected",
[
(
"var",
[
float("nan"),
43.0,
float("nan"),
136.333333,
43.5,
94.966667,
182.0,
318.0,
],
),
("mean", [float("nan"), 7.5, float("nan"), 21.5, 6.0, 9.166667, 13.0, 17.5]),
("sum", [float("nan"), 30.0, float("nan"), 86.0, 30.0, 55.0, 91.0, 140.0]),
(
"skew",
[
float("nan"),
0.709296,
float("nan"),
0.407073,
0.984656,
0.919184,
0.874674,
0.842418,
],
),
(
"kurt",
[
float("nan"),
-0.5916711736073559,
float("nan"),
-1.0028993131317954,
-0.06103844629409494,
-0.254143227116194,
-0.37362637362637585,
-0.45439658241367054,
],
),
],
)
def test_rolling_non_monotonic(method, expected):
"""
Make sure the (rare) branch of non-monotonic indices is covered by a test.

output from 1.1.3 is assumed to be the expected output. Output of sum/mean has
manually been verified.

GH 36933.
"""
# Based on an example found in computation.rst
use_expanding = [True, False, True, False, True, True, True, True]
df = DataFrame({"values": np.arange(len(use_expanding)) ** 2})

class CustomIndexer(pd.api.indexers.BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed):
start = np.empty(num_values, dtype=np.int64)
end = np.empty(num_values, dtype=np.int64)
for i in range(num_values):
if self.use_expanding[i]:
start[i] = 0
end[i] = i + 1
else:
start[i] = i
end[i] = i + self.window_size
return start, end

indexer = CustomIndexer(window_size=4, use_expanding=use_expanding)

result = getattr(df.rolling(indexer), method)()
expected = DataFrame({"values": expected})
tm.assert_frame_equal(result, expected)