Skip to content

Commit d7c5d26

Browse files
authored
REF: rolling benchmarks to reduce redundant benchmarks (#44475)
1 parent bc70099 commit d7c5d26

File tree

1 file changed

+113
-95
lines changed

1 file changed

+113
-95
lines changed

asv_bench/benchmarks/rolling.py

+113-95
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,24 @@ class Methods:
99

1010
params = (
1111
["DataFrame", "Series"],
12-
[10, 1000],
12+
[("rolling", {"window": 10}), ("rolling", {"window": 1000}), ("expanding", {})],
1313
["int", "float"],
14-
["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"],
14+
["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum", "sem"],
1515
)
16-
param_names = ["constructor", "window", "dtype", "method"]
16+
param_names = ["constructor", "window_kwargs", "dtype", "method"]
1717

18-
def setup(self, constructor, window, dtype, method):
18+
def setup(self, constructor, window_kwargs, dtype, method):
1919
N = 10 ** 5
20+
window, kwargs = window_kwargs
2021
arr = (100 * np.random.random(N)).astype(dtype)
21-
self.roll = getattr(pd, constructor)(arr).rolling(window)
22+
obj = getattr(pd, constructor)(arr)
23+
self.window = getattr(obj, window)(**kwargs)
2224

23-
def time_rolling(self, constructor, window, dtype, method):
24-
getattr(self.roll, method)()
25+
def time_method(self, constructor, window_kwargs, dtype, method):
26+
getattr(self.window, method)()
2527

26-
def peakmem_rolling(self, constructor, window, dtype, method):
27-
getattr(self.roll, method)()
28+
def peakmem_method(self, constructor, window_kwargs, dtype, method):
29+
getattr(self.window, method)()
2830

2931

3032
class Apply:
@@ -46,148 +48,160 @@ def time_rolling(self, constructor, window, dtype, function, raw):
4648
self.roll.apply(function, raw=raw)
4749

4850

49-
class NumbaEngine:
51+
class NumbaEngineMethods:
5052
params = (
5153
["DataFrame", "Series"],
5254
["int", "float"],
53-
[np.sum, lambda x: np.sum(x) + 5],
55+
[("rolling", {"window": 10}), ("expanding", {})],
5456
["sum", "max", "min", "median", "mean"],
5557
[True, False],
5658
[None, 100],
5759
)
58-
param_names = ["constructor", "dtype", "function", "method", "parallel", "cols"]
60+
param_names = [
61+
"constructor",
62+
"dtype",
63+
"window_kwargs",
64+
"method",
65+
"parallel",
66+
"cols",
67+
]
5968

60-
def setup(self, constructor, dtype, function, method, parallel, cols):
69+
def setup(self, constructor, dtype, window_kwargs, method, parallel, cols):
6170
N = 10 ** 3
71+
window, kwargs = window_kwargs
6272
shape = (N, cols) if cols is not None and constructor != "Series" else N
6373
arr = (100 * np.random.random(shape)).astype(dtype)
6474
data = getattr(pd, constructor)(arr)
6575

6676
# Warm the cache
6777
with warnings.catch_warnings(record=True):
6878
# Catch parallel=True not being applicable e.g. 1D data
69-
self.roll = data.rolling(10)
70-
self.roll.apply(
71-
function, raw=True, engine="numba", engine_kwargs={"parallel": parallel}
72-
)
73-
getattr(self.roll, method)(
79+
self.window = getattr(data, window)(**kwargs)
80+
getattr(self.window, method)(
7481
engine="numba", engine_kwargs={"parallel": parallel}
7582
)
7683

77-
self.expand = data.expanding()
78-
self.expand.apply(
79-
function, raw=True, engine="numba", engine_kwargs={"parallel": parallel}
80-
)
81-
82-
def time_rolling_apply(self, constructor, dtype, function, method, parallel, col):
83-
with warnings.catch_warnings(record=True):
84-
self.roll.apply(
85-
function, raw=True, engine="numba", engine_kwargs={"parallel": parallel}
86-
)
87-
88-
def time_expanding_apply(self, constructor, dtype, function, method, parallel, col):
89-
with warnings.catch_warnings(record=True):
90-
self.expand.apply(
91-
function, raw=True, engine="numba", engine_kwargs={"parallel": parallel}
92-
)
93-
94-
def time_rolling_methods(self, constructor, dtype, function, method, parallel, col):
84+
def test_method(self, constructor, dtype, window_kwargs, method, parallel, cols):
9585
with warnings.catch_warnings(record=True):
96-
getattr(self.roll, method)(
86+
getattr(self.window, method)(
9787
engine="numba", engine_kwargs={"parallel": parallel}
9888
)
9989

10090

101-
class ExpandingMethods:
102-
91+
class NumbaEngineApply:
10392
params = (
10493
["DataFrame", "Series"],
10594
["int", "float"],
106-
["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"],
95+
[("rolling", {"window": 10}), ("expanding", {})],
96+
[np.sum, lambda x: np.sum(x) + 5],
97+
[True, False],
98+
[None, 100],
10799
)
108-
param_names = ["constructor", "window", "dtype", "method"]
100+
param_names = [
101+
"constructor",
102+
"dtype",
103+
"window_kwargs",
104+
"function",
105+
"parallel",
106+
"cols",
107+
]
109108

110-
def setup(self, constructor, dtype, method):
111-
N = 10 ** 5
112-
N_groupby = 100
113-
arr = (100 * np.random.random(N)).astype(dtype)
114-
self.expanding = getattr(pd, constructor)(arr).expanding()
115-
self.expanding_groupby = (
116-
pd.DataFrame({"A": arr[:N_groupby], "B": range(N_groupby)})
117-
.groupby("B")
118-
.expanding()
119-
)
109+
def setup(self, constructor, dtype, window_kwargs, function, parallel, cols):
110+
N = 10 ** 3
111+
window, kwargs = window_kwargs
112+
shape = (N, cols) if cols is not None and constructor != "Series" else N
113+
arr = (100 * np.random.random(shape)).astype(dtype)
114+
data = getattr(pd, constructor)(arr)
120115

121-
def time_expanding(self, constructor, dtype, method):
122-
getattr(self.expanding, method)()
116+
# Warm the cache
117+
with warnings.catch_warnings(record=True):
118+
# Catch parallel=True not being applicable e.g. 1D data
119+
self.window = getattr(data, window)(**kwargs)
120+
self.window.apply(
121+
function, raw=True, engine="numba", engine_kwargs={"parallel": parallel}
122+
)
123123

124-
def time_expanding_groupby(self, constructor, dtype, method):
125-
getattr(self.expanding_groupby, method)()
124+
def test_method(self, constructor, dtype, window_kwargs, function, parallel, cols):
125+
with warnings.catch_warnings(record=True):
126+
self.window.apply(
127+
function, raw=True, engine="numba", engine_kwargs={"parallel": parallel}
128+
)
126129

127130

128131
class EWMMethods:
129132

130-
params = (["DataFrame", "Series"], [10, 1000], ["int", "float"], ["mean", "std"])
131-
param_names = ["constructor", "window", "dtype", "method"]
133+
params = (
134+
["DataFrame", "Series"],
135+
[
136+
({"halflife": 10}, "mean"),
137+
({"halflife": 10}, "std"),
138+
({"halflife": 1000}, "mean"),
139+
({"halflife": 1000}, "std"),
140+
(
141+
{
142+
"halflife": "1 Day",
143+
"times": pd.date_range("1900", periods=10 ** 5, freq="23s"),
144+
},
145+
"mean",
146+
),
147+
],
148+
["int", "float"],
149+
)
150+
param_names = ["constructor", "kwargs_method", "dtype"]
132151

133-
def setup(self, constructor, window, dtype, method):
152+
def setup(self, constructor, kwargs_method, dtype):
134153
N = 10 ** 5
154+
kwargs, method = kwargs_method
135155
arr = (100 * np.random.random(N)).astype(dtype)
136-
times = pd.date_range("1900", periods=N, freq="23s")
137-
self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)
138-
self.ewm_times = getattr(pd, constructor)(arr).ewm(
139-
halflife="1 Day", times=times
140-
)
156+
self.method = method
157+
self.ewm = getattr(pd, constructor)(arr).ewm(**kwargs)
141158

142-
def time_ewm(self, constructor, window, dtype, method):
143-
getattr(self.ewm, method)()
144-
145-
def time_ewm_times(self, constructor, window, dtype, method):
146-
self.ewm_times.mean()
159+
def time_ewm(self, constructor, kwargs_method, dtype):
160+
getattr(self.ewm, self.method)()
147161

148162

149163
class VariableWindowMethods(Methods):
150164
params = (
151165
["DataFrame", "Series"],
152166
["50s", "1h", "1d"],
153167
["int", "float"],
154-
["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"],
168+
["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum", "sem"],
155169
)
156170
param_names = ["constructor", "window", "dtype", "method"]
157171

158172
def setup(self, constructor, window, dtype, method):
159173
N = 10 ** 5
160174
arr = (100 * np.random.random(N)).astype(dtype)
161175
index = pd.date_range("2017-01-01", periods=N, freq="5s")
162-
self.roll = getattr(pd, constructor)(arr, index=index).rolling(window)
176+
self.window = getattr(pd, constructor)(arr, index=index).rolling(window)
163177

164178

165179
class Pairwise:
166180

167-
params = ([10, 1000, None], ["corr", "cov"], [True, False])
168-
param_names = ["window", "method", "pairwise"]
181+
params = (
182+
[({"window": 10}, "rolling"), ({"window": 1000}, "rolling"), ({}, "expanding")],
183+
["corr", "cov"],
184+
[True, False],
185+
)
186+
param_names = ["window_kwargs", "method", "pairwise"]
169187

170-
def setup(self, window, method, pairwise):
188+
def setup(self, kwargs_window, method, pairwise):
171189
N = 10 ** 4
172190
n_groups = 20
191+
kwargs, window = kwargs_window
173192
groups = [i for _ in range(N // n_groups) for i in range(n_groups)]
174193
arr = np.random.random(N)
175194
self.df = pd.DataFrame(arr)
176-
self.df_group = pd.DataFrame({"A": groups, "B": arr}).groupby("A")
195+
self.window = getattr(self.df, window)(**kwargs)
196+
self.window_group = getattr(
197+
pd.DataFrame({"A": groups, "B": arr}).groupby("A"), window
198+
)(**kwargs)
177199

178-
def time_pairwise(self, window, method, pairwise):
179-
if window is None:
180-
r = self.df.expanding()
181-
else:
182-
r = self.df.rolling(window=window)
183-
getattr(r, method)(self.df, pairwise=pairwise)
200+
def time_pairwise(self, kwargs_window, method, pairwise):
201+
getattr(self.window, method)(self.df, pairwise=pairwise)
184202

185-
def time_groupby(self, window, method, pairwise):
186-
if window is None:
187-
r = self.df_group.expanding()
188-
else:
189-
r = self.df_group.rolling(window=window)
190-
getattr(r, method)(self.df, pairwise=pairwise)
203+
def time_groupby(self, kwargs_window, method, pairwise):
204+
getattr(self.window_group, method)(self.df, pairwise=pairwise)
191205

192206

193207
class Quantile:
@@ -274,25 +288,29 @@ def peakmem_rolling(self, constructor, window_size, dtype, method):
274288

275289
class Groupby:
276290

277-
params = ["sum", "median", "mean", "max", "min", "kurt", "sum"]
291+
params = (
292+
["sum", "median", "mean", "max", "min", "kurt", "sum"],
293+
[
294+
("rolling", {"window": 2}),
295+
("rolling", {"window": "30s", "on": "C"}),
296+
("expanding", {}),
297+
],
298+
)
278299

279-
def setup(self, method):
300+
def setup(self, method, window_kwargs):
280301
N = 1000
302+
window, kwargs = window_kwargs
281303
df = pd.DataFrame(
282304
{
283305
"A": [str(i) for i in range(N)] * 10,
284306
"B": list(range(N)) * 10,
285307
"C": pd.date_range(start="1900-01-01", freq="1min", periods=N * 10),
286308
}
287309
)
288-
self.groupby_roll_int = df.groupby("A").rolling(window=2)
289-
self.groupby_roll_offset = df.groupby("A").rolling(window="30s", on="C")
290-
291-
def time_rolling_int(self, method):
292-
getattr(self.groupby_roll_int, method)()
310+
self.groupby_window = getattr(df.groupby("A"), window)(**kwargs)
293311

294-
def time_rolling_offset(self, method):
295-
getattr(self.groupby_roll_offset, method)()
312+
def time_method(self, method, window_kwargs):
313+
getattr(self.groupby_window, method)()
296314

297315

298316
class GroupbyLargeGroups:

0 commit comments

Comments
 (0)