Skip to content

Commit da8746b

Browse files
authored
[skip-ci] Add cftime groupby, resample benchmarks (#7795)
* [skip-ci] Add cftime groupby, resample benchmarks xref #7730 * [skip-ci]try setting temp dir * [skip-ci] try mamba? * [skip-ci] increase conda verbosity * [skip-ci] specify channels * [skip-ci] Update .github/workflows/benchmarks.yml * [skip-ci] bugfix * [skip-ci] Parameterize use_flox * [skip-ci] cleanup * [skip-ci] fixes * [skip-ci] fix resample parameterizing
1 parent ca84a1e commit da8746b

File tree

2 files changed

+55
-13
lines changed

2 files changed

+55
-13
lines changed

asv_bench/asv.conf.json

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
// determined by looking for tools on the PATH environment
3131
// variable.
3232
"environment_type": "conda",
33+
"conda_channels": ["conda-forge"],
3334

3435
// timeout in seconds for installing any dependencies in environment
3536
// defaults to 10 min

asv_bench/benchmarks/groupby.py

+54-13
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,29 @@ def setup(self, *args, **kwargs):
1818
"c": xr.DataArray(np.arange(2 * self.n)),
1919
}
2020
)
21-
self.ds2d = self.ds1d.expand_dims(z=10)
21+
self.ds2d = self.ds1d.expand_dims(z=10).copy()
2222
self.ds1d_mean = self.ds1d.groupby("b").mean()
2323
self.ds2d_mean = self.ds2d.groupby("b").mean()
2424

2525
@parameterized(["ndim"], [(1, 2)])
2626
def time_init(self, ndim):
2727
getattr(self, f"ds{ndim}d").groupby("b")
2828

29-
@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
30-
def time_agg_small_num_groups(self, method, ndim):
29+
@parameterized(
30+
["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
31+
)
32+
def time_agg_small_num_groups(self, method, ndim, use_flox):
3133
ds = getattr(self, f"ds{ndim}d")
32-
getattr(ds.groupby("a"), method)().compute()
34+
with xr.set_options(use_flox=use_flox):
35+
getattr(ds.groupby("a"), method)().compute()
3336

34-
@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
35-
def time_agg_large_num_groups(self, method, ndim):
37+
@parameterized(
38+
["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
39+
)
40+
def time_agg_large_num_groups(self, method, ndim, use_flox):
3641
ds = getattr(self, f"ds{ndim}d")
37-
getattr(ds.groupby("b"), method)().compute()
42+
with xr.set_options(use_flox=use_flox):
43+
getattr(ds.groupby("b"), method)().compute()
3844

3945
def time_binary_op_1d(self):
4046
(self.ds1d.groupby("b") - self.ds1d_mean).compute()
@@ -115,15 +121,21 @@ def setup(self, *args, **kwargs):
115121
def time_init(self, ndim):
116122
getattr(self, f"ds{ndim}d").resample(time="D")
117123

118-
@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
119-
def time_agg_small_num_groups(self, method, ndim):
124+
@parameterized(
125+
["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
126+
)
127+
def time_agg_small_num_groups(self, method, ndim, use_flox):
120128
ds = getattr(self, f"ds{ndim}d")
121-
getattr(ds.resample(time="3M"), method)().compute()
129+
with xr.set_options(use_flox=use_flox):
130+
getattr(ds.resample(time="3M"), method)().compute()
122131

123-
@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
124-
def time_agg_large_num_groups(self, method, ndim):
132+
@parameterized(
133+
["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
134+
)
135+
def time_agg_large_num_groups(self, method, ndim, use_flox):
125136
ds = getattr(self, f"ds{ndim}d")
126-
getattr(ds.resample(time="48H"), method)().compute()
137+
with xr.set_options(use_flox=use_flox):
138+
getattr(ds.resample(time="48H"), method)().compute()
127139

128140

129141
class ResampleDask(Resample):
@@ -132,3 +144,32 @@ def setup(self, *args, **kwargs):
132144
super().setup(**kwargs)
133145
self.ds1d = self.ds1d.chunk({"time": 50})
134146
self.ds2d = self.ds2d.chunk({"time": 50, "z": 4})
147+
148+
149+
class ResampleCFTime(Resample):
150+
def setup(self, *args, **kwargs):
151+
self.ds1d = xr.Dataset(
152+
{
153+
"b": ("time", np.arange(365.0 * 24)),
154+
},
155+
coords={
156+
"time": xr.date_range(
157+
"2001-01-01", freq="H", periods=365 * 24, calendar="noleap"
158+
)
159+
},
160+
)
161+
self.ds2d = self.ds1d.expand_dims(z=10)
162+
self.ds1d_mean = self.ds1d.resample(time="48H").mean()
163+
self.ds2d_mean = self.ds2d.resample(time="48H").mean()
164+
165+
166+
@parameterized(["use_cftime", "use_flox"], [[True, False], [True, False]])
167+
class GroupByLongTime:
168+
def setup(self, use_cftime, use_flox):
169+
arr = np.random.randn(10, 10, 365 * 30)
170+
time = xr.date_range("2000", periods=30 * 365, use_cftime=use_cftime)
171+
self.da = xr.DataArray(arr, dims=("y", "x", "time"), coords={"time": time})
172+
173+
def time_mean(self, use_cftime, use_flox):
174+
with xr.set_options(use_flox=use_flox):
175+
self.da.groupby("time.year").mean()

0 commit comments

Comments
 (0)