Skip to content

Commit 7ea5179

Browse files
mroeschkejreback
authored andcommitted
CLN: ASV long and broken benchmarks (#19113)
1 parent 928affd commit 7ea5179

File tree

2 files changed

+21
-114
lines changed

2 files changed

+21
-114
lines changed

asv_bench/benchmarks/frame_methods.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pandas import (DataFrame, Series, MultiIndex, date_range, period_range,
55
isnull, NaT)
66

7-
from .pandas_vb_common import setup # noqa
7+
from .pandas_vb_common import setup # noqa
88

99

1010
class GetNumericData(object):
@@ -127,7 +127,7 @@ class ToHTML(object):
127127
def setup(self):
128128
nrows = 500
129129
self.df2 = DataFrame(np.random.randn(nrows, 10))
130-
self.df2[0] = period_range('2000', '2010', nrows)
130+
self.df2[0] = period_range('2000', periods=nrows)
131131
self.df2[1] = range(nrows)
132132

133133
def time_to_html_mixed(self):

asv_bench/benchmarks/groupby.py

+19-112
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from string import ascii_letters, digits
1+
from string import ascii_letters
22
from itertools import product
33
from functools import partial
44

@@ -275,18 +275,12 @@ class GroupStrings(object):
275275

276276
def setup(self):
277277
n = 2 * 10**5
278-
alpha = list(map(''.join, product((ascii_letters + digits), repeat=4)))
279-
self.df = DataFrame({'a': np.repeat(np.random.choice(alpha,
280-
(n // 11)), 11),
281-
'b': np.repeat(np.random.choice(alpha,
282-
(n // 7)), 7),
283-
'c': np.repeat(np.random.choice(alpha,
284-
(n // 5)), 5),
285-
'd': np.repeat(np.random.choice(alpha,
286-
(n // 1)), 1)})
278+
alpha = list(map(''.join, product(ascii_letters, repeat=4)))
279+
data = np.random.choice(alpha, (n // 5, 4), replace=False)
280+
data = np.repeat(data, 5, axis=0)
281+
self.df = DataFrame(data, columns=list('abcd'))
287282
self.df['joe'] = (np.random.randn(len(self.df)) * 10).round(3)
288-
i = np.random.permutation(len(self.df))
289-
self.df = self.df.iloc[i].reset_index(drop=True)
283+
self.df = self.df.sample(frac=1).reset_index(drop=True)
290284

291285
def time_multi_columns(self):
292286
self.df.groupby(list('abcd')).max()
@@ -356,10 +350,16 @@ class GroupByMethods(object):
356350

357351
goal_time = 0.2
358352

359-
param_names = ['dtype', 'ngroups']
360-
params = [['int', 'float'], [100, 10000]]
353+
param_names = ['dtype', 'method']
354+
params = [['int', 'float'],
355+
['all', 'any', 'count', 'cumcount', 'cummax', 'cummin',
356+
'cumprod', 'cumsum', 'describe', 'first', 'head', 'last', 'mad',
357+
'max', 'min', 'median', 'mean', 'nunique', 'pct_change', 'prod',
358+
'rank', 'sem', 'shift', 'size', 'skew', 'std', 'sum', 'tail',
359+
'unique', 'value_counts', 'var']]
361360

362-
def setup(self, dtype, ngroups):
361+
def setup(self, dtype, method):
362+
ngroups = 1000
363363
size = ngroups * 2
364364
rng = np.arange(ngroups)
365365
values = rng.take(np.random.randint(0, ngroups, size=size))
@@ -369,104 +369,11 @@ def setup(self, dtype, ngroups):
369369
key = np.concatenate([np.random.random(ngroups) * 0.1,
370370
np.random.random(ngroups) * 10.0])
371371

372-
self.df = DataFrame({'values': values,
373-
'key': key})
372+
df = DataFrame({'values': values, 'key': key})
373+
self.df_groupby_method = getattr(df.groupby('key')['values'], method)
374374

375-
def time_all(self, dtype, ngroups):
376-
self.df.groupby('key')['values'].all()
377-
378-
def time_any(self, dtype, ngroups):
379-
self.df.groupby('key')['values'].any()
380-
381-
def time_count(self, dtype, ngroups):
382-
self.df.groupby('key')['values'].count()
383-
384-
def time_cumcount(self, dtype, ngroups):
385-
self.df.groupby('key')['values'].cumcount()
386-
387-
def time_cummax(self, dtype, ngroups):
388-
self.df.groupby('key')['values'].cummax()
389-
390-
def time_cummin(self, dtype, ngroups):
391-
self.df.groupby('key')['values'].cummin()
392-
393-
def time_cumprod(self, dtype, ngroups):
394-
self.df.groupby('key')['values'].cumprod()
395-
396-
def time_cumsum(self, dtype, ngroups):
397-
self.df.groupby('key')['values'].cumsum()
398-
399-
def time_describe(self, dtype, ngroups):
400-
self.df.groupby('key')['values'].describe()
401-
402-
def time_diff(self, dtype, ngroups):
403-
self.df.groupby('key')['values'].diff()
404-
405-
def time_first(self, dtype, ngroups):
406-
self.df.groupby('key')['values'].first()
407-
408-
def time_head(self, dtype, ngroups):
409-
self.df.groupby('key')['values'].head()
410-
411-
def time_last(self, dtype, ngroups):
412-
self.df.groupby('key')['values'].last()
413-
414-
def time_mad(self, dtype, ngroups):
415-
self.df.groupby('key')['values'].mad()
416-
417-
def time_max(self, dtype, ngroups):
418-
self.df.groupby('key')['values'].max()
419-
420-
def time_mean(self, dtype, ngroups):
421-
self.df.groupby('key')['values'].mean()
422-
423-
def time_median(self, dtype, ngroups):
424-
self.df.groupby('key')['values'].median()
425-
426-
def time_min(self, dtype, ngroups):
427-
self.df.groupby('key')['values'].min()
428-
429-
def time_nunique(self, dtype, ngroups):
430-
self.df.groupby('key')['values'].nunique()
431-
432-
def time_pct_change(self, dtype, ngroups):
433-
self.df.groupby('key')['values'].pct_change()
434-
435-
def time_prod(self, dtype, ngroups):
436-
self.df.groupby('key')['values'].prod()
437-
438-
def time_rank(self, dtype, ngroups):
439-
self.df.groupby('key')['values'].rank()
440-
441-
def time_sem(self, dtype, ngroups):
442-
self.df.groupby('key')['values'].sem()
443-
444-
def time_shift(self, dtype, ngroups):
445-
self.df.groupby('key')['values'].shift()
446-
447-
def time_size(self, dtype, ngroups):
448-
self.df.groupby('key')['values'].size()
449-
450-
def time_skew(self, dtype, ngroups):
451-
self.df.groupby('key')['values'].skew()
452-
453-
def time_std(self, dtype, ngroups):
454-
self.df.groupby('key')['values'].std()
455-
456-
def time_sum(self, dtype, ngroups):
457-
self.df.groupby('key')['values'].sum()
458-
459-
def time_tail(self, dtype, ngroups):
460-
self.df.groupby('key')['values'].tail()
461-
462-
def time_unique(self, dtype, ngroups):
463-
self.df.groupby('key')['values'].unique()
464-
465-
def time_value_counts(self, dtype, ngroups):
466-
self.df.groupby('key')['values'].value_counts()
467-
468-
def time_var(self, dtype, ngroups):
469-
self.df.groupby('key')['values'].var()
375+
def time_method(self, dtype, method):
376+
self.df_groupby_method()
470377

471378

472379
class Float32(object):

0 commit comments

Comments
 (0)