Skip to content

Commit 3cd500f

Browse files
committed
Added object benchmark for applicable GroupBy methods
1 parent 0754294 commit 3cd500f

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

asv_bench/benchmarks/groupby.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@
1111
from .pandas_vb_common import setup # noqa
1212

1313

14+
method_blacklist = {
15+
'object': {'median', 'prod', 'sem', 'cumsum', 'sum', 'cummin', 'mean',
16+
'max', 'skew', 'cumprod', 'cummax', 'rank', 'pct_change', 'min',
17+
'var', 'mad', 'describe', 'std'}
18+
}
19+
20+
1421
class ApplyDictReturn(object):
1522
goal_time = 0.2
1623

@@ -153,6 +160,7 @@ def time_frame_nth_any(self, df):
153160
def time_frame_nth(self, df):
154161
df.groupby(0).nth(0)
155162

163+
156164
def time_series_nth_any(self, df):
157165
df[1].groupby(df[0]).nth(0, dropna='any')
158166

@@ -369,23 +377,27 @@ class GroupByMethods(object):
369377
goal_time = 0.2
370378

371379
param_names = ['dtype', 'method']
372-
params = [['int', 'float'],
380+
params = [['int', 'float', 'object'],
373381
['all', 'any', 'bfill', 'count', 'cumcount', 'cummax', 'cummin',
374382
'cumprod', 'cumsum', 'describe', 'ffill', 'first', 'head',
375383
'last', 'mad', 'max', 'min', 'median', 'mean', 'nunique',
376384
'pct_change', 'prod', 'rank', 'sem', 'shift', 'size', 'skew',
377385
'std', 'sum', 'tail', 'unique', 'value_counts', 'var']]
378386

379387
def setup(self, dtype, method):
388+
if method in method_blacklist.get(dtype, {}):
389+
raise NotImplementedError # skip benchmark
380390
ngroups = 1000
381391
size = ngroups * 2
382392
rng = np.arange(ngroups)
383393
values = rng.take(np.random.randint(0, ngroups, size=size))
384394
if dtype == 'int':
385395
key = np.random.randint(0, size, size=size)
386-
else:
396+
elif dtype == 'float':
387397
key = np.concatenate([np.random.random(ngroups) * 0.1,
388398
np.random.random(ngroups) * 10.0])
399+
elif dtype == 'object':
400+
key = ['foo'] * size
389401

390402
df = DataFrame({'values': values, 'key': key})
391403
self.df_groupby_method = getattr(df.groupby('key')['values'], method)

0 commit comments

Comments
 (0)