|
11 | 11 | from .pandas_vb_common import setup # noqa
|
12 | 12 |
|
13 | 13 |
|
| 14 | +method_blacklist = { |
| 15 | + 'object': {'median', 'prod', 'sem', 'cumsum', 'sum', 'cummin', 'mean', |
| 16 | + 'max', 'skew', 'cumprod', 'cummax', 'rank', 'pct_change', 'min', |
| 17 | + 'var', 'mad', 'describe', 'std'} |
| 18 | + } |
| 19 | + |
| 20 | + |
14 | 21 | class ApplyDictReturn(object):
|
15 | 22 | goal_time = 0.2
|
16 | 23 |
|
@@ -153,6 +160,7 @@ def time_frame_nth_any(self, df):
|
153 | 160 | def time_frame_nth(self, df):
|
154 | 161 | df.groupby(0).nth(0)
|
155 | 162 |
|
| 163 | + |
156 | 164 | def time_series_nth_any(self, df):
|
157 | 165 | df[1].groupby(df[0]).nth(0, dropna='any')
|
158 | 166 |
|
@@ -369,23 +377,27 @@ class GroupByMethods(object):
|
369 | 377 | goal_time = 0.2
|
370 | 378 |
|
371 | 379 | param_names = ['dtype', 'method']
|
372 |
| - params = [['int', 'float'], |
| 380 | + params = [['int', 'float', 'object'], |
373 | 381 | ['all', 'any', 'bfill', 'count', 'cumcount', 'cummax', 'cummin',
|
374 | 382 | 'cumprod', 'cumsum', 'describe', 'ffill', 'first', 'head',
|
375 | 383 | 'last', 'mad', 'max', 'min', 'median', 'mean', 'nunique',
|
376 | 384 | 'pct_change', 'prod', 'rank', 'sem', 'shift', 'size', 'skew',
|
377 | 385 | 'std', 'sum', 'tail', 'unique', 'value_counts', 'var']]
|
378 | 386 |
|
379 | 387 | def setup(self, dtype, method):
|
| 388 | + if method in method_blacklist.get(dtype, {}): |
| 389 | + raise NotImplementedError # skip benchmark |
380 | 390 | ngroups = 1000
|
381 | 391 | size = ngroups * 2
|
382 | 392 | rng = np.arange(ngroups)
|
383 | 393 | values = rng.take(np.random.randint(0, ngroups, size=size))
|
384 | 394 | if dtype == 'int':
|
385 | 395 | key = np.random.randint(0, size, size=size)
|
386 |
| - else: |
| 396 | + elif dtype == 'float': |
387 | 397 | key = np.concatenate([np.random.random(ngroups) * 0.1,
|
388 | 398 | np.random.random(ngroups) * 10.0])
|
| 399 | + elif dtype == 'object': |
| 400 | + key = ['foo'] * size |
389 | 401 |
|
390 | 402 | df = DataFrame({'values': values, 'key': key})
|
391 | 403 | self.df_groupby_method = getattr(df.groupby('key')['values'], method)
|
|
0 commit comments