@@ -273,3 +273,41 @@ def f(g):
273
273
"""
274
274
275
275
groupby_sum_booleans = Benchmark ("df.groupby('ii').sum()" , setup )
276
+
277
+ #----------------------------------------------------------------------
278
+ # Transform testing
279
+
280
+ setup = common_setup + """
281
+ n_dates = 1000
282
+ n_securities = 500
283
+ n_columns = 3
284
+ share_na = 0.1
285
+
286
+ dates = date_range('1997-12-31', periods=n_dates, freq='B')
287
+ dates = Index(map(lambda x: x.year * 10000 + x.month * 100 + x.day, dates))
288
+
289
+ secid_min = int('10000000', 16)
290
+ secid_max = int('F0000000', 16)
291
+ step = (secid_max - secid_min) // (n_securities - 1)
292
+ security_ids = map(lambda x: hex(x)[2:10].upper(), range(secid_min, secid_max + 1, step))
293
+
294
+ data_index = MultiIndex(levels=[dates.values, security_ids],
295
+ labels=[[i for i in xrange(n_dates) for _ in xrange(n_securities)], range(n_securities) * n_dates],
296
+ names=['date', 'security_id'])
297
+ n_data = len(data_index)
298
+
299
+ columns = Index(['factor{}'.format(i) for i in xrange(1, n_columns + 1)])
300
+
301
+ data = DataFrame(np.random.randn(n_data, n_columns), index=data_index, columns=columns)
302
+
303
+ step = int(n_data * share_na)
304
+ for column_index in xrange(n_columns):
305
+ index = column_index
306
+ while index < n_data:
307
+ data.set_value(data_index[index], columns[column_index], np.nan)
308
+ index += step
309
+
310
+ f_fillna = lambda x: x.fillna(method='pad')
311
+ """
312
+
313
+ groupby_transform = Benchmark ("data.groupby(level='security_id').transform(f_fillna)" , setup )
0 commit comments