Skip to content

Commit 2d81b64

Browse files
committed
PERF: added vb_suite test for groupby_transform
added to RELEASE.rst, issue GH2121
1 parent 7898ec2 commit 2d81b64

File tree

3 files changed

+43
-69
lines changed

3 files changed

+43
-69
lines changed

RELEASE.rst

+5-3
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,6 @@ pandas 0.11.0
100100
the collections.Mapping ABC.
101101
- Allow selection semantics via a string with a datelike index to work in both
102102
Series and DataFrames (GH3070_)
103-
- Improved performance across several core functions by taking memory
104-
ordering of arrays into account. Courtesy of @stephenwlin (GH3130_)
105-
106103

107104
.. ipython:: python
108105
@@ -116,6 +113,10 @@ pandas 0.11.0
116113
for plots. Based on https://gist.github.com/huyng/816622 (GH3075_).
117114

118115

116+
- Improved performance across several core functions by taking memory
117+
ordering of arrays into account. Courtesy of @stephenwlin (GH3130_)
118+
- Improved performance of groupby transform method (GH2121_)
119+
119120
**API Changes**
120121

121122
- Do not automatically upcast numeric specified dtypes to ``int64`` or
@@ -234,6 +235,7 @@ pandas 0.11.0
234235
.. _GH622: https://github.com/pydata/pandas/issues/622
235236
.. _GH797: https://github.com/pydata/pandas/issues/797
236237
.. _GH2758: https://github.com/pydata/pandas/issues/2758
238+
.. _GH2121: https://github.com/pydata/pandas/issues/2121
237239
.. _GH2809: https://github.com/pydata/pandas/issues/2809
238240
.. _GH2810: https://github.com/pydata/pandas/issues/2810
239241
.. _GH2837: https://github.com/pydata/pandas/issues/2837

bench/bench_transform.py

-66
This file was deleted.

vb_suite/groupby.py

+38
Original file line numberDiff line numberDiff line change
@@ -273,3 +273,41 @@ def f(g):
273273
"""
274274

275275
groupby_sum_booleans = Benchmark("df.groupby('ii').sum()", setup)
276+
277+
#----------------------------------------------------------------------
278+
# Transform testing
279+
280+
setup = common_setup + """
281+
n_dates = 1000
282+
n_securities = 500
283+
n_columns = 3
284+
share_na = 0.1
285+
286+
dates = date_range('1997-12-31', periods=n_dates, freq='B')
287+
dates = Index(map(lambda x: x.year * 10000 + x.month * 100 + x.day, dates))
288+
289+
secid_min = int('10000000', 16)
290+
secid_max = int('F0000000', 16)
291+
step = (secid_max - secid_min) // (n_securities - 1)
292+
security_ids = map(lambda x: hex(x)[2:10].upper(), range(secid_min, secid_max + 1, step))
293+
294+
data_index = MultiIndex(levels=[dates.values, security_ids],
295+
labels=[[i for i in xrange(n_dates) for _ in xrange(n_securities)], range(n_securities) * n_dates],
296+
names=['date', 'security_id'])
297+
n_data = len(data_index)
298+
299+
columns = Index(['factor{}'.format(i) for i in xrange(1, n_columns + 1)])
300+
301+
data = DataFrame(np.random.randn(n_data, n_columns), index=data_index, columns=columns)
302+
303+
step = int(n_data * share_na)
304+
for column_index in xrange(n_columns):
305+
index = column_index
306+
while index < n_data:
307+
data.set_value(data_index[index], columns[column_index], np.nan)
308+
index += step
309+
310+
f_fillna = lambda x: x.fillna(method='pad')
311+
"""
312+
313+
groupby_transform = Benchmark("data.groupby(level='security_id').transform(f_fillna)", setup)

0 commit comments

Comments
 (0)