Skip to content

Commit 49a3d76

Browse files
committed
TST: vbench for #775
1 parent 13f5db0 commit 49a3d76

File tree

3 files changed

+34
-4
lines changed

3 files changed

+34
-4
lines changed

pandas/core/groupby.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -303,12 +303,20 @@ def mean(self):
303303

304304
def std(self):
305305
"""
306-
Compute mean of groups, excluding missing values
306+
Compute standard deviation of groups, excluding missing values
307307
308308
For multiple groupings, the result index will be a MultiIndex
309309
"""
310310
return self._cython_agg_general('std')
311311

312+
def var(self):
313+
"""
314+
Compute variance of groups, excluding missing values
315+
316+
For multiple groupings, the result index will be a MultiIndex
317+
"""
318+
return self._cython_agg_general('var')
319+
312320
def size(self):
313321
"""
314322
Compute group sizes
@@ -595,6 +603,9 @@ def __init__(self, index, grouper=None, name=None, level=None,
595603
self._group_index = level_index
596604
self.grouper = level_values
597605
else:
606+
if isinstance(self.grouper, (list, tuple)):
607+
self.grouper = com._asarray_tuplesafe(self.grouper)
608+
598609
# no level passed
599610
if not isinstance(self.grouper, np.ndarray):
600611
self.grouper = self.index.map(self.grouper)
@@ -1038,7 +1049,7 @@ def aggregate(self, arg, *args, **kwargs):
10381049
for col, func in arg.iteritems():
10391050
colg = SeriesGroupBy(obj[col], column=col,
10401051
groupings=self.groupings)
1041-
result[col] = colg.agg(func)
1052+
result[col] = colg.aggregate(func)
10421053

10431054
result = DataFrame(result)
10441055
elif isinstance(arg, list):

setup.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,9 @@
164164
]
165165

166166
MAJOR = 0
167-
MINOR = 7
167+
MINOR = 8
168168
MICRO = 0
169-
ISRELEASED = True
169+
ISRELEASED = False
170170
VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
171171
QUALIFIER = ''
172172

vb_suite/groupby.py

+19
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,22 @@ def f():
7272
Benchmark('df.groupby(labels).sum()', setup,
7373
start_date=datetime(2011, 8, 1), logy=True)
7474

75+
#----------------------------------------------------------------------
76+
# group with different functions per column
77+
78+
setup = common_setup + """
79+
fac1 = np.array(['A', 'B', 'C'], dtype='O')
80+
fac2 = np.array(['one', 'two'], dtype='O')
81+
82+
df = DataFrame({'key1': fac1.take(np.random.randint(0, 3, size=100000)),
83+
'key2': fac2.take(np.random.randint(0, 2, size=100000)),
84+
'value1' : np.random.randn(100000),
85+
'value2' : np.random.randn(100000),
86+
'value3' : np.random.randn(100000)})
87+
"""
88+
89+
groupby_multi_different_functions = \
90+
Benchmark("""df.groupby(['key1', 'key2']).agg({'value1' : 'mean',
91+
'value2' : 'var',
92+
'value3' : 'sum'})""",
93+
setup, start_date=datetime(2011, 9, 1))

0 commit comments

Comments
 (0)