Skip to content

Commit 6516c53

Browse files
behzadnourijreback
authored andcommitted
BUG: map builtins min/max to numpy/cython versions
1 parent 2ca6fd8 commit 6516c53

File tree

4 files changed

+41
-3
lines changed

4 files changed

+41
-3
lines changed

doc/source/v0.15.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -668,4 +668,4 @@ Bug Fixes
668668
- Bug in accessing groups from a ``GroupBy`` when the original grouper
669669
was a tuple (:issue:`8121`).
670670

671-
671+
- Bug in ``.agg`` and ``.apply`` where builtins max/min were not mapped to numpy/cythonized versions (:issue:`7722`)

pandas/core/groupby.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -3618,12 +3618,16 @@ def _reorder_by_uniques(uniques, labels):
36183618

36193619

36203620
_func_table = {
3621-
builtins.sum: np.sum
3621+
builtins.sum: np.sum,
3622+
builtins.max: np.max,
3623+
builtins.min: np.min
36223624
}
36233625

36243626

36253627
_cython_table = {
36263628
builtins.sum: 'sum',
3629+
builtins.max: 'max',
3630+
builtins.min: 'min',
36273631
np.sum: 'sum',
36283632
np.mean: 'mean',
36293633
np.prod: 'prod',

pandas/tests/test_groupby.py

+25-1
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,7 @@ def test_get_group_grouped_by_tuple(self):
609609

610610
dt = pd.to_datetime(['2010-01-01', '2010-01-02', '2010-01-01',
611611
'2010-01-02'])
612-
df = DataFrame({'ids': [(x,) for x in dt]})
612+
df = DataFrame({'ids': [(x,) for x in dt]})
613613
gr = df.groupby('ids')
614614
result = gr.get_group(('2010-01-01',))
615615
expected = DataFrame({'ids': [(dt[0],), (dt[0],)]}, index=[0, 2])
@@ -1696,6 +1696,30 @@ def test_nonsense_func(self):
16961696
df = DataFrame([0])
16971697
self.assertRaises(Exception, df.groupby, lambda x: x + 'foo')
16981698

1699+
def test_builtins_apply(self): # GH8155
1700+
df = pd.DataFrame(np.random.randint(1, 50, (1000, 2)),
1701+
columns=['jim', 'joe'])
1702+
df['jolie'] = np.random.randn(1000)
1703+
1704+
for keys in ['jim', ['jim', 'joe']]: # single key & multi-key
1705+
for f in [max, min, sum]:
1706+
fname = f.__name__
1707+
result = df.groupby(keys).apply(f)
1708+
ngroups = len(df.drop_duplicates(subset=keys))
1709+
assert result.shape == (ngroups, 3), 'invalid frame shape: '\
1710+
'{} (expected ({}, 3))'.format(result.shape, ngroups)
1711+
1712+
assert_frame_equal(result, # numpy's equivalent function
1713+
df.groupby(keys).apply(getattr(np, fname)))
1714+
1715+
if f != sum:
1716+
expected = df.groupby(keys).agg(fname).reset_index()
1717+
expected.set_index(keys, inplace=True, drop=False)
1718+
assert_frame_equal(result, expected, check_dtype=False)
1719+
1720+
assert_series_equal(getattr(result, fname)(),
1721+
getattr(df, fname)())
1722+
16991723
def test_cythonized_aggers(self):
17001724
data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan],
17011725
'B': ['A', 'B'] * 6,

vb_suite/groupby.py

+10
Original file line numberDiff line numberDiff line change
@@ -474,3 +474,13 @@ def f(g):
474474
'''
475475
groupby_transform_multi_key3 = Benchmark(stmt, setup)
476476
groupby_transform_multi_key4 = Benchmark(stmt, setup + "df['jim'] = df['joe']")
477+
478+
setup = common_setup + '''
479+
np.random.seed(27182)
480+
n = 100000
481+
df = DataFrame(np.random.randint(1, n / 100, (n, 3)),
482+
columns=['jim', 'joe', 'jolie'])
483+
'''
484+
485+
groupby_agg_builtins1 = Benchmark("df.groupby('jim').agg([sum, min, max])", setup)
486+
groupby_agg_builtins2 = Benchmark("df.groupby(['jim', 'joe']).agg([sum, min, max])", setup)

0 commit comments

Comments
 (0)