BUG: map builtins min/max to numpy/cython versions

behzadnouri · jreback · commit 6516c537dfa5 · 2014-09-03T15:46:48.000-04:00
diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt
@@ -668,4 +668,4 @@ Bug Fixes
 - Bug in accessing groups from a ``GroupBy`` when the original grouper
   was a tuple (:issue:`8121`).
 
-
+- Bug in ``.agg`` and ``.apply`` where builtins max/min were not mapped to numpy/cythonized versions (:issue:`7722`)
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -3618,12 +3618,16 @@ def _reorder_by_uniques(uniques, labels):
 
 
 _func_table = {
-    builtins.sum: np.sum
+    builtins.sum: np.sum,
+    builtins.max: np.max,
+    builtins.min: np.min
 }
 
 
 _cython_table = {
     builtins.sum: 'sum',
+    builtins.max: 'max',
+    builtins.min: 'min',
     np.sum: 'sum',
     np.mean: 'mean',
     np.prod: 'prod',
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -609,7 +609,7 @@ def test_get_group_grouped_by_tuple(self):
 
         dt = pd.to_datetime(['2010-01-01', '2010-01-02', '2010-01-01',
                             '2010-01-02'])
-        df = DataFrame({'ids': [(x,) for x in dt]}) 
+        df = DataFrame({'ids': [(x,) for x in dt]})
         gr = df.groupby('ids')
         result = gr.get_group(('2010-01-01',))
         expected = DataFrame({'ids': [(dt[0],), (dt[0],)]}, index=[0, 2])
@@ -1696,6 +1696,30 @@ def test_nonsense_func(self):
         df = DataFrame([0])
         self.assertRaises(Exception, df.groupby, lambda x: x + 'foo')
 
+    def test_builtins_apply(self): # GH8155
+        df = pd.DataFrame(np.random.randint(1, 50, (1000, 2)),
+                          columns=['jim', 'joe'])
+        df['jolie'] = np.random.randn(1000)
+
+        for keys in ['jim', ['jim', 'joe']]:  # single key & multi-key
+            for f in [max, min, sum]:
+                fname = f.__name__
+                result = df.groupby(keys).apply(f)
+                ngroups = len(df.drop_duplicates(subset=keys))
+                assert result.shape == (ngroups, 3), 'invalid frame shape: '\
+                        '{} (expected ({}, 3))'.format(result.shape, ngroups)
+
+                assert_frame_equal(result,  # numpy's equivalent function
+                                   df.groupby(keys).apply(getattr(np, fname)))
+
+                if f != sum:
+                    expected = df.groupby(keys).agg(fname).reset_index()
+                    expected.set_index(keys, inplace=True, drop=False)
+                    assert_frame_equal(result, expected, check_dtype=False)
+
+                assert_series_equal(getattr(result, fname)(),
+                                    getattr(df, fname)())
+
     def test_cythonized_aggers(self):
         data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan],
                 'B': ['A', 'B'] * 6,
diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py
@@ -474,3 +474,13 @@ def f(g):
 '''
 groupby_transform_multi_key3 = Benchmark(stmt, setup)
 groupby_transform_multi_key4 = Benchmark(stmt, setup + "df['jim'] = df['joe']")
+
+setup = common_setup + '''
+np.random.seed(27182)
+n = 100000
+df = DataFrame(np.random.randint(1, n / 100, (n, 3)),
+        columns=['jim', 'joe', 'jolie'])
+'''
+
+groupby_agg_builtins1 = Benchmark("df.groupby('jim').agg([sum, min, max])", setup)
+groupby_agg_builtins2 = Benchmark("df.groupby(['jim', 'joe']).agg([sum, min, max])", setup)