diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index c82dc370e3e71..e8a3f52975bc0 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -332,6 +332,7 @@ Bug Fixes
 
 
 
+- Bug in  groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`)
 
 
 - Bug in ``DataFrame.to_html`` with ``index=False`` and ``max_rows`` raising in ``IndexError`` (:issue:`14998`)
diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py
index adc17c7514832..895a376457f09 100644
--- a/pandas/compat/numpy/function.py
+++ b/pandas/compat/numpy/function.py
@@ -306,12 +306,18 @@ def validate_expanding_func(name, args, kwargs):
             raise UnsupportedFunctionCall(msg)
 
 
-def validate_groupby_func(name, args, kwargs):
+def validate_groupby_func(name, args, kwargs, allowed=None):
     """
-    'args' and 'kwargs' should be empty because all of
+    'args' and 'kwargs' should be empty, except for allowed
+    kwargs because all of
     their necessary parameters are explicitly listed in
     the function signature
     """
+    if allowed is None:
+        allowed = []
+
+    kwargs = set(kwargs) - set(allowed)
+
     if len(args) + len(kwargs) > 0:
         raise UnsupportedFunctionCall((
             "numpy operations are not valid "
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 700e279cb0030..ddf6d95fa2ab4 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -19,6 +19,7 @@
                                  is_categorical_dtype,
                                  is_datetimelike,
                                  is_datetime_or_timedelta_dtype,
+                                 is_datetime64_any_dtype,
                                  is_bool, is_integer_dtype,
                                  is_complex_dtype,
                                  is_bool_dtype,
@@ -109,10 +110,12 @@ def _groupby_function(name, alias, npfunc, numeric_only=True,
     @Substitution(name='groupby', f=name)
     @Appender(_doc_template)
     @Appender(_local_template)
-    def f(self):
+    def f(self, **kwargs):
+        if 'numeric_only' not in kwargs:
+            kwargs['numeric_only'] = numeric_only
         self._set_group_selection()
         try:
-            return self._cython_agg_general(alias, numeric_only=numeric_only)
+            return self._cython_agg_general(alias, alt=npfunc, **kwargs)
         except AssertionError as e:
             raise SpecificationError(str(e))
         except Exception:
@@ -127,7 +130,9 @@ def f(self):
 
 
 def _first_compat(x, axis=0):
+
     def _first(x):
+
         x = np.asarray(x)
         x = x[notnull(x)]
         if len(x) == 0:
@@ -142,6 +147,7 @@ def _first(x):
 
 def _last_compat(x, axis=0):
     def _last(x):
+
         x = np.asarray(x)
         x = x[notnull(x)]
         if len(x) == 0:
@@ -775,7 +781,7 @@ def _try_cast(self, result, obj):
         return result
 
     def _cython_transform(self, how, numeric_only=True):
-        output = {}
+        output = collections.OrderedDict()
         for name, obj in self._iterate_slices():
             is_numeric = is_numeric_dtype(obj.dtype)
             if numeric_only and not is_numeric:
@@ -783,6 +789,8 @@ def _cython_transform(self, how, numeric_only=True):
 
             try:
                 result, names = self.grouper.transform(obj.values, how)
+            except NotImplementedError:
+                continue
             except AssertionError as e:
                 raise GroupByError(str(e))
             output[name] = self._try_cast(result, obj)
@@ -792,7 +800,7 @@ def _cython_transform(self, how, numeric_only=True):
 
         return self._wrap_transformed_output(output, names)
 
-    def _cython_agg_general(self, how, numeric_only=True):
+    def _cython_agg_general(self, how, alt=None, numeric_only=True):
         output = {}
         for name, obj in self._iterate_slices():
             is_numeric = is_numeric_dtype(obj.dtype)
@@ -1015,26 +1023,26 @@ def mean(self, *args, **kwargs):
 
         For multiple groupings, the result index will be a MultiIndex
         """
-        nv.validate_groupby_func('mean', args, kwargs)
+        nv.validate_groupby_func('mean', args, kwargs, ['numeric_only'])
         try:
-            return self._cython_agg_general('mean')
+            return self._cython_agg_general('mean', **kwargs)
         except GroupByError:
             raise
         except Exception:  # pragma: no cover
             self._set_group_selection()
-            f = lambda x: x.mean(axis=self.axis)
+            f = lambda x: x.mean(axis=self.axis, **kwargs)
             return self._python_agg_general(f)
 
     @Substitution(name='groupby')
     @Appender(_doc_template)
-    def median(self):
+    def median(self, **kwargs):
         """
         Compute median of groups, excluding missing values
 
         For multiple groupings, the result index will be a MultiIndex
         """
         try:
-            return self._cython_agg_general('median')
+            return self._cython_agg_general('median', **kwargs)
         except GroupByError:
             raise
         except Exception:  # pragma: no cover
@@ -1044,7 +1052,7 @@ def median(self):
             def f(x):
                 if isinstance(x, np.ndarray):
                     x = Series(x)
-                return x.median(axis=self.axis)
+                return x.median(axis=self.axis, **kwargs)
             return self._python_agg_general(f)
 
     @Substitution(name='groupby')
@@ -1063,7 +1071,7 @@ def std(self, ddof=1, *args, **kwargs):
 
         # TODO: implement at Cython level?
         nv.validate_groupby_func('std', args, kwargs)
-        return np.sqrt(self.var(ddof=ddof))
+        return np.sqrt(self.var(ddof=ddof, **kwargs))
 
     @Substitution(name='groupby')
     @Appender(_doc_template)
@@ -1080,10 +1088,10 @@ def var(self, ddof=1, *args, **kwargs):
         """
         nv.validate_groupby_func('var', args, kwargs)
         if ddof == 1:
-            return self._cython_agg_general('var')
+            return self._cython_agg_general('var', **kwargs)
         else:
             self._set_group_selection()
-            f = lambda x: x.var(ddof=ddof)
+            f = lambda x: x.var(ddof=ddof, **kwargs)
             return self._python_agg_general(f)
 
     @Substitution(name='groupby')
@@ -1400,39 +1408,39 @@ def cumcount(self, ascending=True):
     @Appender(_doc_template)
     def cumprod(self, axis=0, *args, **kwargs):
         """Cumulative product for each group"""
-        nv.validate_groupby_func('cumprod', args, kwargs)
+        nv.validate_groupby_func('cumprod', args, kwargs, ['numeric_only'])
         if axis != 0:
-            return self.apply(lambda x: x.cumprod(axis=axis))
+            return self.apply(lambda x: x.cumprod(axis=axis, **kwargs))
 
-        return self._cython_transform('cumprod')
+        return self._cython_transform('cumprod', **kwargs)
 
     @Substitution(name='groupby')
     @Appender(_doc_template)
     def cumsum(self, axis=0, *args, **kwargs):
         """Cumulative sum for each group"""
-        nv.validate_groupby_func('cumsum', args, kwargs)
+        nv.validate_groupby_func('cumsum', args, kwargs, ['numeric_only'])
         if axis != 0:
-            return self.apply(lambda x: x.cumsum(axis=axis))
+            return self.apply(lambda x: x.cumsum(axis=axis, **kwargs))
 
-        return self._cython_transform('cumsum')
+        return self._cython_transform('cumsum', **kwargs)
 
     @Substitution(name='groupby')
     @Appender(_doc_template)
-    def cummin(self, axis=0):
+    def cummin(self, axis=0, **kwargs):
         """Cumulative min for each group"""
         if axis != 0:
             return self.apply(lambda x: np.minimum.accumulate(x, axis))
 
-        return self._cython_transform('cummin')
+        return self._cython_transform('cummin', **kwargs)
 
     @Substitution(name='groupby')
     @Appender(_doc_template)
-    def cummax(self, axis=0):
+    def cummax(self, axis=0, **kwargs):
         """Cumulative max for each group"""
         if axis != 0:
             return self.apply(lambda x: np.maximum.accumulate(x, axis))
 
-        return self._cython_transform('cummax')
+        return self._cython_transform('cummax', **kwargs)
 
     @Substitution(name='groupby')
     @Appender(_doc_template)
@@ -1828,6 +1836,28 @@ def wrapper(*args, **kwargs):
     def _cython_operation(self, kind, values, how, axis):
         assert kind in ['transform', 'aggregate']
 
+        # can we do this operation with our cython functions
+        # if not raise NotImplementedError
+
+        # we raise NotImplemented if this is an invalid operation
+        # entirely, e.g. adding datetimes
+
+        # categoricals are only 1d, so we
+        # are not setup for dim transforming
+        if is_categorical_dtype(values):
+            raise NotImplementedError(
+                "categoricals are not support in cython ops ATM")
+        elif is_datetime64_any_dtype(values):
+            if how in ['add', 'prod', 'cumsum', 'cumprod']:
+                raise NotImplementedError(
+                    "datetime64 type does not support {} "
+                    "operations".format(how))
+        elif is_timedelta64_dtype(values):
+            if how in ['prod', 'cumprod']:
+                raise NotImplementedError(
+                    "timedelta64 type does not support {} "
+                    "operations".format(how))
+
         arity = self._cython_arity.get(how, 1)
 
         vdim = values.ndim
@@ -3155,9 +3185,9 @@ def _iterate_slices(self):
                 continue
             yield val, slicer(val)
 
-    def _cython_agg_general(self, how, numeric_only=True):
+    def _cython_agg_general(self, how, alt=None, numeric_only=True):
         new_items, new_blocks = self._cython_agg_blocks(
-            how, numeric_only=numeric_only)
+            how, alt=alt, numeric_only=numeric_only)
         return self._wrap_agged_blocks(new_items, new_blocks)
 
     def _wrap_agged_blocks(self, items, blocks):
@@ -3183,29 +3213,75 @@ def _wrap_agged_blocks(self, items, blocks):
 
     _block_agg_axis = 0
 
-    def _cython_agg_blocks(self, how, numeric_only=True):
-        data, agg_axis = self._get_data_to_aggregate()
+    def _cython_agg_blocks(self, how, alt=None, numeric_only=True):
+        # TODO: the actual managing of mgr_locs is a PITA
+        # here, it should happen via BlockManager.combine
 
-        new_blocks = []
+        data, agg_axis = self._get_data_to_aggregate()
 
         if numeric_only:
             data = data.get_numeric_data(copy=False)
 
+        new_blocks = []
+        new_items = []
+        deleted_items = []
         for block in data.blocks:
 
-            result, _ = self.grouper.aggregate(
-                block.values, how, axis=agg_axis)
+            locs = block.mgr_locs.as_array
+            try:
+                result, _ = self.grouper.aggregate(
+                    block.values, how, axis=agg_axis)
+            except NotImplementedError:
+                # generally if we have numeric_only=False
+                # and non-applicable functions
+                # try to python agg
+
+                if alt is None:
+                    # we cannot perform the operation
+                    # in an alternate way, exclude the block
+                    deleted_items.append(locs)
+                    continue
+
+                # call our grouper again with only this block
+                obj = self.obj[data.items[locs]]
+                s = groupby(obj, self.grouper)
+                result = s.aggregate(lambda x: alt(x, axis=self.axis))
+                result = result._data.blocks[0]
 
             # see if we can cast the block back to the original dtype
             result = block._try_coerce_and_cast_result(result)
 
-            newb = make_block(result, placement=block.mgr_locs)
+            new_items.append(locs)
+            newb = block.make_block_same_class(result)
             new_blocks.append(newb)
 
         if len(new_blocks) == 0:
             raise DataError('No numeric types to aggregate')
 
-        return data.items, new_blocks
+        # reset the locs in the blocks to correspond to our
+        # current ordering
+        indexer = np.concatenate(new_items)
+        new_items = data.items.take(np.sort(indexer))
+
+        if len(deleted_items):
+
+            # we need to adjust the indexer to account for the
+            # items we have removed
+            # really should be done in internals :<
+
+            deleted = np.concatenate(deleted_items)
+            ai = np.arange(len(data))
+            mask = np.zeros(len(data))
+            mask[deleted] = 1
+            indexer = (ai - mask.cumsum())[indexer]
+
+        offset = 0
+        for b in new_blocks:
+            l = len(b.mgr_locs)
+            b.mgr_locs = indexer[offset:(offset + l)]
+            offset += l
+
+        return new_items, new_blocks
 
     def _get_data_to_aggregate(self):
         obj = self._obj_with_exclusions
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index b00dc62206f57..f8a1e5a684858 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2257,9 +2257,131 @@ def test_max_min_non_numeric(self):
         result = aa.groupby('nn').max()
         self.assertTrue('ss' in result)
 
+        result = aa.groupby('nn').max(numeric_only=False)
+        self.assertTrue('ss' in result)
+
         result = aa.groupby('nn').min()
         self.assertTrue('ss' in result)
 
+        result = aa.groupby('nn').min(numeric_only=False)
+        self.assertTrue('ss' in result)
+
+    def test_arg_passthru(self):
+        # make sure that we are passing thru kwargs
+        # to our agg functions
+
+        # GH3668
+        # GH5724
+        df = pd.DataFrame(
+            {'group': [1, 1, 2],
+             'int': [1, 2, 3],
+             'float': [4., 5., 6.],
+             'string': list('abc'),
+             'category_string': pd.Series(list('abc')).astype('category'),
+             'category_int': [7, 8, 9],
+             'datetime': pd.date_range('20130101', periods=3),
+             'datetimetz': pd.date_range('20130101',
+                                         periods=3,
+                                         tz='US/Eastern'),
+             'timedelta': pd.timedelta_range('1 s', periods=3, freq='s')},
+            columns=['group', 'int', 'float', 'string',
+                     'category_string', 'category_int',
+                     'datetime', 'datetimetz',
+                     'timedelta'])
+
+        expected_columns_numeric = Index(['int', 'float', 'category_int'])
+
+        # mean / median
+        expected = pd.DataFrame(
+            {'category_int': [7.5, 9],
+             'float': [4.5, 6.],
+             'timedelta': [pd.Timedelta('1.5s'),
+                           pd.Timedelta('3s')],
+             'int': [1.5, 3],
+             'datetime': [pd.Timestamp('2013-01-01 12:00:00'),
+                          pd.Timestamp('2013-01-03 00:00:00')],
+             'datetimetz': [
+                 pd.Timestamp('2013-01-01 12:00:00', tz='US/Eastern'),
+                 pd.Timestamp('2013-01-03 00:00:00', tz='US/Eastern')]},
+            index=Index([1, 2], name='group'),
+            columns=['int', 'float', 'category_int',
+                     'datetime', 'datetimetz', 'timedelta'])
+        for attr in ['mean', 'median']:
+            f = getattr(df.groupby('group'), attr)
+            result = f()
+            tm.assert_index_equal(result.columns, expected_columns_numeric)
+
+            result = f(numeric_only=False)
+            assert_frame_equal(result.reindex_like(expected), expected)
+
+        # TODO: min, max *should* handle
+        # categorical (ordered) dtype
+        expected_columns = Index(['int', 'float', 'string',
+                                  'category_int',
+                                  'datetime', 'datetimetz',
+                                  'timedelta'])
+        for attr in ['min', 'max']:
+            f = getattr(df.groupby('group'), attr)
+            result = f()
+            tm.assert_index_equal(result.columns, expected_columns)
+
+            result = f(numeric_only=False)
+            tm.assert_index_equal(result.columns, expected_columns)
+
+        expected_columns = Index(['int', 'float', 'string',
+                                  'category_string', 'category_int',
+                                  'datetime', 'datetimetz',
+                                  'timedelta'])
+        for attr in ['first', 'last']:
+            f = getattr(df.groupby('group'), attr)
+            result = f()
+            tm.assert_index_equal(result.columns, expected_columns)
+
+            result = f(numeric_only=False)
+            tm.assert_index_equal(result.columns, expected_columns)
+
+        expected_columns = Index(['int', 'float', 'string',
+                                  'category_int', 'timedelta'])
+        for attr in ['sum']:
+            f = getattr(df.groupby('group'), attr)
+            result = f()
+            tm.assert_index_equal(result.columns, expected_columns_numeric)
+
+            result = f(numeric_only=False)
+            tm.assert_index_equal(result.columns, expected_columns)
+
+        expected_columns = Index(['int', 'float', 'category_int'])
+        for attr in ['prod', 'cumprod']:
+            f = getattr(df.groupby('group'), attr)
+            result = f()
+            tm.assert_index_equal(result.columns, expected_columns_numeric)
+
+            result = f(numeric_only=False)
+            tm.assert_index_equal(result.columns, expected_columns)
+
+        # like min, max, but don't include strings
+        expected_columns = Index(['int', 'float',
+                                  'category_int',
+                                  'datetime', 'datetimetz',
+                                  'timedelta'])
+        for attr in ['cummin', 'cummax']:
+            f = getattr(df.groupby('group'), attr)
+            result = f()
+            tm.assert_index_equal(result.columns, expected_columns_numeric)
+
+            result = f(numeric_only=False)
+            tm.assert_index_equal(result.columns, expected_columns)
+
+        expected_columns = Index(['int', 'float', 'category_int',
+                                  'timedelta'])
+        for attr in ['cumsum']:
+            f = getattr(df.groupby('group'), attr)
+            result = f()
+            tm.assert_index_equal(result.columns, expected_columns_numeric)
+
+            result = f(numeric_only=False)
+            tm.assert_index_equal(result.columns, expected_columns)
+
     def test_cython_agg_boolean(self):
         frame = DataFrame({'a': np.random.randint(0, 5, 50),
                            'b': np.random.randint(0, 2, 50).astype('bool')})
@@ -3436,6 +3558,7 @@ def test_int64_overflow(self):
         tups = list(map(tuple, df[['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'
                                    ]].values))
         tups = com._asarray_tuplesafe(tups)
+
         expected = df.groupby(tups).sum()['values']
 
         for k, v in compat.iteritems(expected):