diff --git a/pymc3/backends/base.py b/pymc3/backends/base.py
index 1e0f163d9a..af02c8cbc5 100644
--- a/pymc3/backends/base.py
+++ b/pymc3/backends/base.py
@@ -4,12 +4,16 @@
 creating custom backends).
 """
 import itertools as itl
+import logging
 
 import numpy as np
 import warnings
 import theano.tensor as tt
 
 from ..model import modelcontext
+from .report import SamplerReport, merge_reports
+
+logger = logging.getLogger('pymc3')
 
 
 class BackendError(Exception):
@@ -61,6 +65,10 @@ def __init__(self, name, model=None, vars=None, test_point=None):
         self.chain = None
         self._is_base_setup = False
         self.sampler_vars = None
+        self._warnings = []
+
+    def _add_warnings(self, warnings):
+        self._warnings.extend(warnings)
 
     # Sampling methods
 
@@ -174,7 +182,7 @@ def get_sampler_stats(self, varname, sampler_idx=None, burn=0, thin=1):
             return self._get_sampler_stats(varname, sampler_idx, burn, thin)
 
         sampler_idxs = [i for i, s in enumerate(self.sampler_vars)
-                       if varname in s]
+                        if varname in s]
         if not sampler_idxs:
             raise KeyError("Unknown sampler stat %s" % varname)
 
@@ -185,20 +193,19 @@ def get_sampler_stats(self, varname, sampler_idx=None, burn=0, thin=1):
         else:
             return vals
 
-
     def _get_sampler_stats(self, varname, sampler_idx, burn, thin):
         """Get sampler statistics."""
         raise NotImplementedError()
 
     def _slice(self, idx):
         """Slice trace object."""
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def point(self, idx):
         """Return dictionary of point values at `idx` for current chain
         with variables names as keys.
         """
-        raise NotImplementedError
+        raise NotImplementedError()
 
     @property
     def stat_names(self):
@@ -258,6 +265,11 @@ def __init__(self, straces):
                 raise ValueError("Chains are not unique.")
             self._straces[strace.chain] = strace
 
+        self._report = SamplerReport()
+        for strace in straces:
+            if hasattr(strace, '_warnings'):
+                self._report._add_warnings(strace._warnings, strace.chain)
+
     def __repr__(self):
         template = '<{}: {} chains, {} iterations, {} variables>'
         return template.format(self.__class__.__name__,
@@ -271,6 +283,10 @@ def nchains(self):
     def chains(self):
         return list(sorted(self._straces.keys()))
 
+    @property
+    def report(self):
+        return self._report
+
     def __getitem__(self, idx):
         if isinstance(idx, slice):
             return self._slice(idx)
@@ -303,7 +319,7 @@ def __getitem__(self, idx):
         raise KeyError("Unknown variable %s" % var)
 
     _attrs = set(['_straces', 'varnames', 'chains', 'stat_names',
-                  'supports_sampler_stats'])
+                  'supports_sampler_stats', '_report'])
 
     def __getattr__(self, name):
         # Avoid infinite recursion when called before __init__
@@ -447,10 +463,13 @@ def get_sampler_stats(self, varname, burn=0, thin=1, combine=True,
                    for chain in chains]
         return _squeeze_cat(results, combine, squeeze)
 
-    def _slice(self, idx):
-        """Return a new MultiTrace object sliced according to `idx`."""
-        new_traces = [trace._slice(idx) for trace in self._straces.values()]
-        return MultiTrace(new_traces)
+    def _slice(self, slice):
+        """Return a new MultiTrace object sliced according to `slice`."""
+        new_traces = [trace._slice(slice) for trace in self._straces.values()]
+        trace = MultiTrace(new_traces)
+        idxs = slice.indices(len(self))
+        trace._report = self._report._slice(*idxs)
+        return trace
 
     def point(self, idx, chain=None):
         """Return a dictionary of point values at `idx`.
@@ -502,6 +521,7 @@ def merge_traces(mtraces):
             if new_chain in base_mtrace._straces:
                 raise ValueError("Chains are not unique.")
             base_mtrace._straces[new_chain] = strace
+    base_mtrace.report = merge_reports([trace.report for trace in mtraces])
     return base_mtrace
 
 
diff --git a/pymc3/backends/ndarray.py b/pymc3/backends/ndarray.py
index 9c07060c40..700722a787 100644
--- a/pymc3/backends/ndarray.py
+++ b/pymc3/backends/ndarray.py
@@ -116,8 +116,9 @@ def close(self):
         self.samples = {var: vtrace[:self.draw_idx]
                         for var, vtrace in self.samples.items()}
         if self._stats is not None:
-            self._stats = [{var: trace[:self.draw_idx] for var, trace in stats.items()}
-                           for stats in self._stats]
+            self._stats = [
+                {var: trace[:self.draw_idx] for var, trace in stats.items()}
+                for stats in self._stats]
 
     # Selection methods
 
diff --git a/pymc3/backends/report.py b/pymc3/backends/report.py
new file mode 100644
index 0000000000..52ca87590f
--- /dev/null
+++ b/pymc3/backends/report.py
@@ -0,0 +1,162 @@
+from collections import namedtuple
+import logging
+import enum
+
+
+logger = logging.getLogger('pymc3')
+
+
+@enum.unique
+class WarningType(enum.Enum):
+    # For HMC and NUTS
+    DIVERGENCE = 1
+    TUNING_DIVERGENCE = 2
+    DIVERGENCES = 3
+    TREEDEPTH = 4
+    # Problematic sampler parameters
+    BAD_PARAMS = 5
+    # Indications that chains did not converge, eg Rhat
+    CONVERGENCE = 6
+    BAD_ACCEPTANCE = 7
+
+
+SamplerWarning = namedtuple(
+    'SamplerWarning',
+    "kind, message, level, step, exec_info, extra")
+
+
+_LEVELS = {
+    'info': logging.INFO,
+    'error': logging.ERROR,
+    'warn': logging.WARN,
+    'debug': logging.DEBUG,
+    'critical': logging.CRITICAL,
+}
+
+
+class SamplerReport(object):
+    def __init__(self):
+        self._chain_warnings = {}
+        self._global_warnings = []
+        self._effective_n = None
+        self._gelman_rubin = None
+
+    @property
+    def _warnings(self):
+        chains = sum(self._chain_warnings.values(), [])
+        return chains + self._global_warnings
+
+    @property
+    def ok(self):
+        """Whether the automatic convergence checks found serious problems."""
+        return all(_LEVELS[warn.level] < _LEVELS['warn']
+                   for warn in self._warnings)
+
+    def raise_ok(self, level='error'):
+        errors = [warn for warn in self._warnings
+                  if _LEVELS[warn.level] >= _LEVELS[level]]
+        if errors:
+            raise ValueError('Serious convergence issues during sampling.')
+
+    def _run_convergence_checks(self, trace):
+        if trace.nchains == 1:
+            msg = ("Only one chain was sampled, this makes it impossible to "
+                   "run some convergence checks")
+            warn = SamplerWarning(WarningType.BAD_PARAMS, msg, 'info',
+                                  None, None, None)
+            self._add_warnings([warn])
+            return
+
+        from pymc3 import diagnostics
+
+        self._effective_n = effective_n = diagnostics.effective_n(trace)
+        self._gelman_rubin = gelman_rubin = diagnostics.gelman_rubin(trace)
+
+        warnings = []
+        rhat_max = max(val.max() for val in gelman_rubin.values())
+        if rhat_max > 1.4:
+            msg = ("The gelman-rubin statistic is larger than 1.4 for some "
+                   "parameters. The sampler did not converge.")
+            warn = SamplerWarning(
+                WarningType.CONVERGENCE, msg, 'error', None, None, gelman_rubin)
+            warnings.append(warn)
+        elif rhat_max > 1.2:
+            msg = ("The gelman-rubin statistic is larger than 1.2 for some "
+                   "parameters.")
+            warn = SamplerWarning(
+                WarningType.CONVERGENCE, msg, 'warn', None, None, gelman_rubin)
+            warnings.append(warn)
+        elif rhat_max > 1.05:
+            msg = ("The gelman-rubin statistic is larger than 1.05 for some "
+                   "parameters. This indicates slight problems during "
+                   "sampling.")
+            warn = SamplerWarning(
+                WarningType.CONVERGENCE, msg, 'info', None, None, gelman_rubin)
+            warnings.append(warn)
+
+        eff_min = min(val.min() for val in effective_n.values())
+        n_samples = len(trace) * trace.nchains
+        if eff_min < 200 and n_samples >= 500:
+            msg = ("The estimated number of effective samples is smaller than "
+                   "200 for some parameters.")
+            warn = SamplerWarning(
+                WarningType.CONVERGENCE, msg, 'error', None, None, effective_n)
+            warnings.append(warn)
+        elif eff_min / n_samples < 0.25:
+            msg = ("The number of effective samples is smaller than "
+                   "25% for some parameters.")
+            warn = SamplerWarning(
+                WarningType.CONVERGENCE, msg, 'warn', None, None, effective_n)
+            warnings.append(warn)
+
+        self._add_warnings(warnings)
+
+    def _add_warnings(self, warnings, chain=None):
+        if chain is None:
+            warn_list = self._global_warnings
+        else:
+            warn_list = self._chain_warnings.setdefault(chain, [])
+        warn_list.extend(warnings)
+
+    def _log_summary(self):
+
+        def log_warning(warn):
+            level = _LEVELS[warn.level]
+            logger.log(level, warn.message)
+
+        for chain, warns in self._chain_warnings.items():
+            for warn in warns:
+                log_warning(warn)
+        for warn in self._global_warnings:
+            log_warning(warn)
+
+    def _slice(self, start, stop, step):
+        report = SamplerReport()
+
+        def filter_warns(warnings):
+            filtered = []
+            for warn in warnings:
+                if warn.step is None:
+                    filtered.append(warn)
+                elif (start <= warn.step < stop and
+                        (warn.step - start) % step == 0):
+                    warn = warn._replace(step=warn.step - start)
+                    filtered.append(warn)
+            return filtered
+
+        report._add_warnings(filter_warns(self._global_warnings))
+        for chain in self._chain_warnings:
+            report._add_warnings(
+                filter_warns(self._chain_warnings[chain]),
+                chain)
+
+        return report
+
+
+def merge_reports(reports):
+    report = SamplerReport()
+    for rep in reports:
+        report._add_warnings(rep._global_warnings)
+        for chain in rep._chain_warnings:
+            report._add_warnings(rep._chain_warnings[chain], chain)
+    return report
diff --git a/pymc3/diagnostics.py b/pymc3/diagnostics.py
index 322108be37..64188deb43 100644
--- a/pymc3/diagnostics.py
+++ b/pymc3/diagnostics.py
@@ -250,8 +250,8 @@ def get_neff(x, Vhat):
         if t % 2:
             t -= 1
 
-        return min(num_chains * num_samples,
-                   int(num_chains * num_samples / (1. + 2 * rho[1:t-1].sum())))
+        neff = num_chains * num_samples / (1. + 2 * rho[1:t-1].sum())
+        return min(num_chains * num_samples, np.floor(neff))
 
     def generate_neff(trace_values):
         x = np.array(trace_values)
diff --git a/pymc3/model.py b/pymc3/model.py
index cf9c3987ae..a834698fe3 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -10,7 +10,7 @@
 from theano import theano, tensor as tt
 from theano.tensor.var import TensorVariable
 
-from pymc3.theanof import set_theano_conf
+from pymc3.theanof import set_theano_conf, floatX
 import pymc3 as pm
 from pymc3.math import flatten_list
 from .memoize import memoize, WithMemoization
@@ -1061,13 +1061,13 @@ def _get_scaling(total_size, shape, ndim):
     scalar
     """
     if total_size is None:
-        coef = pm.floatX(1)
+        coef = floatX(1)
     elif isinstance(total_size, int):
         if ndim >= 1:
             denom = shape[0]
         else:
             denom = 1
-        coef = pm.floatX(total_size) / pm.floatX(denom)
+        coef = floatX(total_size) / floatX(denom)
     elif isinstance(total_size, (list, tuple)):
         if not all(isinstance(i, int) for i in total_size if (i is not Ellipsis and i is not None)):
             raise TypeError('Unrecognized `total_size` type, expected '
@@ -1085,20 +1085,20 @@ def _get_scaling(total_size, shape, ndim):
             raise ValueError('Length of `total_size` is too big, '
                              'number of scalings is bigger that ndim, got %r' % total_size)
         elif (len(begin) + len(end)) == 0:
-            return pm.floatX(1)
+            return floatX(1)
         if len(end) > 0:
             shp_end = shape[-len(end):]
         else:
             shp_end = np.asarray([])
         shp_begin = shape[:len(begin)]
-        begin_coef = [pm.floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None]
-        end_coef = [pm.floatX(t) / shp_end[i] for i, t in enumerate(end) if t is not None]
+        begin_coef = [floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None]
+        end_coef = [floatX(t) / shp_end[i] for i, t in enumerate(end) if t is not None]
         coefs = begin_coef + end_coef
         coef = tt.prod(coefs)
     else:
         raise TypeError('Unrecognized `total_size` type, expected '
                         'int or list of ints, got %r' % total_size)
-    return tt.as_tensor(pm.floatX(coef))
+    return tt.as_tensor(floatX(coef))
 
 
 class FreeRV(Factor, TensorVariable):
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 71a4947a0b..2d0d2771f8 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -1,23 +1,25 @@
 from collections import defaultdict, Iterable
 from copy import copy
 import pickle
-from six import integer_types
+import logging
+import warnings
 
+from six import integer_types
 from joblib import Parallel, delayed
 import numpy as np
-import warnings
 import theano.gradient as tg
 
-import pymc3 as pm
 from .backends.base import BaseTrace, MultiTrace
 from .backends.ndarray import NDArray
-from .model import modelcontext, Point
+from .model import modelcontext, Point, all_continuous
 from .step_methods import (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
                            BinaryGibbsMetropolis, CategoricalGibbsMetropolis,
                            Slice, CompoundStep, arraystep)
 from .util import update_start_vals
 from .vartypes import discrete_types
 from pymc3.step_methods.hmc import quadpotential
+from pymc3 import plots
+import pymc3 as pm
 from tqdm import tqdm
 
 import sys
@@ -29,6 +31,9 @@
                 BinaryGibbsMetropolis, Slice, CategoricalGibbsMetropolis)
 
 
+_log = logging.getLogger('pymc3')
+
+
 def instantiate_steppers(model, steps, selected_steps, step_kwargs=None):
     """Instantiates steppers assigned to the model variables.
 
@@ -144,17 +149,17 @@ def assign_step_methods(model, step=None, methods=STEP_METHODS,
     return instantiate_steppers(model, steps, selected_steps, step_kwargs)
 
 
-def print_step_hierarchy(s, level=0):
+def _print_step_hierarchy(s, level=0):
     if isinstance(s, (list, tuple)):
-        pm._log.info('>' * level + 'list')
+        _log.info('>' * level + 'list')
         for i in s:
-            print_step_hierarchy(i, level+1)
+            _print_step_hierarchy(i, level+1)
     elif isinstance(s, CompoundStep):
-        pm._log.info('>' * level + 'CompoundStep')
+        _log.info('>' * level + 'CompoundStep')
         for i in s.methods:
-            print_step_hierarchy(i, level+1)
+            _print_step_hierarchy(i, level+1)
     else:
-        pm._log.info('>' * level + '{}: {}'.format(s.__class__.__name__, s.vars))
+        _log.info('>' * level + '{}: {}'.format(s.__class__.__name__, s.vars))
 
 
 def _cpu_count():
@@ -182,7 +187,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
            trace=None, chain_idx=0, chains=None, njobs=None, tune=500,
            nuts_kwargs=None, step_kwargs=None, progressbar=True, model=None,
            random_seed=None, live_plot=False, discard_tuned_samples=True,
-           live_plot_kwargs=None, **kwargs):
+           live_plot_kwargs=None, compute_convergence_checks=True, **kwargs):
     """Draw samples from the posterior using the given step methods.
 
     Multiple step methods are supported via compound step methods.
@@ -288,6 +293,9 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
         Options for traceplot. Example: live_plot_kwargs={'varnames': ['x']}
     discard_tuned_samples : bool
         Whether to discard posterior samples of the tune interval.
+    compute_convergence_checks : bool, default=True
+        Whether to compute sampler statistics like gelman-rubin and
+        effective_n.
 
     Returns
     -------
@@ -331,12 +339,14 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
             np.random.seed(random_seed)
         random_seed = [np.random.randint(2 ** 30) for _ in range(chains)]
     if not isinstance(random_seed, Iterable):
-        raise TypeError('Invalid value for `random_seed`. Must be tuple, list or int')
+        raise TypeError(
+            'Invalid value for `random_seed`. Must be tuple, list or int')
 
     if 'chain' in kwargs:
         chain_idx = kwargs['chain']
-        warnings.warn("The chain argument has been deprecated. Use chain_idx instead.",
-                    DeprecationWarning)
+        warnings.warn(
+            "The chain argument has been deprecated. Use chain_idx instead.",
+            DeprecationWarning)
 
     if start is not None:
         for start_vals in start:
@@ -352,10 +362,10 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
     if model.ndim == 0:
         raise ValueError('The model does not contain any free variables.')
 
-    if step is None and init is not None and pm.model.all_continuous(model.vars):
+    if step is None and init is not None and all_continuous(model.vars):
         try:
             # By default, try to use NUTS
-            pm._log.info('Auto-assigning NUTS sampler...')
+            _log.info('Auto-assigning NUTS sampler...')
             args = step_kwargs if step_kwargs is not None else {}
             args = args.get('nuts', {})
             start_, step = init_nuts(init=init, chains=chains, n_init=n_init,
@@ -365,9 +375,9 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
                 start = start_
         except (AttributeError, NotImplementedError, tg.NullTypeGradError):
             # gradient computation failed
-            pm._log.info("Initializing NUTS failed. "
-                         "Falling back to elementwise auto-assignment.")
-            pm._log.debug('Exception in init nuts', exec_info=True)
+            _log.info("Initializing NUTS failed. "
+                      "Falling back to elementwise auto-assignment.")
+            _log.debug('Exception in init nuts', exec_info=True)
             step = assign_step_methods(model, step, step_kwargs=step_kwargs)
     else:
         step = assign_step_methods(model, step, step_kwargs=step_kwargs)
@@ -403,33 +413,39 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
     ])
     parallel = njobs > 1 and chains > 1 and not has_population_samplers
     if parallel:
-        pm._log.info('Multiprocess sampling ({} chains in {} jobs)'.format(chains, njobs))
-        print_step_hierarchy(step)
+        _log.info('Multiprocess sampling ({} chains in {} jobs)'.format(chains, njobs))
+        _print_step_hierarchy(step)
         try:
             trace = _mp_sample(**sample_args)
         except pickle.PickleError:
-            pm._log.warn("Could not pickle model, sampling singlethreaded.")
-            pm._log.debug('Pickling error:', exec_info=True)
+            _log.warn("Could not pickle model, sampling singlethreaded.")
+            _log.debug('Pickling error:', exec_info=True)
             parallel = False
         except AttributeError as e:
             if str(e).startswith("AttributeError: Can't pickle"):
-                pm._log.warn("Could not pickle model, sampling singlethreaded.")
-                pm._log.debug('Pickling error:', exec_info=True)
+                _log.warn("Could not pickle model, sampling singlethreaded.")
+                _log.debug('Pickling error:', exec_info=True)
                 parallel = False
             else:
                 raise
     if not parallel:
-       if has_population_samplers:
-            pm._log.info('Population sampling ({} chains)'.format(chains))
-            print_step_hierarchy(step)
+        if has_population_samplers:
+            _log.info('Population sampling ({} chains)'.format(chains))
+            _print_step_hierarchy(step)
             trace = _sample_population(**sample_args)
-       else:
-            pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains))
-            print_step_hierarchy(step)
+        else:
+            _log.info('Sequential sampling ({} chains in 1 job)'.format(chains))
+            _print_step_hierarchy(step)
             trace = _sample_many(**sample_args)
 
     discard = tune if discard_tuned_samples else 0
-    return trace[discard:]
+    trace = trace[discard:]
+
+    if compute_convergence_checks:
+        trace.report._run_convergence_checks(trace)
+
+    trace.report._log_summary()
+    return trace
 
 
 def _check_start_shape(model, start):
@@ -459,11 +475,11 @@ def _check_start_shape(model, start):
         raise ValueError("Bad shape for start argument:{}".format(e))
 
 
-def _sample_many(draws, chain, chains, start, random_seed, **kwargs):
+def _sample_many(draws, chain, chains, start, random_seed, step, **kwargs):
     traces = []
     for i in range(chains):
         trace = _sample(draws=draws, chain=chain + i, start=start[i],
-                        random_seed=random_seed[i], **kwargs)
+                        step=step, random_seed=random_seed[i], **kwargs)
         if trace is None:
             if len(traces) == 0:
                 raise ValueError('Sampling stopped before a sample was created.')
@@ -514,9 +530,9 @@ def _sample(chain, progressbar, random_seed, start, draws=None, step=None,
                 if it >= skip_first:
                     trace = MultiTrace([strace])
                     if it == skip_first:
-                        ax = pm.plots.traceplot(trace, live_plot=False, **live_plot_kwargs)
+                        ax = plots.traceplot(trace, live_plot=False, **live_plot_kwargs)
                     elif (it - skip_first) % refresh_every == 0 or it == draws - 1:
-                        pm.plots.traceplot(trace, ax=ax, live_plot=True, **live_plot_kwargs)
+                        plots.traceplot(trace, ax=ax, live_plot=True, **live_plot_kwargs)
     except KeyboardInterrupt:
         pass
     finally:
@@ -575,7 +591,7 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
     if random_seed is not None:
         np.random.seed(random_seed)
     if draws < 1:
-        raise ValueError('Argument `draws` should be above 0.')
+        raise ValueError('Argument `draws` must be greater than 0.')
 
     if start is None:
         start = {}
@@ -600,6 +616,7 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
         strace.setup(draws, chain)
 
     try:
+        step.tune = bool(tune)
         for i in range(draws):
             if i == tune:
                 step = stop_tuning(step)
@@ -615,16 +632,18 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
             yield strace
     except KeyboardInterrupt:
         strace.close()
-        if hasattr(step, 'report'):
-            step.report._finalize(strace)
+        if hasattr(step, 'warnings'):
+            warns = step.warnings(strace)
+            strace._add_warnings(warns)
         raise
     except BaseException:
         strace.close()
         raise
     else:
         strace.close()
-        if hasattr(step, 'report'):
-            step.report._finalize(strace)
+        if hasattr(step, 'warnings'):
+            warns = step.warnings(strace)
+            strace._add_warnings(warns)
 
 
 class PopulationStepper(object):
@@ -651,7 +670,7 @@ def __init__(self, steppers, parallelize):
         if parallelize and sys.version_info >= (3,4):
             try:
                 # configure a child process for each stepper
-                pm._log.info('Attempting to parallelize chains.')
+                _log.info('Attempting to parallelize chains.')
                 import multiprocessing
                 for c, stepper in enumerate(tqdm(steppers)):
                     slave_end, master_end = multiprocessing.Pipe()
@@ -670,16 +689,19 @@ def __init__(self, steppers, parallelize):
                     self._master_ends.append(master_end)
                     self._processes.append(process)
                 self.is_parallelized = True
-            except:
-                pm._log.info('Population parallelization failed. ' \
-                                'Falling back to sequential stepping of chains.')
+            except Exception:
+                _log.info('Population parallelization failed. '
+                          'Falling back to sequential stepping of chains.')
+                _log.debug('Error was: ', exec_info=True)
         else:
             if parallelize:
-                warnings.warn('Population parallelization is only supported on Python 3.4 and ' \
-                    'higher.  All {} chains will step on one process.'.format(self.nchains))
+                warnings.warn('Population parallelization is only supported '
+                              'on Python 3.4 and higher.  All {} chains will '
+                              'run sequentially on one process.'
+                              .format(self.nchains))
             else:
-                pm._log.info('Chains are not parallelized. You can enable this by passing ' \
-                             'pm.sample(parallelize=True).')
+                _log.info('Chains are not parallelized. You can enable this by passing '
+                          'pm.sample(parallelize=True).')
         return super(PopulationStepper, self).__init__()
 
     def __enter__(self):
@@ -693,8 +715,8 @@ def __exit__(self, exc_type, exc_val, exc_tb):
                     master_end.send(None)
                 for process in self._processes:
                     process.join(timeout=3)
-            except:
-                pm._log.warning('Termination failed.')
+            except Exception:
+                _log.warning('Termination failed.')
         return
 
     @staticmethod
@@ -737,7 +759,7 @@ def _run_slave(c, stepper_dumps, slave_end):
                 update = stepper.step(population[c])
                 slave_end.send(update)
         except Exception:
-            pm._log.exception('ChainWalker{}'.format(c))
+            _log.exception('ChainWalker{}'.format(c))
         return
 
     def step(self, tune_stop, population):
@@ -990,7 +1012,7 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
 
     np.random.seed(random_seed)
 
-    indices = np.random.randint(0, nchain*len_trace, samples)
+    indices = np.random.randint(0, nchain * len_trace, samples)
 
     if progressbar:
         indices = tqdm(indices, total=samples)
@@ -1077,7 +1099,7 @@ def sample_ppc_w(traces, samples=None, models=None, weights=None,
     weights = np.asarray(weights)
     p = weights / np.sum(weights)
 
-    min_tr = min([len(i)*i.nchains for i in traces])
+    min_tr = min([len(i) * i.nchains for i in traces])
 
     n = (min_tr * p).astype('int')
     # ensure n sum up to min_tr
@@ -1092,7 +1114,7 @@ def sample_ppc_w(traces, samples=None, models=None, weights=None,
         except AttributeError:
             nchain = 1
 
-        indices = np.random.randint(0, nchain*len_trace, j)
+        indices = np.random.randint(0, nchain * len_trace, j)
         if nchain > 1:
             chain_idx, point_idx = np.divmod(indices, len_trace)
             for idx in zip(chain_idx, point_idx):
@@ -1104,7 +1126,8 @@ def sample_ppc_w(traces, samples=None, models=None, weights=None,
     obs = [x for m in models for x in m.observed_RVs]
     variables = np.repeat(obs, n)
 
-    lenghts = list(set([np.shape(np.atleast_1d(o.distribution.default())) for o in obs]))
+    lengths = [np.atleast_1d(observed).shape for observed in obs]
+    lenghts = list(set(lengths))
 
     if len(lenghts) == 1:
         size = [None for i in variables]
@@ -1116,8 +1139,8 @@ def sample_ppc_w(traces, samples=None, models=None, weights=None,
         y = np.zeros(shape=lenghts[1])
         b = np.broadcast(x, y)
         for var in variables:
-            l = np.shape(np.atleast_1d(var.distribution.default()))
-            if l != b.shape:
+            shape = np.shape(np.atleast_1d(var.distribution.default()))
+            if shape != b.shape:
                 size.append(b.shape)
             else:
                 size.append(None)
@@ -1170,8 +1193,8 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None,
           a diagonal based on the variance of the tuning samples. All
           chains use the test value (usually the prior mean) as starting
           point.
-        * jitter+adapt_diag : Same as `adapt_diag`, but add uniform jitter
-          in [-1, 1] to the starting point in each chain.
+        * jitter+adapt_diag : Same as `adapt_diag`, but use uniform jitter
+          in [-1, 1] as starting point in each chain.
         * advi+adapt_diag : Run ADVI and then adapt the resulting diagonal
           mass matrix based on the sample variance of the tuning samples.
         * advi+adapt_diag_grad : Run ADVI and then adapt the resulting
@@ -1202,12 +1225,12 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None,
     nuts_sampler : pymc3.step_methods.NUTS
         Instantiated and initialized NUTS sampler object
     """
-    model = pm.modelcontext(model)
+    model = modelcontext(model)
 
     vars = kwargs.get('vars', model.vars)
     if set(vars) != set(model.vars):
         raise ValueError('Must use init_nuts on all variables of a model.')
-    if not pm.model.all_continuous(vars):
+    if not all_continuous(vars):
         raise ValueError('init_nuts can only be used for models with only '
                          'continuous variables.')
 
@@ -1220,7 +1243,7 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None,
     if init == 'auto':
         init = 'jitter+adapt_diag'
 
-    pm._log.info('Initializing NUTS using {}...'.format(init))
+    _log.info('Initializing NUTS using {}...'.format(init))
 
     if random_seed is not None:
         random_seed = int(np.atleast_1d(random_seed)[0])
@@ -1325,7 +1348,8 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None,
         start = list(np.random.choice(init_trace, chains))
         potential = quadpotential.QuadPotentialFull(cov)
     else:
-        raise NotImplementedError('Initializer {} is not supported.'.format(init))
+        raise ValueError(
+            'Unknown initializer: {}.'.format(init))
 
     step = pm.NUTS(potential=potential, model=model, **kwargs)
 
diff --git a/pymc3/stats.py b/pymc3/stats.py
index 3aae537ed4..1c897d6603 100644
--- a/pymc3/stats.py
+++ b/pymc3/stats.py
@@ -15,7 +15,6 @@
 from scipy.stats import dirichlet
 from scipy.optimize import minimize
 
-from .backends import tracetab as ttab
 
 __all__ = ['autocorr', 'autocov', 'dic', 'bpic', 'waic', 'loo', 'hpd', 'quantiles',
            'mc_error', 'summary', 'df_summary', 'compare', 'bfmi', 'r2_score']
@@ -865,6 +864,7 @@ def dict2pd(statdict, labelname):
     """Small helper function to transform a diagnostics output dict into a
     pandas Series.
     """
+    from .backends import tracetab as ttab
     var_dfs = []
     for key, value in statdict.items():
         var_df = pd.Series(value.flatten())
@@ -964,6 +964,7 @@ def summary(trace, varnames=None, transform=lambda x: x, stat_funcs=None,
         mu__0  0.066473  0.000312  0.105039  0.214242
         mu__1  0.067513 -0.159097 -0.045637  0.062912
     """
+    from .backends import tracetab as ttab
 
     if varnames is None:
         varnames = get_default_varnames(trace.varnames,
diff --git a/pymc3/step_methods/compound.py b/pymc3/step_methods/compound.py
index 0dd55b3492..d8f66171cb 100644
--- a/pymc3/step_methods/compound.py
+++ b/pymc3/step_methods/compound.py
@@ -31,19 +31,9 @@ def step(self, point):
                 point = method.step(point)
             return point
 
-    @property
-    def report(self):
-        reports = []
+    def warnings(self, strace):
+        warns = []
         for method in self.methods:
-            if hasattr(method, 'report'):
-                reports.append(method.report)
-        return _CompoundReport(reports)
-
-
-class _CompoundReport(object):
-    def __init__(self, reports):
-        self._reports = reports
-
-    def _finalize(self, strace):
-        for report in self._reports:
-            report._finalize(strace)
+            if hasattr(method, 'warnings'):
+                warns.extend(method.warnings(strace))
+        return warns
diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py
index 140495d25b..aebb401d34 100644
--- a/pymc3/step_methods/hmc/base_hmc.py
+++ b/pymc3/step_methods/hmc/base_hmc.py
@@ -1,3 +1,5 @@
+from collections import namedtuple
+
 import numpy as np
 
 from pymc3.model import modelcontext, Point
@@ -6,6 +8,18 @@
 from pymc3.theanof import inputvars, floatX
 from pymc3.tuning import guess_scaling
 from .quadpotential import quad_potential, QuadPotentialDiagAdapt
+from pymc3.step_methods import step_sizes
+from pymc3.backends.report import SamplerWarning, WarningType
+
+
+HMCStepData = namedtuple(
+    "HMCStepData",
+    "end, accept_stat, divergence_info, stats")
+
+
+DivergenceInfo = namedtuple(
+    'DivergenceInfo',
+    'message, exec_info, state')
 
 
 class BaseHMC(arraystep.GradientSharedStep):
@@ -15,7 +29,10 @@ class BaseHMC(arraystep.GradientSharedStep):
 
     def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
                  model=None, blocked=True, potential=None,
-                 integrator="leapfrog", dtype=None, **theano_kwargs):
+                 integrator="leapfrog", dtype=None, Emax=1000,
+                 target_accept=0.8, gamma=0.05, k=0.75, t0=10,
+                 adapt_step_size=True, step_rand=None,
+                 **theano_kwargs):
         """Set up Hamiltonian samplers with common structures.
 
         Parameters
@@ -45,8 +62,18 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
         super(BaseHMC, self).__init__(vars, blocked=blocked, model=model,
                                       dtype=dtype, **theano_kwargs)
 
+        self.adapt_step_size = adapt_step_size
+        self.Emax = Emax
+        self.iter_count = 0
         size = self._logp_dlogp_func.size
 
+        self.step_size = step_scale / (size ** 0.25)
+        self.target_accept = target_accept
+        self.step_adapt = step_sizes.DualAverageAdaptation(
+            self.step_size, target_accept, gamma, k, t0)
+
+        self.tune = True
+
         if scaling is None and potential is None:
             mean = floatX(np.zeros(size))
             var = floatX(np.ones(size))
@@ -59,10 +86,114 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
         if scaling is not None and potential is not None:
             raise ValueError("Can not specify both potential and scaling.")
 
-        self.step_size = step_scale / (size ** 0.25)
         if potential is not None:
             self.potential = potential
         else:
             self.potential = quad_potential(scaling, is_cov)
 
-        self.integrator = integration.CpuLeapfrogIntegrator(self.potential, self._logp_dlogp_func)
+        self.integrator = integration.CpuLeapfrogIntegrator(
+            self.potential, self._logp_dlogp_func)
+
+        self._step_rand = step_rand
+        self._warnings = []
+        self._samples_after_tune = 0
+        self._num_divs_sample = 0
+
+    def _hamiltonian_step(self, start, p0, step_size):
+        """Compute one hamiltonian trajectory and return the next state.
+
+        Subclasses must overwrite this method and return a `HMCStepData`.
+        """
+        raise NotImplementedError("Abstract method")
+
+    def astep(self, q0):
+        """Perform a single HMC iteration."""
+        p0 = self.potential.random()
+        start = self.integrator.compute_state(q0, p0)
+
+        if not np.isfinite(start.energy):
+            self.potential.raise_ok()
+            raise ValueError('Bad initial energy: %s. The model '
+                             'might be misspecified.' % start.energy)
+
+        adapt_step = self.tune and self.adapt_step_size
+        step_size = self.step_adapt.current(adapt_step)
+        self.step_size = step_size
+
+        if self._step_rand is not None:
+            step_size = self._step_rand(step_size)
+
+        hmc_step = self._hamiltonian_step(start, p0, step_size)
+
+        self.step_adapt.update(hmc_step.accept_stat, adapt_step)
+        self.potential.update(hmc_step.end.q, hmc_step.end.q_grad, self.tune)
+        if hmc_step.divergence_info:
+            info = hmc_step.divergence_info
+            if self.tune:
+                kind = WarningType.TUNING_DIVERGENCE
+                point = None
+            else:
+                kind = WarningType.DIVERGENCE
+                self._num_divs_sample += 1
+                # We don't want to fill up all memory with divergence info
+                if self._num_divs_sample < 100:
+                    point = self._logp_dlogp_func.array_to_dict(info.state.q)
+                else:
+                    point = None
+            warning = SamplerWarning(
+                kind, info.message, 'debug', self.iter_count,
+                info.exec_info, point)
+
+            self._warnings.append(warning)
+
+        self.iter_count += 1
+        if not self.tune:
+            self._samples_after_tune += 1
+
+        stats = {
+            'tune': self.tune,
+            'diverging': bool(hmc_step.divergence_info),
+        }
+
+        stats.update(hmc_step.stats)
+        stats.update(self.step_adapt.stats())
+
+        return hmc_step.end.q, [stats]
+
+    def reset(self, start=None):
+        self.tune = True
+        self.potential.reset()
+
+    def warnings(self, strace):
+        # list.copy() is not available in python2
+        warnings = self._warnings[:]
+
+        # Generate a global warning for divergences
+        n_divs = self._num_divs_sample
+        if n_divs and self._samples_after_tune == n_divs:
+            msg = ('The chain contains only diverging samples. The model is '
+                   'probably misspecified.')
+            warning = SamplerWarning(
+                WarningType.DIVERGENCES, msg, 'error', None, None, None)
+            warnings.append(warning)
+        elif n_divs > 0:
+            message = ('Divergences after tuning. Increase `target_accept` or '
+                       'reparameterize.')
+            warning = SamplerWarning(
+                WarningType.DIVERGENCES, message, 'error', None, None, None)
+            warnings.append(warning)
+
+        # small trace
+        if self._samples_after_tune == 0:
+            msg = "Tuning was enabled throughout the whole trace."
+            warning = SamplerWarning(
+                WarningType.BAD_PARAMS, msg, 'error', None, None, None)
+            warnings.append(warning)
+        elif self._samples_after_tune < 500:
+            msg = "Only %s samples in chain." % self._samples_after_tune
+            warning = SamplerWarning(
+                WarningType.BAD_PARAMS, msg, 'error', None, None, None)
+            warnings.append(warning)
+
+        warnings.extend(self.step_adapt.warnings())
+        return warnings
diff --git a/pymc3/step_methods/hmc/hmc.py b/pymc3/step_methods/hmc/hmc.py
index f9c3441ae8..dd5c05ab12 100644
--- a/pymc3/step_methods/hmc/hmc.py
+++ b/pymc3/step_methods/hmc/hmc.py
@@ -1,17 +1,15 @@
+import math
+
 import numpy as np
 
-from ..arraystep import metrop_select, Competence
-from .base_hmc import BaseHMC
+from ..arraystep import Competence
 from pymc3.vartypes import discrete_types
-from pymc3.theanof import floatX
+from pymc3.step_methods.hmc.integration import IntegrationError
+from pymc3.step_methods.hmc.base_hmc import BaseHMC, HMCStepData, DivergenceInfo
 
 
 __all__ = ['HamiltonianMC']
 
-# TODO:
-# add constraint handling via page 37 of Radford's
-# http://www.cs.utoronto.ca/~radford/ham-mcmc.abstract.html
-
 
 def unif(step_size, elow=.85, ehigh=1.15):
     return np.random.uniform(elow, ehigh) * step_size
@@ -25,8 +23,24 @@ class HamiltonianMC(BaseHMC):
 
     name = 'hmc'
     default_blocked = True
-
-    def __init__(self, vars=None, path_length=2., step_rand=unif, **kwargs):
+    generates_stats = True
+    stats_dtypes = [{
+        'step_size': np.float64,
+        'n_steps': np.int64,
+        'tune': np.bool,
+        'step_size_bar': np.float64,
+        'accept': np.float64,
+        'diverging': np.bool,
+        'energy_error': np.float64,
+        'energy': np.float64,
+        'max_energy_error': np.float64,
+        'path_length': np.float64,
+        'accepted': np.bool,
+    }]
+
+    def __init__(self, vars=None, path_length=2.,
+                 adapt_step_size=True, gamma=0.05, k=0.75, t0=10,
+                 target_accept=0.8, **kwargs):
         """Set up the Hamiltonian Monte Carlo sampler.
 
         Parameters
@@ -50,32 +64,71 @@ def __init__(self, vars=None, path_length=2., step_rand=unif, **kwargs):
             An object that represents the Hamiltonian with methods `velocity`,
             `energy`, and `random` methods. It can be specified instead
             of the scaling matrix.
+        target_accept : float, default .8
+            Adapt the step size such that the average acceptance
+            probability across the trajectories are close to target_accept.
+            Higher values for target_accept lead to smaller step sizes.
+            Setting this to higher values like 0.9 or 0.99 can help
+            with sampling from difficult posteriors. Valid values are
+            between 0 and 1 (exclusive).
+        gamma : float, default .05
+        k : float, default .75
+            Parameter for dual averaging for step size adaptation. Values
+            between 0.5 and 1 (exclusive) are admissible. Higher values
+            correspond to slower adaptation.
+        t0 : int, default 10
+            Parameter for dual averaging. Higher values slow initial
+            adaptation.
+        adapt_step_size : bool, default=True
+            Whether step size adaptation should be enabled. If this is
+            disabled, `k`, `t0`, `gamma` and `target_accept` are ignored.
         model : pymc3.Model
             The model
         **kwargs : passed to BaseHMC
         """
         super(HamiltonianMC, self).__init__(vars, **kwargs)
         self.path_length = path_length
-        self.step_rand = step_rand
 
-    def astep(self, q0):
-        """Perform a single HMC iteration."""
-        e = floatX(self.step_rand(self.step_size))
-        n_steps = int(self.path_length / e)
-
-        p0 = self.potential.random()
-        start = self.integrator.compute_state(q0, p0)
-
-        if not np.isfinite(start.energy):
-            raise ValueError('Bad initial energy: %s. The model '
-                             'might be misspecified.' % start.energy)
+    def _hamiltonian_step(self, start, p0, step_size):
+        path_length = np.random.rand() * self.path_length
+        n_steps = max(1, int(path_length / step_size))
 
+        energy_change = -np.inf
         state = start
-        for _ in range(n_steps):
-            state = self.integrator.step(e, state)
-
-        energy_change = start.energy - state.energy
-        return metrop_select(energy_change, state.q, start.q)[0]
+        div_info = None
+        try:
+            for _ in range(n_steps):
+                state = self.integrator.step(step_size, state)
+        except IntegrationError as e:
+            div_info = DivergenceInfo('Divergence encountered.', e, state)
+        else:
+            if not np.isfinite(state.energy):
+                div_info = DivergenceInfo(
+                    'Divergence encountered, bad energy.', None, state)
+            energy_change = start.energy - state.energy
+            if np.abs(energy_change) > self.Emax:
+                div_info = DivergenceInfo(
+                    'Divergence encountered, large integration error.',
+                    None, state)
+
+        accept_stat = min(1, math.exp(energy_change))
+
+        if div_info is not None or np.random.rand() >= accept_stat:
+            end = start
+            accepted = False
+        else:
+            end = state
+            accepted = True
+
+        stats = {
+            'path_length': path_length,
+            'n_steps': n_steps,
+            'accept': accept_stat,
+            'energy_error': energy_change,
+            'energy': state.energy,
+            'accepted': accepted,
+        }
+        return HMCStepData(end, accept_stat, div_info, stats)
 
     @staticmethod
     def competence(var, has_grad):
diff --git a/pymc3/step_methods/hmc/integration.py b/pymc3/step_methods/hmc/integration.py
index 9ed3f4b8b9..4744d1c046 100644
--- a/pymc3/step_methods/hmc/integration.py
+++ b/pymc3/step_methods/hmc/integration.py
@@ -7,6 +7,10 @@
 State = namedtuple("State", 'q, p, v, q_grad, energy')
 
 
+class IntegrationError(RuntimeError):
+    pass
+
+
 class CpuLeapfrogIntegrator(object):
     def __init__(self, potential, logp_dlogp_func):
         """Leapfrog integrator using CPU."""
@@ -46,6 +50,21 @@ def step(self, epsilon, state, out=None):
         -------
         None if `out` is provided, else a State namedtuple
         """
+        try:
+            return self._step(epsilon, state, out=None)
+        except linalg.LinAlgError as err:
+            msg = "LinAlgError during leapfrog step."
+            raise IntegrationError(msg)
+        except ValueError as err:
+            # Raised by many scipy.linalg functions
+            scipy_msg = "array must not contain infs or nans"
+            if len(err.args) > 0 and scipy_msg in err.args[0].lower():
+                msg = "Infs or nans in scipy.linalg during leapfrog step."
+                raise IntegrationError(msg)
+            else:
+                raise
+
+    def _step(self, epsilon, state, out=None):
         pot = self._potential
         axpy = linalg.blas.get_blas_funcs('axpy', dtype=self._dtype)
 
diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py
index 8102cce81a..6a5815743f 100644
--- a/pymc3/step_methods/hmc/nuts.py
+++ b/pymc3/step_methods/hmc/nuts.py
@@ -1,14 +1,14 @@
+from __future__ import division
+
 from collections import namedtuple
-import warnings
 
 import numpy as np
 import numpy.random as nr
-from scipy import stats, linalg
-import six
 
 from ..arraystep import Competence
-from pymc3.exceptions import SamplingError
-from .base_hmc import BaseHMC
+from .base_hmc import BaseHMC, HMCStepData, DivergenceInfo
+from .integration import IntegrationError
+from pymc3.backends.report import SamplerWarning, WarningType
 from pymc3.theanof import floatX
 from pymc3.vartypes import continuous_types
 
@@ -67,8 +67,8 @@ class NUTS(BaseHMC):
 
     References
     ----------
-    .. [1] Hoffman, Matthew D., & Gelman, Andrew. (2011). The No-U-Turn Sampler:
-       Adaptively Setting Path Lengths in Hamiltonian Monte Carlo.
+    .. [1] Hoffman, Matthew D., & Gelman, Andrew. (2011). The No-U-Turn
+       Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo.
     """
 
     name = 'nuts'
@@ -88,10 +88,7 @@ class NUTS(BaseHMC):
         'max_energy_error': np.float64,
     }]
 
-    def __init__(self, vars=None, Emax=1000, target_accept=0.8,
-                 gamma=0.05, k=0.75, t0=10, adapt_step_size=True,
-                 max_treedepth=10, on_error='summary',
-                 early_max_treedepth=8,
+    def __init__(self, vars=None, max_treedepth=10, early_max_treedepth=8,
                  **kwargs):
         R"""Set up the No-U-Turn sampler.
 
@@ -101,19 +98,25 @@ def __init__(self, vars=None, Emax=1000, target_accept=0.8,
         Emax : float, default 1000
             Maximum energy change allowed during leapfrog steps. Larger
             deviations will abort the integration.
-        target_accept : float (0,1), default .8
-            Try to find a step size such that the average acceptance
+        target_accept : float, default .8
+            Adapt the step size such that the average acceptance
             probability across the trajectories are close to target_accept.
             Higher values for target_accept lead to smaller step sizes.
+            Setting this to higher values like 0.9 or 0.99 can help
+            with sampling from difficult posteriors. Valid values are
+            between 0 and 1 (exclusive).
         step_scale : float, default 0.25
             Size of steps to take, automatically scaled down by `1/n**(1/4)`.
             If step size adaptation is switched off, the resulting step size
             is used. If adaptation is enabled, it is used as initial guess.
         gamma : float, default .05
-        k : float (.5,1) default .75
-            scaling of speed of adaptation
+        k : float, default .75
+            Parameter for dual averaging for step size adaptation. Values
+            between 0.5 and 1 (exclusive) are admissible. Higher values
+            correspond to slower adaptation.
         t0 : int, default 10
-            slows initial adaptation
+            Parameter for dual averaging. Higher values slow initial
+            adaptation.
         adapt_step_size : bool, default=True
             Whether step size adaptation should be enabled. If this is
             disabled, `k`, `t0`, `gamma` and `target_accept` are ignored.
@@ -132,12 +135,6 @@ def __init__(self, vars=None, Emax=1000, target_accept=0.8,
             this will be interpreded as the mass or covariance matrix.
         is_cov : bool, default=False
             Treat the scaling as mass or covariance matrix.
-        on_error : {'summary', 'warn', 'raise'}, default='summary'
-            How to report problems during sampling.
-
-            * `summary`: Print one warning after sampling.
-            * `warn`: Print individual warnings as soon as they appear.
-            * `raise`: Raise an error on the first problem.
         potential : Potential, optional
             An object that represents the Hamiltonian with methods `velocity`,
             `energy`, and `random` methods. It can be specified instead
@@ -153,42 +150,13 @@ def __init__(self, vars=None, Emax=1000, target_accept=0.8,
         `pm.sample` to the desired number of tuning steps.
         """
         super(NUTS, self).__init__(vars, **kwargs)
-        self.Emax = Emax
 
-        self.target_accept = target_accept
-        self.gamma = gamma
-        self.k = k
-        self.t0 = t0
-
-        self.h_bar = 0
-        self.mu = np.log(self.step_size * 10)
-        self.log_step_size = np.log(self.step_size)
-        self.log_step_size_bar = 0
-        self.m = 1
-        self.adapt_step_size = adapt_step_size
         self.max_treedepth = max_treedepth
         self.early_max_treedepth = early_max_treedepth
+        self._reached_max_treedepth = 0
 
-        self.tune = True
-        self.report = NutsReport(on_error, max_treedepth, target_accept)
-
-    def astep(self, q0):
-        """Perform a single NUTS iteration."""
-        p0 = self.potential.random()
-        start = self.integrator.compute_state(q0, p0)
-
-        if not np.isfinite(start.energy):
-            raise ValueError('Bad initial energy: %s. The model '
-                             'might be misspecified.' % start.energy)
-
-        if not self.adapt_step_size:
-            step_size = self.step_size
-        elif self.tune:
-            step_size = np.exp(self.log_step_size)
-        else:
-            step_size = np.exp(self.log_step_size_bar)
-
-        if self.tune and self.m < 200:
+    def _hamiltonian_step(self, start, p0, step_size):
+        if self.tune and self.iter_count < 200:
             max_treedepth = self.early_max_treedepth
         else:
             max_treedepth = self.max_treedepth
@@ -197,38 +165,16 @@ def astep(self, q0):
 
         for _ in range(max_treedepth):
             direction = logbern(np.log(0.5)) * 2 - 1
-            diverging_info, turning = tree.extend(direction)
-            q, q_grad = tree.proposal.q, tree.proposal.q_grad
+            divergence_info, turning = tree.extend(direction)
 
-            if diverging_info or turning:
-                if diverging_info:
-                    self.report._add_divergence(self.tune, *diverging_info)
+            if divergence_info or turning:
                 break
+        else:
+            self._reached_max_treedepth += 1
 
-        w = 1. / (self.m + self.t0)
-        self.h_bar = ((1 - w) * self.h_bar +
-                      w * (self.target_accept - tree.accept_sum * 1. / tree.n_proposals))
-
-        if self.tune:
-            self.log_step_size = self.mu - self.h_bar * np.sqrt(self.m) / self.gamma
-            mk = self.m ** -self.k
-            self.log_step_size_bar = mk * self.log_step_size + (1 - mk) * self.log_step_size_bar
-
-        self.m += 1
-
-        if self.tune:
-            self.potential.adapt(q, q_grad)
-
-        stats = {
-            'step_size': step_size,
-            'tune': self.tune,
-            'step_size_bar': np.exp(self.log_step_size_bar),
-            'diverging': bool(diverging_info),
-        }
-
-        stats.update(tree.stats())
-
-        return q, [stats]
+        stats = tree.stats()
+        accept_stat = stats['mean_tree_accept']
+        return HMCStepData(tree.proposal, accept_stat, divergence_info, stats)
 
     @staticmethod
     def competence(var, has_grad):
@@ -237,6 +183,17 @@ def competence(var, has_grad):
             return Competence.IDEAL
         return Competence.INCOMPATIBLE
 
+    def warnings(self, strace):
+        warnings = super(NUTS, self).warnings(strace)
+
+        if np.mean(self._reached_max_treedepth) > 0.05:
+            msg = ('The chain reached the maximum tree depth. Increase '
+                   'max_treedepth, increase target_accept or reparameterize.')
+            warn = SamplerWarning(WarningType.TREEDEPTH, msg, 'warn',
+                                  None, None, None)
+            warnings.append(warn)
+        return warnings
+
 
 # A proposal for the next position
 Proposal = namedtuple("Proposal", "q, q_grad, energy, p_accept")
@@ -285,10 +242,9 @@ def extend(self, direction):
         If direction is larger than 0, extend it to the right, otherwise
         extend it to the left.
 
-        Return a tuple `(diverging, turning)`. `diverging` indicates if the
-        tree extension was aborted because the energy change exceeded
-        `self.Emax`. If so, it is a tuple containing details about the reason.
-        Otherwise, it will be `False`. `turning` indicates that
+        Return a tuple `(diverging, turning)` of type (DivergenceInfo, bool).
+        `diverging` indicates, that the tree extension was aborted because
+        the energy change exceeded `self.Emax`. `turning` indicates that
         the tree extension was stopped because the termination criterior
         was reached (the trajectory is turning back).
         """
@@ -325,17 +281,9 @@ def _single_step(self, left, epsilon):
         """Perform a leapfrog step and handle error cases."""
         try:
             right = self.integrator.step(epsilon, left)
-        except linalg.LinAlgError as err:
-            error_msg = "LinAlgError during leapfrog step."
+        except IntegrationError as err:
+            error_msg = str(err)
             error = err
-        except ValueError as err:
-            # Raised by many scipy.linalg functions
-            scipy_msg = "array must not contain infs or nans"
-            if len(err.args) > 0 and scipy_msg in err.args[0].lower():
-                error_msg = "Infs or nans in scipy.linalg during leapfrog step."
-                error = err
-            else:
-                raise
         else:
             energy_change = right.energy - self.start_energy
             if np.isnan(energy_change):
@@ -346,25 +294,30 @@ def _single_step(self, left, epsilon):
             if np.abs(energy_change) < self.Emax:
                 p_accept = min(1, np.exp(-energy_change))
                 log_size = -energy_change
-                proposal = Proposal(right.q, right.q_grad, right.energy, p_accept)
-                tree = Subtree(right, right, right.p, proposal, log_size, p_accept, 1)
-                return tree, False, False
+                proposal = Proposal(
+                    right.q, right.q_grad, right.energy, p_accept)
+                tree = Subtree(right, right, right.p,
+                               proposal, log_size, p_accept, 1)
+                return tree, None, False
             else:
-                error_msg = ("Energy change in leapfrog step is too large: %s. "
+                error_msg = ("Energy change in leapfrog step is too large: %s."
                              % energy_change)
                 error = None
         tree = Subtree(None, None, None, None, -np.inf, 0, 1)
-        return tree, (error_msg, error, left), False
+        divergance_info = DivergenceInfo(error_msg, error, left)
+        return tree, divergance_info, False
 
     def _build_subtree(self, left, depth, epsilon):
         if depth == 0:
             return self._single_step(left, epsilon)
 
-        tree1, diverging, turning = self._build_subtree(left, depth - 1, epsilon)
+        tree1, diverging, turning = self._build_subtree(
+            left, depth - 1, epsilon)
         if diverging or turning:
             return tree1, diverging, turning
 
-        tree2, diverging, turning = self._build_subtree(tree1.right, depth - 1, epsilon)
+        tree2, diverging, turning = self._build_subtree(
+            tree1.right, depth - 1, epsilon)
 
         left, right = tree1.left, tree2.right
 
@@ -385,7 +338,8 @@ def _build_subtree(self, left, depth, epsilon):
         accept_sum = tree1.accept_sum + tree2.accept_sum
         n_proposals = tree1.n_proposals + tree2.n_proposals
 
-        tree = Subtree(left, right, p_sum, proposal, log_size, accept_sum, n_proposals)
+        tree = Subtree(left, right, p_sum, proposal,
+                       log_size, accept_sum, n_proposals)
         return tree, diverging, turning
 
     def stats(self):
@@ -397,94 +351,3 @@ def stats(self):
             'tree_size': self.n_proposals,
             'max_energy_error': self.max_energy_change,
         }
-
-
-class NutsReport(object):
-    def __init__(self, on_error, max_treedepth, target_accept):
-        if on_error not in ['summary', 'raise', 'warn']:
-            raise ValueError('Invalid value for on_error.')
-        self._on_error = on_error
-        self._max_treedepth = max_treedepth
-        self._target_accept = target_accept
-        self._chain_id = None
-        self._divs_tune = []
-        self._divs_after_tune = []
-
-    def _add_divergence(self, tuning, msg, error=None, point=None):
-        if tuning:
-            self._divs_tune.append((msg, error, point))
-        else:
-            self._divs_after_tune.append((msg, error, point))
-        if self._on_error == 'raise':
-            err = SamplingError('Divergence after tuning: %s Increase '
-                                'target_accept or reparameterize.' % msg)
-            six.raise_from(err, error)
-        elif self._on_error == 'warn':
-            warnings.warn('Divergence detected: %s Increase target_accept '
-                          'or reparameterize.' % msg)
-
-    def _check_len(self, tuning):
-        n = (~tuning).sum()
-        if n < 500:
-            warnings.warn('Chain %s contains only %s samples.'
-                          % (self._chain_id, n))
-        if np.all(tuning):
-            warnings.warn('Step size tuning was enabled throughout the whole '
-                          'trace. You might want to specify the number of '
-                          'tuning steps.')
-        if n > 0 and n == len(self._divs_after_tune):
-            warnings.warn('Chain %s contains only diverging samples. '
-                          'The model is probably misspecified.'
-                          % self._chain_id)
-
-    def _check_accept(self, accept):
-        mean_accept = np.mean(accept)
-        target_accept = self._target_accept
-        # Try to find a reasonable interval for acceptable acceptance
-        # probabilities. Finding this was mostry trial and error.
-        n_bound = min(100, len(accept))
-        n_good, n_bad = mean_accept * n_bound, (1 - mean_accept) * n_bound
-        lower, upper = stats.beta(n_good + 1, n_bad + 1).interval(0.95)
-        if target_accept < lower or target_accept > upper:
-            warnings.warn('The acceptance probability in chain %s does not '
-                          'match the target. It is %s, but should be close '
-                          'to %s. Try to increase the number of tuning steps.'
-                          % (self._chain_id, mean_accept, target_accept))
-
-    def _check_depth(self, depth):
-        if len(depth) == 0:
-            return
-        if np.mean(depth == self._max_treedepth) > 0.05:
-            warnings.warn('Chain %s reached the maximum tree depth. Increase '
-                          'max_treedepth, increase target_accept or '
-                          'reparameterize.' % self._chain_id)
-
-    def _check_divergence(self):
-        n_diverging = len(self._divs_after_tune)
-        if n_diverging > 0:
-            warnings.warn("Chain %s contains %s diverging samples after "
-                          "tuning. If increasing `target_accept` does not help "
-                          "try to reparameterize."
-                          % (self._chain_id, n_diverging))
-
-    def _finalize(self, strace):
-        """Print warnings for obviously problematic chains."""
-        self._chain_id = strace.chain
-
-        if strace.supports_sampler_stats:
-            tuning = strace.get_sampler_stats('tune')
-            if tuning.ndim == 2:
-                tuning = np.any(tuning, axis=-1)
-
-            accept = strace.get_sampler_stats('mean_tree_accept')
-            if accept.ndim == 2:
-                accept = np.mean(accept, axis=-1)
-
-            depth = strace.get_sampler_stats('depth')
-            if depth.ndim == 2:
-                depth = np.max(depth, axis=-1)
-
-            self._check_len(tuning)
-            self._check_depth(depth[~tuning])
-            self._check_accept(accept[~tuning])
-            self._check_divergence()
diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py
index 01b7eb6b1d..1eaf165014 100644
--- a/pymc3/step_methods/hmc/quadpotential.py
+++ b/pymc3/step_methods/hmc/quadpotential.py
@@ -86,7 +86,7 @@ def random(self, x):
     def velocity_energy(self, x, v_out):
         raise NotImplementedError('Abstract method')
 
-    def adapt(self, sample, grad):
+    def update(self, sample, grad, tune):
         """Inform the potential about a new sample during tuning.
 
         This can be used by adaptive potentials to change the
@@ -94,6 +94,12 @@ def adapt(self, sample, grad):
         """
         pass
 
+    def raise_ok(self):
+        pass
+
+    def reset(self):
+        pass
+
 
 def isquadpotential(value):
     """Check whether an object might be a QuadPotential object."""
@@ -104,7 +110,7 @@ class QuadPotentialDiagAdapt(QuadPotential):
     """Adapt a diagonal mass matrix from the sample variances."""
 
     def __init__(self, n, initial_mean, initial_diag=None, initial_weight=0,
-                 adaptation_window=100, dtype=None):
+                 adaptation_window=101, dtype=None):
         """Set up a diagonal mass matrix."""
         if initial_diag is not None and initial_diag.ndim != 1:
             raise ValueError('Initial diagonal must be one-dimensional.')
@@ -162,8 +168,11 @@ def _update_from_weightvar(self, weightvar):
         np.divide(1, self._stds, out=self._inv_stds)
         self._var_theano.set_value(self._var)
 
-    def adapt(self, sample, grad):
+    def update(self, sample, grad, tune):
         """Inform the potential about a new sample during tuning."""
+        if not tune:
+            return
+
         window = self.adaptation_window
 
         self._foreground_var.add_sample(sample, weight=1)
@@ -176,6 +185,16 @@ def adapt(self, sample, grad):
 
         self._n_samples += 1
 
+    def raise_ok(self):
+        if np.any(self._stds == 0):
+            raise ValueError('Mass matrix contains zeros on the diagonal. '
+                             'Some derivatives might always be zero.')
+        if np.any(self._stds < 0):
+            raise ValueError('Mass matrix contains negative values on the '
+                             'diagonal.')
+        if np.any(~np.isfinite(self._stds)):
+            raise ValueError('Mass matrix contains non-finite values.')
+
 
 class QuadPotentialDiagAdaptGrad(QuadPotentialDiagAdapt):
     """Adapt a diagonal mass matrix from the variances of the gradients.
@@ -196,15 +215,18 @@ def _update(self, var):
         np.divide(1, self._stds, out=self._inv_stds)
         self._var_theano.set_value(self._var)
 
-    def adapt(self, sample, grad):
+    def update(self, sample, grad, tune):
         """Inform the potential about a new sample during tuning."""
+        if not tune:
+            return
+
         self._grads1[:] += np.abs(grad)
         self._grads2[:] += np.abs(grad)
         self._ngrads1 += 1
         self._ngrads2 += 1
 
         if self._n_samples <= 150:
-            super().adapt(sample, grad)
+            super().update(sample, grad)
         else:
             self._update((self._ngrads1 / self._grads1) ** 2)
 
diff --git a/pymc3/step_methods/step_sizes.py b/pymc3/step_methods/step_sizes.py
new file mode 100644
index 0000000000..b063216baf
--- /dev/null
+++ b/pymc3/step_methods/step_sizes.py
@@ -0,0 +1,67 @@
+import math
+
+import numpy as np
+from scipy import stats
+
+from pymc3.backends.report import SamplerWarning, WarningType
+
+
+class DualAverageAdaptation(object):
+    def __init__(self, initial_step, target, gamma, k, t0):
+        self._log_step = math.log(initial_step)
+        self._log_bar = self._log_step
+        self._target = target
+        self._hbar = 0.
+        self._k = k
+        self._t0 = t0
+        self._count = 1
+        self._mu = math.log(10 * initial_step)
+        self._gamma = gamma
+        self._tuned_stats = []
+
+    def current(self, tune):
+        if tune:
+            return math.exp(self._log_step)
+        else:
+            return math.exp(self._log_bar)
+
+    def update(self, accept_stat, tune):
+        if not tune:
+            self._tuned_stats.append(accept_stat)
+            return
+
+        count, k, t0 = self._count, self._k, self._t0
+        w = 1. / (count + t0)
+        self._hbar = ((1 - w) * self._hbar + w * (self._target - accept_stat))
+
+        self._log_step = self._mu - self._hbar * math.sqrt(count) / self._gamma
+        mk = count ** -k
+        self._log_bar = mk * self._log_step + (1 - mk) * self._log_bar
+        self._count += 1
+
+    def stats(self):
+        return {
+            'step_size': math.exp(self._log_step),
+            'step_size_bar': math.exp(self._log_bar),
+        }
+
+    def warnings(self):
+        accept = np.array(self._tuned_stats)
+        mean_accept = np.mean(accept)
+        target_accept = self._target
+        # Try to find a reasonable interval for acceptable acceptance
+        # probabilities. Finding this was mostry trial and error.
+        n_bound = min(100, len(accept))
+        n_good, n_bad = mean_accept * n_bound, (1 - mean_accept) * n_bound
+        lower, upper = stats.beta(n_good + 1, n_bad + 1).interval(0.95)
+        if target_accept < lower or target_accept > upper:
+            msg = ('The acceptance probability does not match the target. It '
+                   'is %s, but should be close to %s. Try to increase the '
+                   'number of tuning steps.'
+                   % (mean_accept, target_accept))
+            info = {'target': target_accept, 'actual': mean_accept}
+            warning = SamplerWarning(
+                WarningType.BAD_ACCEPTANCE, msg, 'warn', None, None, info)
+            return [warning]
+        else:
+            return []
diff --git a/pymc3/tests/test_diagnostics.py b/pymc3/tests/test_diagnostics.py
index 31ee57f7d2..382012744d 100644
--- a/pymc3/tests/test_diagnostics.py
+++ b/pymc3/tests/test_diagnostics.py
@@ -44,7 +44,7 @@ def test_bad(self):
 
     def test_right_shape_python_float(self, shape=None, test_shape=None):
         """Check Gelman-Rubin statistic shape is correct w/ python float"""
-        n_jobs = 3
+        chains = 3
         n_samples = 5
 
         with Model():
@@ -55,9 +55,9 @@ def test_right_shape_python_float(self, shape=None, test_shape=None):
 
             # start sampling at the MAP
             start = find_MAP()
-            step = NUTS(scaling=start)
+            step = NUTS(scaling=start, step_scale=0.1)
             ptrace = sample(n_samples, tune=0, step=step, start=start,
-                            njobs=n_jobs, random_seed=42)
+                            chains=chains, random_seed=42)
 
         rhat = gelman_rubin(ptrace)['x']
 
diff --git a/pymc3/tests/test_examples.py b/pymc3/tests/test_examples.py
index 19557fb38b..dbcbd3aa96 100644
--- a/pymc3/tests/test_examples.py
+++ b/pymc3/tests/test_examples.py
@@ -33,7 +33,8 @@ def get_city_data():
 class TestARM5_4(SeededTest):
     def build_model(self):
         data = pd.read_csv(pm.get_data('wells.dat'),
-                           delimiter=u' ', index_col=u'id', dtype={u'switch': np.int8})
+                           delimiter=u' ', index_col=u'id',
+                           dtype={u'switch': np.int8})
         data.dist /= 100
         data.educ /= 4
         col = data.columns
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index 5f2584965f..2cb7c15839 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -264,18 +264,18 @@ def test_sample_ppc_w(self):
 
         with pm.Model() as model_0:
             mu = pm.Normal('mu', mu=0, sd=1)
-            y = pm.Normal('y', mu=mu, sd=1, observed=data0)
+            y = pm.Normal('y', mu=mu, sd=1, observed=data0, shape=500)
             trace_0 = pm.sample()
 
         with pm.Model() as model_1:
             mu = pm.Normal('mu', mu=0, sd=1, shape=len(data0))
-            y = pm.Normal('y', mu=mu, sd=1, observed=data0)
+            y = pm.Normal('y', mu=mu, sd=1, observed=data0, shape=500)
             trace_1 = pm.sample()
 
         traces = [trace_0, trace_0]
         models = [model_0, model_0]
-        ppc = pm.sample_ppc_w(traces, 1000, models)
-        assert ppc['y'].shape == (1000,)
+        ppc = pm.sample_ppc_w(traces, 100, models)
+        assert ppc['y'].shape == (100, 500)
 
         traces = [trace_0, trace_1]
         models = [model_0, model_1]
diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py
index 53ee60d4ec..d2e848c815 100644
--- a/pymc3/tests/test_step.py
+++ b/pymc3/tests/test_step.py
@@ -11,7 +11,6 @@
                                 MultivariateNormalProposal, HamiltonianMC,
                                 EllipticalSlice, smc, DEMetropolis)
 from pymc3.theanof import floatX
-from pymc3 import SamplingError
 from pymc3.distributions import (
     Binomial, Normal, Bernoulli, Categorical, Beta, HalfNormal)
 
@@ -62,26 +61,26 @@ class TestStepMethods(object):  # yield test doesn't work subclassing object
             2.41488213e+00, 4.68298379e-01, 4.86342250e-01,
             -8.43949966e-01]),
         HamiltonianMC: np.array([
-           -0.74925631, -0.2566773 , -2.12480977,  1.64328926, -1.39315913,
-            2.04200003,  0.00706711,  0.34240498,  0.44276674, -0.21368043,
-           -0.76398723,  1.19280082, -1.43030242, -0.44896107,  0.0547087 ,
-           -1.72170938, -0.20443956,  0.35432546,  1.77695096, -0.31053636,
-           -0.26729283,  1.26450201,  0.17049917,  0.27953939, -0.24185153,
-            0.95617117, -0.45707061,  0.75837366, -1.73391277,  1.63331612,
-           -0.68426038,  0.20499991, -0.43866983,  0.31080195,  0.47104548,
-           -0.50331753,  0.7821196 , -1.7544931 ,  1.24106497, -1.0152971 ,
-           -0.01949091, -0.33151479,  0.19138253,  0.40349184,  0.31694823,
-           -0.01508142, -0.31330951,  0.40874228,  0.40874228,  0.58078882,
-            0.68378375,  0.84142914,  0.44756075, -0.87297183,  0.59695222,
-            1.96161733, -0.37126652,  0.27552912,  0.74547583, -0.16172925,
-            0.79969568, -0.20501522, -0.36181518,  0.13114261, -0.8461323 ,
-           -0.07749079, -0.07013026,  0.88022116, -0.5546825 ,  0.25232708,
-            0.09483573,  0.84910913,  1.33348018, -1.1971401 ,  0.49203123,
-            0.22365435,  1.3801812 ,  0.06885929,  1.07115053, -1.52225141,
-            1.50179721, -2.01528399, -1.31610679, -0.32298834, -0.80630885,
-           -0.6828592 ,  0.2897919 ,  1.64608125, -0.71793662, -0.5233058 ,
-            0.53549836,  0.61119221,  0.24235732, -1.3940593 ,  0.28380114,
-           -0.22629978, -0.19318957,  1.12543101, -1.40328285,  0.21054137]),
+             0.40608634,  0.40608634,  0.04610354, -0.78588609,  0.03773683,
+            -0.49373368,  0.21708042,  0.21708042, -0.14413517, -0.68284611,
+             0.76299659,  0.24128663,  0.24128663, -0.54835464, -0.84408365,
+            -0.82762589, -0.67429432, -0.67429432, -0.57900517, -0.97138029,
+            -0.37809745, -0.37809745, -0.19333181, -0.40329098,  0.54999765,
+             1.171515  ,  0.90279792,  0.90279792,  1.63830503, -0.90436674,
+            -0.02516293, -0.02516293, -0.22177082, -0.28061216, -0.10158021,
+             0.0807234 ,  0.16994063,  0.16994063,  0.4141503 ,  0.38505666,
+            -0.25936504,  2.12074192,  2.24467132,  0.9628703 , -1.37044749,
+             0.32983336, -0.55317525, -0.55317525, -0.40295662, -0.40295662,
+            -0.40295662,  0.49076931,  0.04234407, -1.0002905 ,  0.99823615,
+             0.99823615,  0.24915904, -0.00965061,  0.48519377,  0.21959942,
+            -0.93094702, -0.93094702, -0.76812553, -0.73699981, -0.91834134,
+            -0.91834134,  0.79522886, -0.04267669, -0.04267669,  0.51368761,
+             0.51368761,  0.02255577,  0.70823409,  0.70823409,  0.73921198,
+             0.30295007,  0.30295007,  0.30295007, -0.1300897 ,  0.44310964,
+            -1.35839961, -1.55398633, -0.57323153, -0.57323153, -1.15435458,
+            -0.17697793, -0.17697793,  0.2925856 , -0.56119025, -0.15360141,
+             0.83715916, -0.02340449, -0.02340449, -0.63074456, -0.82745942,
+            -0.67626237,  1.13814805, -0.81857813, -0.81857813,  0.26367166]),
         Metropolis: np.array([
             1.62434536, 1.01258895, 0.4844172, -0.58855142, 1.15626034, 0.39505344, 1.85716138,
             -0.20297933, -0.20297933, -0.20297933, -0.20297933, -1.08083775, -1.08083775,
@@ -425,17 +424,14 @@ def test_parallelized_chains_are_random(self):
 
 @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
 class TestNutsCheckTrace(object):
-    def test_multiple_samplers(self):
+    def test_multiple_samplers(self, caplog):
         with Model():
             prob = Beta('prob', alpha=5., beta=3.)
             Binomial('outcome', n=1, p=prob)
-            # Catching warnings through multiprocessing doesn't work,
-            # so we have to use single threaded sampling.
-            with pytest.warns(None) as warns:
-                sample(3, tune=2, discard_tuned_samples=False,
-                       n_init=None, chains=1)
-            messages = [warn.message.args[0] for warn in warns]
-            assert any("contains only 3" in msg for msg in messages)
+            caplog.clear()
+            sample(3, tune=2, discard_tuned_samples=False,
+                   n_init=None, chains=1)
+            messages = [msg.msg for msg in caplog.records]
             assert all('boolean index did not' not in msg for msg in messages)
 
     def test_bad_init(self):
@@ -445,24 +441,25 @@ def test_bad_init(self):
                 sample(init=None)
             error.match('Bad initial')
 
-    def test_linalg(self):
+    def test_linalg(self, caplog):
         with Model():
             a = Normal('a', shape=2)
             a = tt.switch(a > 0, np.inf, a)
             b = tt.slinalg.solve(floatX(np.eye(2)), a)
             Normal('c', mu=b, shape=2)
-            # Catching warnings through multiprocessing doesn't work,
-            # so we have to use single threaded sampling.
-            with pytest.warns(None) as warns:
-                trace = sample(20, init=None, tune=5, chains=1)
-            warns = [str(warn.message) for warn in warns]
+            caplog.clear()
+            trace = sample(20, init=None, tune=5, chains=2)
+            warns = [msg.msg for msg in caplog.records]
             assert np.any(trace['diverging'])
-            assert any('diverging samples after tuning' in warn
-                       for warn in warns)
-            # FIXME This test fails sporadically on py27.
-            # It seems that capturing warnings doesn't work as expected.
-            # assert any('contains only' in warn for warn in warns)
+            assert (
+                any('Divergences after tuning' in warn
+                    for warn in warns)
+                or
+                any('only diverging samples' in warn
+                    for warn in warns))
 
-            with pytest.raises(SamplingError):
-                sample(20, init=None, nuts_kwargs={'on_error': 'raise'})
+            with pytest.raises(ValueError) as error:
+                trace.report.raise_ok()
+            error.match('issues during sampling')
 
+            assert not trace.report.ok