From 412d22b8431b8e0edd8d6ed77b867fd5d636807c Mon Sep 17 00:00:00 2001
From: Chris Fonnesbeck <fonnesbeck@gmail.com>
Date: Sat, 26 Oct 2019 11:21:54 -0500
Subject: [PATCH 1/8] Replaced tqdm progressbar with fastprogress

---
 pymc3/parallel_sampling.py     | 27 ++++++++++---------------
 pymc3/sampling.py              | 34 +++++++++----------------------
 pymc3/smc/smc.py               |  4 ++--
 pymc3/tuning/starting.py       | 11 +++++-----
 pymc3/variational/inference.py | 37 +++++++++++++---------------------
 requirements.txt               |  2 +-
 6 files changed, 43 insertions(+), 72 deletions(-)

diff --git a/pymc3/parallel_sampling.py b/pymc3/parallel_sampling.py
index 36a529cdc1..ae92c6c5dc 100644
--- a/pymc3/parallel_sampling.py
+++ b/pymc3/parallel_sampling.py
@@ -237,7 +237,6 @@ def __init__(self, draws, tune, step_method, chain, seed, start):
             tune,
             seed,
         )
-        # We fork right away, so that the main process can start tqdm threads
         try:
             self._process.start()
         except IOError as e:
@@ -346,8 +345,7 @@ def __init__(
         start_chain_num=0,
         progressbar=True,
     ):
-        if progressbar:
-            from tqdm import tqdm
+        from fastprogress import progress_bar
 
         if any(len(arg) != chains for arg in [seeds, start_points]):
             raise ValueError("Number of seeds and start_points must be %s." % chains)
@@ -369,14 +367,13 @@ def __init__(
 
         self._progress = None
         self._divergences = 0
+        self._total_draws = 0
         self._desc = "Sampling {0._chains:d} chains, {0._divergences:,d} divergences"
         self._chains = chains
-        if progressbar:
-            self._progress = tqdm(
-                total=chains * (draws + tune),
-                unit="draws",
-                desc=self._desc.format(self)
-            )
+        self._progress = progress_bar(range(chains * (draws + tune)),
+                                    display=progressbar,
+                                    auto_update=False)
+        self._progress.comment = self._desc.format(self)
 
     def _make_active(self):
         while self._inactive and len(self._active) < self._max_active:
@@ -393,11 +390,11 @@ def __iter__(self):
         while self._active:
             draw = ProcessAdapter.recv_draw(self._active)
             proc, is_last, draw, tuning, stats, warns = draw
-            if self._progress is not None:
-                if not tuning and stats and stats[0].get('diverging'):
-                    self._divergences += 1
-                    self._progress.set_description(self._desc.format(self))
-                self._progress.update()
+            self._total_draws += 1
+            if not tuning and stats and stats[0].get('diverging'):
+                self._divergences += 1
+                self._progress.comment = self._desc.format(self)
+            self._progress.update(self._total_draws)
 
             if is_last:
                 proc.join()
@@ -423,8 +420,6 @@ def __enter__(self):
 
     def __exit__(self, *args):
         ProcessAdapter.terminate_all(self._samplers)
-        if self._progress is not None:
-            self._progress.close()
 
 def _cpu_count():
     """Try to guess the number of CPUs in the system.
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 89322dda9f..1518840685 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -26,7 +26,7 @@
 from .parallel_sampling import _cpu_count
 from pymc3.step_methods.hmc import quadpotential
 import pymc3 as pm
-from tqdm import tqdm
+from fastprogress import progress_bar
 
 
 import sys
@@ -494,8 +494,7 @@ def _sample_population(draws, chain, chains, start, random_seed, step, tune,
     sampling = _prepare_iter_population(draws, chains, step, start, parallelize,
                                         tune=tune, model=model, random_seed=random_seed)
 
-    if progressbar:
-        sampling = tqdm(sampling, total=draws)
+    sampling = progress_bar(sampling, total=draws, display=progressbar)
 
     latest_traces = None
     for it, traces in enumerate(sampling):
@@ -510,10 +509,10 @@ def _sample(chain, progressbar, random_seed, start, draws=None, step=None,
     sampling = _iter_sample(draws, step, start, trace, chain,
                             tune, model, random_seed)
     _pbar_data = None
-    if progressbar:
-        _pbar_data = {"chain": chain, "divergences": 0}
-        _desc = "Sampling chain {chain:d}, {divergences:,d} divergences"
-        sampling = tqdm(sampling, total=draws, desc=_desc.format(**_pbar_data))
+    _pbar_data = {"chain": chain, "divergences": 0}
+    _desc = "Sampling chain {chain:d}, {divergences:,d} divergences"
+    sampling = progress_bar(sampling, total=draws, display=progressbar)
+    sampling.comment = _desc.format(**_pbar_data)
     try:
         strace = None
         for it, (strace, diverging) in enumerate(sampling):
@@ -521,12 +520,9 @@ def _sample(chain, progressbar, random_seed, start, draws=None, step=None,
                 trace = MultiTrace([strace])
                 if diverging and _pbar_data is not None:
                     _pbar_data["divergences"] += 1
-                    sampling.set_description(_desc.format(**_pbar_data))
+                    sampling.comment = _desc.format(**_pbar_data)
     except KeyboardInterrupt:
         pass
-    finally:
-        if progressbar:
-            sampling.close()
     return strace
 
 
@@ -663,7 +659,7 @@ def __init__(self, steppers, parallelize):
                 # configure a child process for each stepper
                 _log.info('Attempting to parallelize chains to all cores. You can turn this off with `pm.sample(cores=1)`.')
                 import multiprocessing
-                for c, stepper in enumerate(tqdm(steppers)):
+                for c, stepper in enumerate(progress_bar(steppers)):
                     slave_end, master_end = multiprocessing.Pipe()
                     stepper_dumps = pickle.dumps(stepper, protocol=4)
                     process = multiprocessing.Process(
@@ -1154,8 +1150,7 @@ def sample_posterior_predictive(trace,
     indices = np.arange(samples)
 
     
-    if progressbar:
-        indices = tqdm(indices, total=samples)
+    indices = progress_bar(indices, total=samples, display=progressbar)
 
     ppc_trace_t = _DefaultTrace(samples)
     try:
@@ -1173,10 +1168,6 @@ def sample_posterior_predictive(trace,
     except KeyboardInterrupt:
         pass
 
-    finally:
-        if progressbar:
-            indices.close()
-
     ppc_trace = ppc_trace_t.trace_dict
     if keep_size:
         for k, ary in ppc_trace.items():
@@ -1299,8 +1290,7 @@ def sample_posterior_predictive_w(traces, samples=None, models=None, weights=Non
 
     indices = np.random.randint(0, len_trace, samples)
 
-    if progressbar:
-        indices = tqdm(indices, total=samples)
+    indices = progress_bar(indices, total=samples, display=progressbar)
 
     try:
         ppc = defaultdict(list)
@@ -1317,10 +1307,6 @@ def sample_posterior_predictive_w(traces, samples=None, models=None, weights=Non
     except KeyboardInterrupt:
         pass
 
-    finally:
-        if progressbar:
-            indices.close()
-
     return {k: np.asarray(v) for k, v in ppc.items()}
 
 
diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py
index d9abbcd4f6..0e750f6e52 100644
--- a/pymc3/smc/smc.py
+++ b/pymc3/smc/smc.py
@@ -1,7 +1,7 @@
 from collections import OrderedDict
 
 import numpy as np
-from tqdm import tqdm
+from fastprogress import progress_bar
 import multiprocessing as mp
 import warnings
 from theano import function as theano_function
@@ -264,7 +264,7 @@ def mutate(self):
                     draw,
                     *parameters
                 )
-                for draw in tqdm(range(self.draws), disable=not self.progressbar)
+                for draw in progress_bar(range(self.draws), display=self.progressbar)
             ]
         posterior, acc_list, priors, likelihoods = zip(*results)
         self.posterior = np.array(posterior)
diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py
index 30fa9d7abf..a76dbeaa00 100644
--- a/pymc3/tuning/starting.py
+++ b/pymc3/tuning/starting.py
@@ -6,7 +6,7 @@
 from scipy.optimize import minimize
 import numpy as np
 from numpy import isfinite, nan_to_num
-from tqdm import tqdm
+from fastprogress import progress_bar
 import pymc3 as pm
 from ..vartypes import discrete_types, typefilter
 from ..model import modelcontext, Point
@@ -177,8 +177,8 @@ def __init__(self, maxeval=5000, progressbar=True, logp_func=None, dlogp_func=No
             self.use_gradient = True
             self.desc = 'logp = {:,.5g}, ||grad|| = {:,.5g}'
         self.previous_x = None
-        self.progress = tqdm(total=maxeval, disable=not progressbar)
-        self.progress.n = 0
+        self.progress = progress_bar(total=maxeval, display=progressbar)
+        self.progress.update(0)
 
     def __call__(self, x):
         neg_value = np.float64(self.logp_func(pm.floatX(x)))
@@ -198,7 +198,6 @@ def __call__(self, x):
 
         if self.n_eval > self.maxeval:
             self.update_progress_desc(neg_value, grad)
-            self.progress.close()
             raise StopIteration
 
         self.n_eval += 1
@@ -211,7 +210,7 @@ def __call__(self, x):
 
     def update_progress_desc(self, neg_value, grad=None):
         if grad is None:
-            self.progress.set_description(self.desc.format(neg_value))
+            self.progress.comment = self.desc.format(neg_value)
         else:
             norm_grad = np.linalg.norm(grad)
-            self.progress.set_description(self.desc.format(neg_value, norm_grad))
+            self.progress.comment = self.desc.format(neg_value, norm_grad)
diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py
index 23185aec7d..8b643a1659 100644
--- a/pymc3/variational/inference.py
+++ b/pymc3/variational/inference.py
@@ -3,7 +3,7 @@
 import collections
 
 import numpy as np
-import tqdm
+from fastprogress import progress_bar
 
 import pymc3 as pm
 from pymc3.variational import test_functions
@@ -73,14 +73,12 @@ def run_profiling(self, n=1000, score=None, **kwargs):
             score=score, fn_kwargs=fn_kwargs,
             **kwargs
         )
-        progress = tqdm.trange(n)
+        progress = progress_bar(n)
         try:
             for _ in progress:
                 step_func()
         except KeyboardInterrupt:
             pass
-        finally:
-            progress.close()
         return step_func.profile
 
     def fit(self, n=10000, score=None, callbacks=None, progressbar=True,
@@ -129,11 +127,11 @@ def fit(self, n=10000, score=None, callbacks=None, progressbar=True,
             callbacks = []
         score = self._maybe_score(score)
         step_func = self.objective.step_function(score=score, **kwargs)
-        with tqdm.trange(n, disable=not progressbar) as progress:
-            if score:
-                state = self._iterate_with_loss(0, n, step_func, progress, callbacks)
-            else:
-                state = self._iterate_without_loss(0, n, step_func, progress, callbacks)
+        progress = progress_bar(n, display=progressbar)
+        if score:
+            state = self._iterate_with_loss(0, n, step_func, progress, callbacks)
+        else:
+            state = self._iterate_without_loss(0, n, step_func, progress, callbacks)
 
         # hack to allow pm.fit() access to loss hist
         self.approx.hist = self.hist
@@ -170,11 +168,8 @@ def _iterate_without_loss(self, s, _, step_func, progress, callbacks):
                 for callback in callbacks:
                     callback(self.approx, None, i+s+1)
         except (KeyboardInterrupt, StopIteration) as e:
-            progress.close()
             if isinstance(e, StopIteration):
                 logger.info(str(e))
-        finally:
-            progress.close()
         return State(i+s, step=step_func,
                      callbacks=callbacks,
                      score=False)
@@ -219,15 +214,13 @@ def _infmean(input_array):
                 scores[i] = e
                 if i % 10 == 0:
                     avg_loss = _infmean(scores[max(0, i - 1000):i + 1])
-                    progress.set_description('Average Loss = {:,.5g}'.format(avg_loss))
+                    progress.comment = 'Average Loss = {:,.5g}'.format(avg_loss)
                     avg_loss = scores[max(0, i - 1000):i + 1].mean()
-                    progress.set_description(
-                        'Average Loss = {:,.5g}'.format(avg_loss))
+                    progress.comment = 'Average Loss = {:,.5g}'.format(avg_loss)
                 for callback in callbacks:
                     callback(self.approx, scores[:i + 1], i+s+1)
         except (KeyboardInterrupt, StopIteration) as e:  # pragma: no cover
             # do not print log on the same line
-            progress.close()
             scores = scores[:i]
             if isinstance(e, StopIteration):
                 logger.info(str(e))
@@ -246,8 +239,6 @@ def _infmean(input_array):
                 avg_loss = _infmean(scores[max(0, i - 1000):i + 1])
                 logger.info(
                     'Finished [100%]: Average Loss = {:,.5g}'.format(avg_loss))
-        finally:
-            progress.close()
         self.hist = np.concatenate([self.hist, scores])
         return State(i+s, step=step_func,
                      callbacks=callbacks,
@@ -259,11 +250,11 @@ def refine(self, n, progressbar=True):
         if self.state is None:
             raise TypeError('Need to call `.fit` first')
         i, step, callbacks, score = self.state
-        with tqdm.trange(n, disable=not progressbar) as progress:
-            if score:
-                state = self._iterate_with_loss(i, n, step, progress, callbacks)
-            else:
-                state = self._iterate_without_loss(i, n, step, progress, callbacks)
+        progress = progress_bar(n, display=progressbar)
+        if score:
+            state = self._iterate_with_loss(i, n, step, progress, callbacks)
+        else:
+            state = self._iterate_without_loss(i, n, step, progress, callbacks)
         self.state = state
 
 
diff --git a/requirements.txt b/requirements.txt
index 5beda3ba46..489fb83aac 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,5 +4,5 @@ numpy>=1.13.0
 scipy>=0.18.1
 pandas>=0.18.0
 patsy>=0.4.0
-tqdm>=4.8.4
+fastprogress>=0.1.21
 h5py>=2.7.0

From 32511d62e72510ae49feefda37c7c1eb4f4dee85 Mon Sep 17 00:00:00 2001
From: Chris Fonnesbeck <fonnesbeck@gmail.com>
Date: Sat, 26 Oct 2019 11:25:20 -0500
Subject: [PATCH 2/8] Applied black formatting on changed files

---
 pymc3/parallel_sampling.py     |  56 +--
 pymc3/sampling.py              | 642 +++++++++++++++++++++------------
 pymc3/smc/smc.py               |  43 ++-
 pymc3/tuning/starting.py       |  83 +++--
 pymc3/variational/inference.py | 254 +++++++------
 5 files changed, 665 insertions(+), 413 deletions(-)

diff --git a/pymc3/parallel_sampling.py b/pymc3/parallel_sampling.py
index ae92c6c5dc..cc1225600b 100644
--- a/pymc3/parallel_sampling.py
+++ b/pymc3/parallel_sampling.py
@@ -17,28 +17,31 @@
 
 def _get_broken_pipe_exception():
     import sys
-    if sys.platform == 'win32':
-        return RuntimeError("The communication pipe between the main process "
-                            "and its spawned children is broken.\n"
-                            "In Windows OS, this usually means that the child "
-                            "process raised an exception while it was being "
-                            "spawned, before it was setup to communicate to "
-                            "the main process.\n"
-                            "The exceptions raised by the child process while "
-                            "spawning cannot be caught or handled from the "
-                            "main process, and when running from an IPython or "
-                            "jupyter notebook interactive kernel, the child's "
-                            "exception and traceback appears to be lost.\n"
-                            "A known way to see the child's error, and try to "
-                            "fix or handle it, is to run the problematic code "
-                            "as a batch script from a system's Command Prompt. "
-                            "The child's exception will be printed to the "
-                            "Command Promt's stderr, and it should be visible "
-                            "above this error and traceback.\n"
-                            "Note that if running a jupyter notebook that was "
-                            "invoked from a Command Prompt, the child's "
-                            "exception should have been printed to the Command "
-                            "Prompt on which the notebook is running.")
+
+    if sys.platform == "win32":
+        return RuntimeError(
+            "The communication pipe between the main process "
+            "and its spawned children is broken.\n"
+            "In Windows OS, this usually means that the child "
+            "process raised an exception while it was being "
+            "spawned, before it was setup to communicate to "
+            "the main process.\n"
+            "The exceptions raised by the child process while "
+            "spawning cannot be caught or handled from the "
+            "main process, and when running from an IPython or "
+            "jupyter notebook interactive kernel, the child's "
+            "exception and traceback appears to be lost.\n"
+            "A known way to see the child's error, and try to "
+            "fix or handle it, is to run the problematic code "
+            "as a batch script from a system's Command Prompt. "
+            "The child's exception will be printed to the "
+            "Command Promt's stderr, and it should be visible "
+            "above this error and traceback.\n"
+            "Note that if running a jupyter notebook that was "
+            "invoked from a Command Prompt, the child's "
+            "exception should have been printed to the Command "
+            "Prompt on which the notebook is running."
+        )
     else:
         return None
 
@@ -370,9 +373,9 @@ def __init__(
         self._total_draws = 0
         self._desc = "Sampling {0._chains:d} chains, {0._divergences:,d} divergences"
         self._chains = chains
-        self._progress = progress_bar(range(chains * (draws + tune)),
-                                    display=progressbar,
-                                    auto_update=False)
+        self._progress = progress_bar(
+            range(chains * (draws + tune)), display=progressbar, auto_update=False
+        )
         self._progress.comment = self._desc.format(self)
 
     def _make_active(self):
@@ -391,7 +394,7 @@ def __iter__(self):
             draw = ProcessAdapter.recv_draw(self._active)
             proc, is_last, draw, tuning, stats, warns = draw
             self._total_draws += 1
-            if not tuning and stats and stats[0].get('diverging'):
+            if not tuning and stats and stats[0].get("diverging"):
                 self._divergences += 1
                 self._progress.comment = self._desc.format(self)
             self._progress.update(self._total_draws)
@@ -421,6 +424,7 @@ def __enter__(self):
     def __exit__(self, *args):
         ProcessAdapter.terminate_all(self._samplers)
 
+
 def _cpu_count():
     """Try to guess the number of CPUs in the system.
 
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 1518840685..6be7c593a3 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -1,4 +1,5 @@
 from typing import Dict, List, Optional, TYPE_CHECKING, cast
+
 if TYPE_CHECKING:
     from typing import Any, Tuple
 from typing import Iterable as TIterable
@@ -17,10 +18,23 @@
 from .backends.ndarray import NDArray
 from .distributions.distribution import draw_values
 from .model import modelcontext, Point, all_continuous, Model
-from .step_methods import (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
-                           BinaryGibbsMetropolis, CategoricalGibbsMetropolis,
-                           Slice, CompoundStep, arraystep)
-from .util import update_start_vals, get_untransformed_name, is_transformed_name, get_default_varnames
+from .step_methods import (
+    NUTS,
+    HamiltonianMC,
+    Metropolis,
+    BinaryMetropolis,
+    BinaryGibbsMetropolis,
+    CategoricalGibbsMetropolis,
+    Slice,
+    CompoundStep,
+    arraystep,
+)
+from .util import (
+    update_start_vals,
+    get_untransformed_name,
+    is_transformed_name,
+    get_default_varnames,
+)
 from .vartypes import discrete_types
 from .exceptions import IncorrectArgumentsError
 from .parallel_sampling import _cpu_count
@@ -30,17 +44,32 @@
 
 
 import sys
+
 sys.setrecursionlimit(10000)
 
-__all__ = ['sample', 'iter_sample', 'sample_posterior_predictive',
-           'sample_posterior_predictive_w', 'init_nuts',
-           'sample_prior_predictive', 'sample_ppc', 'sample_ppc_w']
+__all__ = [
+    "sample",
+    "iter_sample",
+    "sample_posterior_predictive",
+    "sample_posterior_predictive_w",
+    "init_nuts",
+    "sample_prior_predictive",
+    "sample_ppc",
+    "sample_ppc_w",
+]
 
-STEP_METHODS = (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
-                BinaryGibbsMetropolis, Slice, CategoricalGibbsMetropolis)
+STEP_METHODS = (
+    NUTS,
+    HamiltonianMC,
+    Metropolis,
+    BinaryMetropolis,
+    BinaryGibbsMetropolis,
+    Slice,
+    CategoricalGibbsMetropolis,
+)
 
 
-_log = logging.getLogger('pymc3')
+_log = logging.getLogger("pymc3")
 
 
 def instantiate_steppers(model, steps, selected_steps, step_kwargs=None):
@@ -81,7 +110,7 @@ def instantiate_steppers(model, steps, selected_steps, step_kwargs=None):
 
     unused_args = set(step_kwargs).difference(used_keys)
     if unused_args:
-        raise ValueError('Unused step method arguments: %s' % unused_args)
+        raise ValueError("Unused step method arguments: %s" % unused_args)
 
     if len(steps) == 1:
         steps = steps[0]
@@ -89,8 +118,7 @@ def instantiate_steppers(model, steps, selected_steps, step_kwargs=None):
     return steps
 
 
-def assign_step_methods(model, step=None, methods=STEP_METHODS,
-                        step_kwargs=None):
+def assign_step_methods(model, step=None, methods=STEP_METHODS, step_kwargs=None):
     """Assign model variables to appropriate step methods.
 
     Passing a specified model will auto-assign its constituent stochastic
@@ -145,14 +173,15 @@ def assign_step_methods(model, step=None, methods=STEP_METHODS,
             if has_gradient:
                 try:
                     tg.grad(model.logpt, var)
-                except (AttributeError,
-                        NotImplementedError,
-                        tg.NullTypeGradError):
+                except (AttributeError, NotImplementedError, tg.NullTypeGradError):
                     has_gradient = False
             # select the best method
-            selected = max(methods, key=lambda method,
-                           var=var, has_gradient=has_gradient:
-                           method._competence(var, has_gradient))
+            selected = max(
+                methods,
+                key=lambda method, var=var, has_gradient=has_gradient: method._competence(
+                    var, has_gradient
+                ),
+            )
             selected_steps[selected].append(var)
 
     return instantiate_steppers(model, steps, selected_steps, step_kwargs)
@@ -160,23 +189,43 @@ def assign_step_methods(model, step=None, methods=STEP_METHODS,
 
 def _print_step_hierarchy(s, level=0):
     if isinstance(s, (list, tuple)):
-        _log.info('>' * level + 'list')
+        _log.info(">" * level + "list")
         for i in s:
-            _print_step_hierarchy(i, level+1)
+            _print_step_hierarchy(i, level + 1)
     elif isinstance(s, CompoundStep):
-        _log.info('>' * level + 'CompoundStep')
+        _log.info(">" * level + "CompoundStep")
         for i in s.methods:
-            _print_step_hierarchy(i, level+1)
+            _print_step_hierarchy(i, level + 1)
     else:
-        varnames = ', '.join([get_untransformed_name(v.name) if is_transformed_name(v.name)
-                              else v.name for v in s.vars])
-        _log.info('>' * level + '{}: [{}]'.format(s.__class__.__name__, varnames))
-
-
-def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=None, chain_idx=0,
-           chains=None, cores=None, tune=500, progressbar=True,
-           model=None, random_seed=None, discard_tuned_samples=True,
-           compute_convergence_checks=True, **kwargs):
+        varnames = ", ".join(
+            [
+                get_untransformed_name(v.name)
+                if is_transformed_name(v.name)
+                else v.name
+                for v in s.vars
+            ]
+        )
+        _log.info(">" * level + "{}: [{}]".format(s.__class__.__name__, varnames))
+
+
+def sample(
+    draws=500,
+    step=None,
+    init="auto",
+    n_init=200000,
+    start=None,
+    trace=None,
+    chain_idx=0,
+    chains=None,
+    cores=None,
+    tune=500,
+    progressbar=True,
+    model=None,
+    random_seed=None,
+    discard_tuned_samples=True,
+    compute_convergence_checks=True,
+    **kwargs
+):
     """Draw samples from the posterior using the given step methods.
 
     Multiple step methods are supported via compound step methods.
@@ -294,33 +343,38 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N
     """
     model = modelcontext(model)
 
-    nuts_kwargs = kwargs.pop('nuts_kwargs', None)
+    nuts_kwargs = kwargs.pop("nuts_kwargs", None)
     if nuts_kwargs is not None:
-        warnings.warn("The nuts_kwargs argument has been deprecated. Pass step "
-                      "method arguments directly to sample instead",
-                      DeprecationWarning)
+        warnings.warn(
+            "The nuts_kwargs argument has been deprecated. Pass step "
+            "method arguments directly to sample instead",
+            DeprecationWarning,
+        )
         kwargs.update(nuts_kwargs)
-    step_kwargs = kwargs.pop('step_kwargs', None)
+    step_kwargs = kwargs.pop("step_kwargs", None)
     if step_kwargs is not None:
-        warnings.warn("The step_kwargs argument has been deprecated. Pass step "
-                      "method arguments directly to sample instead",
-                      DeprecationWarning)
+        warnings.warn(
+            "The step_kwargs argument has been deprecated. Pass step "
+            "method arguments directly to sample instead",
+            DeprecationWarning,
+        )
         kwargs.update(step_kwargs)
 
     if cores is None:
         cores = min(4, _cpu_count())
 
-
-    if 'njobs' in kwargs:
-        cores = kwargs['njobs']
+    if "njobs" in kwargs:
+        cores = kwargs["njobs"]
         warnings.warn(
             "The njobs argument has been deprecated. Use cores instead.",
-            DeprecationWarning)
-    if 'nchains' in kwargs:
-        chains = kwargs['nchains']
+            DeprecationWarning,
+        )
+    if "nchains" in kwargs:
+        chains = kwargs["nchains"]
         warnings.warn(
             "The nchains argument has been deprecated. Use chains instead.",
-            DeprecationWarning)
+            DeprecationWarning,
+        )
     if chains is None:
         chains = max(2, cores)
     if isinstance(start, dict):
@@ -334,13 +388,13 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N
             np.random.seed(random_seed)
         random_seed = [np.random.randint(2 ** 30) for _ in range(chains)]
     if not isinstance(random_seed, Iterable):
-        raise TypeError(
-            'Invalid value for `random_seed`. Must be tuple, list or int')
-    if 'chain' in kwargs:
-        chain_idx = kwargs['chain']
+        raise TypeError("Invalid value for `random_seed`. Must be tuple, list or int")
+    if "chain" in kwargs:
+        chain_idx = kwargs["chain"]
         warnings.warn(
             "The chain argument has been deprecated. Use chain_idx instead.",
-            DeprecationWarning)
+            DeprecationWarning,
+        )
 
     if start is not None:
         for start_vals in start:
@@ -357,22 +411,30 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N
     draws += tune
 
     if model.ndim == 0:
-        raise ValueError('The model does not contain any free variables.')
+        raise ValueError("The model does not contain any free variables.")
 
     if step is None and init is not None and all_continuous(model.vars):
         try:
             # By default, try to use NUTS
-            _log.info('Auto-assigning NUTS sampler...')
-            start_, step = init_nuts(init=init, chains=chains, n_init=n_init,
-                                     model=model, random_seed=random_seed,
-                                     progressbar=progressbar, **kwargs)
+            _log.info("Auto-assigning NUTS sampler...")
+            start_, step = init_nuts(
+                init=init,
+                chains=chains,
+                n_init=n_init,
+                model=model,
+                random_seed=random_seed,
+                progressbar=progressbar,
+                **kwargs
+            )
             if start is None:
                 start = start_
         except (AttributeError, NotImplementedError, tg.NullTypeGradError):
             # gradient computation failed
-            _log.info("Initializing NUTS failed. "
-                      "Falling back to elementwise auto-assignment.")
-            _log.debug('Exception in init nuts', exec_info=True)
+            _log.info(
+                "Initializing NUTS failed. "
+                "Falling back to elementwise auto-assignment."
+            )
+            _log.debug("Exception in init nuts", exec_info=True)
             step = assign_step_methods(model, step, step_kwargs=kwargs)
     else:
         step = assign_step_methods(model, step, step_kwargs=kwargs)
@@ -384,47 +446,53 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N
     if isinstance(start, dict):
         start = [start] * chains
 
-    sample_args = {'draws': draws,
-                   'step': step,
-                   'start': start,
-                   'trace': trace,
-                   'chain': chain_idx,
-                   'chains': chains,
-                   'tune': tune,
-                   'progressbar': progressbar,
-                   'model': model,
-                   'random_seed': random_seed,
-                   'cores': cores, }
+    sample_args = {
+        "draws": draws,
+        "step": step,
+        "start": start,
+        "trace": trace,
+        "chain": chain_idx,
+        "chains": chains,
+        "tune": tune,
+        "progressbar": progressbar,
+        "model": model,
+        "random_seed": random_seed,
+        "cores": cores,
+    }
 
     sample_args.update(kwargs)
 
-    has_population_samplers = np.any([isinstance(m, arraystep.PopulationArrayStepShared)
-                                      for m in (step.methods if isinstance(step, CompoundStep) else [step])])
+    has_population_samplers = np.any(
+        [
+            isinstance(m, arraystep.PopulationArrayStepShared)
+            for m in (step.methods if isinstance(step, CompoundStep) else [step])
+        ]
+    )
 
     parallel = cores > 1 and chains > 1 and not has_population_samplers
     if parallel:
-        _log.info('Multiprocess sampling ({} chains in {} jobs)'.format(chains, cores))
+        _log.info("Multiprocess sampling ({} chains in {} jobs)".format(chains, cores))
         _print_step_hierarchy(step)
         try:
             trace = _mp_sample(**sample_args)
         except pickle.PickleError:
             _log.warning("Could not pickle model, sampling singlethreaded.")
-            _log.debug('Pickling error:', exec_info=True)
+            _log.debug("Pickling error:", exec_info=True)
             parallel = False
         except AttributeError as e:
             if str(e).startswith("AttributeError: Can't pickle"):
                 _log.warning("Could not pickle model, sampling singlethreaded.")
-                _log.debug('Pickling error:', exec_info=True)
+                _log.debug("Pickling error:", exec_info=True)
                 parallel = False
             else:
                 raise
     if not parallel:
         if has_population_samplers:
-            _log.info('Population sampling ({} chains)'.format(chains))
+            _log.info("Population sampling ({} chains)".format(chains))
             _print_step_hierarchy(step)
             trace = _sample_population(**sample_args, parallelize=cores > 1)
         else:
-            _log.info('Sequential sampling ({} chains in 1 job)'.format(chains))
+            _log.info("Sequential sampling ({} chains in 1 job)".format(chains))
             _print_step_hierarchy(step)
             trace = _sample_many(**sample_args)
 
@@ -432,8 +500,10 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N
     trace = trace[discard:]
 
     if compute_convergence_checks:
-        if draws-tune < 100:
-            warnings.warn("The number of samples is too small to check convergence reliably.")
+        if draws - tune < 100:
+            warnings.warn(
+                "The number of samples is too small to check convergence reliably."
+            )
         else:
             trace.report._run_convergence_checks(trace, model)
 
@@ -445,7 +515,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N
 def _check_start_shape(model, start):
     if not isinstance(start, dict):
         raise TypeError("start argument must be a dict or an array-like of dicts")
-    e = ''
+    e = ""
     for var in model.vars:
         if var.name in start.keys():
             var_shape = var.shape.tag.test_value
@@ -459,23 +529,28 @@ def _check_start_shape(model, start):
             else:
                 # if model var has a specified shape
                 if var_shape.size > 0:
-                    e += "\nExpected shape {} for var " \
-                         "'{}', got scalar {}".format(
-                             tuple(var_shape), var.name, start[var.name]
-                         )
+                    e += "\nExpected shape {} for var " "'{}', got scalar {}".format(
+                        tuple(var_shape), var.name, start[var.name]
+                    )
 
-    if e != '':
+    if e != "":
         raise ValueError("Bad shape for start argument:{}".format(e))
 
 
 def _sample_many(draws, chain, chains, start, random_seed, step, **kwargs):
     traces = []
     for i in range(chains):
-        trace = _sample(draws=draws, chain=chain + i, start=start[i],
-                        step=step, random_seed=random_seed[i], **kwargs)
+        trace = _sample(
+            draws=draws,
+            chain=chain + i,
+            start=start[i],
+            step=step,
+            random_seed=random_seed[i],
+            **kwargs
+        )
         if trace is None:
             if len(traces) == 0:
-                raise ValueError('Sampling stopped before a sample was created.')
+                raise ValueError("Sampling stopped before a sample was created.")
             else:
                 break
         elif len(trace) < draws:
@@ -487,12 +562,31 @@ def _sample_many(draws, chain, chains, start, random_seed, step, **kwargs):
     return MultiTrace(traces)
 
 
-def _sample_population(draws, chain, chains, start, random_seed, step, tune,
-                       model, progressbar=None, parallelize=False, **kwargs):
+def _sample_population(
+    draws,
+    chain,
+    chains,
+    start,
+    random_seed,
+    step,
+    tune,
+    model,
+    progressbar=None,
+    parallelize=False,
+    **kwargs
+):
     # create the generator that iterates all chains in parallel
     chains = [chain + c for c in range(chains)]
-    sampling = _prepare_iter_population(draws, chains, step, start, parallelize,
-                                        tune=tune, model=model, random_seed=random_seed)
+    sampling = _prepare_iter_population(
+        draws,
+        chains,
+        step,
+        start,
+        parallelize,
+        tune=tune,
+        model=model,
+        random_seed=random_seed,
+    )
 
     sampling = progress_bar(sampling, total=draws, display=progressbar)
 
@@ -502,12 +596,21 @@ def _sample_population(draws, chain, chains, start, random_seed, step, tune,
     return MultiTrace(latest_traces)
 
 
-def _sample(chain, progressbar, random_seed, start, draws=None, step=None,
-            trace=None, tune=None, model=None, **kwargs):
-    skip_first = kwargs.get('skip_first', 0)
-
-    sampling = _iter_sample(draws, step, start, trace, chain,
-                            tune, model, random_seed)
+def _sample(
+    chain,
+    progressbar,
+    random_seed,
+    start,
+    draws=None,
+    step=None,
+    trace=None,
+    tune=None,
+    model=None,
+    **kwargs
+):
+    skip_first = kwargs.get("skip_first", 0)
+
+    sampling = _iter_sample(draws, step, start, trace, chain, tune, model, random_seed)
     _pbar_data = None
     _pbar_data = {"chain": chain, "divergences": 0}
     _desc = "Sampling chain {chain:d}, {divergences:,d} divergences"
@@ -526,8 +629,16 @@ def _sample(chain, progressbar, random_seed, start, draws=None, step=None,
     return strace
 
 
-def iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
-                model=None, random_seed=None):
+def iter_sample(
+    draws,
+    step,
+    start=None,
+    trace=None,
+    chain=0,
+    tune=None,
+    model=None,
+    random_seed=None,
+):
     """Generator that returns a trace on each iteration using the given
     step method.  Multiple step methods supported via compound step
     method returns the amount of time taken.
@@ -561,20 +672,27 @@ def iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
         for trace in iter_sample(500, step):
             ...
     """
-    sampling = _iter_sample(draws, step, start, trace, chain, tune,
-                            model, random_seed)
+    sampling = _iter_sample(draws, step, start, trace, chain, tune, model, random_seed)
     for i, (strace, _) in enumerate(sampling):
-        yield MultiTrace([strace[:i + 1]])
-
-
-def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
-                 model=None, random_seed=None):
+        yield MultiTrace([strace[: i + 1]])
+
+
+def _iter_sample(
+    draws,
+    step,
+    start=None,
+    trace=None,
+    chain=0,
+    tune=None,
+    model=None,
+    random_seed=None,
+):
     model = modelcontext(model)
     draws = int(draws)
     if random_seed is not None:
         np.random.seed(random_seed)
     if draws < 1:
-        raise ValueError('Argument `draws` must be greater than 0.')
+        raise ValueError("Argument `draws` must be greater than 0.")
 
     if start is None:
         start = {}
@@ -601,7 +719,7 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
     try:
         step.tune = bool(tune)
         for i in range(draws):
-            if i == 0 and hasattr(step, 'iter_count'):
+            if i == 0 and hasattr(step, "iter_count"):
                 step.iter_count = 0
             if i == tune:
                 step = stop_tuning(step)
@@ -609,7 +727,7 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
                 point, stats = step.step(point)
                 if strace.supports_sampler_stats:
                     strace.record(point, stats)
-                    diverging = i > tune and stats and stats[0].get('diverging')
+                    diverging = i > tune and stats and stats[0].get("diverging")
                 else:
                     strace.record(point)
             else:
@@ -619,7 +737,7 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
             yield strace, diverging
     except KeyboardInterrupt:
         strace.close()
-        if hasattr(step, 'warnings'):
+        if hasattr(step, "warnings"):
             warns = step.warnings()
             strace._add_warnings(warns)
         raise
@@ -628,7 +746,7 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
         raise
     else:
         strace.close()
-        if hasattr(step, 'warnings'):
+        if hasattr(step, "warnings"):
             warns = step.warnings()
             strace._add_warnings(warns)
 
@@ -657,15 +775,18 @@ def __init__(self, steppers, parallelize):
         if parallelize:
             try:
                 # configure a child process for each stepper
-                _log.info('Attempting to parallelize chains to all cores. You can turn this off with `pm.sample(cores=1)`.')
+                _log.info(
+                    "Attempting to parallelize chains to all cores. You can turn this off with `pm.sample(cores=1)`."
+                )
                 import multiprocessing
+
                 for c, stepper in enumerate(progress_bar(steppers)):
                     slave_end, master_end = multiprocessing.Pipe()
                     stepper_dumps = pickle.dumps(stepper, protocol=4)
                     process = multiprocessing.Process(
                         target=self.__class__._run_slave,
                         args=(c, stepper_dumps, slave_end),
-                        name='ChainWalker{}'.format(c)
+                        name="ChainWalker{}".format(c),
                     )
                     # we want the child process to exit if the parent is terminated
                     process.daemon = True
@@ -677,12 +798,16 @@ def __init__(self, steppers, parallelize):
                     self._processes.append(process)
                 self.is_parallelized = True
             except Exception:
-                _log.info('Population parallelization failed. '
-                          'Falling back to sequential stepping of chains.')
-                _log.debug('Error was: ', exec_info=True)
+                _log.info(
+                    "Population parallelization failed. "
+                    "Falling back to sequential stepping of chains."
+                )
+                _log.debug("Error was: ", exec_info=True)
         else:
-            _log.info('Chains are not parallelized. You can enable this by passing '
-                      '`pm.sample(cores=n)`, where n > 1.')
+            _log.info(
+                "Chains are not parallelized. You can enable this by passing "
+                "`pm.sample(cores=n)`, where n > 1."
+            )
         return super().__init__()
 
     def __enter__(self):
@@ -697,7 +822,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
                 for process in self._processes:
                     process.join(timeout=3)
             except Exception:
-                _log.warning('Termination failed.')
+                _log.warning("Termination failed.")
         return
 
     @staticmethod
@@ -721,7 +846,9 @@ def _run_slave(c, stepper_dumps, slave_end):
             # but rather a CompoundStep. PopulationArrayStepShared.population
             # has to be updated, therefore we identify the substeppers first.
             population_steppers = []
-            for sm in (stepper.methods if isinstance(stepper, CompoundStep) else [stepper]):
+            for sm in (
+                stepper.methods if isinstance(stepper, CompoundStep) else [stepper]
+            ):
                 if isinstance(sm, arraystep.PopulationArrayStepShared):
                     population_steppers.append(sm)
             while True:
@@ -740,7 +867,7 @@ def _run_slave(c, stepper_dumps, slave_end):
                 update = stepper.step(population[c])
                 slave_end.send(update)
         except Exception:
-            _log.exception('ChainWalker{}'.format(c))
+            _log.exception("ChainWalker{}".format(c))
         return
 
     def step(self, tune_stop, population):
@@ -773,8 +900,9 @@ def step(self, tune_stop, population):
         return updates
 
 
-def _prepare_iter_population(draws, chains, step, start, parallelize, tune=None,
-                             model=None, random_seed=None):
+def _prepare_iter_population(
+    draws, chains, step, start, parallelize, tune=None, model=None, random_seed=None
+):
     """Prepares a PopulationStepper and traces for population sampling.
 
     Returns
@@ -789,7 +917,7 @@ def _prepare_iter_population(draws, chains, step, start, parallelize, tune=None,
     if random_seed is not None:
         np.random.seed(random_seed)
     if draws < 1:
-        raise ValueError('Argument `draws` should be above 0.')
+        raise ValueError("Argument `draws` should be above 0.")
 
     # The initialization of traces, samplers and points must happen in the right order:
     # 1. traces are initialized and update_start_vals configures variable transforms
@@ -821,7 +949,7 @@ def _prepare_iter_population(draws, chains, step, start, parallelize, tune=None,
         else:
             chainstep = copy(step)
         # link population samplers to the shared population state
-        for sm in (chainstep.methods if isinstance(step, CompoundStep) else [chainstep]):
+        for sm in chainstep.methods if isinstance(step, CompoundStep) else [chainstep]:
             if isinstance(sm, arraystep.PopulationArrayStepShared):
                 sm.link_population(population, c)
         steppers[c] = chainstep
@@ -881,7 +1009,7 @@ def _iter_population(draws, tune, popstep, steppers, traces, points):
     except KeyboardInterrupt:
         for c, strace in enumerate(traces):
             strace.close()
-            if hasattr(steppers[c], 'report'):
+            if hasattr(steppers[c], "report"):
                 steppers[c].report._finalize(strace)
         raise
     except BaseException:
@@ -891,7 +1019,7 @@ def _iter_population(draws, tune, popstep, steppers, traces, points):
     else:
         for c, strace in enumerate(traces):
             strace.close()
-            if hasattr(steppers[c], 'report'):
+            if hasattr(steppers[c], "report"):
                 steppers[c].report._finalize(strace)
 
 
@@ -907,19 +1035,32 @@ def _choose_backend(trace, chain, shortcuts=None, **kwds):
         shortcuts = pm.backends._shortcuts
 
     try:
-        backend = shortcuts[trace]['backend']
-        name = shortcuts[trace]['name']
+        backend = shortcuts[trace]["backend"]
+        name = shortcuts[trace]["name"]
         return backend(name, **kwds)
     except TypeError:
         return NDArray(vars=trace, **kwds)
     except KeyError:
-        raise ValueError('Argument `trace` is invalid.')
-
-
-def _mp_sample(draws, tune, step, chains, cores, chain, random_seed,
-               start, progressbar, trace=None, model=None, **kwargs):
+        raise ValueError("Argument `trace` is invalid.")
+
+
+def _mp_sample(
+    draws,
+    tune,
+    step,
+    chains,
+    cores,
+    chain,
+    random_seed,
+    start,
+    progressbar,
+    trace=None,
+    model=None,
+    **kwargs
+):
 
     import pymc3.parallel_sampling as ps
+
     # We did draws += tune in pm.sample
     draws -= tune
 
@@ -939,15 +1080,14 @@ def _mp_sample(draws, tune, step, chains, cores, chain, random_seed,
         traces.append(strace)
 
     sampler = ps.ParallelSampler(
-        draws, tune, chains, cores, random_seed, start, step,
-        chain, progressbar)
+        draws, tune, chains, cores, random_seed, start, step, chain, progressbar
+    )
     try:
         try:
             with sampler:
                 for draw in sampler:
                     trace = traces[draw.chain - chain]
-                    if (trace.supports_sampler_stats
-                            and draw.stats is not None):
+                    if trace.supports_sampler_stats and draw.stats is not None:
                         trace.record(draw.point, draw.stats)
                     else:
                         trace.record(draw.point)
@@ -982,7 +1122,7 @@ def _choose_chains(traces, tune):
 
     lengths = [max(0, len(trace) - tune) for trace in traces]
     if not sum(lengths):
-        raise ValueError('Not enough samples to build a trace.')
+        raise ValueError("Not enough samples to build a trace.")
 
     idxs = np.argsort(lengths)[::-1]
     l_sort = np.array(lengths)[idxs]
@@ -1008,8 +1148,9 @@ def stop_tuning(step):
     step.stop_tuning()
     return step
 
-class _DefaultTrace():
-    '''
+
+class _DefaultTrace:
+    """
     This class is a utility for collecting a number of samples
     into a dictionary. Name comes from its similarity to `defaultdict` --
     entries are lazily created.
@@ -1026,15 +1167,17 @@ class _DefaultTrace():
         A dictionary constituting a trace.  Should be extracted
         after a procedure has filled the `_DefaultTrace` using the
         `insert()` method
-    '''
-    trace_dict = {} # type: Dict[str, np.ndarray]
-    _len = None # type: int
+    """
+
+    trace_dict = {}  # type: Dict[str, np.ndarray]
+    _len = None  # type: int
+
     def __init__(self, samples):
         self._len = samples
         self.trace_dict = {}
 
     def insert(self, k: str, v, idx: int):
-        '''
+        """
         Insert `v` as the value of the `idx`th sample for the variable `k`.
 
         Parameters
@@ -1045,9 +1188,9 @@ def insert(self, k: str, v, idx: int):
             The value of the `idx`th sample from variable `k`
         ids : int
             The index of the sample we are inserting into the trace.
-        '''
-        if hasattr(v, 'shape'):
-            value_shape = tuple(v.shape) # type: Tuple[int, ...]
+        """
+        if hasattr(v, "shape"):
+            value_shape = tuple(v.shape)  # type: Tuple[int, ...]
         else:
             value_shape = ()
 
@@ -1060,18 +1203,20 @@ def insert(self, k: str, v, idx: int):
         if value_shape == ():
             self.trace_dict[k][idx] = v
         else:
-            self.trace_dict[k][idx,:] = v
-
-
-def sample_posterior_predictive(trace,
-                                samples: Optional[int]=None,
-                                model: Optional[Model]=None,
-                                vars: Optional[TIterable[Tensor]]=None,
-                                var_names: Optional[List[str]]=None,
-                                size: Optional[int]=None,
-                                keep_size: Optional[bool]=False,
-                                random_seed=None,
-                                progressbar: bool=True) -> Dict[str, np.ndarray]:
+            self.trace_dict[k][idx, :] = v
+
+
+def sample_posterior_predictive(
+    trace,
+    samples: Optional[int] = None,
+    model: Optional[Model] = None,
+    vars: Optional[TIterable[Tensor]] = None,
+    var_names: Optional[List[str]] = None,
+    size: Optional[int] = None,
+    keep_size: Optional[bool] = False,
+    random_seed=None,
+    progressbar: bool = True,
+) -> Dict[str, np.ndarray]:
     """Generate posterior predictive samples from a model given a trace.
 
     Parameters
@@ -1119,28 +1264,37 @@ def sample_posterior_predictive(trace,
         nchain = 1
 
     if keep_size and samples is not None:
-        raise IncorrectArgumentsError("Should not specify both keep_size and samples argukments")
+        raise IncorrectArgumentsError(
+            "Should not specify both keep_size and samples argukments"
+        )
     if keep_size and size is not None:
-        raise IncorrectArgumentsError("Should not specify both keep_size and size argukments")
+        raise IncorrectArgumentsError(
+            "Should not specify both keep_size and size argukments"
+        )
 
     if samples is None:
         samples = sum(len(v) for v in trace._straces.values())
 
     if samples < len_trace * nchain:
-        warnings.warn("samples parameter is smaller than nchains times ndraws, some draws "
-                     "and/or chains may not be represented in the returned posterior "
-                     "predictive sample")
+        warnings.warn(
+            "samples parameter is smaller than nchains times ndraws, some draws "
+            "and/or chains may not be represented in the returned posterior "
+            "predictive sample"
+        )
 
     model = modelcontext(model)
 
     if var_names is not None:
         if vars is not None:
-            raise IncorrectArgumentsError("Should not specify both vars and var_names arguments.")
+            raise IncorrectArgumentsError(
+                "Should not specify both vars and var_names arguments."
+            )
         else:
             vars = [model[x] for x in var_names]
-    elif vars is not None: # var_names is None, and vars is not.
-        warnings.warn("vars argument is deprecated in favor of var_names.",
-                      DeprecationWarning)
+    elif vars is not None:  # var_names is None, and vars is not.
+        warnings.warn(
+            "vars argument is deprecated in favor of var_names.", DeprecationWarning
+        )
     if vars is None:
         vars = model.observed_RVs
 
@@ -1149,7 +1303,6 @@ def sample_posterior_predictive(trace,
 
     indices = np.arange(samples)
 
-    
     indices = progress_bar(indices, total=samples, display=progressbar)
 
     ppc_trace_t = _DefaultTrace(samples)
@@ -1178,13 +1331,14 @@ def sample_posterior_predictive(trace,
 
 def sample_ppc(*args, **kwargs):
     """This method is deprecated.  Please use :func:`~sampling.sample_posterior_predictive`"""
-    message = 'sample_ppc() is deprecated.  Please use sample_posterior_predictive()'
+    message = "sample_ppc() is deprecated.  Please use sample_posterior_predictive()"
     warnings.warn(message, DeprecationWarning, stacklevel=2)
     return sample_posterior_predictive(*args, **kwargs)
 
 
-def sample_posterior_predictive_w(traces, samples=None, models=None, weights=None,
-                                  random_seed=None, progressbar=True):
+def sample_posterior_predictive_w(
+    traces, samples=None, models=None, weights=None, random_seed=None, progressbar=True
+):
     """Generate weighted posterior predictive samples from a list of models and
     a list of traces according to a set of weights.
 
@@ -1226,22 +1380,21 @@ def sample_posterior_predictive_w(traces, samples=None, models=None, weights=Non
         weights = [1] * len(traces)
 
     if len(traces) != len(weights):
-        raise ValueError('The number of traces and weights should be the same')
+        raise ValueError("The number of traces and weights should be the same")
 
     if len(models) != len(weights):
-        raise ValueError('The number of models and weights should be the same')
+        raise ValueError("The number of models and weights should be the same")
 
     length_morv = len(models[0].observed_RVs)
     if not all(len(i.observed_RVs) == length_morv for i in models):
-        raise ValueError(
-            'The number of observed RVs should be the same for all models')
+        raise ValueError("The number of observed RVs should be the same for all models")
 
     weights = np.asarray(weights)
     p = weights / np.sum(weights)
 
     min_tr = min([len(i) * i.nchains for i in traces])
 
-    n = (min_tr * p).astype('int')
+    n = (min_tr * p).astype("int")
     # ensure n sum up to min_tr
     idx = np.argmax(n)
     n[idx] = n[idx] + min_tr - np.sum(n)
@@ -1271,7 +1424,7 @@ def sample_posterior_predictive_w(traces, samples=None, models=None, weights=Non
     if len(lengths) == 1:
         size = [None for i in variables]
     elif len(lengths) > 2:
-        raise ValueError('Observed variables could not be broadcast together')
+        raise ValueError("Observed variables could not be broadcast together")
     else:
         size = []
         x = np.zeros(shape=lengths[0])
@@ -1299,10 +1452,7 @@ def sample_posterior_predictive_w(traces, samples=None, models=None, weights=Non
             var = variables[idx]
             # TODO sample_posterior_predictive_w is currently only work for model with
             # one observed.
-            ppc[var.name].append(draw_values([var],
-                                             point=param,
-                                             size=size[idx]
-                                             )[0])
+            ppc[var.name].append(draw_values([var], point=param, size=size[idx])[0])
 
     except KeyboardInterrupt:
         pass
@@ -1312,16 +1462,18 @@ def sample_posterior_predictive_w(traces, samples=None, models=None, weights=Non
 
 def sample_ppc_w(*args, **kwargs):
     """This method is deprecated.  Please use :func:`~sampling.sample_posterior_predictive_w`"""
-    message = 'sample_ppc() is deprecated.  Please use sample_posterior_predictive_w()'
+    message = "sample_ppc() is deprecated.  Please use sample_posterior_predictive_w()"
     warnings.warn(message, DeprecationWarning, stacklevel=2)
     return sample_posterior_predictive_w(*args, **kwargs)
 
 
-def sample_prior_predictive(samples=500,
-                            model: Optional[Model]=None,
-                            vars: Optional[TIterable[str]] = None,
-                            var_names: Optional[TIterable[str]] = None,
-                            random_seed=None) -> Dict[str, np.ndarray]:
+def sample_prior_predictive(
+    samples=500,
+    model: Optional[Model] = None,
+    vars: Optional[TIterable[str]] = None,
+    var_names: Optional[TIterable[str]] = None,
+    random_seed=None,
+) -> Dict[str, np.ndarray]:
     """Generate samples from the prior predictive distribution.
 
     Parameters
@@ -1349,8 +1501,8 @@ def sample_prior_predictive(samples=500,
     if vars is None and var_names is None:
         prior_pred_vars = model.observed_RVs
         prior_vars = (
-            get_default_varnames(model.unobserved_RVs, include_transformed=True) +
-            model.potentials
+            get_default_varnames(model.unobserved_RVs, include_transformed=True)
+            + model.potentials
         )
         vars_ = [var.name for var in prior_vars + prior_pred_vars]
         vars = set(vars_)
@@ -1358,12 +1510,13 @@ def sample_prior_predictive(samples=500,
         vars = var_names
         vars_ = vars
     elif vars is not None:
-        warnings.warn("vars argument is deprecated in favor of var_names.",
-                      DeprecationWarning)
+        warnings.warn(
+            "vars argument is deprecated in favor of var_names.", DeprecationWarning
+        )
         vars_ = vars
     else:
         raise ValueError("Cannot supply both vars and var_names arguments.")
-    vars = cast(TIterable[str], vars) # tell mypy that vars cannot be None here.
+    vars = cast(TIterable[str], vars)  # tell mypy that vars cannot be None here.
 
     if random_seed is not None:
         np.random.seed(random_seed)
@@ -1373,9 +1526,9 @@ def sample_prior_predictive(samples=500,
 
     data = {k: v for k, v in zip(names, values)}
     if data is None:
-        raise AssertionError("No variables sampled: attempting to sample %s"%names)
+        raise AssertionError("No variables sampled: attempting to sample %s" % names)
 
-    prior = {} # type: Dict[str, np.ndarray]
+    prior = {}  # type: Dict[str, np.ndarray]
     for var_name in vars:
         if var_name in data:
             prior[var_name] = data[var_name]
@@ -1383,12 +1536,20 @@ def sample_prior_predictive(samples=500,
             untransformed = get_untransformed_name(var_name)
             if untransformed in data:
                 prior[var_name] = model[untransformed].transformation.forward_val(
-                    data[untransformed])
+                    data[untransformed]
+                )
     return prior
 
 
-def init_nuts(init='auto', chains=1, n_init=500000, model=None,
-              random_seed=None, progressbar=True, **kwargs):
+def init_nuts(
+    init="auto",
+    chains=1,
+    n_init=500000,
+    model=None,
+    random_seed=None,
+    progressbar=True,
+    **kwargs
+):
     """Set up the mass matrix initialization for NUTS.
 
     NUTS convergence and sampling speed is extremely dependent on the
@@ -1438,42 +1599,40 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None,
     """
     model = modelcontext(model)
 
-    vars = kwargs.get('vars', model.vars)
+    vars = kwargs.get("vars", model.vars)
     if set(vars) != set(model.vars):
-        raise ValueError('Must use init_nuts on all variables of a model.')
+        raise ValueError("Must use init_nuts on all variables of a model.")
     if not all_continuous(vars):
-        raise ValueError('init_nuts can only be used for models with only '
-                         'continuous variables.')
+        raise ValueError(
+            "init_nuts can only be used for models with only " "continuous variables."
+        )
 
     if not isinstance(init, str):
-        raise TypeError('init must be a string.')
+        raise TypeError("init must be a string.")
 
     if init is not None:
         init = init.lower()
 
-    if init == 'auto':
-        init = 'jitter+adapt_diag'
+    if init == "auto":
+        init = "jitter+adapt_diag"
 
-    _log.info('Initializing NUTS using {}...'.format(init))
+    _log.info("Initializing NUTS using {}...".format(init))
 
     if random_seed is not None:
         random_seed = int(np.atleast_1d(random_seed)[0])
         np.random.seed(random_seed)
 
     cb = [
-        pm.callbacks.CheckParametersConvergence(
-            tolerance=1e-2, diff='absolute'),
-        pm.callbacks.CheckParametersConvergence(
-            tolerance=1e-2, diff='relative'),
+        pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="absolute"),
+        pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="relative"),
     ]
 
-    if init == 'adapt_diag':
+    if init == "adapt_diag":
         start = [model.test_point] * chains
         mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
         var = np.ones_like(mean)
-        potential = quadpotential.QuadPotentialDiagAdapt(
-            model.ndim, mean, var, 10)
-    elif init == 'jitter+adapt_diag':
+        potential = quadpotential.QuadPotentialDiagAdapt(model.ndim, mean, var, 10)
+    elif init == "jitter+adapt_diag":
         start = []
         for _ in range(chains):
             mean = {var: val.copy() for var, val in model.test_point.items()}
@@ -1482,12 +1641,13 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None,
             start.append(mean)
         mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
         var = np.ones_like(mean)
-        potential = quadpotential.QuadPotentialDiagAdapt(
-            model.ndim, mean, var, 10)
-    elif init == 'advi+adapt_diag_grad':
+        potential = quadpotential.QuadPotentialDiagAdapt(model.ndim, mean, var, 10)
+    elif init == "advi+adapt_diag_grad":
         approx = pm.fit(
             random_seed=random_seed,
-            n=n_init, method='advi', model=model,
+            n=n_init,
+            method="advi",
+            model=model,
             callbacks=cb,
             progressbar=progressbar,
             obj_optimizer=pm.adagrad_window,
@@ -1500,11 +1660,14 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None,
         mean = model.dict_to_array(mean)
         weight = 50
         potential = quadpotential.QuadPotentialDiagAdaptGrad(
-            model.ndim, mean, cov, weight)
-    elif init == 'advi+adapt_diag':
+            model.ndim, mean, cov, weight
+        )
+    elif init == "advi+adapt_diag":
         approx = pm.fit(
             random_seed=random_seed,
-            n=n_init, method='advi', model=model,
+            n=n_init,
+            method="advi",
+            model=model,
             callbacks=cb,
             progressbar=progressbar,
             obj_optimizer=pm.adagrad_window,
@@ -1516,51 +1679,52 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None,
         mean = approx.bij.rmap(approx.mean.get_value())
         mean = model.dict_to_array(mean)
         weight = 50
-        potential = quadpotential.QuadPotentialDiagAdapt(
-            model.ndim, mean, cov, weight)
-    elif init == 'advi':
+        potential = quadpotential.QuadPotentialDiagAdapt(model.ndim, mean, cov, weight)
+    elif init == "advi":
         approx = pm.fit(
             random_seed=random_seed,
-            n=n_init, method='advi', model=model,
+            n=n_init,
+            method="advi",
+            model=model,
             callbacks=cb,
             progressbar=progressbar,
-            obj_optimizer=pm.adagrad_window
+            obj_optimizer=pm.adagrad_window,
         )  # type: pm.MeanField
         start = approx.sample(draws=chains)
         start = list(start)
         stds = approx.bij.rmap(approx.std.eval())
         cov = model.dict_to_array(stds) ** 2
         potential = quadpotential.QuadPotentialDiag(cov)
-    elif init == 'advi_map':
+    elif init == "advi_map":
         start = pm.find_MAP(include_transformed=True)
         approx = pm.MeanField(model=model, start=start)
         pm.fit(
             random_seed=random_seed,
-            n=n_init, method=pm.KLqp(approx),
+            n=n_init,
+            method=pm.KLqp(approx),
             callbacks=cb,
             progressbar=progressbar,
-            obj_optimizer=pm.adagrad_window
+            obj_optimizer=pm.adagrad_window,
         )
         start = approx.sample(draws=chains)
         start = list(start)
         stds = approx.bij.rmap(approx.std.eval())
         cov = model.dict_to_array(stds) ** 2
         potential = quadpotential.QuadPotentialDiag(cov)
-    elif init == 'map':
+    elif init == "map":
         start = pm.find_MAP(include_transformed=True)
         cov = pm.find_hessian(point=start)
         start = [start] * chains
         potential = quadpotential.QuadPotentialFull(cov)
-    elif init == 'nuts':
-        init_trace = pm.sample(draws=n_init, step=pm.NUTS(),
-                               tune=n_init // 2,
-                               random_seed=random_seed)
+    elif init == "nuts":
+        init_trace = pm.sample(
+            draws=n_init, step=pm.NUTS(), tune=n_init // 2, random_seed=random_seed
+        )
         cov = np.atleast_1d(pm.trace_cov(init_trace))
         start = list(np.random.choice(init_trace, chains))
         potential = quadpotential.QuadPotentialFull(cov)
     else:
-        raise ValueError(
-            'Unknown initializer: {}.'.format(init))
+        raise ValueError("Unknown initializer: {}.".format(init))
 
     step = pm.NUTS(potential=potential, model=model, **kwargs)
 
diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py
index 0e750f6e52..8a614eb895 100644
--- a/pymc3/smc/smc.py
+++ b/pymc3/smc/smc.py
@@ -91,7 +91,9 @@ def initialize_population(self):
         var_info = OrderedDict()
         if self.start is None:
             init_rnd = sample_prior_predictive(
-                self.draws, var_names=[v.name for v in self.model.unobserved_RVs], model=self.model
+                self.draws,
+                var_names=[v.name for v in self.model.unobserved_RVs],
+                model=self.model,
             )
         else:
             init_rnd = self.start
@@ -103,7 +105,9 @@ def initialize_population(self):
 
         for i in range(self.draws):
 
-            point = Point({v.name: init_rnd[v.name][i] for v in self.variables}, model=self.model)
+            point = Point(
+                {v.name: init_rnd[v.name][i] for v in self.variables}, model=self.model
+            )
             population.append(self.model.dict_to_array(point))
 
         self.posterior = np.array(floatX(population))
@@ -119,7 +123,9 @@ def setup_kernel(self):
         if self.kernel.lower() == "abc":
             warnings.warn(EXPERIMENTAL_WARNING)
             if len(self.model.observed_RVs) != 1:
-                warnings.warn("SMC-ABC only works properly with models with one observed variable")
+                warnings.warn(
+                    "SMC-ABC only works properly with models with one observed variable"
+                )
             simulator = self.model.observed_RVs[0]
             self.likelihood_logp = PseudoLikelihood(
                 self.epsilon,
@@ -131,7 +137,9 @@ def setup_kernel(self):
                 self.sum_stat,
             )
         elif self.kernel.lower() == "metropolis":
-            self.likelihood_logp = logp_forw([self.model.datalogpt], self.variables, shared)
+            self.likelihood_logp = logp_forw(
+                [self.model.datalogpt], self.variables, shared
+            )
 
     def initialize_logp(self):
         """
@@ -139,7 +147,9 @@ def initialize_logp(self):
         """
         if self.parallel and self.cores > 1:
             self.pool = mp.Pool(processes=self.cores)
-            priors = self.pool.starmap(self.prior_logp, [(sample,) for sample in self.posterior])
+            priors = self.pool.starmap(
+                self.prior_logp, [(sample,) for sample in self.posterior]
+            )
             likelihoods = self.pool.starmap(
                 self.likelihood_logp, [(sample,) for sample in self.posterior]
             )
@@ -204,14 +214,18 @@ def update_proposal(self):
         cov = np.atleast_2d(cov)
         cov += 1e-6 * np.eye(cov.shape[0])
         if np.isnan(cov).any() or np.isinf(cov).any():
-            raise ValueError('Sample covariances not valid! Likely "draws" is too small!')
+            raise ValueError(
+                'Sample covariances not valid! Likely "draws" is too small!'
+            )
         self.proposal = MultivariateNormalProposal(cov)
 
     def tune(self):
         """
         Tune scaling and n_steps based on the acceptance rate.
         """
-        ave_scaling = np.exp(np.log(self.scalings.mean()) + (self.acc_per_chain.mean() - 0.234))
+        ave_scaling = np.exp(
+            np.log(self.scalings.mean()) + (self.acc_per_chain.mean() - 0.234)
+        )
         self.scalings = 0.5 * (
             ave_scaling + np.exp(np.log(self.scalings) + (self.acc_per_chain - 0.234))
         )
@@ -219,7 +233,8 @@ def tune(self):
         if self.tune_steps:
             acc_rate = max(1.0 / self.proposed, self.acc_rate)
             self.n_steps = min(
-                self.max_steps, max(2, int(np.log(1 - self.p_acc_rate) / np.log(1 - acc_rate)))
+                self.max_steps,
+                max(2, int(np.log(1 - self.p_acc_rate) / np.log(1 - acc_rate))),
             )
 
         self.proposed = self.draws * self.n_steps
@@ -335,7 +350,9 @@ def metrop_kernel(
 
         new_tempered_logp = pl + ll * beta
 
-        q_old, accept = metrop_select(new_tempered_logp - old_tempered_logp, q_new, q_old)
+        q_old, accept = metrop_select(
+            new_tempered_logp - old_tempered_logp, q_new, q_old
+        )
 
         if accept:
             accepted += 1
@@ -369,7 +386,9 @@ class PseudoLikelihood:
     Pseudo Likelihood
     """
 
-    def __init__(self, epsilon, observations, function, model, var_info, distance, sum_stat):
+    def __init__(
+        self, epsilon, observations, function, model, var_info, distance, sum_stat
+    ):
         """
         epsilon : float
             Standard deviation of the gaussian pseudo likelihood.
@@ -419,7 +438,9 @@ def posterior_to_function(self, posterior):
 
     def gauss_kernel(self, value):
         epsilon = self.epsilon
-        return (-(value ** 2) / epsilon ** 2 + np.log(1 / (2 * np.pi * epsilon ** 2))) / 2.0
+        return (
+            -(value ** 2) / epsilon ** 2 + np.log(1 / (2 * np.pi * epsilon ** 2))
+        ) / 2.0
 
     def absolute_error(self, a, b):
         if self.sum_stat:
diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py
index a76dbeaa00..466ece4c4d 100644
--- a/pymc3/tuning/starting.py
+++ b/pymc3/tuning/starting.py
@@ -1,8 +1,8 @@
-'''
+"""
 Created on Mar 12, 2011
 
 @author: johnsalvatier
-'''
+"""
 from scipy.optimize import minimize
 import numpy as np
 from numpy import isfinite, nan_to_num
@@ -18,12 +18,21 @@
 import warnings
 from inspect import getargspec
 
-__all__ = ['find_MAP']
-
-
-def find_MAP(start=None, vars=None, method="L-BFGS-B",
-             return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None,
-             *args, **kwargs):
+__all__ = ["find_MAP"]
+
+
+def find_MAP(
+    start=None,
+    vars=None,
+    method="L-BFGS-B",
+    return_raw=False,
+    include_transformed=True,
+    progressbar=True,
+    maxeval=5000,
+    model=None,
+    *args,
+    **kwargs
+):
     """
     Finds the local maximum a posteriori point given a model.
 
@@ -58,7 +67,9 @@ def find_MAP(start=None, vars=None, method="L-BFGS-B",
     wrapped it inside pymc3.sample() and you should thus avoid this method.
     """
 
-    warnings.warn('find_MAP should not be used to initialize the NUTS sampler, simply call pymc3.sample() and it will automatically initialize NUTS in a better way.')
+    warnings.warn(
+        "find_MAP should not be used to initialize the NUTS sampler, simply call pymc3.sample() and it will automatically initialize NUTS in a better way."
+    )
 
     model = modelcontext(model)
     if start is None:
@@ -67,10 +78,12 @@ def find_MAP(start=None, vars=None, method="L-BFGS-B",
         update_start_vals(start, model.test_point, model)
 
     if not set(start.keys()).issubset(model.named_vars.keys()):
-        extra_keys = ', '.join(set(start.keys()) - set(model.named_vars.keys()))
-        valid_keys = ', '.join(model.named_vars.keys())
-        raise KeyError('Some start parameters do not appear in the model!\n'
-                       'Valid keys are: {}, but {} was supplied'.format(valid_keys, extra_keys))
+        extra_keys = ", ".join(set(start.keys()) - set(model.named_vars.keys()))
+        valid_keys = ", ".join(model.named_vars.keys())
+        raise KeyError(
+            "Some start parameters do not appear in the model!\n"
+            "Valid keys are: {}, but {} was supplied".format(valid_keys, extra_keys)
+        )
 
     if vars is None:
         vars = model.cont_vars
@@ -90,29 +103,37 @@ def find_MAP(start=None, vars=None, method="L-BFGS-B",
         compute_gradient = False
 
     if disc_vars or not compute_gradient:
-        pm._log.warning("Warning: gradient not available." +
-                        "(E.g. vars contains discrete variables). MAP " +
-                        "estimates may not be accurate for the default " +
-                        "parameters. Defaulting to non-gradient minimization " +
-                        "'Powell'.")
+        pm._log.warning(
+            "Warning: gradient not available."
+            + "(E.g. vars contains discrete variables). MAP "
+            + "estimates may not be accurate for the default "
+            + "parameters. Defaulting to non-gradient minimization "
+            + "'Powell'."
+        )
         method = "Powell"
 
     if "fmin" in kwargs:
         fmin = kwargs.pop("fmin")
-        warnings.warn('In future versions, set the optimization algorithm with a string. '
-                      'For example, use `method="L-BFGS-B"` instead of '
-                      '`fmin=sp.optimize.fmin_l_bfgs_b"`.')
+        warnings.warn(
+            "In future versions, set the optimization algorithm with a string. "
+            'For example, use `method="L-BFGS-B"` instead of '
+            '`fmin=sp.optimize.fmin_l_bfgs_b"`.'
+        )
 
         cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)
 
         # Check to see if minimization function actually uses the gradient
-        if 'fprime' in getargspec(fmin).args:
+        if "fprime" in getargspec(fmin).args:
+
             def grad_logp(point):
                 return nan_to_num(-dlogp_func(point))
-            opt_result = fmin(cost_func, bij.map(start), fprime=grad_logp, *args, **kwargs)
+
+            opt_result = fmin(
+                cost_func, bij.map(start), fprime=grad_logp, *args, **kwargs
+            )
         else:
             # Check to see if minimization function uses a starting value
-            if 'x0' in getargspec(fmin).args:
+            if "x0" in getargspec(fmin).args:
                 opt_result = fmin(cost_func, bij.map(start), *args, **kwargs)
             else:
                 opt_result = fmin(cost_func, *args, **kwargs)
@@ -129,7 +150,9 @@ def grad_logp(point):
             cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)
 
         try:
-            opt_result = minimize(cost_func, x0, method=method, jac=compute_gradient, *args, **kwargs)
+            opt_result = minimize(
+                cost_func, x0, method=method, jac=compute_gradient, *args, **kwargs
+            )
             mx0 = opt_result["x"]  # r -> opt_result
             cost_func.progress.total = cost_func.progress.n + 1
             cost_func.progress.update()
@@ -142,7 +165,9 @@ def grad_logp(point):
             cost_func.progress.close()
 
     vars = get_default_varnames(model.unobserved_RVs, include_transformed)
-    mx = {var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(mx0)))}
+    mx = {
+        var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(mx0)))
+    }
 
     if return_raw:
         return mx, opt_result
@@ -171,11 +196,11 @@ def __init__(self, maxeval=5000, progressbar=True, logp_func=None, dlogp_func=No
         self.logp_func = logp_func
         if dlogp_func is None:
             self.use_gradient = False
-            self.desc = 'logp = {:,.5g}'
+            self.desc = "logp = {:,.5g}"
         else:
             self.dlogp_func = dlogp_func
             self.use_gradient = True
-            self.desc = 'logp = {:,.5g}, ||grad|| = {:,.5g}'
+            self.desc = "logp = {:,.5g}, ||grad|| = {:,.5g}"
         self.previous_x = None
         self.progress = progress_bar(total=maxeval, display=progressbar)
         self.progress.update(0)
@@ -187,7 +212,7 @@ def __call__(self, x):
             neg_grad = self.dlogp_func(pm.floatX(x))
             if np.all(np.isfinite(neg_grad)):
                 self.previous_x = x
-            grad = nan_to_num(-1.0*neg_grad)
+            grad = nan_to_num(-1.0 * neg_grad)
             grad = grad.astype(np.float64)
         else:
             self.previous_x = x
diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py
index 8b643a1659..72dcbe03b4 100644
--- a/pymc3/variational/inference.py
+++ b/pymc3/variational/inference.py
@@ -8,7 +8,10 @@
 import pymc3 as pm
 from pymc3.variational import test_functions
 from pymc3.variational.approximations import (
-    MeanField, FullRank, Empirical, NormalizingFlow
+    MeanField,
+    FullRank,
+    Empirical,
+    NormalizingFlow,
 )
 from pymc3.variational.operators import KL, KSD
 from . import opvi
@@ -16,22 +19,22 @@
 logger = logging.getLogger(__name__)
 
 __all__ = [
-    'ADVI',
-    'FullRankADVI',
-    'SVGD',
-    'ASVGD',
-    'NFVI',
-    'Inference',
-    'ImplicitGradient',
-    'KLqp',
-    'fit'
+    "ADVI",
+    "FullRankADVI",
+    "SVGD",
+    "ASVGD",
+    "NFVI",
+    "Inference",
+    "ImplicitGradient",
+    "KLqp",
+    "fit",
 ]
 
-State = collections.namedtuple('State', 'i,step,callbacks,score')
+State = collections.namedtuple("State", "i,step,callbacks,score")
 
 
 class Inference:
-    R"""**Base class for Variational Inference**
+    r"""**Base class for Variational Inference**
 
     Communicates Operator, Approximation and Test Function to build Objective Function
 
@@ -57,9 +60,10 @@ def _maybe_score(self, score):
         if score is None:
             score = returns_loss
         elif score and not returns_loss:
-            warnings.warn('method `fit` got `score == True` but %s '
-                          'does not return loss. Ignoring `score` argument'
-                          % self.objective.op)
+            warnings.warn(
+                "method `fit` got `score == True` but %s "
+                "does not return loss. Ignoring `score` argument" % self.objective.op
+            )
             score = False
         else:
             pass
@@ -67,11 +71,10 @@ def _maybe_score(self, score):
 
     def run_profiling(self, n=1000, score=None, **kwargs):
         score = self._maybe_score(score)
-        fn_kwargs = kwargs.pop('fn_kwargs', dict())
-        fn_kwargs['profile'] = True
+        fn_kwargs = kwargs.pop("fn_kwargs", dict())
+        fn_kwargs["profile"] = True
         step_func = self.objective.step_function(
-            score=score, fn_kwargs=fn_kwargs,
-            **kwargs
+            score=score, fn_kwargs=fn_kwargs, **kwargs
         )
         progress = progress_bar(n)
         try:
@@ -81,8 +84,7 @@ def run_profiling(self, n=1000, score=None, **kwargs):
             pass
         return step_func.profile
 
-    def fit(self, n=10000, score=None, callbacks=None, progressbar=True,
-            **kwargs):
+    def fit(self, n=10000, score=None, callbacks=None, progressbar=True, **kwargs):
         """Perform Operator Variational Inference
 
         Parameters
@@ -154,34 +156,37 @@ def _iterate_without_loss(self, s, _, step_func, progress, callbacks):
                         for i in range(slclen):
                             name_slc.append((vmap_.var, i))
                     index = np.where(np.isnan(current_param))[0]
-                    errmsg = ['NaN occurred in optimization. ']
-                    suggest_solution = 'Try tracking this parameter: ' \
-                                       'http://docs.pymc.io/notebooks/variational_api_quickstart.html#Tracking-parameters'
+                    errmsg = ["NaN occurred in optimization. "]
+                    suggest_solution = (
+                        "Try tracking this parameter: "
+                        "http://docs.pymc.io/notebooks/variational_api_quickstart.html#Tracking-parameters"
+                    )
                     try:
                         for ii in index:
-                            errmsg.append('The current approximation of RV `{}`.ravel()[{}]'
-                                          ' is NaN.'.format(*name_slc[ii]))
+                            errmsg.append(
+                                "The current approximation of RV `{}`.ravel()[{}]"
+                                " is NaN.".format(*name_slc[ii])
+                            )
                         errmsg.append(suggest_solution)
                     except IndexError:
                         pass
-                    raise FloatingPointError('\n'.join(errmsg))
+                    raise FloatingPointError("\n".join(errmsg))
                 for callback in callbacks:
-                    callback(self.approx, None, i+s+1)
+                    callback(self.approx, None, i + s + 1)
         except (KeyboardInterrupt, StopIteration) as e:
             if isinstance(e, StopIteration):
                 logger.info(str(e))
-        return State(i+s, step=step_func,
-                     callbacks=callbacks,
-                     score=False)
+        return State(i + s, step=step_func, callbacks=callbacks, score=False)
 
     def _iterate_with_loss(self, s, n, step_func, progress, callbacks):
         def _infmean(input_array):
             """Return the mean of the finite values of the array"""
-            input_array = input_array[np.isfinite(input_array)].astype('float64')
+            input_array = input_array[np.isfinite(input_array)].astype("float64")
             if len(input_array) == 0:
                 return np.nan
             else:
                 return np.mean(input_array)
+
         scores = np.empty(n)
         scores[:] = np.nan
         i = 0
@@ -200,55 +205,61 @@ def _infmean(input_array):
                         for i in range(slclen):
                             name_slc.append((vmap_.var, i))
                     index = np.where(np.isnan(current_param))[0]
-                    errmsg = ['NaN occurred in optimization. ']
-                    suggest_solution = 'Try tracking this parameter: ' \
-                                       'http://docs.pymc.io/notebooks/variational_api_quickstart.html#Tracking-parameters'
+                    errmsg = ["NaN occurred in optimization. "]
+                    suggest_solution = (
+                        "Try tracking this parameter: "
+                        "http://docs.pymc.io/notebooks/variational_api_quickstart.html#Tracking-parameters"
+                    )
                     try:
                         for ii in index:
-                            errmsg.append('The current approximation of RV `{}`.ravel()[{}]'
-                                          ' is NaN.'.format(*name_slc[ii]))
+                            errmsg.append(
+                                "The current approximation of RV `{}`.ravel()[{}]"
+                                " is NaN.".format(*name_slc[ii])
+                            )
                         errmsg.append(suggest_solution)
                     except IndexError:
                         pass
-                    raise FloatingPointError('\n'.join(errmsg))
+                    raise FloatingPointError("\n".join(errmsg))
                 scores[i] = e
                 if i % 10 == 0:
-                    avg_loss = _infmean(scores[max(0, i - 1000):i + 1])
-                    progress.comment = 'Average Loss = {:,.5g}'.format(avg_loss)
-                    avg_loss = scores[max(0, i - 1000):i + 1].mean()
-                    progress.comment = 'Average Loss = {:,.5g}'.format(avg_loss)
+                    avg_loss = _infmean(scores[max(0, i - 1000) : i + 1])
+                    progress.comment = "Average Loss = {:,.5g}".format(avg_loss)
+                    avg_loss = scores[max(0, i - 1000) : i + 1].mean()
+                    progress.comment = "Average Loss = {:,.5g}".format(avg_loss)
                 for callback in callbacks:
-                    callback(self.approx, scores[:i + 1], i+s+1)
+                    callback(self.approx, scores[: i + 1], i + s + 1)
         except (KeyboardInterrupt, StopIteration) as e:  # pragma: no cover
             # do not print log on the same line
             scores = scores[:i]
             if isinstance(e, StopIteration):
                 logger.info(str(e))
             if n < 10:
-                logger.info('Interrupted at {:,d} [{:.0f}%]: Loss = {:,.5g}'.format(
-                    i, 100 * i // n, scores[i]))
+                logger.info(
+                    "Interrupted at {:,d} [{:.0f}%]: Loss = {:,.5g}".format(
+                        i, 100 * i // n, scores[i]
+                    )
+                )
             else:
-                avg_loss = _infmean(scores[min(0, i - 1000):i + 1])
-                logger.info('Interrupted at {:,d} [{:.0f}%]: Average Loss = {:,.5g}'.format(
-                    i, 100 * i // n, avg_loss))
+                avg_loss = _infmean(scores[min(0, i - 1000) : i + 1])
+                logger.info(
+                    "Interrupted at {:,d} [{:.0f}%]: Average Loss = {:,.5g}".format(
+                        i, 100 * i // n, avg_loss
+                    )
+                )
         else:
             if n < 10:
-                logger.info(
-                    'Finished [100%]: Loss = {:,.5g}'.format(scores[-1]))
+                logger.info("Finished [100%]: Loss = {:,.5g}".format(scores[-1]))
             else:
-                avg_loss = _infmean(scores[max(0, i - 1000):i + 1])
-                logger.info(
-                    'Finished [100%]: Average Loss = {:,.5g}'.format(avg_loss))
+                avg_loss = _infmean(scores[max(0, i - 1000) : i + 1])
+                logger.info("Finished [100%]: Average Loss = {:,.5g}".format(avg_loss))
         self.hist = np.concatenate([self.hist, scores])
-        return State(i+s, step=step_func,
-                     callbacks=callbacks,
-                     score=True)
+        return State(i + s, step=step_func, callbacks=callbacks, score=True)
 
     def refine(self, n, progressbar=True):
         """Refine the solution using the last compiled step function
         """
         if self.state is None:
-            raise TypeError('Need to call `.fit` first')
+            raise TypeError("Need to call `.fit` first")
         i, step, callbacks, score = self.state
         progress = progress_bar(n, display=progressbar)
         if score:
@@ -282,12 +293,13 @@ class KLqp(Inference):
         Understanding disentangling in :math:`\beta`-VAE
         arXiv preprint 1804.03599
     """
-    def __init__(self, approx, beta=1.):
+
+    def __init__(self, approx, beta=1.0):
         super().__init__(KL, approx, None, beta=beta)
 
 
 class ADVI(KLqp):
-    R"""**Automatic Differentiation Variational Inference (ADVI)**
+    r"""**Automatic Differentiation Variational Inference (ADVI)**
 
     This class implements the meanfield ADVI, where the variational
     posterior distribution is assumed to be spherical Gaussian without
@@ -435,7 +447,7 @@ def __init__(self, *args, **kwargs):
 
 
 class FullRankADVI(KLqp):
-    R"""**Full Rank Automatic Differentiation Variational Inference (ADVI)**
+    r"""**Full Rank Automatic Differentiation Variational Inference (ADVI)**
 
     Parameters
     ----------
@@ -480,12 +492,13 @@ class ImplicitGradient(Inference):
     only for large number of samples. Larger temperature is needed for small number of
     samples but there is no theoretical approach to choose the best one in such case.
     """
+
     def __init__(self, approx, estimator=KSD, kernel=test_functions.rbf, **kwargs):
         super().__init__(op=estimator, approx=approx, tf=kernel, **kwargs)
 
 
 class SVGD(ImplicitGradient):
-    R"""**Stein Variational Gradient Descent**
+    r"""**Stein Variational Gradient Descent**
 
     This inference is based on Kernelized Stein Discrepancy
     it's main idea is to move initial noisy particles so that
@@ -535,18 +548,31 @@ class SVGD(ImplicitGradient):
         arXiv:1704.02399
     """
 
-    def __init__(self, n_particles=100, jitter=1, model=None, start=None,
-                 random_seed=None, estimator=KSD, kernel=test_functions.rbf, **kwargs):
-        if kwargs.get('local_rv') is not None:
-            raise opvi.AEVBInferenceError('SVGD does not support local groups')
+    def __init__(
+        self,
+        n_particles=100,
+        jitter=1,
+        model=None,
+        start=None,
+        random_seed=None,
+        estimator=KSD,
+        kernel=test_functions.rbf,
+        **kwargs
+    ):
+        if kwargs.get("local_rv") is not None:
+            raise opvi.AEVBInferenceError("SVGD does not support local groups")
         empirical = Empirical(
-            size=n_particles, jitter=jitter,
-            start=start, model=model, random_seed=random_seed)
+            size=n_particles,
+            jitter=jitter,
+            start=start,
+            model=model,
+            random_seed=random_seed,
+        )
         super().__init__(approx=empirical, estimator=estimator, kernel=kernel, **kwargs)
 
 
 class ASVGD(ImplicitGradient):
-    R"""**Amortized Stein Variational Gradient Descent**
+    r"""**Amortized Stein Variational Gradient Descent**
 
     **not suggested to use**
 
@@ -591,32 +617,45 @@ class ASVGD(ImplicitGradient):
     """
 
     def __init__(self, approx=None, estimator=KSD, kernel=test_functions.rbf, **kwargs):
-        warnings.warn('You are using experimental inference Operator. '
-                      'It requires careful choice of temperature, default is 1. '
-                      'Default temperature works well for low dimensional problems and '
-                      'for significant `n_obj_mc`. Temperature > 1 gives more exploration '
-                      'power to algorithm, < 1 leads to undesirable results. Please take '
-                      'it in account when looking at inference result. Posterior variance '
-                      'is often **underestimated** when using temperature = 1.')
+        warnings.warn(
+            "You are using experimental inference Operator. "
+            "It requires careful choice of temperature, default is 1. "
+            "Default temperature works well for low dimensional problems and "
+            "for significant `n_obj_mc`. Temperature > 1 gives more exploration "
+            "power to algorithm, < 1 leads to undesirable results. Please take "
+            "it in account when looking at inference result. Posterior variance "
+            "is often **underestimated** when using temperature = 1."
+        )
         if approx is None:
             approx = FullRank(
-                model=kwargs.pop('model', None),
-                local_rv=kwargs.pop('local_rv', None)
+                model=kwargs.pop("model", None), local_rv=kwargs.pop("local_rv", None)
             )
         super().__init__(estimator=estimator, approx=approx, kernel=kernel, **kwargs)
 
-    def fit(self, n=10000, score=None, callbacks=None, progressbar=True,
-            obj_n_mc=500, **kwargs):
+    def fit(
+        self,
+        n=10000,
+        score=None,
+        callbacks=None,
+        progressbar=True,
+        obj_n_mc=500,
+        **kwargs
+    ):
         return super().fit(
-            n=n, score=score, callbacks=callbacks,
-            progressbar=progressbar, obj_n_mc=obj_n_mc, **kwargs)
+            n=n,
+            score=score,
+            callbacks=callbacks,
+            progressbar=progressbar,
+            obj_n_mc=obj_n_mc,
+            **kwargs
+        )
 
     def run_profiling(self, n=1000, score=None, obj_n_mc=500, **kwargs):
         return super().run_profiling(n=n, score=score, obj_n_mc=obj_n_mc, **kwargs)
 
 
 class NFVI(KLqp):
-    R"""**Normalizing Flow based :class:`KLqp` inference**
+    r"""**Normalizing Flow based :class:`KLqp` inference**
 
     Normalizing flow is a series of invertible transformations on initial distribution.
 
@@ -670,9 +709,17 @@ def __init__(self, *args, **kwargs):
         super().__init__(NormalizingFlow(*args, **kwargs))
 
 
-def fit(n=10000, local_rv=None, method='advi', model=None,
-        random_seed=None, start=None, inf_kwargs=None, **kwargs):
-    R"""Handy shortcut for using inference methods in functional way
+def fit(
+    n=10000,
+    local_rv=None,
+    method="advi",
+    model=None,
+    random_seed=None,
+    start=None,
+    inf_kwargs=None,
+    **kwargs
+):
+    r"""Handy shortcut for using inference methods in functional way
 
     Parameters
     ----------
@@ -740,42 +787,33 @@ def fit(n=10000, local_rv=None, method='advi', model=None,
     else:
         inf_kwargs = inf_kwargs.copy()
     if local_rv is not None:
-        inf_kwargs['local_rv'] = local_rv
+        inf_kwargs["local_rv"] = local_rv
     if random_seed is not None:
-        inf_kwargs['random_seed'] = random_seed
+        inf_kwargs["random_seed"] = random_seed
     if start is not None:
-        inf_kwargs['start'] = start
+        inf_kwargs["start"] = start
     if model is None:
         model = pm.modelcontext(model)
     _select = dict(
-        advi=ADVI,
-        fullrank_advi=FullRankADVI,
-        svgd=SVGD,
-        asvgd=ASVGD,
-        nfvi=NFVI
+        advi=ADVI, fullrank_advi=FullRankADVI, svgd=SVGD, asvgd=ASVGD, nfvi=NFVI
     )
     if isinstance(method, str):
         method = method.lower()
-        if method.startswith('nfvi='):
+        if method.startswith("nfvi="):
             formula = method[5:]
-            inference = NFVI(
-                formula,
-                **inf_kwargs
-                )
+            inference = NFVI(formula, **inf_kwargs)
         elif method in _select:
 
-            inference = _select[method](
-                model=model,
-                **inf_kwargs
-            )
+            inference = _select[method](model=model, **inf_kwargs)
         else:
-            raise KeyError('method should be one of %s '
-                           'or Inference instance' %
-                           set(_select.keys()))
+            raise KeyError(
+                "method should be one of %s "
+                "or Inference instance" % set(_select.keys())
+            )
     elif isinstance(method, Inference):
         inference = method
     else:
-        raise TypeError('method should be one of %s '
-                        'or Inference instance' %
-                        set(_select.keys()))
+        raise TypeError(
+            "method should be one of %s " "or Inference instance" % set(_select.keys())
+        )
     return inference.fit(n, **kwargs)

From f2a5146a8e0ba7bc24858f77298457829d3f8915 Mon Sep 17 00:00:00 2001
From: Osvaldo Martin <aloctavodia@gmail.com>
Date: Mon, 25 Nov 2019 11:04:26 -0300
Subject: [PATCH 3/8] Update starting.py

---
 pymc3/tuning/starting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py
index 578b7cc507..31845db6e4 100644
--- a/pymc3/tuning/starting.py
+++ b/pymc3/tuning/starting.py
@@ -199,7 +199,7 @@ def __init__(self, maxeval=5000, progressbar=True, logp_func=None, dlogp_func=No
             self.use_gradient = True
             self.desc = "logp = {:,.5g}, ||grad|| = {:,.5g}"
         self.previous_x = None
-        self.progress = progress_bar(total=maxeval, display=progressbar)
+        self.progress = progress_bar(total=range(maxeval), display=progressbar)
         self.progress.update(0)
 
     def __call__(self, x):

From a308df159621d9dee2531964549c6db0c641fdd7 Mon Sep 17 00:00:00 2001
From: Osvaldo Martin <aloctavodia@gmail.com>
Date: Mon, 25 Nov 2019 11:31:01 -0300
Subject: [PATCH 4/8] Update starting.py

---
 pymc3/tuning/starting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py
index 31845db6e4..125b02d735 100644
--- a/pymc3/tuning/starting.py
+++ b/pymc3/tuning/starting.py
@@ -199,7 +199,7 @@ def __init__(self, maxeval=5000, progressbar=True, logp_func=None, dlogp_func=No
             self.use_gradient = True
             self.desc = "logp = {:,.5g}, ||grad|| = {:,.5g}"
         self.previous_x = None
-        self.progress = progress_bar(total=range(maxeval), display=progressbar)
+        self.progress = progress_bar(range(maxeval), total=maxeval, display=progressbar)
         self.progress.update(0)
 
     def __call__(self, x):

From 89b53ed4e275e44c6c3fe6dab7c1cf2ccbeaf7e2 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Mon, 25 Nov 2019 15:03:17 -0300
Subject: [PATCH 5/8] fix progress_bar

---
 pymc3/tuning/starting.py       | 17 ++++++-----------
 pymc3/variational/inference.py | 35 ++++++++--------------------------
 2 files changed, 14 insertions(+), 38 deletions(-)

diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py
index 125b02d735..96ce62470f 100644
--- a/pymc3/tuning/starting.py
+++ b/pymc3/tuning/starting.py
@@ -125,9 +125,7 @@ def find_MAP(
             def grad_logp(point):
                 return nan_to_num(-dlogp_func(point))
 
-            opt_result = fmin(
-                cost_func, bij.map(start), fprime=grad_logp, *args, **kwargs
-            )
+            opt_result = fmin(cost_func, bij.map(start), fprime=grad_logp, *args, **kwargs)
         else:
             # Check to see if minimization function uses a starting value
             if "x0" in getargspec(fmin).args:
@@ -151,20 +149,17 @@ def grad_logp(point):
                 cost_func, x0, method=method, jac=compute_gradient, *args, **kwargs
             )
             mx0 = opt_result["x"]  # r -> opt_result
-            cost_func.progress.total = cost_func.progress.n + 1
-            cost_func.progress.update()
         except (KeyboardInterrupt, StopIteration) as e:
             mx0, opt_result = cost_func.previous_x, None
-            cost_func.progress.close()
             if isinstance(e, StopIteration):
                 pm._log.info(e)
         finally:
-            cost_func.progress.close()
+            last_v = cost_func.n_eval
+            cost_func.progress.total = last_v
+            cost_func.progress.update(last_v)
 
     vars = get_default_varnames(model.unobserved_RVs, include_transformed)
-    mx = {
-        var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(mx0)))
-    }
+    mx = {var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(mx0)))}
 
     if return_raw:
         return mx, opt_result
@@ -223,7 +218,7 @@ def __call__(self, x):
             raise StopIteration
 
         self.n_eval += 1
-        self.progress.update(1)
+        self.progress.update_bar(self.n_eval)
 
         if self.use_gradient:
             return value, grad
diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py
index 72dcbe03b4..d066fd4621 100644
--- a/pymc3/variational/inference.py
+++ b/pymc3/variational/inference.py
@@ -73,10 +73,8 @@ def run_profiling(self, n=1000, score=None, **kwargs):
         score = self._maybe_score(score)
         fn_kwargs = kwargs.pop("fn_kwargs", dict())
         fn_kwargs["profile"] = True
-        step_func = self.objective.step_function(
-            score=score, fn_kwargs=fn_kwargs, **kwargs
-        )
-        progress = progress_bar(n)
+        step_func = self.objective.step_function(score=score, fn_kwargs=fn_kwargs, **kwargs)
+        progress = progress_bar(range(n))
         try:
             for _ in progress:
                 step_func()
@@ -129,7 +127,7 @@ def fit(self, n=10000, score=None, callbacks=None, progressbar=True, **kwargs):
             callbacks = []
         score = self._maybe_score(score)
         step_func = self.objective.step_function(score=score, **kwargs)
-        progress = progress_bar(n, display=progressbar)
+        progress = progress_bar(range(n), display=progressbar)
         if score:
             state = self._iterate_with_loss(0, n, step_func, progress, callbacks)
         else:
@@ -562,11 +560,7 @@ def __init__(
         if kwargs.get("local_rv") is not None:
             raise opvi.AEVBInferenceError("SVGD does not support local groups")
         empirical = Empirical(
-            size=n_particles,
-            jitter=jitter,
-            start=start,
-            model=model,
-            random_seed=random_seed,
+            size=n_particles, jitter=jitter, start=start, model=model, random_seed=random_seed,
         )
         super().__init__(approx=empirical, estimator=estimator, kernel=kernel, **kwargs)
 
@@ -632,15 +626,7 @@ def __init__(self, approx=None, estimator=KSD, kernel=test_functions.rbf, **kwar
             )
         super().__init__(estimator=estimator, approx=approx, kernel=kernel, **kwargs)
 
-    def fit(
-        self,
-        n=10000,
-        score=None,
-        callbacks=None,
-        progressbar=True,
-        obj_n_mc=500,
-        **kwargs
-    ):
+    def fit(self, n=10000, score=None, callbacks=None, progressbar=True, obj_n_mc=500, **kwargs):
         return super().fit(
             n=n,
             score=score,
@@ -794,9 +780,7 @@ def fit(
         inf_kwargs["start"] = start
     if model is None:
         model = pm.modelcontext(model)
-    _select = dict(
-        advi=ADVI, fullrank_advi=FullRankADVI, svgd=SVGD, asvgd=ASVGD, nfvi=NFVI
-    )
+    _select = dict(advi=ADVI, fullrank_advi=FullRankADVI, svgd=SVGD, asvgd=ASVGD, nfvi=NFVI)
     if isinstance(method, str):
         method = method.lower()
         if method.startswith("nfvi="):
@@ -807,13 +791,10 @@ def fit(
             inference = _select[method](model=model, **inf_kwargs)
         else:
             raise KeyError(
-                "method should be one of %s "
-                "or Inference instance" % set(_select.keys())
+                "method should be one of %s " "or Inference instance" % set(_select.keys())
             )
     elif isinstance(method, Inference):
         inference = method
     else:
-        raise TypeError(
-            "method should be one of %s " "or Inference instance" % set(_select.keys())
-        )
+        raise TypeError("method should be one of %s " "or Inference instance" % set(_select.keys()))
     return inference.fit(n, **kwargs)

From 8d0a9399020ae8fa395cd1a546ad307563e8ffa1 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Sun, 8 Dec 2019 10:20:39 -0300
Subject: [PATCH 6/8] update

---
 pymc3/tuning/starting.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py
index 96ce62470f..9794149957 100644
--- a/pymc3/tuning/starting.py
+++ b/pymc3/tuning/starting.py
@@ -194,7 +194,11 @@ def __init__(self, maxeval=5000, progressbar=True, logp_func=None, dlogp_func=No
             self.use_gradient = True
             self.desc = "logp = {:,.5g}, ||grad|| = {:,.5g}"
         self.previous_x = None
+<<<<<<< HEAD
         self.progress = progress_bar(range(maxeval), total=maxeval, display=progressbar)
+=======
+        self.progress = progress_bar(total=range(maxeval), display=progressbar)
+>>>>>>> Update starting.py
         self.progress.update(0)
 
     def __call__(self, x):

From 0f975e2d6ae1fe2c53c01ea5c2758b4ff5a5c4c7 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Sun, 8 Dec 2019 10:25:48 -0300
Subject: [PATCH 7/8] fix conflict

---
 pymc3/tuning/starting.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py
index 9794149957..96ce62470f 100644
--- a/pymc3/tuning/starting.py
+++ b/pymc3/tuning/starting.py
@@ -194,11 +194,7 @@ def __init__(self, maxeval=5000, progressbar=True, logp_func=None, dlogp_func=No
             self.use_gradient = True
             self.desc = "logp = {:,.5g}, ||grad|| = {:,.5g}"
         self.previous_x = None
-<<<<<<< HEAD
         self.progress = progress_bar(range(maxeval), total=maxeval, display=progressbar)
-=======
-        self.progress = progress_bar(total=range(maxeval), display=progressbar)
->>>>>>> Update starting.py
         self.progress.update(0)
 
     def __call__(self, x):

From ebd86d080386fc4fc685b315910a63dd6e0b6764 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Mon, 9 Dec 2019 18:44:45 -0300
Subject: [PATCH 8/8] fix strings, import and add release note

---
 RELEASE-NOTES.md               |  7 ++++++-
 pymc3/parallel_sampling.py     |  2 +-
 pymc3/variational/inference.py | 36 +++++++++++++++++++++++++---------
 3 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 3407e2101e..401044099f 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -1,6 +1,11 @@
 # Release Notes
 
-## PyMC3 3.8 (on deck)
+## PyMC3 3.9 (On deck)
+
+### New features
+- use [fastprogress](https://github.com/fastai/fastprogress) instead of tqdm [#3693](https://github.com/pymc-devs/pymc3/pull/3693)
+
+## PyMC3 3.8 (November 29 2019)
 
 ### New features
 - Implemented robust u turn check in NUTS (similar to stan-dev/stan#2800). See PR [#3605]
diff --git a/pymc3/parallel_sampling.py b/pymc3/parallel_sampling.py
index cc1225600b..7dacf5607d 100644
--- a/pymc3/parallel_sampling.py
+++ b/pymc3/parallel_sampling.py
@@ -9,6 +9,7 @@
 import errno
 
 import numpy as np
+from fastprogress import progress_bar
 
 from . import theanof
 
@@ -348,7 +349,6 @@ def __init__(
         start_chain_num=0,
         progressbar=True,
     ):
-        from fastprogress import progress_bar
 
         if any(len(arg) != chains for arg in [seeds, start_points]):
             raise ValueError("Number of seeds and start_points must be %s." % chains)
diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py
index d066fd4621..0ed1d0fa45 100644
--- a/pymc3/variational/inference.py
+++ b/pymc3/variational/inference.py
@@ -73,7 +73,9 @@ def run_profiling(self, n=1000, score=None, **kwargs):
         score = self._maybe_score(score)
         fn_kwargs = kwargs.pop("fn_kwargs", dict())
         fn_kwargs["profile"] = True
-        step_func = self.objective.step_function(score=score, fn_kwargs=fn_kwargs, **kwargs)
+        step_func = self.objective.step_function(
+            score=score, fn_kwargs=fn_kwargs, **kwargs
+        )
         progress = progress_bar(range(n))
         try:
             for _ in progress:
@@ -555,12 +557,16 @@ def __init__(
         random_seed=None,
         estimator=KSD,
         kernel=test_functions.rbf,
-        **kwargs
+        **kwargs,
     ):
         if kwargs.get("local_rv") is not None:
             raise opvi.AEVBInferenceError("SVGD does not support local groups")
         empirical = Empirical(
-            size=n_particles, jitter=jitter, start=start, model=model, random_seed=random_seed,
+            size=n_particles,
+            jitter=jitter,
+            start=start,
+            model=model,
+            random_seed=random_seed,
         )
         super().__init__(approx=empirical, estimator=estimator, kernel=kernel, **kwargs)
 
@@ -626,14 +632,22 @@ def __init__(self, approx=None, estimator=KSD, kernel=test_functions.rbf, **kwar
             )
         super().__init__(estimator=estimator, approx=approx, kernel=kernel, **kwargs)
 
-    def fit(self, n=10000, score=None, callbacks=None, progressbar=True, obj_n_mc=500, **kwargs):
+    def fit(
+        self,
+        n=10000,
+        score=None,
+        callbacks=None,
+        progressbar=True,
+        obj_n_mc=500,
+        **kwargs,
+    ):
         return super().fit(
             n=n,
             score=score,
             callbacks=callbacks,
             progressbar=progressbar,
             obj_n_mc=obj_n_mc,
-            **kwargs
+            **kwargs,
         )
 
     def run_profiling(self, n=1000, score=None, obj_n_mc=500, **kwargs):
@@ -703,7 +717,7 @@ def fit(
     random_seed=None,
     start=None,
     inf_kwargs=None,
-    **kwargs
+    **kwargs,
 ):
     r"""Handy shortcut for using inference methods in functional way
 
@@ -780,7 +794,9 @@ def fit(
         inf_kwargs["start"] = start
     if model is None:
         model = pm.modelcontext(model)
-    _select = dict(advi=ADVI, fullrank_advi=FullRankADVI, svgd=SVGD, asvgd=ASVGD, nfvi=NFVI)
+    _select = dict(
+        advi=ADVI, fullrank_advi=FullRankADVI, svgd=SVGD, asvgd=ASVGD, nfvi=NFVI
+    )
     if isinstance(method, str):
         method = method.lower()
         if method.startswith("nfvi="):
@@ -791,10 +807,12 @@ def fit(
             inference = _select[method](model=model, **inf_kwargs)
         else:
             raise KeyError(
-                "method should be one of %s " "or Inference instance" % set(_select.keys())
+                f"method should be one of {set(_select.keys())} or Inference instance"
             )
     elif isinstance(method, Inference):
         inference = method
     else:
-        raise TypeError("method should be one of %s " "or Inference instance" % set(_select.keys()))
+        raise TypeError(
+            f"method should be one of {set(_select.keys())} or Inference instance"
+        )
     return inference.fit(n, **kwargs)