diff --git a/.codecov.yml b/.codecov.yml
index 0b888c6b4a..e7fe4624f7 100644
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -13,13 +13,13 @@ coverage:
         # basic
         target: auto
         threshold: 1%
-        base: auto 
+        base: auto
     patch:
       default:
         # basic
         target: 50%
         threshold: 1%
-        base: auto 
+        base: auto
 
 comment:
   layout: "reach, diff, flags, files"
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 66f104c2a1..c9dfdbc6bf 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -1,4 +1,4 @@
-If you have questions about a specific use case, or you are not sure whether this is a bug or not, please post it to our discourse channel: https://discourse.pymc.io  
+If you have questions about a specific use case, or you are not sure whether this is a bug or not, please post it to our discourse channel: https://discourse.pymc.io
 
 ## Description of your problem
 
diff --git a/.github/workflows/autoupdate-pre-commit-config.yml b/.github/workflows/autoupdate-pre-commit-config.yml
new file mode 100644
index 0000000000..48bc700af6
--- /dev/null
+++ b/.github/workflows/autoupdate-pre-commit-config.yml
@@ -0,0 +1,33 @@
+name: "Update pre-commit config"
+
+on:
+  schedule:
+    - cron: "0 7 * * 1" # At 07:00 on each Monday.
+  workflow_dispatch:
+
+jobs:
+  update-pre-commit:
+    if: github.repository_owner == 'pymc-devs'
+    name: Autoupdate pre-commit config
+    runs-on: ubuntu-latest
+    steps:
+      - name: Set up Python
+        uses: actions/setup-python@v2
+      - name: Cache multiple paths
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/.cache/pre-commit
+            ~/.cache/pip
+          key: pre-commit-autoupdate-${{ runner.os }}-build
+      - name: Update pre-commit config packages
+        uses: technote-space/create-pr-action@v2
+        with:
+          GITHUB_TOKEN: ${{ secrets.ACTION_TRIGGER_TOKEN }}
+          EXECUTE_COMMANDS: |
+            pip install pre-commit
+            pre-commit autoupdate || (exit 0);
+            pre-commit run -a || (exit 0);
+          COMMIT_MESSAGE: "⬆️ UPGRADE: Autoupdate pre-commit config"
+          PR_BRANCH_NAME: "pre-commit-config-update-${PR_ID}"
+          PR_TITLE: "⬆️ UPGRADE: Autoupdate pre-commit config"
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 723347913a..6131de7400 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -8,6 +8,8 @@ on:
 jobs:
   pre-commit:
     runs-on: ubuntu-latest
+    env:
+      SKIP: no-commit-to-branch
     steps:
     - uses: actions/checkout@v2
     - uses: actions/setup-python@v2
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index eeeb5c322f..e1da3c3bb0 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -29,6 +29,7 @@ jobs:
             --ignore=pymc3/tests/test_shape_handling.py
             --ignore=pymc3/tests/test_shared.py
             --ignore=pymc3/tests/test_smc.py
+            --ignore=pymc3/tests/test_step.py
             --ignore=pymc3/tests/test_updates.py
             --ignore=pymc3/tests/test_variational_inference.py
           - |
@@ -47,6 +48,7 @@ jobs:
           - |
             pymc3/tests/test_distributions_timeseries.py
             pymc3/tests/test_shape_handling.py
+            pymc3/tests/test_step.py
             pymc3/tests/test_updates.py
             pymc3/tests/test_variational_inference.py
           - |
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b7d088d87a..89da779eef 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,11 +1,19 @@
+exclude: ^(docs/logos|pymc3/examples/data)/
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
   rev: v3.3.0
   hooks:
-    -   id: end-of-file-fixer
+    -   id: check-merge-conflict
     -   id: check-toml
+    -   id: check-yaml
+    -   id: debug-statements
+    -   id: end-of-file-fixer
+    -   id: no-commit-to-branch
+        args: [--branch, master]
+    -   id: requirements-txt-fixer
+    -   id: trailing-whitespace
 - repo: https://github.com/nbQA-dev/nbQA
-  rev: 0.4.1
+  rev: 0.5.4
   hooks:
     - id: nbqa-black
       additional_dependencies: [black==20.8b1]
@@ -13,12 +21,11 @@ repos:
       additional_dependencies: [isort==5.6.4]
     - id: nbqa-pyupgrade
       additional_dependencies: [pyupgrade==2.7.4]
-
 - repo: https://github.com/PyCQA/isort
   rev: 5.6.4
   hooks:
     - id: isort
-      name: isort (python)
+      name: isort
 - repo: https://github.com/asottile/pyupgrade
   rev: v2.7.4
   hooks:
diff --git a/GOVERNANCE.md b/GOVERNANCE.md
index 99b9f0ce2f..68c6240543 100644
--- a/GOVERNANCE.md
+++ b/GOVERNANCE.md
@@ -13,7 +13,7 @@ developed openly and hosted in public GitHub repositories under the
 [GitHub organization](https://github.com/pymc-devs/pymc3). Examples of
 Project Software include the PyMC3 code and the Documentation, etc. The Services run by the
 Project consist of public websites and web-services that are hosted
-at [http://pymc-devs.github.io/pymc3/](http://pymc-devs.github.io/pymc3/) 
+at [http://pymc-devs.github.io/pymc3/](http://pymc-devs.github.io/pymc3/)
 The Project is developed by a team of distributed developers, called
 Contributors. Contributors are individuals who have contributed code,
 documentation, designs or other work to one or more Project repositories.
@@ -131,7 +131,7 @@ The current Steering Council membership comprises:
 - Junpeng Lao
 - Osvaldo Martin
 - Austin Rochford
-- Adrian Seyboldt 
+- Adrian Seyboldt
 - Thomas Wiecki
 
 ### Council membership
diff --git a/README.rst b/README.rst
index de7e457b67..70a37dd4f7 100644
--- a/README.rst
+++ b/README.rst
@@ -21,11 +21,11 @@ The future of PyMC3 & Theano
 There have been many questions and uncertainty around the future of PyMC3 since Theano
 stopped getting developed by the original authors, and we started experiments with PyMC4.
 
-We are happy to announce that PyMC3 on Theano (which we are `developing further <https://github.com/pymc-devs/Theano-PyMC>`__) 
-with a new JAX backend is the future. PyMC4 will not be developed further. 
+We are happy to announce that PyMC3 on Theano (which we are `developing further <https://github.com/pymc-devs/Theano-PyMC>`__)
+with a new JAX backend is the future. PyMC4 will not be developed further.
 
 See the `full announcement <https://pymc-devs.medium.com/the-future-of-pymc3-or-theano-is-dead-long-live-theano-d8005f8a0e9b>`__
-for more details. 
+for more details.
 
 Features
 ========
@@ -119,7 +119,7 @@ Another option is to clone the repository and install PyMC3 using
 Dependencies
 ============
 
-PyMC3 is tested on Python 3.6, 3.7, and 3.8 and depends on `Theano-PyMC <https://github.com/pymc-devs/Theano-PyMC>`__, 
+PyMC3 is tested on Python 3.6, 3.7, and 3.8 and depends on `Theano-PyMC <https://github.com/pymc-devs/Theano-PyMC>`__,
 NumPy, SciPy, and Pandas
 (see `requirements.txt <https://github.com/pymc-devs/pymc3/blob/master/requirements.txt>`__ for version
 information).
diff --git a/binder/requirements.txt b/binder/requirements.txt
index d63469859d..a06c1205d9 100644
--- a/binder/requirements.txt
+++ b/binder/requirements.txt
@@ -1,4 +1,4 @@
 -r ../requirements-dev.txt
-# this installs pymc3 itself. it is funny that this is an absolute path, 
+# this installs pymc3 itself. it is funny that this is an absolute path,
 # but reqirements-dev.txt is relative.
 .
diff --git a/docs/source/Gaussian_Processes.rst b/docs/source/Gaussian_Processes.rst
index 6ca96db1f6..f6913344ca 100644
--- a/docs/source/Gaussian_Processes.rst
+++ b/docs/source/Gaussian_Processes.rst
@@ -21,7 +21,7 @@ choice as priors over functions due to the marginalization and conditioning
 properties of the multivariate normal distribution.  Usually, the marginal
 distribution over :math:`f(x)` is evaluated during the inference step.  The
 conditional distribution is then used for predicting the function values
-:math:`f(x_*)` at new points, :math:`x_*`.  
+:math:`f(x_*)` at new points, :math:`x_*`.
 
 The joint distribution of :math:`f(x)` and :math:`f(x_*)` is multivariate
 normal,
@@ -29,9 +29,9 @@ normal,
 .. math::
 
   \begin{bmatrix} f(x) \\ f(x_*) \\ \end{bmatrix} \sim
-  \text{N}\left( 
+  \text{N}\left(
     \begin{bmatrix} m(x)  \\ m(x_*)    \\ \end{bmatrix} \,,
-    \begin{bmatrix} k(x,x')    & k(x_*, x)    \\ 
+    \begin{bmatrix} k(x,x')    & k(x_*, x)    \\
                     k(x_*, x) &  k(x_*, x_*')  \\ \end{bmatrix}
           \right) \,.
 
@@ -41,21 +41,21 @@ distribution is
 
 .. math::
 
-  f(x_*) \mid f(x) \sim \text{N}\left( k(x_*, x) k(x, x)^{-1} [f(x) - m(x)] + m(x_*) ,\, 
+  f(x_*) \mid f(x) \sim \text{N}\left( k(x_*, x) k(x, x)^{-1} [f(x) - m(x)] + m(x_*) ,\,
     k(x_*, x_*) - k(x, x_*) k(x, x)^{-1} k(x, x_*) \right) \,.
 
 .. note::
 
   For more information on GPs, check out the book `Gaussian Processes for
   Machine Learning <http://www.gaussianprocess.org/gpml/>`_ by Rasmussen &
-  Williams, or `this introduction <https://www.ics.uci.edu/~welling/teaching/KernelsICS273B/gpB.pdf>`_ 
+  Williams, or `this introduction <https://www.ics.uci.edu/~welling/teaching/KernelsICS273B/gpB.pdf>`_
   by D. Mackay.
 
 PyMC3 is a great environment for working with fully Bayesian Gaussian Process
-models.  GPs in PyMC3 have a clear syntax and are highly composable, and many 
-predefined covariance functions (or kernels), mean functions, and several GP 
+models.  GPs in PyMC3 have a clear syntax and are highly composable, and many
+predefined covariance functions (or kernels), mean functions, and several GP
 implementations are included.  GPs are treated as distributions that can be
-used within larger or hierarchical models, not just as standalone regression 
+used within larger or hierarchical models, not just as standalone regression
 models.
 
 Mean and covariance functions
@@ -83,7 +83,7 @@ specify :code:`input_dim`, the total number of columns of :code:`X`, and
 :code:`active_dims`, which of those columns or dimensions the covariance
 function will act on, is because :code:`cov_func` hasn't actually seen the
 input data yet.  The :code:`active_dims` argument is optional, and defaults to
-all columns of the matrix of inputs.  
+all columns of the matrix of inputs.
 
 Covariance functions in PyMC3 closely follow the algebraic rules for kernels,
 which allows users to combine covariance functions into new ones, for example:
@@ -97,13 +97,13 @@ which allows users to combine covariance functions into new ones, for example:
 
 
     cov_func = pm.gp.cov.ExpQuad(...) * pm.gp.cov.Periodic(...)
-    
+
 - The product (or sum) of a covariance function with a scalar is a
   covariance function::
 
-    
+
     cov_func = eta**2 * pm.gp.cov.Matern32(...)
-    
+
 
 
 After the covariance function is defined, it is now a function that is
@@ -133,7 +133,7 @@ is::
 The first argument is the mean function and the second is the covariance
 function.  We've made the GP object, but we haven't made clear which function
 it is to be a prior for, what the inputs are, or what parameters it will be
-conditioned on.  
+conditioned on.
 
 .. note::
 
@@ -145,18 +145,18 @@ conditioned on.
 
 Calling the `prior` method will create a PyMC3 random variable that represents
 the latent function :math:`f(x) = \mathbf{f}`::
-  
+
 	f = gp.prior("f", X)
 
 :code:`f` is a random variable that can be used within a PyMC3 model like any
 other type of random variable.  The first argument is the name of the random
-variable representing the function we are placing the prior over.  
-The second argument is the inputs to the function that the prior is over, 
+variable representing the function we are placing the prior over.
+The second argument is the inputs to the function that the prior is over,
 :code:`X`.  The inputs are usually known and present in the data, but they can
-also be PyMC3 random variables.  If the inputs are a Theano tensor or a 
+also be PyMC3 random variables.  If the inputs are a Theano tensor or a
 PyMC3 random variable, the :code:`shape` needs to be given.
 
-Usually at this point, inference is performed on the model.  The 
+Usually at this point, inference is performed on the model.  The
 :code:`conditional` method creates the conditional, or predictive,
 distribution over the latent function at arbitrary :math:`x_*` input points,
 :math:`f(x_*)`.  To construct the conditional distribution we write::
@@ -166,7 +166,7 @@ distribution over the latent function at arbitrary :math:`x_*` input points,
 Additive GPs
 ============
 
-The GP implementation in PyMC3 is constructed so that it is easy to define 
+The GP implementation in PyMC3 is constructed so that it is easy to define
 additive GPs and sample from individual GP components.  We can write::
 
     gp1 = pm.gp.Marginal(mean_func1, cov_func1)
@@ -183,18 +183,18 @@ Consider two independent GP distributed functions, :math:`f_1(x) \sim
 
 .. math::
 
-  \begin{bmatrix} f_1 \\ f_1^* \\ f_2 \\ f_2^* 
+  \begin{bmatrix} f_1 \\ f_1^* \\ f_2 \\ f_2^*
                \\ f_1 + f_2    \\ f_1^* + f_2^* \end{bmatrix} \sim
-  \text{N}\left( 
+  \text{N}\left(
     \begin{bmatrix} m_1 \\ m_1^* \\ m_2 \\ m_2^* \\
                     m_1 + m_2    \\ m_1^* + m_2^*   \\ \end{bmatrix} \,,\,
-    \begin{bmatrix} 
+    \begin{bmatrix}
       K_1       &  K_1^*     &   0       &    0      & K_1        & K_1^*              \\
       K_1^{*^T} &  K_1^{**}  &   0       &    0      & K_1^*      & K_1^{**}           \\
       0         &  0         & K_2       & K_2^*     & K_2        & K_2^{*}            \\
       0         &  0         & K_2^{*^T} & K_2^{**}  & K_2^{*}    & K_2^{**}           \\
       K_1       &  K_1^{*}   & K_2       & K_2^{*}   & K_1 + K_2  & K_1^{*} + K_2^{*}  \\
-      K_1^{*^T} & K_1^{**} & K_2^{*^T} & K_2^{**} & K_1^{*^T}+K_2^{*^T} & K_1^{**}+K_2^{**} 
+      K_1^{*^T} & K_1^{**} & K_2^{*^T} & K_2^{**} & K_1^{*^T}+K_2^{*^T} & K_1^{**}+K_2^{**}
     \end{bmatrix}
   \right) \,.
 
@@ -220,42 +220,42 @@ other implementations.  The first block fits the GP prior.  We denote
     with pm.Model() as model:
         gp1 = pm.gp.Marginal(mean_func1, cov_func1)
         gp2 = pm.gp.Marginal(mean_func2, cov_func2)
-        
-        # gp represents f1 + f2.  
+
+        # gp represents f1 + f2.
         gp = gp1 + gp2
-        
+
         f = gp.marginal_likelihood("f", X, y, noise)
-        
+
         trace = pm.sample(1000)
 
 
-To construct the conditional distribution of :code:`gp1` or :code:`gp2`, we 
-also need to include the additional arguments, :code:`X`, :code:`y`, and 
+To construct the conditional distribution of :code:`gp1` or :code:`gp2`, we
+also need to include the additional arguments, :code:`X`, :code:`y`, and
 :code:`noise`::
 
     with model:
         # conditional distributions of f1 and f2
-        f1_star = gp1.conditional("f1_star", X_star, 
+        f1_star = gp1.conditional("f1_star", X_star,
                                   given={"X": X, "y": y, "noise": noise, "gp": gp})
-        f2_star = gp2.conditional("f2_star", X_star, 
+        f2_star = gp2.conditional("f2_star", X_star,
                                   given={"X": X, "y": y, "noise": noise, "gp": gp})
 
         # conditional of f1 + f2, `given` not required
         f_star = gp.conditional("f_star", X_star)
 
-This second block produces the conditional distributions.  Notice that extra 
+This second block produces the conditional distributions.  Notice that extra
 arguments are required for conditionals of :math:`f1` and :math:`f2`, but not
-:math:`f`.  This is because those arguments are cached when 
+:math:`f`.  This is because those arguments are cached when
 :code:`.marginal_likelihood` is called on :code:`gp`.
 
 .. note::
   When constructing conditionals, the additional arguments :code:`X`, :code:`y`,
   :code:`noise` and :code:`gp` must be provided as a dict called `given`!
 
-Since the marginal likelihoood method of :code:`gp1` or :code:`gp2` weren't called, 
-their conditionals need to be provided with the required inputs.  In the same 
+Since the marginal likelihoood method of :code:`gp1` or :code:`gp2` weren't called,
+their conditionals need to be provided with the required inputs.  In the same
 fashion as the prior, :code:`f_star`, :code:`f1_star` and :code:`f2_star` are random
-variables that can now be used like any other random variable in PyMC3.  
+variables that can now be used like any other random variable in PyMC3.
 
 Check the notebooks for detailed demonstrations of the usage of GP functionality
 in PyMC3.
diff --git a/docs/source/Probability_Distributions.rst b/docs/source/Probability_Distributions.rst
index af77f28b28..ae6e726570 100644
--- a/docs/source/Probability_Distributions.rst
+++ b/docs/source/Probability_Distributions.rst
@@ -4,24 +4,24 @@
 Probability Distributions in PyMC3
 **********************************
 
-The most fundamental step in building Bayesian models is the specification of a full probability model for the problem at hand. This primarily involves assigning parametric statistical distributions to unknown quantities in the model, in addition to appropriate functional forms for likelihoods to represent the information from the data. To this end, PyMC3 includes a comprehensive set of pre-defined statistical distributions that can be used as model building blocks. 
+The most fundamental step in building Bayesian models is the specification of a full probability model for the problem at hand. This primarily involves assigning parametric statistical distributions to unknown quantities in the model, in addition to appropriate functional forms for likelihoods to represent the information from the data. To this end, PyMC3 includes a comprehensive set of pre-defined statistical distributions that can be used as model building blocks.
 
 For example, if we wish to define a particular variable as having a normal prior, we can specify that using an instance of the ``Normal`` class.
 
 ::
 
     with pm.Model():
-    
+
         x = pm.Normal('x', mu=0, sigma=1)
-        
+
 A variable requires at least a ``name`` argument, and zero or more model parameters, depending on the distribution. Parameter names vary by distribution, using conventional names wherever possible. The example above defines a scalar variable. To make a vector-valued variable, a ``shape`` argument should be provided; for example, a 3x3 matrix of beta random variables could be defined with:
 
 ::
 
     with pm.Model():
-    
+
         p = pm.Beta('p', 1, 1, shape=(3, 3))
-        
+
 Probability distributions are all subclasses of ``Distribution``, which in turn has two major subclasses: ``Discrete`` and ``Continuous``. In terms of data types, a ``Continuous`` random variable is given whichever floating point type is defined by ``theano.config.floatX``, while ``Discrete`` variables are given ``int16`` types when ``theano.config.floatX`` is ``float32``, and ``int64`` otherwise.
 
 All distributions in ``pm.distributions`` will have two important methods: ``random()`` and ``logp()`` with the following signatures:
@@ -29,22 +29,22 @@ All distributions in ``pm.distributions`` will have two important methods: ``ran
 ::
 
     class SomeDistribution(Continuous):
-    
+
         def random(self, point=None, size=None):
             ...
             return random_samples
-            
+
         def logp(self, value):
             ...
             return total_log_prob
-            
+
 PyMC3 expects the ``logp()`` method to return a log-probability evaluated at the passed ``value`` argument. This method is used internally by all of the inference methods to calculate the model log-probability that is used for fitting models. The ``random()`` method is used to simulate values from the variable, and is used internally for posterior predictive checks.
 
 
 Custom distributions
 ====================
 
-Despite the fact that PyMC3 ships with a large set of the most common probability distributions, some problems may require the use of functional forms that are less common, and not available in ``pm.distributions``. One example of this is in survival analysis, where time-to-event data is modeled using probability densities that are designed to accommodate censored data. 
+Despite the fact that PyMC3 ships with a large set of the most common probability distributions, some problems may require the use of functional forms that are less common, and not available in ``pm.distributions``. One example of this is in survival analysis, where time-to-event data is modeled using probability densities that are designed to accommodate censored data.
 
 An exponential survival function, where :math:`c=0` denotes failure (or non-survival), is defined by:
 
@@ -64,7 +64,7 @@ For the exponential survival function, this is:
 
     exp_surv = pm.DensityDist('exp_surv', logp, observed={'failure':failure, 'value':t})
 
-Similarly, if a random number generator is required, a function returning random numbers corresponding to the probability distribution can be passed as the ``random`` argument.    
+Similarly, if a random number generator is required, a function returning random numbers corresponding to the probability distribution can be passed as the ``random`` argument.
 
 
 Using PyMC distributions without a Model
@@ -75,22 +75,22 @@ Distribution objects, as we have defined them so far, are only usable inside of
 ::
 
     y = Binomial('y', n=10, p=0.5)
-    
-    
+
+
 ::
 
     TypeError: No context on context stack
- 
+
 This is because the distribution classes are designed to integrate themselves automatically inside of a PyMC model. When a model cannot be found, it fails. However, each ``Distribution`` has a ``dist`` class method that returns a stripped-down distribution object that can be used outside of a PyMC model.
 
-For example, a standalone binomial distribution can be created by:   
-    
+For example, a standalone binomial distribution can be created by:
+
 ::
 
     y = pm.Binomial.dist(n=10, p=0.5)
-   
+
 This allows for probabilities to be calculated and random numbers to be drawn.
-    
+
 ::
 
     >>> y.logp(4).eval()
@@ -99,7 +99,7 @@ This allows for probabilities to be calculated and random numbers to be drawn.
     >>> y.random(size=3)
     array([5, 4, 3])
 
-            
+
 Auto-transformation
 ===================
 
@@ -112,8 +112,8 @@ For example, the gamma distribution is positive-valued. If we define one for a m
     with pm.Model() as model:
         g = pm.Gamma('g', 1, 1)
 
-We notice a modified variable inside the model ``vars`` attribute, which holds the free variables in the model. 
-        
+We notice a modified variable inside the model ``vars`` attribute, which holds the free variables in the model.
+
 ::
 
     >>> model.vars
@@ -122,7 +122,7 @@ We notice a modified variable inside the model ``vars`` attribute, which holds t
 As the name suggests, the variable ``g`` has been log-transformed, and this is the space over which sampling takes place.
 
 The original variable is simply treated as a deterministic variable, since the value of the transformed variable is simply back-transformed when a sample is drawn in order to recover the original variable. Hence, ``g`` resides in the ``model.deterministics`` list.
-    
+
 ::
 
     >>> model.deterministics
diff --git a/docs/source/api/distributions/continuous.rst b/docs/source/api/distributions/continuous.rst
index 9be0b9183c..fcc49d2e11 100644
--- a/docs/source/api/distributions/continuous.rst
+++ b/docs/source/api/distributions/continuous.rst
@@ -21,7 +21,7 @@ Continuous
    Cauchy
    HalfCauchy
    Gamma
-   InverseGamma 
+   InverseGamma
    Weibull
    Lognormal
    ChiSquared
diff --git a/docs/source/api/math.rst b/docs/source/api/math.rst
index 80741743cb..c548d13216 100644
--- a/docs/source/api/math.rst
+++ b/docs/source/api/math.rst
@@ -2,8 +2,8 @@
 Math
 ====
 
-This submodule contains various mathematical functions. Most of them are imported directly 
-from theano.tensor (see there for more details). Doing any kind of math with PyMC3 random 
+This submodule contains various mathematical functions. Most of them are imported directly
+from theano.tensor (see there for more details). Doing any kind of math with PyMC3 random
 variables, or defining custom likelihoods or priors requires you to use these theano
 expressions rather than NumPy or Python code.
 
diff --git a/pymc3/backends/__init__.py b/pymc3/backends/__init__.py
index d746b4408c..ab0faa3ded 100644
--- a/pymc3/backends/__init__.py
+++ b/pymc3/backends/__init__.py
@@ -52,7 +52,7 @@
     >>> sliced_trace = trace[1000:]
 
 The backend for the new trace is always NDArray, regardless of the
-type of original trace. 
+type of original trace.
 
 Loading a saved backend
 -----------------------
diff --git a/requirements-dev.txt b/requirements-dev.txt
index c5a03f945d..1e127fe8b9 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,22 +1,22 @@
 bokeh>=0.12.13
 coverage>=5.1
+dill
 graphviz>=0.8.3
 ipython
 nbsphinx>=0.4.2
 nose>=1.3.7
 nose-parameterized==0.6.0
 numpydoc>=0.9.1
+parameterized
 pre-commit>=2.8.0
 pycodestyle>=2.3.1
 pyflakes>=1.5.0
 pylint>=1.7.4
-pytest-cov>=2.5.1
 pytest>=3.0.7
+pytest-cov>=2.5.1
 pytest-xdist
 recommonmark>=0.4.0
 seaborn>=0.8.1
-sphinx-autobuild==0.7.1
 sphinx>=1.5.5
+sphinx-autobuild==0.7.1
 watermark
-parameterized
-dill
diff --git a/requirements.txt b/requirements.txt
index 16ddd673aa..f97594daf9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,11 @@
 arviz>=0.9.0
-theano-pymc==1.0.11
+contextvars; python_version < '3.7'
+dataclasses; python_version < '3.7'
+dill
+fastprogress>=0.2.0
 numpy>=1.13.0
-scipy>=0.18.1
 pandas>=0.18.0
 patsy>=0.5.1
-fastprogress>=0.2.0
+scipy>=0.18.1
+theano-pymc==1.0.11
 typing-extensions>=3.7.4
-dataclasses; python_version < '3.7'
-contextvars; python_version < '3.7'
-dill