diff --git a/.codecov.yml b/.codecov.yml index 0b888c6b4a..e7fe4624f7 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -13,13 +13,13 @@ coverage: # basic target: auto threshold: 1% - base: auto + base: auto patch: default: # basic target: 50% threshold: 1% - base: auto + base: auto comment: layout: "reach, diff, flags, files" diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 66f104c2a1..c9dfdbc6bf 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,4 +1,4 @@ -If you have questions about a specific use case, or you are not sure whether this is a bug or not, please post it to our discourse channel: https://discourse.pymc.io +If you have questions about a specific use case, or you are not sure whether this is a bug or not, please post it to our discourse channel: https://discourse.pymc.io ## Description of your problem diff --git a/.github/workflows/autoupdate-pre-commit-config.yml b/.github/workflows/autoupdate-pre-commit-config.yml new file mode 100644 index 0000000000..48bc700af6 --- /dev/null +++ b/.github/workflows/autoupdate-pre-commit-config.yml @@ -0,0 +1,33 @@ +name: "Update pre-commit config" + +on: + schedule: + - cron: "0 7 * * 1" # At 07:00 on each Monday. + workflow_dispatch: + +jobs: + update-pre-commit: + if: github.repository_owner == 'pymc-devs' + name: Autoupdate pre-commit config + runs-on: ubuntu-latest + steps: + - name: Set up Python + uses: actions/setup-python@v2 + - name: Cache multiple paths + uses: actions/cache@v2 + with: + path: | + ~/.cache/pre-commit + ~/.cache/pip + key: pre-commit-autoupdate-${{ runner.os }}-build + - name: Update pre-commit config packages + uses: technote-space/create-pr-action@v2 + with: + GITHUB_TOKEN: ${{ secrets.ACTION_TRIGGER_TOKEN }} + EXECUTE_COMMANDS: | + pip install pre-commit + pre-commit autoupdate || (exit 0); + pre-commit run -a || (exit 0); + COMMIT_MESSAGE: "⬆️ UPGRADE: Autoupdate pre-commit config" + PR_BRANCH_NAME: "pre-commit-config-update-${PR_ID}" + PR_TITLE: "⬆️ UPGRADE: Autoupdate pre-commit config" diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 723347913a..6131de7400 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -8,6 +8,8 @@ on: jobs: pre-commit: runs-on: ubuntu-latest + env: + SKIP: no-commit-to-branch steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index eeeb5c322f..e1da3c3bb0 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -29,6 +29,7 @@ jobs: --ignore=pymc3/tests/test_shape_handling.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py + --ignore=pymc3/tests/test_step.py --ignore=pymc3/tests/test_updates.py --ignore=pymc3/tests/test_variational_inference.py - | @@ -47,6 +48,7 @@ jobs: - | pymc3/tests/test_distributions_timeseries.py pymc3/tests/test_shape_handling.py + pymc3/tests/test_step.py pymc3/tests/test_updates.py pymc3/tests/test_variational_inference.py - | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b7d088d87a..89da779eef 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,11 +1,19 @@ +exclude: ^(docs/logos|pymc3/examples/data)/ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v3.3.0 hooks: - - id: end-of-file-fixer + - id: check-merge-conflict - id: check-toml + - id: check-yaml + - id: debug-statements + - id: end-of-file-fixer + - id: no-commit-to-branch + args: [--branch, master] + - id: requirements-txt-fixer + - id: trailing-whitespace - repo: https://github.com/nbQA-dev/nbQA - rev: 0.4.1 + rev: 0.5.4 hooks: - id: nbqa-black additional_dependencies: [black==20.8b1] @@ -13,12 +21,11 @@ repos: additional_dependencies: [isort==5.6.4] - id: nbqa-pyupgrade additional_dependencies: [pyupgrade==2.7.4] - - repo: https://github.com/PyCQA/isort rev: 5.6.4 hooks: - id: isort - name: isort (python) + name: isort - repo: https://github.com/asottile/pyupgrade rev: v2.7.4 hooks: diff --git a/GOVERNANCE.md b/GOVERNANCE.md index 99b9f0ce2f..68c6240543 100644 --- a/GOVERNANCE.md +++ b/GOVERNANCE.md @@ -13,7 +13,7 @@ developed openly and hosted in public GitHub repositories under the [GitHub organization](https://github.com/pymc-devs/pymc3). Examples of Project Software include the PyMC3 code and the Documentation, etc. The Services run by the Project consist of public websites and web-services that are hosted -at [http://pymc-devs.github.io/pymc3/](http://pymc-devs.github.io/pymc3/) +at [http://pymc-devs.github.io/pymc3/](http://pymc-devs.github.io/pymc3/) The Project is developed by a team of distributed developers, called Contributors. Contributors are individuals who have contributed code, documentation, designs or other work to one or more Project repositories. @@ -131,7 +131,7 @@ The current Steering Council membership comprises: - Junpeng Lao - Osvaldo Martin - Austin Rochford -- Adrian Seyboldt +- Adrian Seyboldt - Thomas Wiecki ### Council membership diff --git a/README.rst b/README.rst index de7e457b67..70a37dd4f7 100644 --- a/README.rst +++ b/README.rst @@ -21,11 +21,11 @@ The future of PyMC3 & Theano There have been many questions and uncertainty around the future of PyMC3 since Theano stopped getting developed by the original authors, and we started experiments with PyMC4. -We are happy to announce that PyMC3 on Theano (which we are `developing further `__) -with a new JAX backend is the future. PyMC4 will not be developed further. +We are happy to announce that PyMC3 on Theano (which we are `developing further `__) +with a new JAX backend is the future. PyMC4 will not be developed further. See the `full announcement `__ -for more details. +for more details. Features ======== @@ -119,7 +119,7 @@ Another option is to clone the repository and install PyMC3 using Dependencies ============ -PyMC3 is tested on Python 3.6, 3.7, and 3.8 and depends on `Theano-PyMC `__, +PyMC3 is tested on Python 3.6, 3.7, and 3.8 and depends on `Theano-PyMC `__, NumPy, SciPy, and Pandas (see `requirements.txt `__ for version information). diff --git a/binder/requirements.txt b/binder/requirements.txt index d63469859d..a06c1205d9 100644 --- a/binder/requirements.txt +++ b/binder/requirements.txt @@ -1,4 +1,4 @@ -r ../requirements-dev.txt -# this installs pymc3 itself. it is funny that this is an absolute path, +# this installs pymc3 itself. it is funny that this is an absolute path, # but reqirements-dev.txt is relative. . diff --git a/docs/source/Gaussian_Processes.rst b/docs/source/Gaussian_Processes.rst index 6ca96db1f6..f6913344ca 100644 --- a/docs/source/Gaussian_Processes.rst +++ b/docs/source/Gaussian_Processes.rst @@ -21,7 +21,7 @@ choice as priors over functions due to the marginalization and conditioning properties of the multivariate normal distribution. Usually, the marginal distribution over :math:`f(x)` is evaluated during the inference step. The conditional distribution is then used for predicting the function values -:math:`f(x_*)` at new points, :math:`x_*`. +:math:`f(x_*)` at new points, :math:`x_*`. The joint distribution of :math:`f(x)` and :math:`f(x_*)` is multivariate normal, @@ -29,9 +29,9 @@ normal, .. math:: \begin{bmatrix} f(x) \\ f(x_*) \\ \end{bmatrix} \sim - \text{N}\left( + \text{N}\left( \begin{bmatrix} m(x) \\ m(x_*) \\ \end{bmatrix} \,, - \begin{bmatrix} k(x,x') & k(x_*, x) \\ + \begin{bmatrix} k(x,x') & k(x_*, x) \\ k(x_*, x) & k(x_*, x_*') \\ \end{bmatrix} \right) \,. @@ -41,21 +41,21 @@ distribution is .. math:: - f(x_*) \mid f(x) \sim \text{N}\left( k(x_*, x) k(x, x)^{-1} [f(x) - m(x)] + m(x_*) ,\, + f(x_*) \mid f(x) \sim \text{N}\left( k(x_*, x) k(x, x)^{-1} [f(x) - m(x)] + m(x_*) ,\, k(x_*, x_*) - k(x, x_*) k(x, x)^{-1} k(x, x_*) \right) \,. .. note:: For more information on GPs, check out the book `Gaussian Processes for Machine Learning `_ by Rasmussen & - Williams, or `this introduction `_ + Williams, or `this introduction `_ by D. Mackay. PyMC3 is a great environment for working with fully Bayesian Gaussian Process -models. GPs in PyMC3 have a clear syntax and are highly composable, and many -predefined covariance functions (or kernels), mean functions, and several GP +models. GPs in PyMC3 have a clear syntax and are highly composable, and many +predefined covariance functions (or kernels), mean functions, and several GP implementations are included. GPs are treated as distributions that can be -used within larger or hierarchical models, not just as standalone regression +used within larger or hierarchical models, not just as standalone regression models. Mean and covariance functions @@ -83,7 +83,7 @@ specify :code:`input_dim`, the total number of columns of :code:`X`, and :code:`active_dims`, which of those columns or dimensions the covariance function will act on, is because :code:`cov_func` hasn't actually seen the input data yet. The :code:`active_dims` argument is optional, and defaults to -all columns of the matrix of inputs. +all columns of the matrix of inputs. Covariance functions in PyMC3 closely follow the algebraic rules for kernels, which allows users to combine covariance functions into new ones, for example: @@ -97,13 +97,13 @@ which allows users to combine covariance functions into new ones, for example: cov_func = pm.gp.cov.ExpQuad(...) * pm.gp.cov.Periodic(...) - + - The product (or sum) of a covariance function with a scalar is a covariance function:: - + cov_func = eta**2 * pm.gp.cov.Matern32(...) - + After the covariance function is defined, it is now a function that is @@ -133,7 +133,7 @@ is:: The first argument is the mean function and the second is the covariance function. We've made the GP object, but we haven't made clear which function it is to be a prior for, what the inputs are, or what parameters it will be -conditioned on. +conditioned on. .. note:: @@ -145,18 +145,18 @@ conditioned on. Calling the `prior` method will create a PyMC3 random variable that represents the latent function :math:`f(x) = \mathbf{f}`:: - + f = gp.prior("f", X) :code:`f` is a random variable that can be used within a PyMC3 model like any other type of random variable. The first argument is the name of the random -variable representing the function we are placing the prior over. -The second argument is the inputs to the function that the prior is over, +variable representing the function we are placing the prior over. +The second argument is the inputs to the function that the prior is over, :code:`X`. The inputs are usually known and present in the data, but they can -also be PyMC3 random variables. If the inputs are a Theano tensor or a +also be PyMC3 random variables. If the inputs are a Theano tensor or a PyMC3 random variable, the :code:`shape` needs to be given. -Usually at this point, inference is performed on the model. The +Usually at this point, inference is performed on the model. The :code:`conditional` method creates the conditional, or predictive, distribution over the latent function at arbitrary :math:`x_*` input points, :math:`f(x_*)`. To construct the conditional distribution we write:: @@ -166,7 +166,7 @@ distribution over the latent function at arbitrary :math:`x_*` input points, Additive GPs ============ -The GP implementation in PyMC3 is constructed so that it is easy to define +The GP implementation in PyMC3 is constructed so that it is easy to define additive GPs and sample from individual GP components. We can write:: gp1 = pm.gp.Marginal(mean_func1, cov_func1) @@ -183,18 +183,18 @@ Consider two independent GP distributed functions, :math:`f_1(x) \sim .. math:: - \begin{bmatrix} f_1 \\ f_1^* \\ f_2 \\ f_2^* + \begin{bmatrix} f_1 \\ f_1^* \\ f_2 \\ f_2^* \\ f_1 + f_2 \\ f_1^* + f_2^* \end{bmatrix} \sim - \text{N}\left( + \text{N}\left( \begin{bmatrix} m_1 \\ m_1^* \\ m_2 \\ m_2^* \\ m_1 + m_2 \\ m_1^* + m_2^* \\ \end{bmatrix} \,,\, - \begin{bmatrix} + \begin{bmatrix} K_1 & K_1^* & 0 & 0 & K_1 & K_1^* \\ K_1^{*^T} & K_1^{**} & 0 & 0 & K_1^* & K_1^{**} \\ 0 & 0 & K_2 & K_2^* & K_2 & K_2^{*} \\ 0 & 0 & K_2^{*^T} & K_2^{**} & K_2^{*} & K_2^{**} \\ K_1 & K_1^{*} & K_2 & K_2^{*} & K_1 + K_2 & K_1^{*} + K_2^{*} \\ - K_1^{*^T} & K_1^{**} & K_2^{*^T} & K_2^{**} & K_1^{*^T}+K_2^{*^T} & K_1^{**}+K_2^{**} + K_1^{*^T} & K_1^{**} & K_2^{*^T} & K_2^{**} & K_1^{*^T}+K_2^{*^T} & K_1^{**}+K_2^{**} \end{bmatrix} \right) \,. @@ -220,42 +220,42 @@ other implementations. The first block fits the GP prior. We denote with pm.Model() as model: gp1 = pm.gp.Marginal(mean_func1, cov_func1) gp2 = pm.gp.Marginal(mean_func2, cov_func2) - - # gp represents f1 + f2. + + # gp represents f1 + f2. gp = gp1 + gp2 - + f = gp.marginal_likelihood("f", X, y, noise) - + trace = pm.sample(1000) -To construct the conditional distribution of :code:`gp1` or :code:`gp2`, we -also need to include the additional arguments, :code:`X`, :code:`y`, and +To construct the conditional distribution of :code:`gp1` or :code:`gp2`, we +also need to include the additional arguments, :code:`X`, :code:`y`, and :code:`noise`:: with model: # conditional distributions of f1 and f2 - f1_star = gp1.conditional("f1_star", X_star, + f1_star = gp1.conditional("f1_star", X_star, given={"X": X, "y": y, "noise": noise, "gp": gp}) - f2_star = gp2.conditional("f2_star", X_star, + f2_star = gp2.conditional("f2_star", X_star, given={"X": X, "y": y, "noise": noise, "gp": gp}) # conditional of f1 + f2, `given` not required f_star = gp.conditional("f_star", X_star) -This second block produces the conditional distributions. Notice that extra +This second block produces the conditional distributions. Notice that extra arguments are required for conditionals of :math:`f1` and :math:`f2`, but not -:math:`f`. This is because those arguments are cached when +:math:`f`. This is because those arguments are cached when :code:`.marginal_likelihood` is called on :code:`gp`. .. note:: When constructing conditionals, the additional arguments :code:`X`, :code:`y`, :code:`noise` and :code:`gp` must be provided as a dict called `given`! -Since the marginal likelihoood method of :code:`gp1` or :code:`gp2` weren't called, -their conditionals need to be provided with the required inputs. In the same +Since the marginal likelihoood method of :code:`gp1` or :code:`gp2` weren't called, +their conditionals need to be provided with the required inputs. In the same fashion as the prior, :code:`f_star`, :code:`f1_star` and :code:`f2_star` are random -variables that can now be used like any other random variable in PyMC3. +variables that can now be used like any other random variable in PyMC3. Check the notebooks for detailed demonstrations of the usage of GP functionality in PyMC3. diff --git a/docs/source/Probability_Distributions.rst b/docs/source/Probability_Distributions.rst index af77f28b28..ae6e726570 100644 --- a/docs/source/Probability_Distributions.rst +++ b/docs/source/Probability_Distributions.rst @@ -4,24 +4,24 @@ Probability Distributions in PyMC3 ********************************** -The most fundamental step in building Bayesian models is the specification of a full probability model for the problem at hand. This primarily involves assigning parametric statistical distributions to unknown quantities in the model, in addition to appropriate functional forms for likelihoods to represent the information from the data. To this end, PyMC3 includes a comprehensive set of pre-defined statistical distributions that can be used as model building blocks. +The most fundamental step in building Bayesian models is the specification of a full probability model for the problem at hand. This primarily involves assigning parametric statistical distributions to unknown quantities in the model, in addition to appropriate functional forms for likelihoods to represent the information from the data. To this end, PyMC3 includes a comprehensive set of pre-defined statistical distributions that can be used as model building blocks. For example, if we wish to define a particular variable as having a normal prior, we can specify that using an instance of the ``Normal`` class. :: with pm.Model(): - + x = pm.Normal('x', mu=0, sigma=1) - + A variable requires at least a ``name`` argument, and zero or more model parameters, depending on the distribution. Parameter names vary by distribution, using conventional names wherever possible. The example above defines a scalar variable. To make a vector-valued variable, a ``shape`` argument should be provided; for example, a 3x3 matrix of beta random variables could be defined with: :: with pm.Model(): - + p = pm.Beta('p', 1, 1, shape=(3, 3)) - + Probability distributions are all subclasses of ``Distribution``, which in turn has two major subclasses: ``Discrete`` and ``Continuous``. In terms of data types, a ``Continuous`` random variable is given whichever floating point type is defined by ``theano.config.floatX``, while ``Discrete`` variables are given ``int16`` types when ``theano.config.floatX`` is ``float32``, and ``int64`` otherwise. All distributions in ``pm.distributions`` will have two important methods: ``random()`` and ``logp()`` with the following signatures: @@ -29,22 +29,22 @@ All distributions in ``pm.distributions`` will have two important methods: ``ran :: class SomeDistribution(Continuous): - + def random(self, point=None, size=None): ... return random_samples - + def logp(self, value): ... return total_log_prob - + PyMC3 expects the ``logp()`` method to return a log-probability evaluated at the passed ``value`` argument. This method is used internally by all of the inference methods to calculate the model log-probability that is used for fitting models. The ``random()`` method is used to simulate values from the variable, and is used internally for posterior predictive checks. Custom distributions ==================== -Despite the fact that PyMC3 ships with a large set of the most common probability distributions, some problems may require the use of functional forms that are less common, and not available in ``pm.distributions``. One example of this is in survival analysis, where time-to-event data is modeled using probability densities that are designed to accommodate censored data. +Despite the fact that PyMC3 ships with a large set of the most common probability distributions, some problems may require the use of functional forms that are less common, and not available in ``pm.distributions``. One example of this is in survival analysis, where time-to-event data is modeled using probability densities that are designed to accommodate censored data. An exponential survival function, where :math:`c=0` denotes failure (or non-survival), is defined by: @@ -64,7 +64,7 @@ For the exponential survival function, this is: exp_surv = pm.DensityDist('exp_surv', logp, observed={'failure':failure, 'value':t}) -Similarly, if a random number generator is required, a function returning random numbers corresponding to the probability distribution can be passed as the ``random`` argument. +Similarly, if a random number generator is required, a function returning random numbers corresponding to the probability distribution can be passed as the ``random`` argument. Using PyMC distributions without a Model @@ -75,22 +75,22 @@ Distribution objects, as we have defined them so far, are only usable inside of :: y = Binomial('y', n=10, p=0.5) - - + + :: TypeError: No context on context stack - + This is because the distribution classes are designed to integrate themselves automatically inside of a PyMC model. When a model cannot be found, it fails. However, each ``Distribution`` has a ``dist`` class method that returns a stripped-down distribution object that can be used outside of a PyMC model. -For example, a standalone binomial distribution can be created by: - +For example, a standalone binomial distribution can be created by: + :: y = pm.Binomial.dist(n=10, p=0.5) - + This allows for probabilities to be calculated and random numbers to be drawn. - + :: >>> y.logp(4).eval() @@ -99,7 +99,7 @@ This allows for probabilities to be calculated and random numbers to be drawn. >>> y.random(size=3) array([5, 4, 3]) - + Auto-transformation =================== @@ -112,8 +112,8 @@ For example, the gamma distribution is positive-valued. If we define one for a m with pm.Model() as model: g = pm.Gamma('g', 1, 1) -We notice a modified variable inside the model ``vars`` attribute, which holds the free variables in the model. - +We notice a modified variable inside the model ``vars`` attribute, which holds the free variables in the model. + :: >>> model.vars @@ -122,7 +122,7 @@ We notice a modified variable inside the model ``vars`` attribute, which holds t As the name suggests, the variable ``g`` has been log-transformed, and this is the space over which sampling takes place. The original variable is simply treated as a deterministic variable, since the value of the transformed variable is simply back-transformed when a sample is drawn in order to recover the original variable. Hence, ``g`` resides in the ``model.deterministics`` list. - + :: >>> model.deterministics diff --git a/docs/source/api/distributions/continuous.rst b/docs/source/api/distributions/continuous.rst index 9be0b9183c..fcc49d2e11 100644 --- a/docs/source/api/distributions/continuous.rst +++ b/docs/source/api/distributions/continuous.rst @@ -21,7 +21,7 @@ Continuous Cauchy HalfCauchy Gamma - InverseGamma + InverseGamma Weibull Lognormal ChiSquared diff --git a/docs/source/api/math.rst b/docs/source/api/math.rst index 80741743cb..c548d13216 100644 --- a/docs/source/api/math.rst +++ b/docs/source/api/math.rst @@ -2,8 +2,8 @@ Math ==== -This submodule contains various mathematical functions. Most of them are imported directly -from theano.tensor (see there for more details). Doing any kind of math with PyMC3 random +This submodule contains various mathematical functions. Most of them are imported directly +from theano.tensor (see there for more details). Doing any kind of math with PyMC3 random variables, or defining custom likelihoods or priors requires you to use these theano expressions rather than NumPy or Python code. diff --git a/pymc3/backends/__init__.py b/pymc3/backends/__init__.py index d746b4408c..ab0faa3ded 100644 --- a/pymc3/backends/__init__.py +++ b/pymc3/backends/__init__.py @@ -52,7 +52,7 @@ >>> sliced_trace = trace[1000:] The backend for the new trace is always NDArray, regardless of the -type of original trace. +type of original trace. Loading a saved backend ----------------------- diff --git a/requirements-dev.txt b/requirements-dev.txt index c5a03f945d..1e127fe8b9 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,22 +1,22 @@ bokeh>=0.12.13 coverage>=5.1 +dill graphviz>=0.8.3 ipython nbsphinx>=0.4.2 nose>=1.3.7 nose-parameterized==0.6.0 numpydoc>=0.9.1 +parameterized pre-commit>=2.8.0 pycodestyle>=2.3.1 pyflakes>=1.5.0 pylint>=1.7.4 -pytest-cov>=2.5.1 pytest>=3.0.7 +pytest-cov>=2.5.1 pytest-xdist recommonmark>=0.4.0 seaborn>=0.8.1 -sphinx-autobuild==0.7.1 sphinx>=1.5.5 +sphinx-autobuild==0.7.1 watermark -parameterized -dill diff --git a/requirements.txt b/requirements.txt index 16ddd673aa..f97594daf9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,11 @@ arviz>=0.9.0 -theano-pymc==1.0.11 +contextvars; python_version < '3.7' +dataclasses; python_version < '3.7' +dill +fastprogress>=0.2.0 numpy>=1.13.0 -scipy>=0.18.1 pandas>=0.18.0 patsy>=0.5.1 -fastprogress>=0.2.0 +scipy>=0.18.1 +theano-pymc==1.0.11 typing-extensions>=3.7.4 -dataclasses; python_version < '3.7' -contextvars; python_version < '3.7' -dill