diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index a2564b099e..89fdac3747 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -1,18 +1,23 @@ -import warnings +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import warnings import numpy as np -import theano.tensor as tt +import scipy import theano +import theano.tensor as tt from scipy import stats from theano.tensor.nlinalg import det, matrix_inverse, trace, eigh from . import transforms from .distribution import Continuous, Discrete, draw_values, generate_samples +from ..model import Deterministic +from .continuous import ChiSquared, Normal from .special import gammaln, multigammaln from .dist_math import bound, logpow, factln -__all__ = ['MvNormal', 'Dirichlet', 'Multinomial', 'Wishart', 'LKJCorr'] +__all__ = ['MvNormal', 'Dirichlet', 'Multinomial', 'Wishart', 'WishartBartlett', 'LKJCorr'] class MvNormal(Continuous): @@ -310,6 +315,65 @@ def logp(self, X): n > (p - 1)) +def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False): + """ + Bartlett decomposition of the Wishart distribution. As the Wishart + distribution requires the matrix to be symmetric positive semi-definite + it is impossible for MCMC to ever propose acceptable matrices. + + Instead, we can use the Barlett decomposition which samples a lower + diagonal matrix. Specifically: + + If L ~ [[sqrt(c_1), 0, ...], + [z_21, sqrt(c_1), 0, ...], + [z_31, z32, sqrt(c3), ...]] + with c_i ~ Chi²(n-i+1) and n_ij ~ N(0, 1), then + L * A * A.T * L.T ~ Wishart(L * L.T, nu) + + See http://en.wikipedia.org/wiki/Wishart_distribution#Bartlett_decomposition + for more information. + + :Parameters: + S : ndarray + p x p positive definite matrix + Or: + p x p lower-triangular matrix that is the Cholesky factor + of the covariance matrix. + nu : int + Degrees of freedom, > dim(S). + is_cholesky : bool (default=False) + Input matrix S is already Cholesky decomposed as S.T * S + return_cholesky : bool (default=False) + Only return the Cholesky decomposed matrix. + + :Note: + This is not a standard Distribution class but follows a similar + interface. Besides the Wishart distribution, it will add RVs + c and z to your model which make up the matrix. + """ + + L = S if is_cholesky else scipy.linalg.cholesky(S) + + diag_idx = np.diag_indices_from(S) + tril_idx = np.tril_indices_from(S, k=-1) + n_diag = len(diag_idx[0]) + n_tril = len(tril_idx[0]) + c = T.sqrt(ChiSquared('c', nu - np.arange(2, 2+n_diag), shape=n_diag)) + print('Added new variable c to model diagonal of Wishart.') + z = Normal('z', 0, 1, shape=n_tril) + print('Added new variable z to model off-diagonals of Wishart.') + # Construct A matrix + A = T.zeros(S.shape, dtype=np.float32) + A = T.set_subtensor(A[diag_idx], c) + A = T.set_subtensor(A[tril_idx], z) + + # L * A * A.T * L.T ~ Wishart(L*L.T, nu) + if return_cholesky: + return Deterministic(name, T.dot(L, A)) + else: + return Deterministic(name, T.dot(T.dot(T.dot(L, A), A.T), L.T)) + + class LKJCorr(Continuous): R""" diff --git a/pymc3/examples/wishart.py b/pymc3/examples/wishart.py new file mode 100644 index 0000000000..0b58ffab03 --- /dev/null +++ b/pymc3/examples/wishart.py @@ -0,0 +1,48 @@ +import pymc3 as pm +import numpy as np +import theano +import theano.tensor as T +import scipy.stats +import matplotlib.pyplot as plt + +# Covariance matrix we want to recover +covariance = np.matrix([[2, .5, -.5], + [.5, 1., 0.], + [-.5, 0., 0.5]]) + +prec = np.linalg.inv(covariance) + +mean = [.5, 1, .2] +data = scipy.stats.multivariate_normal(mean, covariance).rvs(5000) + +plt.scatter(data[:, 0], data[:, 1]) + +with pm.Model() as model: + S = np.eye(3) + nu = 5 + mu = pm.Normal('mu', mu=0, sd=1, shape=3) + + # Use the transformed Wishart distribution + # Under the hood this will do a Cholesky decomposition + # of S and add two RVs to the sampler: c and z + prec = pm.WishartBartlett('prec', S, nu) + + # To be able to compare it to truth, convert precision to covariance + cov = pm.Deterministic('cov', T.nlinalg.matrix_inverse(prec)) + + lp = pm.MvNormal('likelihood', mu=mu, tau=prec, observed=data) + + start = pm.find_MAP() + step = pm.NUTS(scaling=start) + + +def run(n = 3000): + if n == "short": + n = 50 + with model: + trace = pm.sample(n, step, start) + + pm.traceplot(trace); + +if __name__ == '__main__': + run()