diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index dceefb95a1..89e623e4fa 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -5,20 +5,26 @@ ### New features - Add `logit_p` keyword to `pm.Bernoulli`, so that users can specify the logit of the success probability. This is faster and more stable than using `p=tt.nnet.sigmoid(logit_p)`. -- Add `random` keyword to `pm.DensityDist` thus enabling users to pass custom random method which in turn makes sampling from a `DensityDist` possible. +- Add `random` keyword to `pm.DensityDist` thus enabling users to pass custom random method which in turn makes sampling from a `DensityDist` possible. - Effective sample size computation is updated. The estimation uses Geyer's initial positive sequence, which no longer truncates the autocorrelation series inaccurately. `pm.diagnostics.effective_n` now can reports N_eff>N. +- Added `KroneckerNormal` distribution and a corresponding `MarginalKron` + Gaussian Process implementation for efficient inference, along with + lower-level functions such as `cartesian` and `kronecker` products. +- Added `Coregion` covariance function. ### Fixes - `VonMises` does not overflow for large values of kappa. i0 and i1 have been removed and we now use log_i0 to compute the logp. + ### Deprecations - DIC and BPIC calculations have been removed - `njobs` and `nchains` kwarg are deprecated in favor of `cores` and `chains` for `sample` - `lag` kwarg in `pm.stats.autocorr` and `pm.stats.autocov` is deprecated. + ## PyMC 3.3 (January 9, 2018) ### New features diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index 8892592f18..932432ce2b 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -57,6 +57,7 @@ from .multivariate import MvNormal from .multivariate import MatrixNormal +from .multivariate import KroneckerNormal from .multivariate import MvStudentT from .multivariate import Dirichlet from .multivariate import Multinomial @@ -123,6 +124,7 @@ 'TensorType', 'MvNormal', 'MatrixNormal', + 'KroneckerNormal', 'MvStudentT', 'Dirichlet', 'Multinomial', diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index 2250cf1287..e7010d60a1 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -13,6 +13,9 @@ from .special import gammaln from pymc3.theanof import floatX +from six.moves import xrange +from functools import partial + f = floatX c = - .5 * np.log(2. * np.pi) @@ -326,3 +329,135 @@ def grad(self, inputs, grads): x_grad, = grads return [x_grad * self.grad_op(x)] + + +# Custom Eigh, EighGrad, and eigh are required until +# https://github.com/Theano/Theano/pull/6557 is handled, since lambda's +# cannot be used with pickling. +class Eigh(tt.nlinalg.Eig): + """ + Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix. + + This is a copy of Eigh from theano that calls an EighGrad which uses + partial instead of lambda. Once this has been merged with theano this + should be removed. + """ + + _numop = staticmethod(np.linalg.eigh) + __props__ = ('UPLO',) + + def __init__(self, UPLO='L'): + assert UPLO in ['L', 'U'] + self.UPLO = UPLO + + def make_node(self, x): + x = tt.as_tensor_variable(x) + assert x.ndim == 2 + # Numpy's linalg.eigh may return either double or single + # presision eigenvalues depending on installed version of + # LAPACK. Rather than trying to reproduce the (rather + # involved) logic, we just probe linalg.eigh with a trivial + # input. + w_dtype = self._numop([[np.dtype(x.dtype).type()]])[0].dtype.name + w = theano.tensor.vector(dtype=w_dtype) + v = theano.tensor.matrix(dtype=x.dtype) + return theano.gof.Apply(self, [x], [w, v]) + + def perform(self, node, inputs, outputs): + (x,) = inputs + (w, v) = outputs + w[0], v[0] = self._numop(x, self.UPLO) + + def grad(self, inputs, g_outputs): + r"""The gradient function should return + .. math:: \sum_n\left(W_n\frac{\partial\,w_n} + {\partial a_{ij}} + + \sum_k V_{nk}\frac{\partial\,v_{nk}} + {\partial a_{ij}}\right), + where [:math:`W`, :math:`V`] corresponds to ``g_outputs``, + :math:`a` to ``inputs``, and :math:`(w, v)=\mbox{eig}(a)`. + Analytic formulae for eigensystem gradients are well-known in + perturbation theory: + .. math:: \frac{\partial\,w_n} + {\partial a_{ij}} = v_{in}\,v_{jn} + .. math:: \frac{\partial\,v_{kn}} + {\partial a_{ij}} = + \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m} + """ + x, = inputs + w, v = self(x) + # Replace gradients wrt disconnected variables with + # zeros. This is a work-around for issue #1063. + gw, gv = tt.nlinalg._zero_disconnected([w, v], g_outputs) + return [EighGrad(self.UPLO)(x, w, v, gw, gv)] + + +class EighGrad(theano.Op): + """ + Gradient of an eigensystem of a Hermitian matrix. + + This is a copy of EighGrad from theano that uses partial instead of lambda. + Once this has been merged with theano this should be removed. + """ + + __props__ = ('UPLO',) + + def __init__(self, UPLO='L'): + assert UPLO in ['L', 'U'] + self.UPLO = UPLO + if UPLO == 'L': + self.tri0 = np.tril + self.tri1 = partial(np.triu, k=1) + else: + self.tri0 = np.triu + self.tri1 = partial(np.tril, k=-1) + + def make_node(self, x, w, v, gw, gv): + x, w, v, gw, gv = map(tt.as_tensor_variable, (x, w, v, gw, gv)) + assert x.ndim == 2 + assert w.ndim == 1 + assert v.ndim == 2 + assert gw.ndim == 1 + assert gv.ndim == 2 + out_dtype = theano.scalar.upcast(x.dtype, w.dtype, v.dtype, + gw.dtype, gv.dtype) + out = theano.tensor.matrix(dtype=out_dtype) + return theano.gof.Apply(self, [x, w, v, gw, gv], [out]) + + def perform(self, node, inputs, outputs): + """ + Implements the "reverse-mode" gradient for the eigensystem of + a square matrix. + """ + x, w, v, W, V = inputs + N = x.shape[0] + outer = np.outer + + def G(n): + return sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m]) + for m in xrange(N) if m != n) + + g = sum(outer(v[:, n], v[:, n] * W[n] + G(n)) + for n in xrange(N)) + + # Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a) + # (triu(a)) only. This means that partial derivative of + # eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero + # for i < j (i > j). At the same time, non-zero components of + # the gradient must account for the fact that variation of the + # opposite triangle contributes to variation of two elements + # of Hermitian (symmetric) matrix. The following line + # implements the necessary logic. + out = self.tri0(g) + self.tri1(g).T + + # Make sure we return the right dtype even if NumPy performed + # upcasting in self.tri0. + outputs[0][0] = np.asarray(out, dtype=node.outputs[0].dtype) + + def infer_shape(self, node, shapes): + return [shapes[0]] + + +def eigh(a, UPLO='L'): + """A copy, remove with Eigh and EighGrad when possible""" + return Eigh(UPLO)(a) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 50a36d2166..3af91a8b26 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -20,12 +20,14 @@ from ..model import Deterministic from .continuous import ChiSquared, Normal from .special import gammaln, multigammaln -from .dist_math import bound, logpow, factln, Cholesky +from .dist_math import bound, logpow, factln, Cholesky, eigh +from ..math import kron_dot, kron_diag, kron_solve_lower, kronecker __all__ = ['MvNormal', 'MvStudentT', 'Dirichlet', 'Multinomial', 'Wishart', 'WishartBartlett', - 'LKJCorr', 'LKJCholeskyCov', 'MatrixNormal'] + 'LKJCorr', 'LKJCholeskyCov', 'MatrixNormal', + 'KroneckerNormal'] class _QuadFormBase(Continuous): @@ -1293,3 +1295,220 @@ def logp(self, value): n = self.n norm = - 0.5 * m * n * pm.floatX(np.log(2 * np.pi)) return norm - 0.5*trquaddist - m*half_collogdet - n*half_rowlogdet + + +class KroneckerNormal(Continuous): + R""" + Multivariate normal log-likelihood with Kronecker-structured covariance. + + .. math:: + + f(x \mid \mu, K) = + \frac{1}{(2\pi |K|)^{1/2}} + \exp\left\{ -\frac{1}{2} (x-\mu)^{\prime} K^{-1} (x-\mu) \right\} + + ======== ========================== + Support :math:`x \in \mathbb{R}^N` + Mean :math:`\mu` + Variance :math:`K = \bigotimes K_i` + \sigma^2 I_N + ======== ========================== + + Parameters + ---------- + mu : array + Vector of means, just as in `MvNormal`. + covs : list of arrays + The set of covariance matrices :math:`[K_1, K_2, ...]` to be + Kroneckered in the order provided :math:`\bigotimes K_i`. + chols : list of arrays + The set of lower cholesky matrices :math:`[L_1, L_2, ...]` such that + :math:`K_i = L_i L_i'`. + evds : list of tuples + The set of eigenvalue-vector, eigenvector-matrix pairs + :math:`[(v_1, Q_1), (v_2, Q_2), ...]` such that + :math:`K_i = Q_i \text{diag}(v_i) Q_i'`. For example:: + + v_i, Q_i = tt.nlinalg.eigh(K_i) + + sigma : scalar, variable + Standard deviation of the Gaussian white noise. + + Examples + -------- + Define a multivariate normal variable with a covariance + :math:`K = K_1 \otimes K_2` + + .. code:: python + + K1 = np.array([[1., 0.5], [0.5, 2]]) + K2 = np.array([[1., 0.4, 0.2], [0.4, 2, 0.3], [0.2, 0.3, 1]]) + covs = [K1, K2] + N = 6 + mu = np.zeros(N) + with pm.Model() as model: + vals = pm.KroneckerNormal('vals', mu=mu, covs=covs, shape=N) + + Effeciency gains are made by cholesky decomposing :math:`K_1` and + :math:`K_2` individually rather than the larger :math:`K` matrix. Although + only two matrices :math:`K_1` and :math:`K_2` are shown here, an arbitrary + number of submatrices can be combined in this way. Choleskys and + eigendecompositions can be provided instead + + .. code:: python + + chols = [np.linalg.cholesky(Ki) for Ki in covs] + evds = [np.linalg.eigh(Ki) for Ki in covs] + with pm.Model() as model: + vals2 = pm.KroneckerNormal('vals2', mu=mu, chols=chols, shape=N) + # or + vals3 = pm.KroneckerNormal('vals3', mu=mu, evds=evds, shape=N) + + neither of which will be converted. Diagonal noise can also be added to + the covariance matrix, :math:`K = K_1 \otimes K_2 + \sigma^2 I_N`. + Despite the noise removing the overall Kronecker structure of the matrix, + `KroneckerNormal` can continue to make efficient calculations by + utilizing eigendecompositons of the submatrices behind the scenes [1]. + Thus, + + .. code:: python + + sigma = 0.1 + with pm.Model() as noise_model: + vals = pm.KroneckerNormal('vals', mu=mu, covs=covs, sigma=sigma, shape=N) + vals2 = pm.KroneckerNormal('vals2', mu=mu, chols=chols, sigma=sigma, shape=N) + vals3 = pm.KroneckerNormal('vals3', mu=mu, evds=evds, sigma=sigma, shape=N) + + are identical, with `covs` and `chols` each converted to + eigendecompositions. + + References + ---------- + .. [1] Saatchi, Y. (2011). "Scalable inference for structured Gaussian process models" + """ + + def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None, + *args, **kwargs): + self._setup(covs, chols, evds, sigma) + super(KroneckerNormal, self).__init__(*args, **kwargs) + self.mu = tt.as_tensor_variable(mu) + self.mean = self.median = self.mode = self.mu + + def _setup(self, covs, chols, evds, sigma): + self.cholesky = Cholesky(nofail=False, lower=True) + if len([i for i in [covs, chols, evds] if i is not None]) != 1: + raise ValueError('Incompatible parameterization. ' + 'Specify exactly one of covs, chols, ' + 'or evds.') + self._isEVD = False + self.sigma = sigma + self.is_noisy = self.sigma is not None and self.sigma != 0 + if covs is not None: + self._cov_type = 'cov' + self.covs = covs + if self.is_noisy: + # Noise requires eigendecomposition + eigh_map = map(eigh, covs) + self._setup_evd(eigh_map) + else: + # Otherwise use cholesky as usual + self.chols = list(map(self.cholesky, self.covs)) + self.chol_diags = list(map(tt.nlinalg.diag, self.chols)) + self.sizes = tt.as_tensor_variable( + [chol.shape[0] for chol in self.chols]) + self.N = tt.prod(self.sizes) + elif chols is not None: + self._cov_type = 'chol' + if self.is_noisy: # A strange case... + # Noise requires eigendecomposition + covs = [tt.dot(chol, chol.T) for chol in chols] + eigh_map = map(eigh, covs) + self._setup_evd(eigh_map) + else: + self.chols = chols + self.chol_diags = list(map(tt.nlinalg.diag, self.chols)) + self.sizes = tt.as_tensor_variable( + [chol.shape[0] for chol in self.chols]) + self.N = tt.prod(self.sizes) + else: + self._cov_type = 'evd' + self._setup_evd(evds) + + def _setup_evd(self, eigh_iterable): + self._isEVD = True + eigs_sep, Qs = zip(*eigh_iterable) # Unzip + self.Qs = list(map(tt.as_tensor_variable, Qs)) + self.QTs = list(map(tt.transpose, self.Qs)) + + self.eigs_sep = list(map(tt.as_tensor_variable, eigs_sep)) + self.eigs = kron_diag(*self.eigs_sep) # Combine separate eigs + if self.is_noisy: + self.eigs += self.sigma**2 + self.N = self.eigs.shape[0] + + def _setup_random(self): + if not hasattr(self, 'mv_params'): + self.mv_params = {'mu': self.mu} + if self._cov_type == 'cov': + cov = kronecker(*self.covs) + if self.is_noisy: + cov = cov + self.sigma**2 * tt.identity_like(cov) + self.mv_params['cov'] = cov + elif self._cov_type == 'chol': + if self.is_noisy: + covs = [] + for eig, Q in zip(self.eigs_sep, self.Qs): + cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T)) + covs.append(cov_i) + cov = kronecker(*covs) + if self.is_noisy: + cov = cov + self.sigma**2 * tt.identity_like(cov) + self.mv_params['chol'] = self.cholesky(cov) + else: + self.mv_params['chol'] = kronecker(*self.chols) + elif self._cov_type == 'evd': + covs = [] + for eig, Q in zip(self.eigs_sep, self.Qs): + # print() + cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T)) + covs.append(cov_i) + cov = kronecker(*covs) + if self.is_noisy: + cov = cov + self.sigma**2 * tt.identity_like(cov) + self.mv_params['cov'] = cov + + def random(self, point=None, size=None): + # Expand params into terms MvNormal can understand to force consistency + self._setup_random() + dist = MvNormal.dist(**self.mv_params) + return dist.random(point, size) + + def _quaddist(self, value): + """Computes the quadratic (x-mu)^T @ K^-1 @ (x-mu) and log(det(K))""" + if value.ndim > 2 or value.ndim == 0: + raise ValueError('Invalid dimension for value: %s' % value.ndim) + if value.ndim == 1: + onedim = True + value = value[None, :] + else: + onedim = False + + delta = value - self.mu + if self._isEVD: + sqrt_quad = kron_dot(self.QTs, delta.T) + sqrt_quad = sqrt_quad/tt.sqrt(self.eigs[:, None]) + logdet = tt.sum(tt.log(self.eigs)) + else: + sqrt_quad = kron_solve_lower(self.chols, delta.T) + logdet = 0 + for chol_size, chol_diag in zip(self.sizes, self.chol_diags): + logchol = tt.log(chol_diag) * self.N/chol_size + logdet += tt.sum(2*logchol) + # Square each sample + quad = tt.batched_dot(sqrt_quad.T, sqrt_quad.T) + if onedim: + quad = quad[0] + return quad, logdet + + def logp(self, value): + quad, logdet = self._quaddist(value) + return - (quad + logdet + self.N*tt.log(2*np.pi)) / 2.0 diff --git a/pymc3/gp/__init__.py b/pymc3/gp/__init__.py index ca22f64977..d71db6b83e 100644 --- a/pymc3/gp/__init__.py +++ b/pymc3/gp/__init__.py @@ -1,4 +1,4 @@ from . import cov from . import mean from . import util -from .gp import Latent, Marginal, MarginalSparse, TP +from .gp import Latent, Marginal, MarginalSparse, TP, MarginalKron diff --git a/pymc3/gp/cov.py b/pymc3/gp/cov.py index 9628db91b2..ba903c0076 100644 --- a/pymc3/gp/cov.py +++ b/pymc3/gp/cov.py @@ -15,7 +15,8 @@ 'Cosine', 'Periodic', 'WarpedInput', - 'Gibbs'] + 'Gibbs', + 'Coregion'] class Covariance(object): @@ -143,6 +144,41 @@ def __call__(self, X, Xs=None, diag=False): return reduce(mul, self.merge_factors(X, Xs, diag)) +class Kron(Covariance): + R"""Form a covariance object that is the kronecker product of other covariances. + + In contrast to standard multiplication, where each covariance is given the + same inputs X and Xs, kronecker product covariances first split the inputs + into their respective spaces (inferred from the input_dim of each object) + before forming their product. Kronecker covariances have a larger + input dimension than any of its factors since the inputs are the + concatenated columns of its components. + + Factors must be covariances or their combinations, arrays will not work. + """ + + def __init__(self, factor_list): + self.input_dims = [factor.input_dim for factor in factor_list] + input_dim = sum(self.input_dims) + super(Kron, self).__init__(input_dim=input_dim) + self.factor_list = factor_list + + def _split(self, X, Xs): + indices = np.cumsum(self.input_dims) + X_split = np.hsplit(X, indices) + if Xs is not None: + Xs_split = np.hsplit(Xs, indices) + else: + Xs_split = [None] * len(X_split) + return X_split, Xs_split + + def __call__(self, X, Xs=None, diag=False): + X_split, Xs_split = self._split(X, Xs) + covs = [cov(x, xs, diag) for cov, x, xs + in zip(self.factor_list, X_split, Xs_split)] + return reduce(mul, covs) + + class Constant(Covariance): R""" Constant valued covariance function. @@ -515,3 +551,67 @@ def f(x, args): args = (args,) return func(x, *args) return f + + +class Coregion(Covariance): + R"""Covariance function for intrinsic/linear coregionalization models. + Adapted from GPy http://gpy.readthedocs.io/en/deploy/GPy.kern.src.html#GPy.kern.src.coregionalize.Coregionalize. + + This covariance has the form: + + .. math:: + + \mathbf{B} = \mathbf{W}\mathbf{W}^\top + \text{diag}(\kappa) + + and calls must use integers associated with the index of the matrix. + This allows the api to remain consistent with other covariance objects: + + .. math:: + + k(x, x') = \mathbf{B}[x, x'^\top] + + Parameters + ---------- + W : 2D array of shape (num_outputs, rank) + a low rank matrix that determines the correlations between + the different outputs (rows) + kappa : 1D array of shape (num_outputs, ) + a vector which allows the outputs to behave independently + B : 2D array of shape (num_outputs, rank) + the total matrix, exactly one of (W, kappa) and B must be provided + + Notes + ----- + Exactly one dimension must be active for this kernel. Thus, if + `input_dim != 1`, then `active_dims` must have a length of one. + """ + + def __init__(self, input_dim, W=None, kappa=None, B=None, active_dims=None): + super(Coregion, self).__init__(input_dim, active_dims) + if len(self.active_dims) != 1: + raise ValueError('Coregion requires exactly one dimension to be active') + make_B = W is not None or kappa is not None + if make_B and B is not None: + raise ValueError('Exactly one of (W, kappa) and B must be provided to Coregion') + if make_B: + self.W = tt.as_tensor_variable(W) + self.kappa = tt.as_tensor_variable(kappa) + self.B = tt.dot(self.W, self.W.T) + tt.diag(self.kappa) + elif B is not None: + self.B = tt.as_tensor_variable(B) + else: + raise ValueError('Exactly one of (W, kappa) and B must be provided to Coregion') + + def full(self, X, Xs=None): + X, Xs = self._slice(X, Xs) + index = tt.cast(X, 'int32') + if Xs is None: + index2 = index.T + else: + index2 = tt.cast(Xs, 'int32').T + return self.B[index, index2] + + def diag(self, X): + X, _ = self._slice(X, None) + index = tt.cast(X, 'int32') + return tt.diag(self.B)[index.ravel()] diff --git a/pymc3/gp/gp.py b/pymc3/gp/gp.py index 3a58491dbd..a692166ecc 100644 --- a/pymc3/gp/gp.py +++ b/pymc3/gp/gp.py @@ -9,8 +9,10 @@ from pymc3.gp.util import (conditioned_vars, infer_shape, stabilize, cholesky, solve_lower, solve_upper) from pymc3.distributions import draw_values +from pymc3.distributions.dist_math import eigh +from ..math import cartesian, kron_dot, kron_diag -__all__ = ['Latent', 'Marginal', 'TP', 'MarginalSparse'] +__all__ = ['Latent', 'Marginal', 'TP', 'MarginalSparse', 'MarginalKron'] class Base(object): @@ -787,3 +789,245 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): chol = cholesky(cov) shape = infer_shape(Xnew, kwargs.pop("shape", None)) return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs) + + +@conditioned_vars(["Xs", "y", "sigma"]) +class MarginalKron(Base): + R""" + Marginal Gaussian process whose covariance is a tensor product kernel. + + The `gp.MarginalKron` class is an implementation of the sum of a Kronecker + GP prior and additive white noise. It has `marginal_likelihood`, + `conditional` and `predict` methods. This GP implementation can be used to + efficiently implement regression on data that are normally distributed with + a tensor product kernel and are measured on a full grid of inputs: + `cartesian(*Xs)`. `MarginalKron` is based on the `KroneckerNormal` + distribution, see its docstring for more information. For more information + on the `prior` and `conditional` methods, see their docstrings. + + Parameters + ---------- + cov_funcs : list of Covariance objects + The covariance functions that compose the tensor (Kronecker) product. + Defaults to [zero]. + mean_func : None, instance of Mean + The mean function. Defaults to zero. + + Examples + -------- + .. code:: python + + # One dimensional column vectors of inputs + X1 = np.linspace(0, 1, 10)[:, None] + X2 = np.linspace(0, 2, 5)[:, None] + Xs = [X1, X2] + y = np.random.randn(len(X1)*len(X2)) # toy data + with pm.Model() as model: + # Specify the covariance functions for each Xi + cov_func1 = pm.gp.cov.ExpQuad(1, ls=0.1) # Must accept X1 without error + cov_func2 = pm.gp.cov.ExpQuad(1, ls=0.3) # Must accept X2 without error + + # Specify the GP. The default mean function is `Zero`. + gp = pm.gp.MarginalKron(cov_funcs=[cov_func1, cov_func2]) + + # Place a GP prior over the function f. + sigma = pm.HalfCauchy("sigma", beta=3) + y_ = gp.marginal_likelihood("y", Xs=Xs, y=y, sigma=sigma) + + # ... + + # After fitting or sampling, specify the distribution + # at new points with .conditional + # Xnew need not be on a full grid + Xnew1 = np.linspace(-1, 2, 10)[:, None] + Xnew2 = np.linspace(0, 3, 10)[:, None] + Xnew = np.concatenate((Xnew1, Xnew2), axis=1) # Not full grid, works + Xnew = pm.math.cartesian(Xnew1, Xnew2) # Full grid, also works + + with model: + fcond = gp.conditional("fcond", Xnew=Xnew) + """ + + def __init__(self, mean_func=Zero(), cov_funcs=(Constant(0.0))): + try: + self.cov_funcs = list(cov_funcs) + except TypeError: + self.cov_funcs = [cov_funcs] + cov_func = pm.gp.cov.Kron(self.cov_funcs) + super(MarginalKron, self).__init__(mean_func, cov_func) + + def __add__(self, other): + raise TypeError("Efficient implementation of additive, Kronecker-structured processes not implemented") + + def _build_marginal_likelihood(self, Xs): + self.X = cartesian(*Xs) + mu = self.mean_func(self.X) + covs = [f(X) for f, X in zip(self.cov_funcs, Xs)] + return mu, covs + + def _check_inputs(self, Xs, y, sigma): + N = np.prod([len(X) for X in Xs]) + if len(Xs) != len(self.cov_funcs): + raise ValueError('Must provide a covariance function for each X') + if N != len(y): + raise ValueError('Length of y must match cartesian product of Xs') + + def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs): + """ + Returns the marginal likelihood distribution, given the input + locations `cartesian(*Xs)` and the data `y`. + + Parameters + ---------- + name : string + Name of the random variable + Xs : list of array-like + Function input values for each covariance function. Each entry + must be passable to its respective covariance without error. The + total covariance function is measured on the full grid + `cartesian(*Xs)`. + y : array-like + Data that is the sum of the function with the GP prior and Gaussian + noise. Must have shape `(n, )`. + sigma : scalar, Variable + Standard deviation of the white Gaussian noise. + is_observed : bool + Whether to set `y` as an `observed` variable in the `model`. + Default is `True`. + **kwargs + Extra keyword arguments that are passed to `KroneckerNormal` + distribution constructor. + """ + self._check_inputs(Xs, y, sigma) + mu, covs = self._build_marginal_likelihood(Xs) + self.Xs = Xs + self.y = y + self.sigma = sigma + if is_observed: + return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, + observed=y, **kwargs) + else: + shape = np.prod([len(X) for X in Xs]) + return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, + shape=shape, **kwargs) + + def _build_conditional(self, Xnew, pred_noise, diag): + Xs, y, sigma = self.Xs, self.y, self.sigma + + # Old points + X = cartesian(*Xs) + delta = y - self.mean_func(X) + Kns = [f(x) for f, x in zip(self.cov_funcs, Xs)] + eigs_sep, Qs = zip(*map(eigh, Kns)) # Unzip + QTs = list(map(tt.transpose, Qs)) + eigs = kron_diag(*eigs_sep) # Combine separate eigs + if sigma is not None: + eigs += sigma**2 + + # New points + Km = self.cov_func(Xnew, diag=diag) + Knm = self.cov_func(X, Xnew) + Kmn = Knm.T + + # Build conditional mu + alpha = kron_dot(QTs, delta) + alpha = alpha/eigs[:, None] + alpha = kron_dot(Qs, alpha) + mu = tt.dot(Kmn, alpha).ravel() + self.mean_func(Xnew) + + # Build conditional cov + A = kron_dot(QTs, Knm) + A = A/tt.sqrt(eigs[:, None]) + if diag: + Asq = tt.sum(tt.square(A), 0) + cov = Km - Asq + if pred_noise: + cov += sigma + else: + Asq = tt.dot(A.T, A) + cov = Km - Asq + if pred_noise: + cov += sigma * np.eye(cov.shape) + return mu, cov + + def conditional(self, name, Xnew, pred_noise=False, **kwargs): + """ + Returns the conditional distribution evaluated over new input + locations `Xnew`, just as in `Marginal`. + + `Xnew` will be split + by columns and fed to the relevant covariance functions based on their + `input_dim`. For example, if `cov_func1`, `cov_func2`, and `cov_func3` + have `input_dim` of 2, 1, and 4, respectively, then `Xnew` must have + 7 columns and a covariance between the prediction points + + .. code:: python + + cov_func(Xnew) = cov_func1(Xnew[:, :2]) * cov_func1(Xnew[:, 2:3]) * cov_func1(Xnew[:, 3:]) + + This `cov_func` does not have a Kronecker structure without a full + grid, but the conditional distribution does not have a Kronecker + structure regardless. Thus, the conditional method must fall back to + using `MvNormal` rather than `KroneckerNormal` in either case. + + Parameters + ---------- + name : string + Name of the random variable + Xnew : array-like + Function input values. If one-dimensional, must be a column + vector with shape `(n, 1)`. + pred_noise : bool + Whether or not observation noise is included in the conditional. + Default is `False`. + **kwargs + Extra keyword arguments that are passed to `MvNormal` distribution + constructor. + """ + mu, cov = self._build_conditional(Xnew, pred_noise, False) + chol = cholesky(stabilize(cov)) + shape = infer_shape(Xnew, kwargs.pop("shape", None)) + return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs) + + def predict(self, Xnew, point=None, diag=False, pred_noise=False): + R""" + Return the mean vector and covariance matrix of the conditional + distribution as numpy arrays, given a `point`, such as the MAP + estimate or a sample from a `trace`. + + Parameters + ---------- + Xnew : array-like + Function input values. If one-dimensional, must be a column + vector with shape `(n, 1)`. + point : pymc3.model.Point + A specific point to condition on. + diag : bool + If `True`, return the diagonal instead of the full covariance + matrix. Default is `False`. + pred_noise : bool + Whether or not observation noise is included in the conditional. + Default is `False`. + """ + mu, cov = self._build_conditional(Xnew, pred_noise, diag) + return draw_values([mu, cov], point=point) + + def predictt(self, Xnew, diag=False, pred_noise=False): + R""" + Return the mean vector and covariance matrix of the conditional + distribution as symbolic variables. + + Parameters + ---------- + Xnew : array-like + Function input values. If one-dimensional, must be a column + vector with shape `(n, 1)`. + diag : bool + If `True`, return the diagonal instead of the full covariance + matrix. Default is `False`. + pred_noise : bool + Whether or not observation noise is included in the conditional. + Default is `False`. + """ + mu, cov = self._build_conditional(Xnew, pred_noise, diag) + return mu, cov diff --git a/pymc3/math.py b/pymc3/math.py index 79825d70d1..32ed3d7865 100644 --- a/pymc3/math.py +++ b/pymc3/math.py @@ -18,10 +18,84 @@ import scipy.sparse from scipy.linalg import block_diag as scipy_block_diag from pymc3.theanof import floatX, largest_common_dtype, ix_ +from functools import reduce, partial # pylint: enable=unused-import +def kronecker(*Ks): + """Return the Kronecker product of arguments: + K_1 \otimes K_2 \otimes ... \otimes K_D + + Parameters + ---------- + Ks: 2D array-like + """ + return reduce(tt.slinalg.kron, Ks) + + +def cartesian(*arrays): + """Makes the Cartesian product of arrays. + + Parameters + ---------- + arrays: 1D array-like + 1D arrays where earlier arrays loop more slowly than later ones + """ + N = len(arrays) + return np.stack(np.meshgrid(*arrays, indexing='ij'), -1).reshape(-1, N) + + +def kron_matrix_op(krons, m, op): + """Apply op to krons and m in a way that reproduces op(kronecker(*krons), m) + + Parameters + ----------- + krons: list of square 2D array-like objects + D square matrices [A_1, A_2, ..., A_D] to be Kronecker'ed: + A = A_1 \otimes A_2 \otimes ... \otimes A_D + Product of column dimensions must be N + m : NxM array or 1D array (treated as Nx1) + Object that krons act upon + """ + def flat_matrix_op(flat_mat, mat): + Nmat = mat.shape[1] + flat_shape = flat_mat.shape + mat2 = flat_mat.reshape((Nmat, -1)) + return op(mat, mat2).T.reshape(flat_shape) + + def kron_vector_op(v): + return reduce(flat_matrix_op, krons, v) + + if m.ndim == 1: + m = m[:, None] # Treat 1D array as Nx1 matrix + if m.ndim != 2: # Has not been tested otherwise + raise ValueError('m must have ndim <= 2, not {}'.format(mat.ndim)) + m = m.T + res, _ = theano.scan(kron_vector_op, sequences=[m]) + return res.T + + +# Define kronecker functions that work on 1D and 2D arrays +kron_dot = partial(kron_matrix_op, op=tt.dot) +kron_solve_lower = partial(kron_matrix_op, op=tt.slinalg.solve_lower_triangular) + + +def flat_outer(a, b): + return tt.outer(a, b).ravel() + + +def kron_diag(*diags): + """Returns diagonal of a kronecker product. + + Parameters + ---------- + diags: 1D arrays + The diagonals of matrices that are to be Kroneckered + """ + return reduce(flat_outer, diags) + + def tround(*args, **kwargs): """ Temporary function to silence round warning in Theano. Please remove diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index f32e9b29c2..05b5aa5e9a 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -14,7 +14,8 @@ NegativeBinomial, Geometric, Exponential, ExGaussian, Normal, Flat, LKJCorr, Wald, ChiSquared, HalfNormal, DiscreteUniform, Bound, Uniform, Triangular, Binomial, SkewNormal, DiscreteWeibull, - Gumbel, Logistic, Interpolated, ZeroInflatedBinomial, HalfFlat, AR1) + Gumbel, Logistic, Interpolated, ZeroInflatedBinomial, HalfFlat, AR1, + KroneckerNormal) from ..distributions import continuous from pymc3.theanof import floatX from numpy import array, inf, log, exp @@ -28,7 +29,7 @@ import scipy.stats import theano import theano.tensor as tt - +from ..math import kronecker def get_lkj_cases(): """ @@ -264,6 +265,33 @@ def matrix_normal_logpdf_chol(value, mu, rowchol, colchol): np.dot(colchol, colchol.T)) +def kron_normal_logpdf_cov(value, mu, covs, sigma): + cov = kronecker(*covs).eval() + if sigma is not None: + cov += sigma**2 * np.eye(*cov.shape) + return scipy.stats.multivariate_normal.logpdf(value, mu, cov).sum() + + +def kron_normal_logpdf_chol(value, mu, chols, sigma): + covs = [np.dot(chol, chol.T) for chol in chols] + return kron_normal_logpdf_cov(value, mu, covs, sigma=sigma) + + +def kron_normal_logpdf_evd(value, mu, evds, sigma): + covs = [] + for eigs, Q in evds: + try: + eigs = eigs.eval() + except AttributeError: + pass + try: + Q = Q.eval() + except AttributeError: + pass + covs.append(np.dot(Q, np.dot(np.diag(eigs), Q.T))) + return kron_normal_logpdf_cov(value, mu, covs, sigma) + + def betafn(a): return floatX(scipy.special.gammaln(a).sum(-1) - scipy.special.gammaln(a.sum(-1))) @@ -373,15 +401,23 @@ def PdMatrixCholUpper(n): raise ValueError("n out of bounds") +def RandomPdMatrix(n): + A = np.random.rand(n, n) + return np.dot(A, A.T) + n * np.identity(n) + + class TestMatchesScipy(SeededTest): def pymc3_matches_scipy(self, pymc3_dist, domain, paramdomains, scipy_dist, - decimal=None, extra_args=None): + decimal=None, extra_args=None, scipy_args=None): if extra_args is None: extra_args = {} + if scipy_args is None: + scipy_args = {} model = build_model(pymc3_dist, domain, paramdomains, extra_args) value = model.named_vars['value'] def logp(args): + args.update(scipy_args) return scipy_dist(**args) self.check_logp(model, value, domain, paramdomains, logp, decimal=decimal) @@ -741,6 +777,50 @@ def test_matrixnormal(self, n): matrix_normal_logpdf_chol, decimal=select_by_precision(float64=6, float32=0)) + @pytest.mark.parametrize('n', [2, 3]) + @pytest.mark.parametrize('m', [3]) + @pytest.mark.parametrize('sigma', [None, 1.0]) + def test_kroneckernormal(self, n, m, sigma): + np.random.seed(5) + N = n*m + covs = [RandomPdMatrix(n), RandomPdMatrix(m)] + chols = list(map(np.linalg.cholesky, covs)) + evds = list(map(np.linalg.eigh, covs)) + dom = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N) + mu = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N) + + std_args = {'mu': mu} + cov_args = {'covs': covs} + chol_args = {'chols': chols} + evd_args = {'evds': evds} + if sigma is not None and sigma != 0: + std_args['sigma'] = Domain([sigma], edges=(None, None)) + else: + for args in [cov_args, chol_args, evd_args]: + args['sigma'] = sigma + + self.pymc3_matches_scipy( + KroneckerNormal, dom, std_args, kron_normal_logpdf_cov, + extra_args=cov_args, scipy_args=cov_args) + self.pymc3_matches_scipy( + KroneckerNormal, dom, std_args, kron_normal_logpdf_chol, + extra_args=chol_args, scipy_args=chol_args) + self.pymc3_matches_scipy( + KroneckerNormal, dom, std_args, kron_normal_logpdf_evd, + extra_args=evd_args, scipy_args=evd_args) + + dom = Domain([np.random.randn(2, N)*0.1], edges=(None, None), shape=(2, N)) + + self.pymc3_matches_scipy( + KroneckerNormal, dom, std_args, kron_normal_logpdf_cov, + extra_args=cov_args, scipy_args=cov_args) + self.pymc3_matches_scipy( + KroneckerNormal, dom, std_args, kron_normal_logpdf_chol, + extra_args=chol_args, scipy_args=chol_args) + self.pymc3_matches_scipy( + KroneckerNormal, dom, std_args, kron_normal_logpdf_evd, + extra_args=evd_args, scipy_args=evd_args) + @pytest.mark.parametrize('n', [1, 2]) def test_mvt(self, n): self.pymc3_matches_scipy(MvStudentT, Vector(R, n), diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index b622b9b65d..10d731d4c4 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -13,16 +13,20 @@ from .test_distributions import ( build_model, Domain, product, R, Rplus, Rplusbig, Rplusdunif, Unit, Nat, NatSmall, I, Simplex, Vector, PdMatrix, - PdMatrixChol, PdMatrixCholUpper, RealMatrix + PdMatrixChol, PdMatrixCholUpper, RealMatrix, RandomPdMatrix ) def pymc3_random(dist, paramdomains, ref_rand, valuedomain=Domain([0]), - size=10000, alpha=0.05, fails=10, extra_args=None): + size=10000, alpha=0.05, fails=10, extra_args=None, + model_args=None): + if model_args is None: + model_args = {} model = build_model(dist, valuedomain, paramdomains, extra_args) domains = paramdomains.copy() for pt in product(domains, n_samples=100): pt = pm.Point(pt, model=model) + pt.update(model_args) p = alpha # Allow KS test to fail (i.e., the samples be different) # a certain number of times. Crude, but necessary. @@ -586,6 +590,54 @@ def ref_rand_uchol(size, mu, rowchol, colchol): # extra_args={'lower': False} # ) + def test_kronecker_normal(self): + def ref_rand(size, mu, covs, sigma): + cov = pm.math.kronecker(covs[0], covs[1]).eval() + cov += sigma**2 * np.identity(cov.shape[0]) + return st.multivariate_normal.rvs(mean=mu, cov=cov, size=size) + + def ref_rand_chol(size, mu, chols, sigma): + covs = [np.dot(chol, chol.T) for chol in chols] + return ref_rand(size, mu, covs, sigma) + + def ref_rand_evd(size, mu, evds, sigma): + covs = [] + for eigs, Q in evds: + covs.append(np.dot(Q, np.dot(np.diag(eigs), Q.T))) + return ref_rand(size, mu, covs, sigma) + + sizes = [2, 3] + sigmas = [0, 1] + for n, sigma in zip(sizes, sigmas): + N = n**2 + covs = [RandomPdMatrix(n), RandomPdMatrix(n)] + chols = list(map(np.linalg.cholesky, covs)) + evds = list(map(np.linalg.eigh, covs)) + dom = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N) + mu = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N) + + std_args = {'mu': mu} + cov_args = {'covs': covs} + chol_args = {'chols': chols} + evd_args = {'evds': evds} + if sigma is not None and sigma != 0: + std_args['sigma'] = Domain([sigma], edges=(None, None)) + else: + for args in [cov_args, chol_args, evd_args]: + args['sigma'] = sigma + + pymc3_random( + pm.KroneckerNormal, std_args, valuedomain=dom, + ref_rand=ref_rand, extra_args=cov_args, model_args=cov_args) + pymc3_random( + pm.KroneckerNormal, std_args, valuedomain=dom, + ref_rand=ref_rand_chol, extra_args=chol_args, + model_args=chol_args) + pymc3_random( + pm.KroneckerNormal, std_args, valuedomain=dom, + ref_rand=ref_rand_evd, extra_args=evd_args, + model_args=evd_args) + def test_mv_t(self): def ref_rand(size, nu, Sigma, mu): normal = st.multivariate_normal.rvs(cov=Sigma, size=size).T diff --git a/pymc3/tests/test_gp.py b/pymc3/tests/test_gp.py index c864da858b..1c9e474597 100644 --- a/pymc3/tests/test_gp.py +++ b/pymc3/tests/test_gp.py @@ -1,5 +1,6 @@ # pylint:disable=unused-variable from functools import reduce +from ..math import cartesian, kronecker from operator import add import pymc3 as pm import theano @@ -10,6 +11,7 @@ np.random.seed(101) + class TestZeroMean(object): def test_value(self): X = np.linspace(0, 1, 10)[:, None] @@ -222,6 +224,37 @@ def test_multiops(self): npt.assert_allclose(np.diag(K2), K1d, atol=1e-5) +class TestCovKron(object): + def test_symprod_cov(self): + X1 = np.linspace(0, 1, 10)[:, None] + X2 = np.linspace(0, 1, 10)[:, None] + X = cartesian(X1, X2) + with pm.Model() as model: + cov1 = pm.gp.cov.ExpQuad(1, 0.1) + cov2 = pm.gp.cov.ExpQuad(1, 0.1) + cov = pm.gp.cov.Kron([cov1, cov2]) + K = theano.function([], cov(X))() + npt.assert_allclose(K[0, 1], 1 * 0.53940, atol=1e-3) + npt.assert_allclose(K[0, 11], 0.53940 * 0.53940, atol=1e-3) + # check diagonal + Kd = theano.function([], cov(X, diag=True))() + npt.assert_allclose(np.diag(K), Kd, atol=1e-5) + + def test_multiops(self): + X1 = np.linspace(0, 1, 3)[:, None] + X21 = np.linspace(0, 1, 5)[:, None] + X22 = np.linspace(0, 1, 4)[:, None] + X2 = cartesian(X21, X22) + X = cartesian(X1, X21, X22) + with pm.Model() as model: + cov1 = 3 + pm.gp.cov.ExpQuad(1, 0.1) + pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(1, 0.1) + cov2 = pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(2, 0.1) + cov = pm.gp.cov.Kron([cov1, cov2]) + K_true = kronecker(theano.function([], cov1(X1))(), theano.function([], cov2(X2))()).eval() + K = theano.function([], cov(X))() + npt.assert_allclose(K_true, K) + + class TestCovSliceDim(object): def test_slice1(self): X = np.linspace(0, 1, 30).reshape(10, 3) @@ -538,6 +571,74 @@ def func_twoarg(x, a, b): assert func_twoarg(x, a, b) == func_twoarg2(x, args=(a, b)) +class TestCoregion(object): + def setup_method(self): + self.nrows = 6 + self.ncols = 3 + self.W = np.random.rand(self.nrows, self.ncols) + self.kappa = np.random.rand(self.nrows) + self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa) + self.rand_rows = np.random.randint(0, self.nrows, size=(20, 1)) + self.rand_cols = np.random.randint(0, self.ncols, size=(10, 1)) + self.X = np.concatenate((self.rand_rows, np.random.rand(20, 1)), axis=1) + self.Xs = np.concatenate((self.rand_cols, np.random.rand(10, 1)), axis=1) + + def test_full(self): + B_mat = self.B[self.rand_rows, self.rand_rows.T] + with pm.Model() as model: + B = pm.gp.cov.Coregion(2, W=self.W, kappa=self.kappa, active_dims=[0]) + npt.assert_allclose( + B(np.array([[2, 1.5], [3, -42]])).eval(), + self.B[2:4, 2:4] + ) + npt.assert_allclose(B(self.X).eval(), B_mat) + + def test_fullB(self): + B_mat = self.B[self.rand_rows, self.rand_rows.T] + with pm.Model() as model: + B = pm.gp.cov.Coregion(1, B=self.B) + npt.assert_allclose( + B(np.array([[2], [3]])).eval(), + self.B[2:4, 2:4] + ) + npt.assert_allclose(B(self.X).eval(), B_mat) + + def test_Xs(self): + B_mat = self.B[self.rand_rows, self.rand_cols.T] + with pm.Model() as model: + B = pm.gp.cov.Coregion(2, W=self.W, kappa=self.kappa, active_dims=[0]) + npt.assert_allclose( + B(np.array([[2, 1.5]]), np.array([[3, -42]])).eval(), + self.B[2, 3] + ) + npt.assert_allclose(B(self.X, self.Xs).eval(), B_mat) + + def test_diag(self): + B_diag = np.diag(self.B)[self.rand_rows.ravel()] + with pm.Model() as model: + B = pm.gp.cov.Coregion(2, W=self.W, kappa=self.kappa, active_dims=[0]) + npt.assert_allclose( + B(np.array([[2, 1.5]]), diag=True).eval(), + np.diag(self.B)[2] + ) + npt.assert_allclose(B(self.X, diag=True).eval(), B_diag) + + def test_raises(self): + with pm.Model() as model: + with pytest.raises(ValueError): + B = pm.gp.cov.Coregion(2, W=self.W, kappa=self.kappa) + + def test_raises2(self): + with pm.Model() as model: + with pytest.raises(ValueError): + B = pm.gp.cov.Coregion(1, W=self.W, kappa=self.kappa, B=self.B) + + def test_raises3(self): + with pm.Model() as model: + with pytest.raises(ValueError): + B = pm.gp.cov.Coregion(1) + + class TestMarginalVsLatent(object): R""" Compare the logp of models Marginal, noise=0 and Latent. @@ -579,7 +680,7 @@ def testLatent2(self): chol = np.linalg.cholesky(cov_func(self.X).eval()) y_rotated = np.linalg.solve(chol, self.y - 0.5) latent_logp = model.logp({"f_rotated_": y_rotated, "p": self.pnew}) - npt.assert_allclose(latent_logp, self.logp, atol=0, rtol=1e-2) + npt.assert_allclose(latent_logp, self.logp, atol=5) class TestMarginalVsMarginalSparse(object): @@ -810,13 +911,59 @@ def testAdditiveTPRaises(self): gp1 + gp2 - - - - - - - - - - +class TestMarginalKron(object): + def setup_method(self): + self.Xs = [np.linspace(0, 1, 7)[:, None], + np.linspace(0, 1, 5)[:, None], + np.linspace(0, 1, 6)[:, None]] + self.X = cartesian(*self.Xs) + self.N = np.prod([len(X) for X in self.Xs]) + self.y = np.random.randn(self.N) * 0.1 + self.Xnews = [np.random.randn(5, 1), + np.random.randn(5, 1), + np.random.randn(5, 1)] + self.Xnew = np.concatenate(tuple(self.Xnews), axis=1) + self.sigma = 0.2 + self.pnew = np.random.randn(len(self.Xnew))*0.01 + ls = 0.2 + with pm.Model() as model: + self.cov_funcs = [pm.gp.cov.ExpQuad(1, ls), + pm.gp.cov.ExpQuad(1, ls), + pm.gp.cov.ExpQuad(1, ls)] + cov_func = pm.gp.cov.Kron(self.cov_funcs) + self.mean = pm.gp.mean.Constant(0.5) + gp = pm.gp.Marginal(mean_func=self.mean, cov_func=cov_func) + f = gp.marginal_likelihood("f", self.X, self.y, noise=self.sigma) + p = gp.conditional("p", self.Xnew) + self.mu, self.cov = gp.predict(self.Xnew) + self.logp = model.logp({"p": self.pnew}) + + def testMarginalKronvsMarginalpredict(self): + with pm.Model() as kron_model: + kron_gp = pm.gp.MarginalKron(mean_func=self.mean, + cov_funcs=self.cov_funcs) + f = kron_gp.marginal_likelihood('f', self.Xs, self.y, + sigma=self.sigma, shape=self.N) + p = kron_gp.conditional('p', self.Xnew) + mu, cov = kron_gp.predict(self.Xnew) + npt.assert_allclose(mu, self.mu, atol=0, rtol=1e-2) + npt.assert_allclose(cov, self.cov, atol=0, rtol=1e-2) + + def testMarginalKronvsMarginal(self): + with pm.Model() as kron_model: + kron_gp = pm.gp.MarginalKron(mean_func=self.mean, + cov_funcs=self.cov_funcs) + f = kron_gp.marginal_likelihood('f', self.Xs, self.y, + sigma=self.sigma, shape=self.N) + p = kron_gp.conditional('p', self.Xnew) + kron_logp = kron_model.logp({'p': self.pnew}) + npt.assert_allclose(kron_logp, self.logp, atol=0, rtol=1e-2) + + def testMarginalKronRaises(self): + with pm.Model() as kron_model: + gp1 = pm.gp.MarginalKron(mean_func=self.mean, + cov_funcs=self.cov_funcs) + gp2 = pm.gp.MarginalKron(mean_func=self.mean, + cov_funcs=self.cov_funcs) + with pytest.raises(TypeError): + gp1 + gp2 diff --git a/pymc3/tests/test_math.py b/pymc3/tests/test_math.py index efcbe17eef..b231b08127 100644 --- a/pymc3/tests/test_math.py +++ b/pymc3/tests/test_math.py @@ -5,12 +5,79 @@ from theano.tests import unittest_tools as utt from pymc3.math import ( LogDet, logdet, probit, invprobit, expand_packed_triangular, - log1pexp, log1mexp) + log1pexp, log1mexp, kronecker, cartesian, kron_dot, kron_solve_lower) from .helpers import SeededTest import pytest from pymc3.theanof import floatX +def test_kronecker(): + np.random.seed(1) + # Create random matrices + [a, b, c] = [np.random.rand(3, 3+i) for i in range(3)] + + custom = kronecker(a, b, c) # Custom version + nested = tt.slinalg.kron(a, tt.slinalg.kron(b, c)) + np.testing.assert_array_almost_equal( + custom.eval(), + nested.eval() # Standard nested version + ) + + +def test_cartesian(): + np.random.seed(1) + a = [1, 2, 3] + b = [0, 2] + c = [5, 6] + manual_cartesian = np.array( + [[1, 0, 5], + [1, 0, 6], + [1, 2, 5], + [1, 2, 6], + [2, 0, 5], + [2, 0, 6], + [2, 2, 5], + [2, 2, 6], + [3, 0, 5], + [3, 0, 6], + [3, 2, 5], + [3, 2, 6], + ] + ) + auto_cart = cartesian(a, b, c) + np.testing.assert_array_almost_equal(manual_cartesian, auto_cart) + + +def test_kron_dot(): + np.random.seed(1) + # Create random matrices + Ks = [np.random.rand(3, 3) for i in range(3)] + # Create random vector with correct shape + tot_size = np.prod([k.shape[1] for k in Ks]) + x = np.random.rand(tot_size).reshape((tot_size, 1)) + # Construct entire kronecker product then multiply + big = kronecker(*Ks) + slow_ans = tt.dot(big, x) + # Use tricks to avoid construction of entire kronecker product + fast_ans = kron_dot(Ks, x) + np.testing.assert_array_almost_equal(slow_ans.eval(), fast_ans.eval()) + + +def test_kron_solve_lower(): + np.random.seed(1) + # Create random matrices + Ls = [np.tril(np.random.rand(3, 3)) for i in range(3)] + # Create random vector with correct shape + tot_size = np.prod([L.shape[1] for L in Ls]) + x = np.random.rand(tot_size).reshape((tot_size, 1)) + # Construct entire kronecker product then solve + big = kronecker(*Ls) + slow_ans = tt.slinalg.solve_lower_triangular(big, x) + # Use tricks to avoid construction of entire kronecker product + fast_ans = kron_solve_lower(Ls, x) + np.testing.assert_array_almost_equal(slow_ans.eval(), fast_ans.eval()) + + def test_probit(): p = np.array([0.01, 0.25, 0.5, 0.75, 0.99]) np.testing.assert_allclose(invprobit(probit(p)).eval(), p, atol=1e-5)