diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index 2250cf1287..1e5cd73b25 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -4,17 +4,14 @@ @author: johnsalvatier ''' from __future__ import division - import numpy as np -import scipy.linalg import theano.tensor as tt import theano +from theano.tensor import slinalg from .special import gammaln from pymc3.theanof import floatX -f = floatX -c = - .5 * np.log(2. * np.pi) def bound(logp, *conditions, **kwargs): @@ -139,157 +136,231 @@ def log_normal(x, mean, **kwargs): std = rho2sd(rho) else: std = tau**(-1) - std += f(eps) - return f(c) - tt.log(tt.abs_(std)) - (x - mean) ** 2 / (2. * std ** 2) - + std += floatX(eps) + return floatX(-.5 * np.log(2. * np.pi)) - tt.log(tt.abs_(std)) - (x - mean) ** 2 / (2. * std ** 2) + + +def stabilize(K): + """ adds small diagonal to a covariance matrix """ + return K + floatX(1e-6) * tt.identity_like(K) + + +def CholeskyCheck(mode='cov', return_ldet=True, replacement=None): + """Checks if the given matrix/cholesky is positive definite. Returns a dummy + Cholesky replacement if it is not, along with a boolean to assert whether + replacement was needed and, optionally, the log of the determinant of + either the real Cholesky or its replacement.""" + is_cholesky = (mode == 'chol') + w_ldet = return_ldet + # add inplace=True when/if impletemented by Theano + cholesky = slinalg.Cholesky(lower=True, on_error="nan") + _true = tt.as_tensor_variable(np.array(True)) + + # Presume a given Cholesky is positive definite and return its lodget + def check_chol(cov): + diag = tt.ExtractDiag(view=True)(cov) + ldet = tt.sum(diag.log()) if w_ldet else None + return _true, ldet + + # Check if the Cholesky decomposition worked (ie, the cov or tau + # was positive definite) + def check_nonchol(cov): + ldet = None + if w_ldet : + # will all be NaN if the Cholesky was no-go, which is fine + diag = tt.ExtractDiag(view=True)(cov) + ldet = tt.sum(diag.log()) + if mode == 'tau': + ldet = -ldet + return ~tt.isnan(cov[0,0]), ldet + + check = check_chol if is_cholesky else check_nonchol + repl = lambda ncov, r: r if replacement else tt.identity_like(ncov) + + def func(cov, fallback=None): + if not is_cholesky: + cov = cholesky(cov) + ok, ldet = check(cov) + chol_cov = tt.switch(ok, tt.unbroadcast(cov, 0, 1), repl(cov, fallback)) + return [chol_cov, ldet, ok] if w_ldet else [chol_cov, ok] + + return func + + +def MvNormalLogp(mode='cov'): + """Concstructor for the elementwise log pdf of a multivariate normal distribution. + + The returned function will have parameters: + ---------- + cov : tt.matrix + The covariance matrix or its Cholesky decompositon (the latter if + `chol_cov` is set to True when instantiating the Op). + delta : tt.matrix + Array of deviations from the mean. + """ + solve_lower = slinalg.Solve(A_structure='lower_triangular', overwrite_b=True) + check_chol_wldet = CholeskyCheck(mode, return_ldet=True) + def logpf(cov, delta): + chol, logdet, ok = check_chol_wldet(cov) + _, k = delta.shape + k = floatX(k) + if mode == 'tau': + delta_trans = tt.dot(delta, chol) + else: + delta_trans = solve_lower(chol, delta.T).T + quaddist = (delta_trans ** floatX(2)).sum(axis=-1) + result = floatX(-.5) * k * tt.log(floatX(2) * floatX(np.pi)) + result += floatX(-.5) * quaddist - logdet + return tt.switch(ok, floatX(result), floatX(-np.inf * tt.ones_like(result))) -def MvNormalLogp(): - """Compute the log pdf of a multivariate normal distribution. + return logpf - This should be used in MvNormal.logp once Theano#5908 is released. +def MvNormalLogpSum(mode='cov'): + """Compute the sum of log pdf of a multivariate normal distribution. Parameters ---------- cov : tt.matrix - The covariance matrix. + The covariance matrix or its Cholesky decompositon (the latter if + `chol_cov` is set to True when instantiating the Op). delta : tt.matrix Array of deviations from the mean. """ + cov = tt.matrix('cov') cov.tag.test_value = floatX(np.eye(3)) delta = tt.matrix('delta') delta.tag.test_value = floatX(np.zeros((2, 3))) - solve_lower = tt.slinalg.Solve(A_structure='lower_triangular') - solve_upper = tt.slinalg.Solve(A_structure='upper_triangular') - cholesky = Cholesky(nofail=True, lower=True) + solve_lower = slinalg.Solve(A_structure='lower_triangular', overwrite_b=True) + solve_upper = slinalg.Solve(A_structure='upper_triangular', overwrite_b=True) + check_chol = CholeskyCheck(mode, return_ldet=False) + check_chol_wldet = CholeskyCheck(mode, return_ldet=True) - n, k = delta.shape - n, k = f(n), f(k) - chol_cov = cholesky(cov) - diag = tt.nlinalg.diag(chol_cov) - ok = tt.all(diag > 0) + def logp(cov, delta): + chol, logdet, ok = check_chol_wldet(cov) + + if mode == 'tau': + delta_trans = tt.dot(delta, chol) + else: + delta_trans = solve_lower(chol, delta.T).T + quaddist = (delta_trans ** floatX(2)).sum() - chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1)) - delta_trans = solve_lower(chol_cov, delta.T).T + n, k = delta.shape + n, k = floatX(n), floatX(k) + result = n * k * tt.log(floatX(2) * floatX(np.pi)) + result += floatX(2) * n * logdet + result += quaddist + result = floatX(-.5) * result - result = n * k * tt.log(f(2) * np.pi) - result += f(2) * n * tt.sum(tt.log(diag)) - result += (delta_trans ** f(2)).sum() - result = f(-.5) * result - logp = tt.switch(ok, result, -np.inf) + return tt.switch(ok, floatX(result), floatX(-np.inf * tt.ones_like(result))) def dlogp(inputs, gradients): g_logp, = gradients + g_logp.tag.test_value = floatX(1.) cov, delta = inputs - g_logp.tag.test_value = floatX(1.) n, k = delta.shape + I_k = tt.eye(k, dtype=theano.config.floatX) + n, k = floatX(n), floatX(k) + chol_cov, ok = check_chol(cov, fallback=I_k) - chol_cov = cholesky(cov) - diag = tt.nlinalg.diag(chol_cov) - ok = tt.all(diag > 0) - - chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T - inner = n * tt.eye(k) - tt.dot(delta_trans.T, delta_trans) + inner = n * I_k - tt.dot(delta_trans.T, delta_trans) g_cov = solve_upper(chol_cov.T, inner) g_cov = solve_upper(chol_cov.T, g_cov.T) tau_delta = solve_upper(chol_cov.T, delta_trans.T) - g_delta = tau_delta.T - - g_cov = tt.switch(ok, g_cov, -np.nan) - g_delta = tt.switch(ok, g_delta, -np.nan) - return [-0.5 * g_cov * g_logp, -g_delta * g_logp] + g_cov = tt.switch(ok, floatX(g_cov), floatX(-np.nan * tt.zeros_like(g_cov))) + g_delta = tt.switch(ok, floatX(tau_delta.T), floatX(-np.nan * tt.zeros_like(tau_delta.T))) - return theano.OpFromGraph( - [cov, delta], [logp], grad_overrides=dlogp, inline=True) + return [floatX(-.5) * g_cov * g_logp, -g_delta * g_logp] + return logp -class Cholesky(theano.Op): """ - Return a triangular matrix square root of positive semi-definite `x`. - - This is a copy of the cholesky op in theano, that doesn't throw an - error if the matrix is not positive definite, but instead returns - nan. + This doesn't seem to really improve performance but causes issues with model.dlogp2 + if (mode == 'cov'): + return theano.OpFromGraph( + [cov, delta], [logp], grad_overrides=dlogp, name='MvNormalLogpSum', inline=True) + else: + return theano.OpFromGraph( + [cov, delta], [logp], inline=True)""" - This has been merged upstream and we should switch to that - version after the next theano release. +def MvTLogp(nu): + """Concstructor for the elementwise log pdf of a multivariate normal distribution. - L = cholesky(X, lower=True) implies dot(L, L.T) == X. + The returned function will have parameters: + ---------- + cov : tt.matrix + The covariance matrix or its Cholesky decompositon (the latter if + `chol_cov` is set to True when instantiating the Op). + delta : tt.matrix + Array of deviations from the mean. """ - __props__ = ('lower', 'destructive', 'nofail') + solve_lower = slinalg.Solve(A_structure='lower_triangular', overwrite_b=True) + nu = tt.as_tensor_variable(nu) - def __init__(self, lower=True, nofail=False): - self.lower = lower - self.destructive = False - self.nofail = nofail + def constructor(mode='cov'): + check_chol_wldet = CholeskyCheck(mode, return_ldet=True) + def logpf(cov, delta): + chol, logdet, ok = check_chol_wldet(cov) - def make_node(self, x): - x = tt.as_tensor_variable(x) - if x.ndim != 2: - raise ValueError('Matrix must me two dimensional.') - return tt.Apply(self, [x], [x.type()]) - - def perform(self, node, inputs, outputs): - x = inputs[0] - z = outputs[0] - try: - z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype) - except (ValueError, scipy.linalg.LinAlgError): - if self.nofail: - z[0] = np.eye(x.shape[-1]) - z[0][0, 0] = np.nan + if mode == 'tau': + delta_trans = tt.dot(delta, chol) else: - raise - - def grad(self, inputs, gradients): - """ - Cholesky decomposition reverse-mode gradient update. - - Symbolic expression for reverse-mode Cholesky gradient taken from [0]_ - - References - ---------- - .. [0] I. Murray, "Differentiation of the Cholesky decomposition", - http://arxiv.org/abs/1602.07527 - - """ - - x = inputs[0] - dz = gradients[0] - chol_x = self(x) - ok = tt.all(tt.nlinalg.diag(chol_x) > 0) - chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1)) - dz = tt.switch(ok, dz, floatX(1)) - - # deal with upper triangular by converting to lower triangular - if not self.lower: - chol_x = chol_x.T - dz = dz.T - - def tril_and_halve_diagonal(mtx): - """Extracts lower triangle of square matrix and halves diagonal.""" - return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.) - - def conjugate_solve_triangular(outer, inner): - """Computes L^{-T} P L^{-1} for lower-triangular L.""" - solve = tt.slinalg.Solve(A_structure="upper_triangular") - return solve(outer.T, solve(outer.T, inner.T).T) - - s = conjugate_solve_triangular( - chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz))) - - if self.lower: - grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s)) - else: - grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s)) - return [tt.switch(ok, grad, floatX(np.nan))] - + delta_trans = solve_lower(chol, delta.T).T + _, k = delta.shape + k = floatX(k) + + quaddist = (delta_trans ** floatX(2)).sum(axis=-1) + + result = gammaln((nu + k) / floatX(2)) + result -= gammaln(nu / floatX(2)) + result -= floatX(.5) * k * tt.log(nu * floatX(np.pi)) + result -= (nu + k) / floatX(2) * tt.log1p(quaddist / nu) + result -= logdet + return tt.switch(ok, floatX(result), floatX(-np.inf * tt.ones_like(result))) + + return logpf + return constructor + +def MvTLogpSum(nu): + """Concstructor for the sum of log pdf of a multivariate t distribution. + WIP (not sure if this is at all possible) + The returned function will have parameters: + ---------- + cov : tt.matrix + The covariance matrix or its Cholesky decompositon (the latter if + `chol_cov` is set to True when instantiating the Op). + delta : tt.matrix + Array of deviations from the mean. + """ + solve_lower = slinalg.Solve(A_structure='lower_triangular', overwrite_b=True) + nu = tt.as_tensor_variable(nu) + def constructor(mode='cov'): + check_chol_wldet = CholeskyCheck(mode, return_ldet=True) + def logpf(cov, delta): + chol, logdet, ok = check_chol_wldet(cov) + + if mode == 'tau': + delta_trans = tt.dot(delta, chol) + else: + delta_trans = solve_lower(chol, delta.T).T + n, k = delta.shape + n, k = floatX(n), floatX(k) + + quaddist = (delta_trans ** floatX(2)).sum() + ## TODO haven't done the full math yet + result = n * (gammaln((nu + k) / 2.) - gammaln(nu / 2.)) + result -= n * .5 * k * tt.log(nu * floatX(np.pi)) + result -= (nu + k) / 2. * tt.log1p(quaddist / nu) + result -= logdet + return tt.switch(ok, floatX(result), floatX(-np.inf * tt.ones_like(result))) + return logpf + return constructor class SplineWrapper(theano.Op): """ diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 50a36d2166..3f8ce856e7 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -8,7 +8,7 @@ import theano.tensor as tt from scipy import stats, linalg - +from six import raise_from from theano.tensor.nlinalg import det, matrix_inverse, trace import theano.tensor.slinalg import pymc3 as pm @@ -20,134 +20,97 @@ from ..model import Deterministic from .continuous import ChiSquared, Normal from .special import gammaln, multigammaln -from .dist_math import bound, logpow, factln, Cholesky +from .dist_math import ( bound, logpow, factln, MvNormalLogp, MvNormalLogpSum, + MvTLogp, MvTLogpSum ) __all__ = ['MvNormal', 'MvStudentT', 'Dirichlet', 'Multinomial', 'Wishart', 'WishartBartlett', 'LKJCorr', 'LKJCholeskyCov', 'MatrixNormal'] - class _QuadFormBase(Continuous): def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, - *args, **kwargs): + logphelper=None, logpsumhelper=None, *args, **kwargs): super(_QuadFormBase, self).__init__(*args, **kwargs) + + self._initCov(cov, tau, chol, lower) + if len(self.shape) > 2: raise ValueError("Only 1 or 2 dimensions are allowed.") - if chol is not None and not lower: - chol = chol.T + _cov = getattr(self, self._cov_type) + + if logphelper is None: + raise ValueError("`logphelper` not passed during initialization. " + "_QuadFormBase expects a function that will construct " + "a quadratic distance calculator appropriate for " + "given covariance parameter, with arguments cov " + "and delta (distance from the mean).") + + try: + _deltalogp = logphelper(self._cov_type) + def deltalogp(delta): + return _deltalogp(_cov, delta) + self._delta_logp = deltalogp + + if logpsumhelper is not None: + _delta_logpsum = logpsumhelper(self._cov_type) + def deltalogpsum(delta): + return _delta_logpsum(_cov, delta) + self._delta_logp_sum = deltalogpsum + + except ValueError: + errot = '`{}` must be two dimensional.'.format(self._cov_type) + raise_from(ValueError(error), None) + + self.mu = tt.as_tensor_variable(mu) + + def _initCov(self, cov=None, tau=None, chol=None, lower=True): + if all([val is None for val in [cov, tau, chol]]): + raise ValueError('One of cov, tau or chol arguments must be provided.') + if len([i for i in [tau, cov, chol] if i is not None]) != 1: raise ValueError('Incompatible parameterization. ' 'Specify exactly one of tau, cov, ' 'or chol.') - self.mu = mu = tt.as_tensor_variable(mu) - self.solve_lower = tt.slinalg.Solve(A_structure="lower_triangular") - # Step methods and advi do not catch LinAlgErrors at the - # moment. We work around that by using a cholesky op - # that returns a nan as first entry instead of raising - # an error. - cholesky = Cholesky(nofail=True, lower=True) + + if chol is not None and not lower: + chol = chol.T + + self.cov = self.tau = self.chol_cov = None if cov is not None: - self.k = cov.shape[0] self._cov_type = 'cov' - cov = tt.as_tensor_variable(cov) - if cov.ndim != 2: - raise ValueError('cov must be two dimensional.') - self.chol_cov = cholesky(cov) - self.cov = cov - self._n = self.cov.shape[-1] - elif tau is not None: - self.k = tau.shape[0] - self._cov_type = 'tau' - tau = tt.as_tensor_variable(tau) - if tau.ndim != 2: - raise ValueError('tau must be two dimensional.') - self.chol_tau = cholesky(tau) - self.tau = tau - self._n = self.tau.shape[-1] - else: - self.k = chol.shape[0] + self.cov = tt.as_tensor_variable(cov) + elif chol is not None: self._cov_type = 'chol' - if chol.ndim != 2: - raise ValueError('chol must be two dimensional.') - self.chol_cov = tt.as_tensor_variable(chol) - self._n = self.chol_cov.shape[-1] - - def _quaddist(self, value): - """Compute (x - mu).T @ Sigma^-1 @ (x - mu) and the logdet of Sigma.""" - mu = self.mu + self.chol = tt.as_tensor_variable(chol) + if self.chol.ndim != 2: + raise ValueError('`chol` must be two dimensional.') + else: + self._cov_type = 'tau' + self.tau = tt.as_tensor_variable(tau) + + def _check_logp_value(self, value): if value.ndim > 2 or value.ndim == 0: raise ValueError('Invalid dimension for value: %s' % value.ndim) if value.ndim == 1: - onedim = True value = value[None, :] - else: - onedim = False + return value, value.ndim == 1 - delta = value - mu + def logp(self, value): + value, onedim = self._check_logp_value(value) - if self._cov_type == 'cov': - # Use this when Theano#5908 is released. - # return MvNormalLogp()(self.cov, delta) - dist, logdet, ok = self._quaddist_cov(delta) - elif self._cov_type == 'tau': - dist, logdet, ok = self._quaddist_tau(delta) - else: - dist, logdet, ok = self._quaddist_chol(delta) - - if onedim: - return dist[0], logdet, ok - return dist, logdet, ok - - def _quaddist_chol(self, delta): - chol_cov = self.chol_cov - _, k = delta.shape - k = pm.floatX(k) - diag = tt.nlinalg.diag(chol_cov) - # Check if the covariance matrix is positive definite. - ok = tt.all(diag > 0) - # If not, replace the diagonal. We return -inf later, but - # need to prevent solve_lower from throwing an exception. - chol_cov = tt.switch(ok, chol_cov, 1) - - delta_trans = self.solve_lower(chol_cov, delta.T).T - quaddist = (delta_trans ** 2).sum(axis=-1) - logdet = tt.sum(tt.log(diag)) - return quaddist, logdet, ok - - def _quaddist_cov(self, delta): - return self._quaddist_chol(delta) - - def _quaddist_tau(self, delta): - chol_tau = self.chol_tau - _, k = delta.shape - k = pm.floatX(k) - - diag = tt.nlinalg.diag(chol_tau) - ok = tt.all(diag > 0) - - chol_tau = tt.switch(ok, chol_tau, 1) - diag = tt.nlinalg.diag(chol_tau) - delta_trans = tt.dot(delta, chol_tau) - quaddist = (delta_trans ** 2).sum(axis=-1) - logdet = -tt.sum(tt.log(diag)) - return quaddist, logdet, ok + logp = self._delta_logp(value - self.mu) - def _repr_cov_params(self, dist=None): - if dist is None: - dist = self - if self._cov_type == 'chol': - chol = get_variable_name(self.chol) - return r'\mathit{{chol}}={}'.format(chol) - elif self._cov_type == 'cov': - cov = get_variable_name(self.cov) - return r'\mathit{{cov}}={}'.format(cov) - elif self._cov_type == 'tau': - tau = get_variable_name(self.tau) - return r'\mathit{{tau}}={}'.format(tau) + if onedim and logp.ndim != 0: + return logp[0] + return logp + + def _repr_cov_params(self, dist=None): + name = get_variable_name(getattr(self, self._cov_type)) + return r'\mathit{{{}}}={}'.format(self._cov_type, name) class MvNormal(_QuadFormBase): R""" @@ -218,8 +181,11 @@ class MvNormal(_QuadFormBase): def __init__(self, mu, cov=None, tau=None, chol=None, lower=True, *args, **kwargs): + super(MvNormal, self).__init__(mu=mu, cov=cov, tau=tau, chol=chol, - lower=lower, *args, **kwargs) + lower=lower, logphelper=MvNormalLogp, + logpsumhelper=MvNormalLogpSum, + *args, **kwargs) self.mean = self.median = self.mode = self.mu = self.mu def random(self, point=None, size=None): @@ -235,7 +201,6 @@ def random(self, point=None, size=None): mu, cov = draw_values([self.mu, self.cov], point=point) if mu.shape[-1] != cov.shape[-1]: raise ValueError("Shapes for mu and cov don't match") - try: dist = stats.multivariate_normal( mean=mu, cov=cov, allow_singular=True) @@ -244,7 +209,7 @@ def random(self, point=None, size=None): return np.nan * np.zeros(size) return dist.rvs(size) elif self._cov_type == 'chol': - mu, chol = draw_values([self.mu, self.chol_cov], point=point) + mu, chol = draw_values([self.mu, self.chol], point=point) if mu.shape[-1] != chol[0].shape[-1]: raise ValueError("Shapes for mu and chol don't match") @@ -267,11 +232,15 @@ def random(self, point=None, size=None): chol, standard_normal.T, lower=True) return mu + transformed.T - def logp(self, value): - quaddist, logdet, ok = self._quaddist(value) - k = value.shape[-1].astype(theano.config.floatX) - norm = - 0.5 * k * pm.floatX(np.log(2 * np.pi)) - return bound(norm - 0.5 * quaddist - logdet, ok) + def logp_sum(self, value): + value, onedim = self._check_logp_value(value) + + logpsum = self._delta_logp_sum(value - self.mu) + + if onedim and logp.ndim != 0: + return logpsum[0] + + return logpsum def _repr_latex_(self, name=None, dist=None): if dist is None: @@ -326,13 +295,17 @@ class MvStudentT(_QuadFormBase): def __init__(self, nu, Sigma=None, mu=None, cov=None, tau=None, chol=None, lower=None, *args, **kwargs): + if Sigma is not None: if cov is not None: raise ValueError('Specify only one of cov and Sigma') cov = Sigma - super(MvStudentT, self).__init__(mu=mu, cov=cov, tau=tau, chol=chol, - lower=lower, *args, **kwargs) + self.nu = nu = tt.as_tensor_variable(nu) + super(MvStudentT, self).__init__(mu=mu, cov=cov, tau=tau, chol=chol, + lower=lower, logphelper=MvTLogp(self.nu), + logpsumhelper=MvTLogpSum(self.nu), + *args, **kwargs) self.mean = self.median = self.mode = self.mu = self.mu def random(self, point=None, size=None): @@ -344,7 +317,7 @@ def random(self, point=None, size=None): tau, = draw_values([self.tau], point=point) dist = MvNormal.dist(mu=np.zeros_like(mu), tau=tau) else: - chol, = draw_values([self.chol_cov], point=point) + chol, = draw_values([self.chol], point=point) dist = MvNormal.dist(mu=np.zeros_like(mu), chol=chol) samples = dist.random(point, size) @@ -352,16 +325,6 @@ def random(self, point=None, size=None): chi2 = np.random.chisquare return (np.sqrt(nu) * samples.T / chi2(nu, size)).T + mu - def logp(self, value): - quaddist, logdet, ok = self._quaddist(value) - k = value.shape[-1].astype(theano.config.floatX) - - norm = (gammaln((self.nu + k) / 2.) - - gammaln(self.nu / 2.) - - 0.5 * k * floatX(np.log(self.nu * np.pi))) - inner = - (self.nu + k) / 2. * tt.log1p(quaddist / self.nu) - return bound(norm + inner - logdet, ok) - def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self @@ -1191,11 +1154,13 @@ def __init__(self, mu=0, rowcov=None, rowchol=None, rowtau=None, super(MatrixNormal, self).__init__(shape=shape, *args, **kwargs) self.mu = tt.as_tensor_variable(mu) self.mean = self.median = self.mode = self.mu - self.solve_lower = tt.slinalg.solve_lower_triangular - self.solve_upper = tt.slinalg.solve_upper_triangular def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau): - cholesky = Cholesky(nofail=False, lower=True) + + cholesky = tt.slinalg.Cholesky(lower=True, on_error="nan") + + self.solve_lower = tt.slinalg.solve_lower_triangular + self.solve_upper = tt.slinalg.solve_upper_triangular # Among-row matrices if len([i for i in [rowtau, rowcov, rowchol] if i is not None]) != 1: @@ -1206,25 +1171,27 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau): self.m = rowcov.shape[0] self._rowcov_type = 'cov' rowcov = tt.as_tensor_variable(rowcov) - if rowcov.ndim != 2: - raise ValueError('rowcov must be two dimensional.') - self.rowchol_cov = cholesky(rowcov) + try: + self._rowchol_cov = cholesky(rowcov) + except: + raise_from(ValueError('`rowcov` must be two dimensional.'), None) self.rowcov = rowcov elif rowtau is not None: - raise ValueError('rowtau not supported at this time') + raise ValueError('`rowtau` not supported at this time') self.m = rowtau.shape[0] self._rowcov_type = 'tau' rowtau = tt.as_tensor_variable(rowtau) - if rowtau.ndim != 2: - raise ValueError('rowtau must be two dimensional.') - self.rowchol_tau = cholesky(rowtau) + try: + self._rowchol_tau = cholesky(rowtau) + except: + raise_from(ValueError('`rowtau` must be two dimensional.'), None) self.rowtau = rowtau else: self.m = rowchol.shape[0] self._rowcov_type = 'chol' if rowchol.ndim != 2: - raise ValueError('rowchol must be two dimensional.') - self.rowchol_cov = tt.as_tensor_variable(rowchol) + raise ValueError('`rowchol` must be two dimensional.') + self._rowchol_cov = tt.as_tensor_variable(rowchol) # Among-column matrices if len([i for i in [coltau, colcov, colchol] if i is not None]) != 1: @@ -1235,32 +1202,34 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau): self.n = colcov.shape[0] self._colcov_type = 'cov' colcov = tt.as_tensor_variable(colcov) - if colcov.ndim != 2: - raise ValueError('colcov must be two dimensional.') - self.colchol_cov = cholesky(colcov) + try: + self._colchol_cov = cholesky(colcov) + except: + raise_from(ValueError('`colcov` must be two dimensional.'), None) self.colcov = colcov elif coltau is not None: raise ValueError('coltau not supported at this time') self.n = coltau.shape[0] self._colcov_type = 'tau' coltau = tt.as_tensor_variable(coltau) - if coltau.ndim != 2: - raise ValueError('coltau must be two dimensional.') - self.colchol_tau = cholesky(coltau) + try: + self._colchol_tau = cholesky(v) + except: + raise_from(ValueError('`coltau` must be two dimensional.'), None) self.coltau = coltau else: self.n = colchol.shape[0] self._colcov_type = 'chol' if colchol.ndim != 2: raise ValueError('colchol must be two dimensional.') - self.colchol_cov = tt.as_tensor_variable(colchol) + self._colchol_cov = tt.as_tensor_variable(colchol) def random(self, point=None, size=None): if size is None: size = list(self.shape) mu, colchol, rowchol = draw_values( - [self.mu, self.colchol_cov, self.rowchol_cov], + [self.mu, self._colchol_cov, self._rowchol_cov], point=point ) standard_normal = np.random.standard_normal(size) @@ -1271,8 +1240,8 @@ def _trquaddist(self, value): the logdet of colcov and rowcov.""" delta = value - self.mu - rowchol_cov = self.rowchol_cov - colchol_cov = self.colchol_cov + rowchol_cov = self._rowchol_cov + colchol_cov = self._colchol_cov # Find exponent piece by piece right_quaddist = self.solve_lower(rowchol_cov, delta) @@ -1291,5 +1260,5 @@ def logp(self, value): trquaddist, half_collogdet, half_rowlogdet = self._trquaddist(value) m = self.m n = self.n - norm = - 0.5 * m * n * pm.floatX(np.log(2 * np.pi)) + norm = - 0.5 * m * n * floatX(np.log(2 * np.pi)) return norm - 0.5*trquaddist - m*half_collogdet - n*half_rowlogdet diff --git a/pymc3/distributions/timeseries.py b/pymc3/distributions/timeseries.py index 9845199bff..d3bf4e125f 100644 --- a/pymc3/distributions/timeseries.py +++ b/pymc3/distributions/timeseries.py @@ -3,7 +3,6 @@ from pymc3.util import get_variable_name from .continuous import get_tau_sd, Normal, Flat -from .dist_math import Cholesky from . import multivariate from . import distribution @@ -278,49 +277,7 @@ def _repr_latex_(self, name=None, dist=None): return r'${} \sim \text{EulerMaruyama}(\mathit{{dt}}={})$'.format(name, get_variable_name(dt)) - -class _CovSet(): - R""" - Convenience class to set Covariance, Inverse Covariance and Cholesky - descomposition of Covariance marrices. - """ - def __initCov__(self, cov=None, tau=None, chol=None, lower=True): - if all([val is None for val in [cov, tau, chol]]): - raise ValueError('One of cov, tau or chol arguments must be provided.') - - self.cov = self.tau = self.chol_cov = None - - cholesky = Cholesky(nofail=True, lower=True) - if cov is not None: - self.k = cov.shape[0] - self._cov_type = 'cov' - cov = tt.as_tensor_variable(cov) - if cov.ndim != 2: - raise ValueError('cov must be two dimensional.') - self.chol_cov = cholesky(cov) - self.cov = cov - self._n = self.cov.shape[-1] - elif tau is not None: - self.k = tau.shape[0] - self._cov_type = 'tau' - tau = tt.as_tensor_variable(tau) - if tau.ndim != 2: - raise ValueError('tau must be two dimensional.') - self.chol_tau = cholesky(tau) - self.tau = tau - self._n = self.tau.shape[-1] - else: - if chol is not None and not lower: - chol = chol.T - self.k = chol.shape[0] - self._cov_type = 'chol' - if chol.ndim != 2: - raise ValueError('chol must be two dimensional.') - self.chol_cov = tt.as_tensor_variable(chol) - self._n = self.chol_cov.shape[-1] - - -class MvGaussianRandomWalk(distribution.Continuous, _CovSet): +class MvGaussianRandomWalk(distribution.Continuous): R""" Multivariate Random Walk with Normal innovations @@ -345,19 +302,15 @@ class MvGaussianRandomWalk(distribution.Continuous, _CovSet): def __init__(self, mu=0., cov=None, tau=None, chol=None, lower=True, init=Flat.dist(), *args, **kwargs): super(MvGaussianRandomWalk, self).__init__(*args, **kwargs) - super(MvGaussianRandomWalk, self).__initCov__(cov, tau, chol, lower) - self.mu = mu = tt.as_tensor_variable(mu) self.init = init self.mean = tt.as_tensor_variable(0.) + self.innov = multivariate.MvNormal.dist(self.mu, cov, tau, chol, lower) def logp(self, x): x_im1 = x[:-1] x_i = x[1:] - - innov_like = multivariate.MvNormal.dist(mu=x_im1 + self.mu, cov=self.cov, - tau=self.tau, chol=self.chol_cov).logp(x_i) - return self.init.logp(x[0]) + tt.sum(innov_like) + return self.init.logp(x[0]) + self.innov.logp_sum(x_i - x_im1) def _repr_latex_(self, name=None, dist=None): if dist is None: @@ -370,7 +323,7 @@ def _repr_latex_(self, name=None, dist=None): get_variable_name(cov)) -class MvStudentTRandomWalk(distribution.Continuous, _CovSet): +class MvStudentTRandomWalk(distribution.Continuous): R""" Multivariate Random Walk with StudentT innovations @@ -391,19 +344,17 @@ class MvStudentTRandomWalk(distribution.Continuous, _CovSet): def __init__(self, nu, mu=0., cov=None, tau=None, chol=None, lower=True, init=Flat.dist(), *args, **kwargs): super(MvStudentTRandomWalk, self).__init__(*args, **kwargs) - super(MvStudentTRandomWalk, self).__initCov__(cov, tau, chol, lower) self.mu = mu = tt.as_tensor_variable(mu) self.nu = nu = tt.as_tensor_variable(nu) self.init = init + self.inov = multivariate.MvStudentT.dist(self.nu, self.mu, cov=cov, + tau=tau, chol=chol) self.mean = tt.as_tensor_variable(0.) def logp(self, x): x_im1 = x[:-1] x_i = x[1:] - innov_like = multivariate.MvStudentT.dist(self.nu, mu=x_im1 + self.mu, - cov=self.cov, tau=self.tau, - chol=self.chol_cov).logp(x_i) - return self.init.logp(x[0]) + tt.sum(innov_like) + return self.init.logp(x[0]) + self.innov.logp_sum(x_i - x_im1) def _repr_latex_(self, name=None, dist=None): if dist is None: diff --git a/pymc3/gp/gp.py b/pymc3/gp/gp.py index 3a58491dbd..185c6a050a 100644 --- a/pymc3/gp/gp.py +++ b/pymc3/gp/gp.py @@ -6,12 +6,16 @@ import pymc3 as pm from pymc3.gp.cov import Covariance, Constant from pymc3.gp.mean import Zero -from pymc3.gp.util import (conditioned_vars, - infer_shape, stabilize, cholesky, solve_lower, solve_upper) +from pymc3.gp.util import (conditioned_vars, infer_shape) +from pymc3.distributions.dist_math import stabilize from pymc3.distributions import draw_values - __all__ = ['Latent', 'Marginal', 'TP', 'MarginalSparse'] +cholesky = tt.slinalg.cholesky +# TODO: see if any of the solves might be done inplace +solve_lower = tt.slinalg.Solve(A_structure='lower_triangular') +solve_upper = tt.slinalg.Solve(A_structure='upper_triangular') +solve = tt.slinalg.Solve(A_structure='general') class Base(object): R""" @@ -104,13 +108,13 @@ def __init__(self, mean_func=Zero(), cov_func=Constant(0.0)): def _build_prior(self, name, X, reparameterize=True, **kwargs): mu = self.mean_func(X) - chol = cholesky(stabilize(self.cov_func(X))) + cov = stabilize(self.cov_func(X)) shape = infer_shape(X, kwargs.pop("shape", None)) if reparameterize: v = pm.Normal(name + "_rotated_", mu=0.0, sd=1.0, shape=shape, **kwargs) - f = pm.Deterministic(name, mu + tt.dot(chol, v)) + f = pm.Deterministic(name, mu + tt.dot(cholesky(cov), v)) else: - f = pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs) + f = pm.MvNormal(name, mu=mu, cov=cov, shape=shape, **kwargs) return f def prior(self, name, X, reparameterize=True, **kwargs): @@ -200,9 +204,9 @@ def conditional(self, name, Xnew, given=None, **kwargs): """ givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, *givens) - chol = cholesky(stabilize(cov)) + cov = stabilize(cov) shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs) + return pm.MvNormal(name, mu=mu, cov=cov, shape=shape, **kwargs) @conditioned_vars(["X", "f", "nu"]) @@ -246,14 +250,15 @@ def __add__(self, other): def _build_prior(self, name, X, reparameterize=True, **kwargs): mu = self.mean_func(X) - chol = cholesky(stabilize(self.cov_func(X))) + cov = stabilize(self.cov_func(X)) shape = infer_shape(X, kwargs.pop("shape", None)) if reparameterize: chi2 = pm.ChiSquared("chi2_", self.nu) v = pm.Normal(name + "_rotated_", mu=0.0, sd=1.0, shape=shape, **kwargs) - f = pm.Deterministic(name, (tt.sqrt(self.nu) / chi2) * (mu + tt.dot(chol, v))) + a = (tt.sqrt(self.nu) / chi2) * (mu + tt.dot(cholesky(cov), v)) + f = pm.Deterministic(name, a) else: - f = pm.MvStudentT(name, nu=self.nu, mu=mu, chol=chol, shape=shape, **kwargs) + f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, shape=shape, **kwargs) return f def prior(self, name, X, reparameterize=True, **kwargs): @@ -319,9 +324,9 @@ def conditional(self, name, Xnew, **kwargs): X = self.X f = self.f nu2, mu, covT = self._build_conditional(Xnew, X, f) - chol = cholesky(stabilize(covT)) + cov = stabilize(covT) shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvStudentT(name, nu=nu2, mu=mu, chol=chol, shape=shape, **kwargs) + return pm.MvStudentT(name, nu=nu2, mu=mu, cov=cov, shape=shape, **kwargs) @conditioned_vars(["X", "y", "noise"]) @@ -415,15 +420,15 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs): if not isinstance(noise, Covariance): noise = pm.gp.cov.WhiteNoise(noise) mu, cov = self._build_marginal_likelihood(X, noise) - chol = cholesky(stabilize(cov)) + cov = stabilize(cov) self.X = X self.y = y self.noise = noise if is_observed: - return pm.MvNormal(name, mu=mu, chol=chol, observed=y, **kwargs) + return pm.MvNormal(name, mu=mu, cov=cov, observed=y, **kwargs) else: shape = infer_shape(X, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs) + return pm.MvNormal(name, mu=mu, cov=cov, shape=shape, **kwargs) def _get_given_vals(self, given): if given is None: @@ -501,9 +506,8 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens) - chol = cholesky(cov) shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs) + return pm.MvNormal(name, mu=mu, cov=cov, shape=shape, **kwargs) def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None): R""" @@ -784,6 +788,5 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens) - chol = cholesky(cov) shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs) + return pm.MvNormal(name, mu=mu, cov=cov, shape=shape, **kwargs) diff --git a/pymc3/gp/util.py b/pymc3/gp/util.py index 031bc98034..2906336c8a 100644 --- a/pymc3/gp/util.py +++ b/pymc3/gp/util.py @@ -1,14 +1,7 @@ from scipy.cluster.vq import kmeans import numpy as np -import pymc3 as pm import theano.tensor as tt - - -cholesky = pm.distributions.dist_math.Cholesky(nofail=True, lower=True) -solve_lower = tt.slinalg.Solve(A_structure='lower_triangular') -solve_upper = tt.slinalg.Solve(A_structure='upper_triangular') -solve = tt.slinalg.Solve(A_structure='general') - +from pymc3.theanof import floatX def infer_shape(X, n_points=None): if n_points is None: @@ -18,12 +11,6 @@ def infer_shape(X, n_points=None): raise TypeError("Cannot infer 'shape', provide as an argument") return n_points - -def stabilize(K): - """ adds small diagonal to a covariance matrix """ - return K + 1e-6 * tt.identity_like(K) - - def kmeans_inducing_points(n_inducing, X): # first whiten X if isinstance(X, tt.TensorConstant): @@ -37,7 +24,7 @@ def kmeans_inducing_points(n_inducing, X): "of {}".format(type(X)))) scaling = np.std(X, 0) # if std of a column is very small (zero), don't normalize that column - scaling[scaling <= 1e-6] = 1.0 + scaling[scaling <= 1e-6] = floatX(1.0) Xw = X / scaling Xu, distortion = kmeans(Xw, n_inducing) return Xu * scaling @@ -89,6 +76,3 @@ def plot_gp_dist(ax, samples, x, plot_samples=True, palette="Reds"): # plot a few samples idx = np.random.randint(0, samples.shape[1], 30) ax.plot(x, samples[:,idx], color=cmap(0.9), lw=1, alpha=0.1) - - - diff --git a/pymc3/tests/models.py b/pymc3/tests/models.py index 117d847c3c..6aacd11495 100644 --- a/pymc3/tests/models.py +++ b/pymc3/tests/models.py @@ -114,12 +114,12 @@ def mv_simple_discrete(): def mv_prior_simple(): n = 3 - noise = 0.1 + noise = pm.floatX(.1) X = np.linspace(0, 1, n)[:, None] K = pm.gp.cov.ExpQuad(1, 1)(X).eval() L = np.linalg.cholesky(K) - K_noise = K + noise * np.eye(n) + K_noise = K + noise * pm.floatX(np.eye(n)) obs = floatX_array([-0.1, 0.5, 1.1]) # Posterior mean @@ -129,12 +129,12 @@ def mv_prior_simple(): # Posterior standard deviation v = np.linalg.solve(L_noise, K) - std_post = (K - np.dot(v.T, v)).diagonal() ** 0.5 + std_post = (K - np.dot(v.T, v)).diagonal() ** pm.floatX(.5) with pm.Model() as model: x = pm.Flat('x', shape=n) x_obs = pm.MvNormal('x_obs', observed=obs, mu=x, - cov=noise * np.eye(n), shape=n) + cov=noise * pm.floatX(np.eye(n)), shape=n) return model.test_point, model, (K, L, mu_post, std_post, noise) diff --git a/pymc3/tests/test_dist_math.py b/pymc3/tests/test_dist_math.py index 57e6fdd19a..d3f023c34a 100644 --- a/pymc3/tests/test_dist_math.py +++ b/pymc3/tests/test_dist_math.py @@ -10,7 +10,8 @@ from ..theanof import floatX from ..distributions import Discrete from ..distributions.dist_math import ( - bound, factln, alltrue_scalar, MvNormalLogp, SplineWrapper) + bound, factln, alltrue_scalar, MvNormalLogp, + MvNormalLogpSum, SplineWrapper) def test_bound(): @@ -122,10 +123,64 @@ def test_multinomial_bound(): class TestMvNormalLogp(): - def test_logp(self): + + def test_logp_with_tau(self): + np.random.seed(42) + + chol_val = floatX(np.array([[1, 0], [ 0.9, 2]])) + cov_val = floatX(np.dot(chol_val, chol_val.T)) + tau_val = floatX(np.linalg.inv(cov_val)) + tau = tt.matrix('tau') + tau.tag.test_value = tau_val + delta_val = floatX(np.random.randn(5, 2)) + delta = tt.matrix('delta') + delta.tag.test_value = delta_val + expect = stats.multivariate_normal(mean=np.zeros(2), cov=cov_val) + expect = expect.logpdf(delta_val) + logp_tau = MvNormalLogp('tau')(tau, delta) + logp_tau_f = theano.function([tau, delta], logp_tau) + logp_tau = logp_tau_f(tau_val, delta_val) + npt.assert_allclose(logp_tau, expect) + + def test_logp_with_cov(self): + np.random.seed(42) + + chol_val = floatX(np.array([[1, 0], [ 0.9, 2]])) + cov_val = floatX(np.dot(chol_val, chol_val.T)) + cov = tt.matrix('cov') + cov.tag.test_value = cov_val + delta_val = floatX(np.random.randn(5, 2)) + delta = tt.matrix('delta') + delta.tag.test_value = delta_val + expect = stats.multivariate_normal(mean=np.zeros(2), cov=cov_val) + expect = expect.logpdf(delta_val) + logp_cov = MvNormalLogp()(cov, delta) + logp_cov_f = theano.function([cov, delta], logp_cov) + logp_cov = logp_cov_f(cov_val, delta_val) + npt.assert_allclose(logp_cov, expect) + + @pytest.mark.skip(reason="Not yet implemented") + def test_logp_with_chol(self): + np.random.seed(42) + + chol_val = floatX(np.array([[1, 0], [ 0.9, 2]])) + cov_val = floatX(np.dot(chol_val, chol_val.T)) + chol = tt.matrix('cov') + chol.tag.test_value = chol_val + delta_val = floatX(np.random.randn(5, 2)) + delta = tt.matrix('delta') + delta.tag.test_value = delta_val + expect = stats.multivariate_normal(mean=np.zeros(2), cov=cov_val) + expect = expect.logpdf(delta_val) + logp_chol = MvNormalLogp('chol')(chol, delta) + logp_chol_f = theano.function([chol, delta], logp_chol) + logp_chol = logp_chol_f(chol_val, delta_val) + npt.assert_allclose(logp_chol, expect) + + def test_logpsum_with_cov(self): np.random.seed(42) - chol_val = floatX(np.array([[1, 0.9], [0, 2]])) + chol_val = floatX(np.array([[1, 0], [ 0.9, 2]])) cov_val = floatX(np.dot(chol_val, chol_val.T)) cov = tt.matrix('cov') cov.tag.test_value = cov_val @@ -134,10 +189,27 @@ def test_logp(self): delta.tag.test_value = delta_val expect = stats.multivariate_normal(mean=np.zeros(2), cov=cov_val) expect = expect.logpdf(delta_val).sum() - logp = MvNormalLogp()(cov, delta) - logp_f = theano.function([cov, delta], logp) - logp = logp_f(cov_val, delta_val) - npt.assert_allclose(logp, expect) + logp_cov = MvNormalLogpSum()(cov, delta) + logp_cov_f = theano.function([cov, delta], logp_cov) + logp_cov = logp_cov_f(cov_val, delta_val) + npt.assert_allclose(logp_cov, expect) + + def test_logpsum_with_chol(self): + np.random.seed(42) + + chol_val = floatX(np.array([[1, 0], [ 0.9, 2]])) + cov_val = floatX(np.dot(chol_val, chol_val.T)) + chol = tt.matrix('cov') + chol.tag.test_value = chol_val + delta_val = floatX(np.random.randn(5, 2)) + delta = tt.matrix('delta') + delta.tag.test_value = delta_val + expect = stats.multivariate_normal(mean=np.zeros(2), cov=cov_val) + expect = expect.logpdf(delta_val).sum() + logp_chol = MvNormalLogpSum('chol')(chol, delta) + logp_chol_f = theano.function([chol, delta], logp_chol) + logp_chol = logp_chol_f(chol_val, delta_val) + npt.assert_allclose(logp_chol, expect) @theano.configparser.change_flags(compute_test_value="ignore") def test_grad(self): @@ -148,8 +220,27 @@ def func(chol_vec, delta): tt.stack([tt.exp(0.1 * chol_vec[0]), 0]), tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]), ]) - cov = tt.dot(chol, chol.T) - return MvNormalLogp()(cov, delta) + cov = floatX(tt.dot(chol, chol.T)) + return MvNormalLogpSum()(cov, delta) + + chol_vec_val = floatX(np.array([0.5, 1., -0.1])) + + delta_val = floatX(np.random.randn(1, 2)) + utt.verify_grad(func, [chol_vec_val, delta_val]) + + delta_val = floatX(np.random.randn(5, 2)) + utt.verify_grad(func, [chol_vec_val, delta_val]) + + @theano.configparser.change_flags(compute_test_value="ignore") + def test_grad_with_chol(self): + np.random.seed(42) + + def func(chol_vec, delta): + chol = tt.stack([ + tt.stack([tt.exp(0.1 * chol_vec[0]), 0]), + tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]), + ]) + return MvNormalLogpSum('chol')(floatX(chol), delta) chol_vec_val = floatX(np.array([0.5, 1., -0.1])) @@ -159,19 +250,18 @@ def func(chol_vec, delta): delta_val = floatX(np.random.randn(5, 2)) utt.verify_grad(func, [chol_vec_val, delta_val]) - @pytest.mark.skip(reason="Fix in theano not released yet: Theano#5908") @theano.configparser.change_flags(compute_test_value="ignore") def test_hessian(self): chol_vec = tt.vector('chol_vec') - chol_vec.tag.test_value = np.array([0.1, 2, 3]) + chol_vec.tag.test_value = floatX(np.array([0.1, 2, 3])) chol = tt.stack([ tt.stack([tt.exp(0.1 * chol_vec[0]), 0]), tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]), ]) cov = tt.dot(chol, chol.T) delta = tt.matrix('delta') - delta.tag.test_value = np.ones((5, 2)) - logp = MvNormalLogp()(cov, delta) + delta.tag.test_value = floatX(np.ones((5, 2))) + logp = MvNormalLogpSum()(cov, delta) g_cov, g_delta = tt.grad(logp, [cov, delta]) tt.grad(g_delta.sum() + g_cov.sum(), [delta, cov]) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index f32e9b29c2..bf5175d5b9 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -693,14 +693,14 @@ def test_mvnormal_indef(self): mu = floatX(np.zeros(2)) x = tt.vector('x') x.tag.test_value = np.zeros(2) - logp = MvNormal.dist(mu=mu, cov=cov).logp(x) + logp = MvNormal.dist(mu=mu, cov=cov).logp_sum(x) f_logp = theano.function([cov, x], logp) assert f_logp(cov_val, np.ones(2)) == -np.inf dlogp = tt.grad(logp, cov) f_dlogp = theano.function([cov, x], dlogp) assert not np.all(np.isfinite(f_dlogp(cov_val, np.ones(2)))) - logp = MvNormal.dist(mu=mu, tau=cov).logp(x) + logp = MvNormal.dist(mu=mu, tau=cov).logp_sum(x) f_logp = theano.function([cov, x], logp) assert f_logp(cov_val, np.ones(2)) == -np.inf dlogp = tt.grad(logp, cov) diff --git a/pymc3/tests/test_gp.py b/pymc3/tests/test_gp.py index c864da858b..c6446f7e80 100644 --- a/pymc3/tests/test_gp.py +++ b/pymc3/tests/test_gp.py @@ -447,7 +447,7 @@ class TestLinear(object): def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: - cov = pm.gp.cov.Linear(1, 0.5) + cov = pm.gp.cov.Linear(1, pm.floatX(.5)) K = theano.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.19444, atol=1e-3) K = theano.function([], cov(X, X))() @@ -461,7 +461,7 @@ class TestPolynomial(object): def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: - cov = pm.gp.cov.Polynomial(1, 0.5, 2, 0) + cov = pm.gp.cov.Polynomial(1, pm.floatX(.5), 2, 0) K = theano.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.03780, atol=1e-3) K = theano.function([], cov(X, X))() @@ -543,15 +543,15 @@ class TestMarginalVsLatent(object): Compare the logp of models Marginal, noise=0 and Latent. """ def setup_method(self): - X = np.random.randn(50,3) - y = np.random.randn(50)*0.01 - Xnew = np.random.randn(60, 3) - pnew = np.random.randn(60)*0.01 + X = pm.floatX(np.random.randn(50,3)) + y = pm.floatX(np.random.randn(50)*0.01) + Xnew = pm.floatX(np.random.randn(60, 3)) + pnew = pm.floatX(np.random.randn(60)*0.01) with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) - mean_func = pm.gp.mean.Constant(0.5) + mean_func = pm.gp.mean.Constant(pm.floatX(.5)) gp = pm.gp.Marginal(mean_func, cov_func) - f = gp.marginal_likelihood("f", X, y, noise=0.0, is_observed=False, observed=y) + f = gp.marginal_likelihood("f", X, y, noise=pm.floatX(0), is_observed=False, observed=y) p = gp.conditional("p", Xnew) self.logp = model.logp({"p": pnew}) self.X = X @@ -562,7 +562,7 @@ def setup_method(self): def testLatent1(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) - mean_func = pm.gp.mean.Constant(0.5) + mean_func = pm.gp.mean.Constant(pm.floatX(.5)) gp = pm.gp.Latent(mean_func, cov_func) f = gp.prior("f", self.X, reparameterize=False) p = gp.conditional("p", self.Xnew) @@ -572,7 +572,7 @@ def testLatent1(self): def testLatent2(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) - mean_func = pm.gp.mean.Constant(0.5) + mean_func = pm.gp.mean.Constant(pm.floatX(.5)) gp = pm.gp.Latent(mean_func, cov_func) f = gp.prior("f", self.X, reparameterize=True) p = gp.conditional("p", self.Xnew) @@ -588,15 +588,15 @@ class TestMarginalVsMarginalSparse(object): Should be nearly equal when inducing points are same as inputs. """ def setup_method(self): - X = np.random.randn(50,3) - y = np.random.randn(50)*0.01 - Xnew = np.random.randn(60, 3) - pnew = np.random.randn(60)*0.01 + X = pm.floatX(np.random.randn(50,3)) + y = pm.floatX(np.random.randn(50)*0.01) + Xnew = pm.floatX(np.random.randn(60, 3)) + pnew = pm.floatX(np.random.randn(60)*0.01) with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) - mean_func = pm.gp.mean.Constant(0.5) + mean_func = pm.gp.mean.Constant(pm.floatX(.5)) gp = pm.gp.Marginal(mean_func, cov_func) - sigma = 0.1 + sigma = pm.floatX(.1) f = gp.marginal_likelihood("f", X, y, noise=sigma) p = gp.conditional("p", Xnew) self.logp = model.logp({"p": pnew}) @@ -611,7 +611,7 @@ def setup_method(self): def testApproximations(self, approx): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) - mean_func = pm.gp.mean.Constant(0.5) + mean_func = pm.gp.mean.Constant(pm.floatX(.5)) gp = pm.gp.MarginalSparse(mean_func, cov_func, approx=approx) f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma) p = gp.conditional("p", self.Xnew) @@ -622,7 +622,7 @@ def testApproximations(self, approx): def testPredictVar(self, approx): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) - mean_func = pm.gp.mean.Constant(0.5) + mean_func = pm.gp.mean.Constant(pm.floatX(.5)) gp = pm.gp.MarginalSparse(mean_func, cov_func, approx=approx) f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma) mu1, var1 = self.gp.predict(self.Xnew, diag=True) @@ -633,7 +633,7 @@ def testPredictVar(self, approx): def testPredictCov(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) - mean_func = pm.gp.mean.Constant(0.5) + mean_func = pm.gp.mean.Constant(pm.floatX(.5)) gp = pm.gp.MarginalSparse(mean_func, cov_func, approx="DTC") f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma, is_observed=False) mu1, cov1 = self.gp.predict(self.Xnew, pred_noise=True) @@ -644,16 +644,16 @@ def testPredictCov(self): class TestGPAdditive(object): def setup_method(self): - self.X = np.random.randn(50,3) - self.y = np.random.randn(50)*0.01 - self.Xnew = np.random.randn(60, 3) - self.noise = pm.gp.cov.WhiteNoise(0.1) + self.X = pm.floatX(np.random.randn(50,3)) + self.y = pm.floatX(np.random.randn(50)*0.01) + self.Xnew = pm.floatX(np.random.randn(60, 3)) + self.noise = pm.gp.cov.WhiteNoise(.1) self.covs = (pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]), pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]), pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])) - self.means = (pm.gp.mean.Constant(0.5), - pm.gp.mean.Constant(0.5), - pm.gp.mean.Constant(0.5)) + self.means = (pm.gp.mean.Constant(pm.floatX(.5)), + pm.gp.mean.Constant(pm.floatX(.5)), + pm.gp.mean.Constant(pm.floatX(.5))) def testAdditiveMarginal(self): with pm.Model() as model1: @@ -682,8 +682,8 @@ def testAdditiveMarginal(self): @pytest.mark.parametrize('approx', ['FITC', 'VFE', 'DTC']) def testAdditiveMarginalSparse(self, approx): - Xu = np.random.randn(10, 3) - sigma = 0.1 + Xu = pm.floatX(np.random.randn(10, 3)) + sigma = pm.floatX(.1) with pm.Model() as model1: gp1 = pm.gp.MarginalSparse(self.means[0], self.covs[0], approx=approx) gp2 = pm.gp.MarginalSparse(self.means[1], self.covs[1], approx=approx) @@ -764,10 +764,10 @@ class TestTP(object): Compare TP with high degress of freedom to GP """ def setup_method(self): - X = np.random.randn(20,3) - y = np.random.randn(20)*0.01 - Xnew = np.random.randn(50, 3) - pnew = np.random.randn(50)*0.01 + X = pm.floatX(np.random.randn(20,3)) + y = pm.floatX(np.random.randn(20)*0.01) + Xnew = pm.floatX(np.random.randn(50, 3)) + pnew = pm.floatX(np.random.randn(50)*0.01) with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) gp = pm.gp.Latent(cov_func=cov_func) @@ -808,15 +808,3 @@ def testAdditiveTPRaises(self): gp2 = pm.gp.TP(cov_func=cov_func, nu=10) with pytest.raises(Exception) as e_info: gp1 + gp2 - - - - - - - - - - - - diff --git a/pymc3/theanof.py b/pymc3/theanof.py index 2ea40632d0..14202769d3 100644 --- a/pymc3/theanof.py +++ b/pymc3/theanof.py @@ -69,7 +69,7 @@ def floatX(X): def smartfloatX(x): """ - Convert non int types to floatX + Convert non int types to floatX """ if str(x.dtype).startswith('float'): x = floatX(x) diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index 0df032378b..fa9c7a2c62 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -10,7 +10,7 @@ from pymc3.theanof import change_flags from pymc3.math import batched_diag from pymc3.variational import flows - +from pymc3.distributions.dist_math import stabilize __all__ = [ 'MeanField', @@ -188,13 +188,13 @@ def symbolic_logq_not_scaled(self): z = self.symbolic_random if self.batched: def logq(z_b, mu_b, L_b): - return pm.MvNormal.dist(mu=mu_b, chol=L_b).logp(z_b) + return pm.MvNormal.dist(mu=mu_b, chol=stabilize(L_b)).logp(z_b) # it's gonna be so slow # scan is computed over batch and then summed up # output shape is (batch, samples) return theano.scan(logq, [z.swapaxes(0, 1), self.mean, self.L])[0].sum(0) else: - return pm.MvNormal.dist(mu=self.mean, chol=self.L).logp(z) + return pm.MvNormal.dist(mu=self.mean, chol=stabilize(self.L)).logp(z) @node_property def symbolic_random(self):