pymc-devs · gBokiau · Feb 9, 2018 · Feb 10, 2018 · Feb 10, 2018 · Feb 10, 2018
diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py
@@ -6,9 +6,10 @@
 from __future__ import division
 
 import numpy as np
-import scipy.linalg
 import theano.tensor as tt
 import theano
+from theano.ifelse import ifelse
+from theano.tensor import slinalg
 
 from .special import gammaln
 from pymc3.theanof import floatX
@@ -143,15 +144,14 @@ def log_normal(x, mean, **kwargs):
     return f(c) - tt.log(tt.abs_(std)) - (x - mean) ** 2 / (2. * std ** 2)
 
 
-def MvNormalLogp():
+def MvNormalLogp(with_choleksy=False):
     """Compute the log pdf of a multivariate normal distribution.
 
-    This should be used in MvNormal.logp once Theano#5908 is released.
-
     Parameters
     ----------
     cov : tt.matrix
-        The covariance matrix.
+        The covariance matrix or its Cholesky decompositon (the latter if
+        `chol_cov` is set to True when instantiating the Op).
     delta : tt.matrix
         Array of deviations from the mean.
     """
@@ -160,24 +160,37 @@ def MvNormalLogp():
     delta = tt.matrix('delta')
     delta.tag.test_value = floatX(np.zeros((2, 3)))
 
-    solve_lower = tt.slinalg.Solve(A_structure='lower_triangular')
-    solve_upper = tt.slinalg.Solve(A_structure='upper_triangular')
-    cholesky = Cholesky(nofail=True, lower=True)
+    solve_lower = slinalg.Solve(A_structure='lower_triangular', overwrite_b=True)
+    solve_upper = slinalg.Solve(A_structure='upper_triangular', overwrite_b=True)
 
     n, k = delta.shape
-    n, k = f(n), f(k)
-    chol_cov = cholesky(cov)
-    diag = tt.nlinalg.diag(chol_cov)
-    ok = tt.all(diag > 0)
+    n = f(n)
+
+    if not with_choleksy:
+        # add inplace=True when/if impletemented by Theano
+        cholesky = slinalg.Cholesky(lower=True, on_error="nan")
+        cov = cholesky(cov)
+        # The Cholesky op will return NaNs if the cov is not positive definite
+        # -- checking the first value is sufficient
+        ok = ~tt.isnan(cov[0,0])
+        # will all be NaN if the Cholesky was no-go, which is fine
+        diag = tt.ExtractDiag(view=True)(cov)
+    else:
+        diag = tt.ExtractDiag(view=True)(cov)
+        # Here we must check if the Cholesky is positive definite
+        ok = tt.all(diag>0)
 
-    chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1))
+    # `solve_lower` throws errors with NaNs hence we replace the cov with
+    # identity and return -Inf later
+    chol_cov = ifelse(ok, cov, tt.eye(k, dtype=theano.config.floatX))
     delta_trans = solve_lower(chol_cov, delta.T).T
 
-    result = n * k * tt.log(f(2) * np.pi)
+    result = n * f(k) * tt.log(f(2 * np.pi))
     result += f(2) * n * tt.sum(tt.log(diag))
     result += (delta_trans ** f(2)).sum()
     result = f(-.5) * result
-    logp = tt.switch(ok, result, -np.inf)
+
+    logp = ifelse(ok, f(result), f(-np.inf * tt.ones_like(result)))
 
     def dlogp(inputs, gradients):
         g_logp, = gradients
@@ -186,109 +199,33 @@ def dlogp(inputs, gradients):
         g_logp.tag.test_value = floatX(1.)
         n, k = delta.shape
 
-        chol_cov = cholesky(cov)
-        diag = tt.nlinalg.diag(chol_cov)
-        ok = tt.all(diag > 0)
+        if not with_choleksy:
+            cov = cholesky(cov)
+            ok = ~tt.isnan(cov[0,0])
+        else:
+            diag = tt.ExtractDiag(view=True)(cov)
+            ok = tt.all(diag>0)
 
-        chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1))
+        I_k = tt.eye(k, dtype=theano.config.floatX)
+        chol_cov = ifelse(ok, cov, I_k)
         delta_trans = solve_lower(chol_cov, delta.T).T
 
-        inner = n * tt.eye(k) - tt.dot(delta_trans.T, delta_trans)
+        inner =  n * I_k - tt.dot(delta_trans.T, delta_trans)
         g_cov = solve_upper(chol_cov.T, inner)
         g_cov = solve_upper(chol_cov.T, g_cov.T)
 
         tau_delta = solve_upper(chol_cov.T, delta_trans.T)
         g_delta = tau_delta.T
 
-        g_cov = tt.switch(ok, g_cov, -np.nan)
-        g_delta = tt.switch(ok, g_delta, -np.nan)
+        g_cov = ifelse(ok, f(g_cov), f(-np.nan * tt.zeros_like(g_cov)))
+        g_delta = ifelse(ok, f(g_delta), f(-np.nan * tt.zeros_like(g_delta)))
 
         return [-0.5 * g_cov * g_logp, -g_delta * g_logp]
 
     return theano.OpFromGraph(
         [cov, delta], [logp], grad_overrides=dlogp, inline=True)
 
 
-class Cholesky(theano.Op):
-    """
-    Return a triangular matrix square root of positive semi-definite `x`.
-
-    This is a copy of the cholesky op in theano, that doesn't throw an
-    error if the matrix is not positive definite, but instead returns
-    nan.
-
-    This has been merged upstream and we should switch to that
-    version after the next theano release.
-
-    L = cholesky(X, lower=True) implies dot(L, L.T) == X.
-    """
-    __props__ = ('lower', 'destructive', 'nofail')
-
-    def __init__(self, lower=True, nofail=False):
-        self.lower = lower
-        self.destructive = False
-        self.nofail = nofail
-
-    def make_node(self, x):
-        x = tt.as_tensor_variable(x)
-        if x.ndim != 2:
-            raise ValueError('Matrix must me two dimensional.')
-        return tt.Apply(self, [x], [x.type()])
-
-    def perform(self, node, inputs, outputs):
-        x = inputs[0]
-        z = outputs[0]
-        try:
-            z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
-        except (ValueError, scipy.linalg.LinAlgError):
-            if self.nofail:
-                z[0] = np.eye(x.shape[-1])
-                z[0][0, 0] = np.nan
-            else:
-                raise
-
-    def grad(self, inputs, gradients):
-        """
-        Cholesky decomposition reverse-mode gradient update.
-
-        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_
-
-        References
-        ----------
-        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
-           http://arxiv.org/abs/1602.07527
-
-        """
-
-        x = inputs[0]
-        dz = gradients[0]
-        chol_x = self(x)
-        ok = tt.all(tt.nlinalg.diag(chol_x) > 0)
-        chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1))
-        dz = tt.switch(ok, dz, floatX(1))
-
-        # deal with upper triangular by converting to lower triangular
-        if not self.lower:
-            chol_x = chol_x.T
-            dz = dz.T
-
-        def tril_and_halve_diagonal(mtx):
-            """Extracts lower triangle of square matrix and halves diagonal."""
-            return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.)
-
-        def conjugate_solve_triangular(outer, inner):
-            """Computes L^{-T} P L^{-1} for lower-triangular L."""
-            solve = tt.slinalg.Solve(A_structure="upper_triangular")
-            return solve(outer.T, solve(outer.T, inner.T).T)
-
-        s = conjugate_solve_triangular(
-            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))
-
-        if self.lower:
-            grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s))
-        else:
-            grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s))
-        return [tt.switch(ok, grad, floatX(np.nan))]
 
 
 class SplineWrapper(theano.Op):