pymc-devs · junpenglao · Feb 15, 2018 · Nov 21, 2017 · Nov 21, 2017 · Nov 21, 2017
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -5,20 +5,26 @@
 ### New features
 
 - Add `logit_p` keyword to `pm.Bernoulli`, so that users can specify the logit of the success probability. This is faster and more stable than using `p=tt.nnet.sigmoid(logit_p)`.
-- Add `random` keyword to `pm.DensityDist` thus enabling users to pass custom random method which in turn makes sampling from a `DensityDist` possible. 
+- Add `random` keyword to `pm.DensityDist` thus enabling users to pass custom random method which in turn makes sampling from a `DensityDist` possible.
 - Effective sample size computation is updated. The estimation uses Geyer's initial positive sequence, which no longer truncates the autocorrelation series inaccurately. `pm.diagnostics.effective_n` now can reports N_eff>N.
+- Added `KroneckerNormal` distribution and a corresponding `MarginalKron`
+  Gaussian Process implementation for efficient inference, along with
+  lower-level functions such as `cartesian` and `kronecker` products.
+- Added `Coregion` covariance function.
 
 
 ### Fixes
 
 - `VonMises` does not overflow for large values of kappa. i0 and i1 have been removed and we now use log_i0 to compute the logp.
 
+
 ### Deprecations
 
 - DIC and BPIC calculations have been removed
 - `njobs` and `nchains` kwarg are deprecated in favor of `cores` and `chains` for `sample`
 - `lag` kwarg in `pm.stats.autocorr` and `pm.stats.autocov` is deprecated.
 
+
 ## PyMC 3.3 (January 9, 2018)
 
 ### New features

diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
@@ -57,6 +57,7 @@
 
 from .multivariate import MvNormal
 from .multivariate import MatrixNormal
+from .multivariate import KroneckerNormal
 from .multivariate import MvStudentT
 from .multivariate import Dirichlet
 from .multivariate import Multinomial
@@ -123,6 +124,7 @@
            'TensorType',
            'MvNormal',
            'MatrixNormal',
+           'KroneckerNormal',
            'MvStudentT',
            'Dirichlet',
            'Multinomial',

diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py
@@ -13,6 +13,9 @@
 from .special import gammaln
 from pymc3.theanof import floatX
 
+from six.moves import xrange
+from functools import partial
+
 f = floatX
 c = - .5 * np.log(2. * np.pi)
 
@@ -326,3 +329,135 @@ def grad(self, inputs, grads):
         x_grad, = grads
 
         return [x_grad * self.grad_op(x)]
+
+
+# Custom Eigh, EighGrad, and eigh are required until
+# https://github.com/Theano/Theano/pull/6557 is handled, since lambda's
+# cannot be used with pickling.
+class Eigh(tt.nlinalg.Eig):
+    """
+    Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
+
+    This is a copy of Eigh from theano that calls an EighGrad which uses
+    partial instead of lambda. Once this has been merged with theano this
+    should be removed.
+    """
+
+    _numop = staticmethod(np.linalg.eigh)
+    __props__ = ('UPLO',)
+
+    def __init__(self, UPLO='L'):
+        assert UPLO in ['L', 'U']
+        self.UPLO = UPLO
+
+    def make_node(self, x):
+        x = tt.as_tensor_variable(x)
+        assert x.ndim == 2
+        # Numpy's linalg.eigh may return either double or single
+        # presision eigenvalues depending on installed version of
+        # LAPACK.  Rather than trying to reproduce the (rather
+        # involved) logic, we just probe linalg.eigh with a trivial
+        # input.
+        w_dtype = self._numop([[np.dtype(x.dtype).type()]])[0].dtype.name
+        w = theano.tensor.vector(dtype=w_dtype)
+        v = theano.tensor.matrix(dtype=x.dtype)
+        return theano.gof.Apply(self, [x], [w, v])
+
+    def perform(self, node, inputs, outputs):
+        (x,) = inputs
+        (w, v) = outputs
+        w[0], v[0] = self._numop(x, self.UPLO)
+
+    def grad(self, inputs, g_outputs):
+        r"""The gradient function should return
+           .. math:: \sum_n\left(W_n\frac{\partial\,w_n}
+                           {\partial a_{ij}} +
+                     \sum_k V_{nk}\frac{\partial\,v_{nk}}
+                           {\partial a_{ij}}\right),
+        where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
+        :math:`a` to ``inputs``, and  :math:`(w, v)=\mbox{eig}(a)`.
+        Analytic formulae for eigensystem gradients are well-known in
+        perturbation theory:
+           .. math:: \frac{\partial\,w_n}
+                          {\partial a_{ij}} = v_{in}\,v_{jn}
+           .. math:: \frac{\partial\,v_{kn}}
+                          {\partial a_{ij}} =
+                \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
+        """
+        x, = inputs
+        w, v = self(x)
+        # Replace gradients wrt disconnected variables with
+        # zeros. This is a work-around for issue #1063.
+        gw, gv = tt.nlinalg._zero_disconnected([w, v], g_outputs)
+        return [EighGrad(self.UPLO)(x, w, v, gw, gv)]
+
+
+class EighGrad(theano.Op):
+    """
+    Gradient of an eigensystem of a Hermitian matrix.
+
+    This is a copy of EighGrad from theano that uses partial instead of lambda.
+    Once this has been merged with theano this should be removed.
+    """
+
+    __props__ = ('UPLO',)
+
+    def __init__(self, UPLO='L'):
+        assert UPLO in ['L', 'U']
+        self.UPLO = UPLO
+        if UPLO == 'L':
+            self.tri0 = np.tril
+            self.tri1 = partial(np.triu, k=1)
+        else:
+            self.tri0 = np.triu
+            self.tri1 = partial(np.tril, k=-1)
+
+    def make_node(self, x, w, v, gw, gv):
+        x, w, v, gw, gv = map(tt.as_tensor_variable, (x, w, v, gw, gv))
+        assert x.ndim == 2
+        assert w.ndim == 1
+        assert v.ndim == 2
+        assert gw.ndim == 1
+        assert gv.ndim == 2
+        out_dtype = theano.scalar.upcast(x.dtype, w.dtype, v.dtype,
+                                         gw.dtype, gv.dtype)
+        out = theano.tensor.matrix(dtype=out_dtype)
+        return theano.gof.Apply(self, [x, w, v, gw, gv], [out])
+
+    def perform(self, node, inputs, outputs):
+        """
+        Implements the "reverse-mode" gradient for the eigensystem of
+        a square matrix.
+        """
+        x, w, v, W, V = inputs
+        N = x.shape[0]
+        outer = np.outer
+
+        def G(n):
+            return sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m])
+                       for m in xrange(N) if m != n)
+
+        g = sum(outer(v[:, n], v[:, n] * W[n] + G(n))
+                for n in xrange(N))
+
+        # Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a)
+        # (triu(a)) only.  This means that partial derivative of
+        # eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero
+        # for i < j (i > j).  At the same time, non-zero components of
+        # the gradient must account for the fact that variation of the
+        # opposite triangle contributes to variation of two elements
+        # of Hermitian (symmetric) matrix. The following line
+        # implements the necessary logic.
+        out = self.tri0(g) + self.tri1(g).T
+
+        # Make sure we return the right dtype even if NumPy performed
+        # upcasting in self.tri0.
+        outputs[0][0] = np.asarray(out, dtype=node.outputs[0].dtype)
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
+
+def eigh(a, UPLO='L'):
+    """A copy, remove with Eigh and EighGrad when possible"""
+    return Eigh(UPLO)(a)